From 20c4d6c8593d575dbb8261b31be2490606f8cce4 Mon Sep 17 00:00:00 2001 From: Chris Friesen Date: Wed, 12 Dec 2018 10:25:10 -0500 Subject: [PATCH] backport some mariadb fixes from upstream Upstream master has some fixes for the mariadb chart that we hope will improve behaviour under fault scenarios so import them into our load. When we update the repo to the latest upstream we should pick these up and the patches can be dropped. Change-Id: I5bb367db76b6d00d9922a4b1bb32d87aaa37cf91 Story: 2004520 Task: 28388 Signed-off-by: Chris Friesen --- .../centos/build_srpm.data | 2 +- .../centos/openstack-helm-infra.spec | 4 + ...ontainer-PID-namespaces-under-docker.patch | 82 +++++ ...adoption-of-running-single-node-mari.patch | 329 ++++++++++++++++++ 4 files changed, 416 insertions(+), 1 deletion(-) create mode 100644 openstack/openstack-helm-infra/files/Mariadb-Share-container-PID-namespaces-under-docker.patch create mode 100644 openstack/openstack-helm-infra/files/Mariadb-Support-adoption-of-running-single-node-mari.patch diff --git a/openstack/openstack-helm-infra/centos/build_srpm.data b/openstack/openstack-helm-infra/centos/build_srpm.data index 17a6d472..ad6f9579 100644 --- a/openstack/openstack-helm-infra/centos/build_srpm.data +++ b/openstack/openstack-helm-infra/centos/build_srpm.data @@ -5,4 +5,4 @@ TAR="$TAR_NAME-$SHA.tar.gz" COPY_LIST="${CGCS_BASE}/downloads/$TAR $PKG_BASE/files/*" -TIS_PATCH_VER=4 +TIS_PATCH_VER=5 diff --git a/openstack/openstack-helm-infra/centos/openstack-helm-infra.spec b/openstack/openstack-helm-infra/centos/openstack-helm-infra.spec index 38b3f686..abd9c952 100644 --- a/openstack/openstack-helm-infra/centos/openstack-helm-infra.spec +++ b/openstack/openstack-helm-infra/centos/openstack-helm-infra.spec @@ -15,6 +15,8 @@ Source0: %{name}-%{sha}.tar.gz BuildArch: noarch Patch01: 0001-gnocchi-chart-updates.patch +Patch02: Mariadb-Support-adoption-of-running-single-node-mari.patch +Patch03: Mariadb-Share-container-PID-namespaces-under-docker.patch BuildRequires: helm @@ -24,6 +26,8 @@ Openstack Helm Infra charts %prep %setup -n openstack-helm-infra %patch01 -p1 +%patch02 -p1 +%patch03 -p1 %build # initialize helm and build the toolkit diff --git a/openstack/openstack-helm-infra/files/Mariadb-Share-container-PID-namespaces-under-docker.patch b/openstack/openstack-helm-infra/files/Mariadb-Share-container-PID-namespaces-under-docker.patch new file mode 100644 index 00000000..3be19c53 --- /dev/null +++ b/openstack/openstack-helm-infra/files/Mariadb-Share-container-PID-namespaces-under-docker.patch @@ -0,0 +1,82 @@ +From 977c523cef00f7919a82d268da7e55223f1864ce Mon Sep 17 00:00:00 2001 +From: Pete Birley +Date: Sat, 8 Dec 2018 16:16:11 -0600 +Subject: [PATCH] Mariadb: Share container PID namespaces under docker + +This PS shares pid namespaces for containers in pods under docker, +bringing running in this runtime inline with other runc based container +backends, allowing the pause process in the pod to act as a reaper. + +Change-Id: Ib5fc101d930446d848246eb5ca4d554b756cb91f +Signed-off-by: Pete Birley +--- + mariadb/templates/deployment-error.yaml | 1 + + mariadb/templates/deployment-ingress.yaml | 1 + + mariadb/templates/monitoring/prometheus/exporter-deployment.yaml | 1 + + mariadb/templates/monitoring/prometheus/exporter-job-create-user.yaml | 1 + + mariadb/templates/statefulset.yaml | 1 + + 5 files changed, 5 insertions(+) + +diff --git a/mariadb/templates/deployment-error.yaml b/mariadb/templates/deployment-error.yaml +index c310324..78c4b18 100644 +--- a/mariadb/templates/deployment-error.yaml ++++ b/mariadb/templates/deployment-error.yaml +@@ -42,6 +42,7 @@ spec: + configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }} + configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }} + spec: ++ shareProcessNamespace: true + serviceAccountName: {{ $serviceAccountName }} + affinity: + {{ tuple $envAll "mariadb" "ingress-error-pages" | include "helm-toolkit.snippets.kubernetes_pod_anti_affinity" | indent 8 }} +diff --git a/mariadb/templates/deployment-ingress.yaml b/mariadb/templates/deployment-ingress.yaml +index 053a08f..afe9407 100644 +--- a/mariadb/templates/deployment-ingress.yaml ++++ b/mariadb/templates/deployment-ingress.yaml +@@ -137,6 +137,7 @@ spec: + configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }} + configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }} + spec: ++ shareProcessNamespace: true + serviceAccountName: {{ $serviceAccountName }} + affinity: + {{ tuple $envAll "mariadb" "ingress" | include "helm-toolkit.snippets.kubernetes_pod_anti_affinity" | indent 8 }} +diff --git a/mariadb/templates/monitoring/prometheus/exporter-deployment.yaml b/mariadb/templates/monitoring/prometheus/exporter-deployment.yaml +index 274a06c..00b3f6e 100644 +--- a/mariadb/templates/monitoring/prometheus/exporter-deployment.yaml ++++ b/mariadb/templates/monitoring/prometheus/exporter-deployment.yaml +@@ -38,6 +38,7 @@ spec: + {{ tuple $envAll "prometheus_mysql_exporter" "exporter" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + namespace: {{ .Values.endpoints.prometheus_mysql_exporter.namespace }} + spec: ++ shareProcessNamespace: true + serviceAccountName: {{ $serviceAccountName }} + nodeSelector: + {{ .Values.labels.prometheus_mysql_exporter.node_selector_key }}: {{ .Values.labels.prometheus_mysql_exporter.node_selector_value }} +diff --git a/mariadb/templates/monitoring/prometheus/exporter-job-create-user.yaml b/mariadb/templates/monitoring/prometheus/exporter-job-create-user.yaml +index df7a147..b9f7b56 100644 +--- a/mariadb/templates/monitoring/prometheus/exporter-job-create-user.yaml ++++ b/mariadb/templates/monitoring/prometheus/exporter-job-create-user.yaml +@@ -30,6 +30,7 @@ spec: + labels: + {{ tuple $envAll "prometheus_mysql_exporter" "create-sql-user" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + spec: ++ shareProcessNamespace: true + serviceAccountName: {{ $serviceAccountName }} + restartPolicy: OnFailure + nodeSelector: +diff --git a/mariadb/templates/statefulset.yaml b/mariadb/templates/statefulset.yaml +index de0fac2..c6aa451 100644 +--- a/mariadb/templates/statefulset.yaml ++++ b/mariadb/templates/statefulset.yaml +@@ -91,6 +91,7 @@ spec: + configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }} + configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }} + spec: ++ shareProcessNamespace: true + serviceAccountName: {{ $serviceAccountName }} + affinity: + {{ tuple $envAll "mariadb" "server" | include "helm-toolkit.snippets.kubernetes_pod_anti_affinity" | indent 8 }} +-- +1.8.3.1 + diff --git a/openstack/openstack-helm-infra/files/Mariadb-Support-adoption-of-running-single-node-mari.patch b/openstack/openstack-helm-infra/files/Mariadb-Support-adoption-of-running-single-node-mari.patch new file mode 100644 index 00000000..1a90c013 --- /dev/null +++ b/openstack/openstack-helm-infra/files/Mariadb-Support-adoption-of-running-single-node-mari.patch @@ -0,0 +1,329 @@ +From 896385354e535d68f7ee06074bb8266c0f1b7055 Mon Sep 17 00:00:00 2001 +From: Pete Birley +Date: Sat, 1 Dec 2018 18:52:39 -0600 +Subject: [PATCH] Mariadb: Support adoption of running single node mariadb + deployment + +This PS updates the mariadb chart to both support adoption of a +single instance of mariadb running the bash driven chart, which +did not support reforming a galera cluster by tracking state using +a configmap. Additionally basic logic is added for upgrading the +database as part of the normal rolling update flow. + +Change-Id: I412de507112b38d6d2534e89f2a02f84bef3da63 +Signed-off-by: Pete Birley +--- + mariadb/templates/bin/_start.py.tpl | 168 +++++++++++++++++++++++---------- + mariadb/templates/etc/_00-base.cnf.tpl | 8 +- + 2 files changed, 123 insertions(+), 53 deletions(-) + +diff --git a/mariadb/templates/bin/_start.py.tpl b/mariadb/templates/bin/_start.py.tpl +index 8a0236b..4d4428c 100644 +--- a/mariadb/templates/bin/_start.py.tpl ++++ b/mariadb/templates/bin/_start.py.tpl +@@ -48,6 +48,10 @@ logger.addHandler(ch) + local_hostname = socket.gethostname() + logger.info("This instance hostname: {0}".format(local_hostname)) + ++# Get the instance number ++instance_number = local_hostname.split("-")[-1] ++logger.info("This instance number: {0}".format(instance_number)) ++ + # Setup k8s client credentials and check api version + kubernetes.config.load_incluster_config() + kubernetes_version = kubernetes.client.VersionApi().get_code().git_version +@@ -109,6 +113,7 @@ def ensure_state_configmap(pod_namespace, configmap_name, configmap_body): + except: + k8s_api_instance.create_namespaced_config_map( + namespace=pod_namespace, body=configmap_body) ++ + return False + + +@@ -351,13 +356,36 @@ def get_cluster_state(): + except: + logger.info("The cluster configmap \"{0}\" does not exist.".format( + state_configmap_name)) ++ time.sleep(default_sleep) ++ leader_expiry_raw = datetime.utcnow() + timedelta( ++ seconds=cluster_leader_ttl) ++ leader_expiry = "{0}Z".format(leader_expiry_raw.isoformat("T")) ++ if check_for_active_nodes(): ++ # NOTE(portdirect): here we make the assumption that the 1st pod ++ # in an existing statefulset is the one to adopt as leader. ++ leader = "{0}-0".format("-".join( ++ local_hostname.split("-")[:-1])) ++ state = "live" ++ logger.info( ++ "The cluster is running already though unmanaged \"{0}\" will be declared leader in a \"{1}\" state". ++ format(leader, state)) ++ else: ++ leader = local_hostname ++ state = "new" ++ logger.info( ++ "The cluster is new \"{0}\" will be declared leader in a \"{1}\" state". ++ format(leader, state)) ++ + initial_configmap_body = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": { + "name": state_configmap_name, + "annotations": { +- "openstackhelm.openstack.org/cluster.state": "new" ++ "openstackhelm.openstack.org/cluster.state": state, ++ "openstackhelm.openstack.org/leader.node": leader, ++ "openstackhelm.openstack.org/leader.expiry": ++ leader_expiry + } + }, + "data": {} +@@ -369,14 +397,11 @@ def get_cluster_state(): + return state + + +-def declare_myself_cluser_leader(ttl): +- """Declare the current pod as the cluster leader. +- +- Keyword arguments: +- ttl -- the ttl for the leader period +- """ ++def declare_myself_cluser_leader(): ++ """Declare the current pod as the cluster leader.""" + logger.info("Declaring myself current cluster leader") +- leader_expiry_raw = datetime.utcnow() + timedelta(seconds=120) ++ leader_expiry_raw = datetime.utcnow() + timedelta( ++ seconds=cluster_leader_ttl) + leader_expiry = "{0}Z".format(leader_expiry_raw.isoformat("T")) + set_configmap_annotation( + key='openstackhelm.openstack.org/leader.node', value=local_hostname) +@@ -393,10 +418,10 @@ def deadmans_leader_election(): + if iso8601.parse_date(leader_expiry).replace( + tzinfo=None) < datetime.utcnow().replace(tzinfo=None): + logger.info("Current cluster leader has expired") +- declare_myself_cluser_leader(ttl=cluster_leader_ttl) ++ declare_myself_cluser_leader() + elif local_hostname == leader_node: + logger.info("Renewing cluster leader lease") +- declare_myself_cluser_leader(ttl=cluster_leader_ttl) ++ declare_myself_cluser_leader() + + + def get_grastate_val(key): +@@ -452,43 +477,47 @@ def update_grastate_configmap(): + def update_grastate_on_restart(): + """Update the grastate.dat on node restart.""" + logger.info("Updating grastate info for node") +- if get_grastate_val(key='seqno') == '-1': +- logger.info( +- "Node shutdown was not clean, getting position via wsrep-recover") +- +- def recover_wsrep_position(): +- """Extract recoved wsrep position from uncleanly exited node.""" +- wsrep_recover = subprocess.Popen( +- [ +- 'mysqld', '--bind-address=127.0.0.1', +- '--wsrep_cluster_address=gcomm://', '--wsrep-recover' +- ], +- stdout=subprocess.PIPE, +- stderr=subprocess.PIPE) +- out, err = wsrep_recover.communicate() +- for item in err.split("\n"): +- if "WSREP: Recovered position:" in item: +- line = item.strip().split() +- wsrep_rec_pos = line[-1].split(':')[-1] +- return wsrep_rec_pos +- +- set_grastate_val(key='seqno', value=recover_wsrep_position()) +- else: +- logger.info("Node shutdown was clean, using grastate.dat") ++ if os.path.exists('/var/lib/mysql/grastate.dat'): ++ if get_grastate_val(key='seqno') == '-1': ++ logger.info( ++ "Node shutdown was not clean, getting position via wsrep-recover" ++ ) ++ ++ def recover_wsrep_position(): ++ """Extract recoved wsrep position from uncleanly exited node.""" ++ wsrep_recover = subprocess.Popen( ++ [ ++ 'mysqld', '--bind-address=127.0.0.1', ++ '--wsrep_cluster_address=gcomm://', '--wsrep-recover' ++ ], ++ stdout=subprocess.PIPE, ++ stderr=subprocess.PIPE) ++ out, err = wsrep_recover.communicate() ++ for item in err.split("\n"): ++ if "WSREP: Recovered position:" in item: ++ line = item.strip().split() ++ wsrep_rec_pos = line[-1].split(':')[-1] ++ return wsrep_rec_pos ++ ++ set_grastate_val(key='seqno', value=recover_wsrep_position()) ++ else: ++ logger.info("Node shutdown was clean, using grastate.dat") + +- update_grastate_configmap() ++ update_grastate_configmap() + ++ else: ++ logger.info("No grastate.dat exists I am a new node") + +-def check_for_active_nodes(endpoints_name=direct_svc_name, +- namespace=pod_namespace): +- """Check K8s endpoints to see if there are active Mariadb Instances. ++ ++def get_active_endpoints(endpoints_name=direct_svc_name, ++ namespace=pod_namespace): ++ """Returns a list of active endpoints. + + Keyword arguments: + endpoints_name -- endpoints to check for active backends + (default direct_svc_name) + namespace -- namespace to check for endpoints (default pod_namespace) + """ +- logger.info("Checking for active nodes") + endpoints = k8s_api_instance.read_namespaced_endpoints( + name=endpoints_name, namespace=pod_namespace) + endpoints_dict = endpoints.to_dict() +@@ -496,6 +525,20 @@ def check_for_active_nodes(endpoints_name=direct_svc_name, + i for i, s in enumerate(endpoints_dict['subsets']) if 'addresses' in s + ][0] + active_endpoints = endpoints_dict['subsets'][addresses_index]['addresses'] ++ return active_endpoints ++ ++ ++def check_for_active_nodes(endpoints_name=direct_svc_name, ++ namespace=pod_namespace): ++ """Check K8s endpoints to see if there are active Mariadb Instances. ++ ++ Keyword arguments: ++ endpoints_name -- endpoints to check for active backends ++ (default direct_svc_name) ++ namespace -- namespace to check for endpoints (default pod_namespace) ++ """ ++ logger.info("Checking for active nodes") ++ active_endpoints = get_active_endpoints() + if active_endpoints and len(active_endpoints) >= 1: + return True + else: +@@ -608,7 +651,11 @@ def launch_leader_election(): + + + def run_mysqld(cluster='existing'): +- """Launch the mysqld instance for the pod. ++ """Launch the mysqld instance for the pod. This will also run mysql upgrade ++ if we are the 1st replica, and the rest of the cluster is already running. ++ This senario will be triggerd either following a rolling update, as this ++ works in reverse order for statefulset. Or restart of the 1st instance, in ++ which case the comand should be a no-op. + + Keyword arguments: + cluster -- whether we going to form a cluster 'new' or joining an existing +@@ -621,18 +668,28 @@ def run_mysqld(cluster='existing'): + mysqld_cmd = ['mysqld'] + if cluster == 'new': + mysqld_cmd.append('--wsrep-new-cluster') ++ else: ++ if int(instance_number) == 0: ++ active_endpoints = get_active_endpoints() ++ if active_endpoints and len(active_endpoints) == ( ++ int(mariadb_replicas) - 1): ++ run_cmd_with_logging([ ++ 'mysql_upgrade', ++ '--defaults-file=/etc/mysql/admin_user.cnf' ++ ], logger) ++ + run_cmd_with_logging(mysqld_cmd, logger) + + + def mysqld_reboot(): + """Reboot a mysqld cluster.""" +- declare_myself_cluser_leader(ttl=cluster_leader_ttl) ++ declare_myself_cluser_leader() + set_grastate_val(key='safe_to_bootstrap', value='1') + run_mysqld(cluster='new') + + + def sigterm_shutdown(x, y): +- """Shutdown the instnace of mysqld on shutdown signal.""" ++ """Shutdown the instance of mysqld on shutdown signal.""" + logger.info("Got a sigterm from the container runtime, time to go.") + stop_mysqld() + +@@ -642,15 +699,26 @@ signal.signal(signal.SIGTERM, sigterm_shutdown) + + # Main logic loop + if get_cluster_state() == 'new': +- set_configmap_annotation( +- key='openstackhelm.openstack.org/cluster.state', value='init') +- declare_myself_cluser_leader(ttl=cluster_leader_ttl) +- launch_leader_election() +- mysqld_bootstrap() +- update_grastate_configmap() +- set_configmap_annotation( +- key='openstackhelm.openstack.org/cluster.state', value='live') +- run_mysqld(cluster='new') ++ leader_node = get_configmap_value( ++ type='annotation', key='openstackhelm.openstack.org/leader.node') ++ if leader_node == local_hostname: ++ set_configmap_annotation( ++ key='openstackhelm.openstack.org/cluster.state', value='init') ++ declare_myself_cluser_leader() ++ launch_leader_election() ++ mysqld_bootstrap() ++ update_grastate_configmap() ++ set_configmap_annotation( ++ key='openstackhelm.openstack.org/cluster.state', value='live') ++ run_mysqld(cluster='new') ++ else: ++ logger.info("Waiting for cluster to start running") ++ while not get_cluster_state() == 'live': ++ time.sleep(default_sleep) ++ while not check_for_active_nodes(): ++ time.sleep(default_sleep) ++ launch_leader_election() ++ run_mysqld() + elif get_cluster_state() == 'init': + logger.info("Waiting for cluster to start running") + while not get_cluster_state() == 'live': +diff --git a/mariadb/templates/etc/_00-base.cnf.tpl b/mariadb/templates/etc/_00-base.cnf.tpl +index fc0b079..949d867 100644 +--- a/mariadb/templates/etc/_00-base.cnf.tpl ++++ b/mariadb/templates/etc/_00-base.cnf.tpl +@@ -21,7 +21,7 @@ collation_server=utf8_unicode_ci + skip-character-set-client-handshake + + # Logging +-slow_query_log=on ++slow_query_log=off + slow_query_log_file=/var/log/mysql/mariadb-slow.log + log_warnings=2 + +@@ -75,9 +75,11 @@ table_definition_cache=1024 + # TODO(tomasz.paszkowski): This needs to by dynamic based on available RAM. + innodb_buffer_pool_size=1024M + innodb_doublewrite=0 ++innodb_file_format=Barracuda + innodb_file_per_table=1 + innodb_flush_method=O_DIRECT + innodb_io_capacity=500 ++innodb_locks_unsafe_for_binlog=1 + innodb_log_file_size=128M + innodb_old_blocks_time=1000 + innodb_read_io_threads=8 +@@ -93,9 +95,9 @@ wsrep_on=1 + wsrep_provider=/usr/lib/galera/libgalera_smm.so + wsrep_provider_options="gmcast.listen_addr=tcp://0.0.0.0:{{ tuple "oslo_db" "direct" "wsrep" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}" + wsrep_slave_threads=12 +-# FIX_ME(portdirect): https://mariadb.com/kb/en/library/mariabackup-overview/#granting-privileges-for-ssts + wsrep_sst_auth=root:{{ .Values.endpoints.oslo_db.auth.admin.password }} +-wsrep_sst_method=mariabackup ++# FIXME(portdirect): use rsync for compatibility between image variations ++wsrep_sst_method=rsync + + [mysqldump] + max-allowed-packet=16M +-- +1.8.3.1 +