backport some mariadb fixes from upstream

Upstream master has some fixes for the mariadb chart that we hope will improve
behaviour under fault scenarios so import them into our load.

When we update the repo to the latest upstream we should pick these up and
the patches can be dropped.

Change-Id: I5bb367db76b6d00d9922a4b1bb32d87aaa37cf91
Story: 2004520
Task: 28388
Signed-off-by: Chris Friesen <chris.friesen@windriver.com>
This commit is contained in:
Chris Friesen 2018-12-12 10:25:10 -05:00
parent 1643e005ae
commit 20c4d6c859
4 changed files with 416 additions and 1 deletions

View File

@ -5,4 +5,4 @@ TAR="$TAR_NAME-$SHA.tar.gz"
COPY_LIST="${CGCS_BASE}/downloads/$TAR $PKG_BASE/files/*"
TIS_PATCH_VER=4
TIS_PATCH_VER=5

View File

@ -15,6 +15,8 @@ Source0: %{name}-%{sha}.tar.gz
BuildArch: noarch
Patch01: 0001-gnocchi-chart-updates.patch
Patch02: Mariadb-Support-adoption-of-running-single-node-mari.patch
Patch03: Mariadb-Share-container-PID-namespaces-under-docker.patch
BuildRequires: helm
@ -24,6 +26,8 @@ Openstack Helm Infra charts
%prep
%setup -n openstack-helm-infra
%patch01 -p1
%patch02 -p1
%patch03 -p1
%build
# initialize helm and build the toolkit

View File

@ -0,0 +1,82 @@
From 977c523cef00f7919a82d268da7e55223f1864ce Mon Sep 17 00:00:00 2001
From: Pete Birley <pete@port.direct>
Date: Sat, 8 Dec 2018 16:16:11 -0600
Subject: [PATCH] Mariadb: Share container PID namespaces under docker
This PS shares pid namespaces for containers in pods under docker,
bringing running in this runtime inline with other runc based container
backends, allowing the pause process in the pod to act as a reaper.
Change-Id: Ib5fc101d930446d848246eb5ca4d554b756cb91f
Signed-off-by: Pete Birley <pete@port.direct>
---
mariadb/templates/deployment-error.yaml | 1 +
mariadb/templates/deployment-ingress.yaml | 1 +
mariadb/templates/monitoring/prometheus/exporter-deployment.yaml | 1 +
mariadb/templates/monitoring/prometheus/exporter-job-create-user.yaml | 1 +
mariadb/templates/statefulset.yaml | 1 +
5 files changed, 5 insertions(+)
diff --git a/mariadb/templates/deployment-error.yaml b/mariadb/templates/deployment-error.yaml
index c310324..78c4b18 100644
--- a/mariadb/templates/deployment-error.yaml
+++ b/mariadb/templates/deployment-error.yaml
@@ -42,6 +42,7 @@ spec:
configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }}
configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }}
spec:
+ shareProcessNamespace: true
serviceAccountName: {{ $serviceAccountName }}
affinity:
{{ tuple $envAll "mariadb" "ingress-error-pages" | include "helm-toolkit.snippets.kubernetes_pod_anti_affinity" | indent 8 }}
diff --git a/mariadb/templates/deployment-ingress.yaml b/mariadb/templates/deployment-ingress.yaml
index 053a08f..afe9407 100644
--- a/mariadb/templates/deployment-ingress.yaml
+++ b/mariadb/templates/deployment-ingress.yaml
@@ -137,6 +137,7 @@ spec:
configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }}
configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }}
spec:
+ shareProcessNamespace: true
serviceAccountName: {{ $serviceAccountName }}
affinity:
{{ tuple $envAll "mariadb" "ingress" | include "helm-toolkit.snippets.kubernetes_pod_anti_affinity" | indent 8 }}
diff --git a/mariadb/templates/monitoring/prometheus/exporter-deployment.yaml b/mariadb/templates/monitoring/prometheus/exporter-deployment.yaml
index 274a06c..00b3f6e 100644
--- a/mariadb/templates/monitoring/prometheus/exporter-deployment.yaml
+++ b/mariadb/templates/monitoring/prometheus/exporter-deployment.yaml
@@ -38,6 +38,7 @@ spec:
{{ tuple $envAll "prometheus_mysql_exporter" "exporter" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
namespace: {{ .Values.endpoints.prometheus_mysql_exporter.namespace }}
spec:
+ shareProcessNamespace: true
serviceAccountName: {{ $serviceAccountName }}
nodeSelector:
{{ .Values.labels.prometheus_mysql_exporter.node_selector_key }}: {{ .Values.labels.prometheus_mysql_exporter.node_selector_value }}
diff --git a/mariadb/templates/monitoring/prometheus/exporter-job-create-user.yaml b/mariadb/templates/monitoring/prometheus/exporter-job-create-user.yaml
index df7a147..b9f7b56 100644
--- a/mariadb/templates/monitoring/prometheus/exporter-job-create-user.yaml
+++ b/mariadb/templates/monitoring/prometheus/exporter-job-create-user.yaml
@@ -30,6 +30,7 @@ spec:
labels:
{{ tuple $envAll "prometheus_mysql_exporter" "create-sql-user" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
spec:
+ shareProcessNamespace: true
serviceAccountName: {{ $serviceAccountName }}
restartPolicy: OnFailure
nodeSelector:
diff --git a/mariadb/templates/statefulset.yaml b/mariadb/templates/statefulset.yaml
index de0fac2..c6aa451 100644
--- a/mariadb/templates/statefulset.yaml
+++ b/mariadb/templates/statefulset.yaml
@@ -91,6 +91,7 @@ spec:
configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }}
configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }}
spec:
+ shareProcessNamespace: true
serviceAccountName: {{ $serviceAccountName }}
affinity:
{{ tuple $envAll "mariadb" "server" | include "helm-toolkit.snippets.kubernetes_pod_anti_affinity" | indent 8 }}
--
1.8.3.1

View File

@ -0,0 +1,329 @@
From 896385354e535d68f7ee06074bb8266c0f1b7055 Mon Sep 17 00:00:00 2001
From: Pete Birley <pete@port.direct>
Date: Sat, 1 Dec 2018 18:52:39 -0600
Subject: [PATCH] Mariadb: Support adoption of running single node mariadb
deployment
This PS updates the mariadb chart to both support adoption of a
single instance of mariadb running the bash driven chart, which
did not support reforming a galera cluster by tracking state using
a configmap. Additionally basic logic is added for upgrading the
database as part of the normal rolling update flow.
Change-Id: I412de507112b38d6d2534e89f2a02f84bef3da63
Signed-off-by: Pete Birley <pete@port.direct>
---
mariadb/templates/bin/_start.py.tpl | 168 +++++++++++++++++++++++----------
mariadb/templates/etc/_00-base.cnf.tpl | 8 +-
2 files changed, 123 insertions(+), 53 deletions(-)
diff --git a/mariadb/templates/bin/_start.py.tpl b/mariadb/templates/bin/_start.py.tpl
index 8a0236b..4d4428c 100644
--- a/mariadb/templates/bin/_start.py.tpl
+++ b/mariadb/templates/bin/_start.py.tpl
@@ -48,6 +48,10 @@ logger.addHandler(ch)
local_hostname = socket.gethostname()
logger.info("This instance hostname: {0}".format(local_hostname))
+# Get the instance number
+instance_number = local_hostname.split("-")[-1]
+logger.info("This instance number: {0}".format(instance_number))
+
# Setup k8s client credentials and check api version
kubernetes.config.load_incluster_config()
kubernetes_version = kubernetes.client.VersionApi().get_code().git_version
@@ -109,6 +113,7 @@ def ensure_state_configmap(pod_namespace, configmap_name, configmap_body):
except:
k8s_api_instance.create_namespaced_config_map(
namespace=pod_namespace, body=configmap_body)
+
return False
@@ -351,13 +356,36 @@ def get_cluster_state():
except:
logger.info("The cluster configmap \"{0}\" does not exist.".format(
state_configmap_name))
+ time.sleep(default_sleep)
+ leader_expiry_raw = datetime.utcnow() + timedelta(
+ seconds=cluster_leader_ttl)
+ leader_expiry = "{0}Z".format(leader_expiry_raw.isoformat("T"))
+ if check_for_active_nodes():
+ # NOTE(portdirect): here we make the assumption that the 1st pod
+ # in an existing statefulset is the one to adopt as leader.
+ leader = "{0}-0".format("-".join(
+ local_hostname.split("-")[:-1]))
+ state = "live"
+ logger.info(
+ "The cluster is running already though unmanaged \"{0}\" will be declared leader in a \"{1}\" state".
+ format(leader, state))
+ else:
+ leader = local_hostname
+ state = "new"
+ logger.info(
+ "The cluster is new \"{0}\" will be declared leader in a \"{1}\" state".
+ format(leader, state))
+
initial_configmap_body = {
"apiVersion": "v1",
"kind": "ConfigMap",
"metadata": {
"name": state_configmap_name,
"annotations": {
- "openstackhelm.openstack.org/cluster.state": "new"
+ "openstackhelm.openstack.org/cluster.state": state,
+ "openstackhelm.openstack.org/leader.node": leader,
+ "openstackhelm.openstack.org/leader.expiry":
+ leader_expiry
}
},
"data": {}
@@ -369,14 +397,11 @@ def get_cluster_state():
return state
-def declare_myself_cluser_leader(ttl):
- """Declare the current pod as the cluster leader.
-
- Keyword arguments:
- ttl -- the ttl for the leader period
- """
+def declare_myself_cluser_leader():
+ """Declare the current pod as the cluster leader."""
logger.info("Declaring myself current cluster leader")
- leader_expiry_raw = datetime.utcnow() + timedelta(seconds=120)
+ leader_expiry_raw = datetime.utcnow() + timedelta(
+ seconds=cluster_leader_ttl)
leader_expiry = "{0}Z".format(leader_expiry_raw.isoformat("T"))
set_configmap_annotation(
key='openstackhelm.openstack.org/leader.node', value=local_hostname)
@@ -393,10 +418,10 @@ def deadmans_leader_election():
if iso8601.parse_date(leader_expiry).replace(
tzinfo=None) < datetime.utcnow().replace(tzinfo=None):
logger.info("Current cluster leader has expired")
- declare_myself_cluser_leader(ttl=cluster_leader_ttl)
+ declare_myself_cluser_leader()
elif local_hostname == leader_node:
logger.info("Renewing cluster leader lease")
- declare_myself_cluser_leader(ttl=cluster_leader_ttl)
+ declare_myself_cluser_leader()
def get_grastate_val(key):
@@ -452,43 +477,47 @@ def update_grastate_configmap():
def update_grastate_on_restart():
"""Update the grastate.dat on node restart."""
logger.info("Updating grastate info for node")
- if get_grastate_val(key='seqno') == '-1':
- logger.info(
- "Node shutdown was not clean, getting position via wsrep-recover")
-
- def recover_wsrep_position():
- """Extract recoved wsrep position from uncleanly exited node."""
- wsrep_recover = subprocess.Popen(
- [
- 'mysqld', '--bind-address=127.0.0.1',
- '--wsrep_cluster_address=gcomm://', '--wsrep-recover'
- ],
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
- out, err = wsrep_recover.communicate()
- for item in err.split("\n"):
- if "WSREP: Recovered position:" in item:
- line = item.strip().split()
- wsrep_rec_pos = line[-1].split(':')[-1]
- return wsrep_rec_pos
-
- set_grastate_val(key='seqno', value=recover_wsrep_position())
- else:
- logger.info("Node shutdown was clean, using grastate.dat")
+ if os.path.exists('/var/lib/mysql/grastate.dat'):
+ if get_grastate_val(key='seqno') == '-1':
+ logger.info(
+ "Node shutdown was not clean, getting position via wsrep-recover"
+ )
+
+ def recover_wsrep_position():
+ """Extract recoved wsrep position from uncleanly exited node."""
+ wsrep_recover = subprocess.Popen(
+ [
+ 'mysqld', '--bind-address=127.0.0.1',
+ '--wsrep_cluster_address=gcomm://', '--wsrep-recover'
+ ],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ out, err = wsrep_recover.communicate()
+ for item in err.split("\n"):
+ if "WSREP: Recovered position:" in item:
+ line = item.strip().split()
+ wsrep_rec_pos = line[-1].split(':')[-1]
+ return wsrep_rec_pos
+
+ set_grastate_val(key='seqno', value=recover_wsrep_position())
+ else:
+ logger.info("Node shutdown was clean, using grastate.dat")
- update_grastate_configmap()
+ update_grastate_configmap()
+ else:
+ logger.info("No grastate.dat exists I am a new node")
-def check_for_active_nodes(endpoints_name=direct_svc_name,
- namespace=pod_namespace):
- """Check K8s endpoints to see if there are active Mariadb Instances.
+
+def get_active_endpoints(endpoints_name=direct_svc_name,
+ namespace=pod_namespace):
+ """Returns a list of active endpoints.
Keyword arguments:
endpoints_name -- endpoints to check for active backends
(default direct_svc_name)
namespace -- namespace to check for endpoints (default pod_namespace)
"""
- logger.info("Checking for active nodes")
endpoints = k8s_api_instance.read_namespaced_endpoints(
name=endpoints_name, namespace=pod_namespace)
endpoints_dict = endpoints.to_dict()
@@ -496,6 +525,20 @@ def check_for_active_nodes(endpoints_name=direct_svc_name,
i for i, s in enumerate(endpoints_dict['subsets']) if 'addresses' in s
][0]
active_endpoints = endpoints_dict['subsets'][addresses_index]['addresses']
+ return active_endpoints
+
+
+def check_for_active_nodes(endpoints_name=direct_svc_name,
+ namespace=pod_namespace):
+ """Check K8s endpoints to see if there are active Mariadb Instances.
+
+ Keyword arguments:
+ endpoints_name -- endpoints to check for active backends
+ (default direct_svc_name)
+ namespace -- namespace to check for endpoints (default pod_namespace)
+ """
+ logger.info("Checking for active nodes")
+ active_endpoints = get_active_endpoints()
if active_endpoints and len(active_endpoints) >= 1:
return True
else:
@@ -608,7 +651,11 @@ def launch_leader_election():
def run_mysqld(cluster='existing'):
- """Launch the mysqld instance for the pod.
+ """Launch the mysqld instance for the pod. This will also run mysql upgrade
+ if we are the 1st replica, and the rest of the cluster is already running.
+ This senario will be triggerd either following a rolling update, as this
+ works in reverse order for statefulset. Or restart of the 1st instance, in
+ which case the comand should be a no-op.
Keyword arguments:
cluster -- whether we going to form a cluster 'new' or joining an existing
@@ -621,18 +668,28 @@ def run_mysqld(cluster='existing'):
mysqld_cmd = ['mysqld']
if cluster == 'new':
mysqld_cmd.append('--wsrep-new-cluster')
+ else:
+ if int(instance_number) == 0:
+ active_endpoints = get_active_endpoints()
+ if active_endpoints and len(active_endpoints) == (
+ int(mariadb_replicas) - 1):
+ run_cmd_with_logging([
+ 'mysql_upgrade',
+ '--defaults-file=/etc/mysql/admin_user.cnf'
+ ], logger)
+
run_cmd_with_logging(mysqld_cmd, logger)
def mysqld_reboot():
"""Reboot a mysqld cluster."""
- declare_myself_cluser_leader(ttl=cluster_leader_ttl)
+ declare_myself_cluser_leader()
set_grastate_val(key='safe_to_bootstrap', value='1')
run_mysqld(cluster='new')
def sigterm_shutdown(x, y):
- """Shutdown the instnace of mysqld on shutdown signal."""
+ """Shutdown the instance of mysqld on shutdown signal."""
logger.info("Got a sigterm from the container runtime, time to go.")
stop_mysqld()
@@ -642,15 +699,26 @@ signal.signal(signal.SIGTERM, sigterm_shutdown)
# Main logic loop
if get_cluster_state() == 'new':
- set_configmap_annotation(
- key='openstackhelm.openstack.org/cluster.state', value='init')
- declare_myself_cluser_leader(ttl=cluster_leader_ttl)
- launch_leader_election()
- mysqld_bootstrap()
- update_grastate_configmap()
- set_configmap_annotation(
- key='openstackhelm.openstack.org/cluster.state', value='live')
- run_mysqld(cluster='new')
+ leader_node = get_configmap_value(
+ type='annotation', key='openstackhelm.openstack.org/leader.node')
+ if leader_node == local_hostname:
+ set_configmap_annotation(
+ key='openstackhelm.openstack.org/cluster.state', value='init')
+ declare_myself_cluser_leader()
+ launch_leader_election()
+ mysqld_bootstrap()
+ update_grastate_configmap()
+ set_configmap_annotation(
+ key='openstackhelm.openstack.org/cluster.state', value='live')
+ run_mysqld(cluster='new')
+ else:
+ logger.info("Waiting for cluster to start running")
+ while not get_cluster_state() == 'live':
+ time.sleep(default_sleep)
+ while not check_for_active_nodes():
+ time.sleep(default_sleep)
+ launch_leader_election()
+ run_mysqld()
elif get_cluster_state() == 'init':
logger.info("Waiting for cluster to start running")
while not get_cluster_state() == 'live':
diff --git a/mariadb/templates/etc/_00-base.cnf.tpl b/mariadb/templates/etc/_00-base.cnf.tpl
index fc0b079..949d867 100644
--- a/mariadb/templates/etc/_00-base.cnf.tpl
+++ b/mariadb/templates/etc/_00-base.cnf.tpl
@@ -21,7 +21,7 @@ collation_server=utf8_unicode_ci
skip-character-set-client-handshake
# Logging
-slow_query_log=on
+slow_query_log=off
slow_query_log_file=/var/log/mysql/mariadb-slow.log
log_warnings=2
@@ -75,9 +75,11 @@ table_definition_cache=1024
# TODO(tomasz.paszkowski): This needs to by dynamic based on available RAM.
innodb_buffer_pool_size=1024M
innodb_doublewrite=0
+innodb_file_format=Barracuda
innodb_file_per_table=1
innodb_flush_method=O_DIRECT
innodb_io_capacity=500
+innodb_locks_unsafe_for_binlog=1
innodb_log_file_size=128M
innodb_old_blocks_time=1000
innodb_read_io_threads=8
@@ -93,9 +95,9 @@ wsrep_on=1
wsrep_provider=/usr/lib/galera/libgalera_smm.so
wsrep_provider_options="gmcast.listen_addr=tcp://0.0.0.0:{{ tuple "oslo_db" "direct" "wsrep" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}"
wsrep_slave_threads=12
-# FIX_ME(portdirect): https://mariadb.com/kb/en/library/mariabackup-overview/#granting-privileges-for-ssts
wsrep_sst_auth=root:{{ .Values.endpoints.oslo_db.auth.admin.password }}
-wsrep_sst_method=mariabackup
+# FIXME(portdirect): use rsync for compatibility between image variations
+wsrep_sst_method=rsync
[mysqldump]
max-allowed-packet=16M
--
1.8.3.1