vault-armada-app/helm-charts/custom/vault-manager-helm/vault-manager-helm/vault-manager/templates/vault-init.yaml

3554 lines
114 KiB
YAML

apiVersion: v1
data:
init.sh: |
#!/bin/bash
# Get the CA path from environment vars
CERT=$CA_CERT
# Store cert as a oneliner for curl purposes
CA_ONELINE=$(awk '{printf "%s\\n", $0}' $CERT)
# Template vaules from helm
VAULT_NS={{ .Release.Namespace }}
VAULT_NAME={{ .Values.vault.name }}
VAULT_FN={{ .Values.vault.fullname }}
HA_REPLICAS={{ .Values.server.ha.replicas }}
# Set the domain for resolving pod names
DOMAIN="${VAULT_NS}.pod.cluster.local"
SVCDOMAIN="${VAULT_NS}.svc.cluster.local"
# define host targets and port
POD_TARGET_BASE="$DOMAIN" # requires 'DNS NAME' of pod
ACTIVE_TARGET="${VAULT_FN}-active.${SVCDOMAIN}" # only the active
TARGET_PORT=8200
# impermanent location to store files while running
WORKDIR=/workdir
mkdir -p $WORKDIR
# Selection of kubectl version from helm override
KUBECTL=kubectl
KUBECTL_HELM_OVERRIDE={{ .Values.manager.k8s.client_version }}
# Trap and trap notification file. When SIGTERM is sent to this pod
# we want to exit promptly and gracefully.
TRAPFILE=$WORKDIR/exit_on_trap
trap "touch $TRAPFILE" SIGTERM
# when specifying a trap for debug, remember it with this variable
# reserve trap '0' for disabling a debugging trap request
DEBUGGING_TRAP=0
# Pause notification file. A debugging option to permit
# vault-manager to be paused at any of the exit_on_trap code points.
# Use case may include permitting time for a developer to setup
# conditions for debug and test.
PAUSEFILE=$WORKDIR/pause_on_trap
PAUSE_RATE=1 # rate at which to test for unpause
EARLY_PAUSE={{ .Values.manager.pause }}
# set the default manager mode; modes include
# VAULT_MANAGER (default)
# MOUNT_HELPER
# INTERACTIVE (i.e., when this script is sourced by an author)
if [ -z "$MANAGER_MODE" ]; then
MANAGER_MODE="VAULT_MANAGER"
fi
if [[ "${BASH_SOURCE[0]}" != "${0}" ]]; then
MANAGER_MODE="INTERACTIVE"
fi
# Maximum sleep seconds for mount-helper before exiting
MOUNT_HELPER_MAX_TIME=60
# Maximum seconds to wait for mount-helper pod to start
MAX_POD_RUN_TRIES=10
# Maximum seconds to wait for vault-manager pod to exit
# Vault-manager is not responding to SIGTERM, so will take 30
# seconds
TERMINATE_TRIES_MAX={{ .Values.manager.waitTermination.maxTries }}
TERMINATE_TRIES_SLEEP={{ .Values.manager.waitTermination.sleepTime }}
# Vault key share configuration
KEY_SECRET_SHARES=5
KEY_REQUIRED_THRESHOLD=3
# Enable vault rekey upon conversion of storage from PVC to k8s
# secrets
AUTO_REKEY_CONVERT={{ .Values.manager.rekey.enableOnPVCConversion }}
# Keep track of vault-manager restarting the rekey procedure; if
# this variable is not true (0) and a rekey procedure is in
# progress, then vault-manager was restarted
REKEY_STARTED=1
# Vault manager will rekey the vault at a time when the vault
# servers are stable for a period of time specified by
# REKEY_STABLE_TIME seconds
REKEY_STABLE_TIME=300
# Global variable to share rekey status
REKEY_STATUS_JSON=''
# Keep track of shards that were last successful
SHARDS_LAST_SUCCESSFUL="cluster-key"
# Records for seal status state machine:
PODREC_F="$WORKDIR/previous_pods_status.txt"
PODREC_TMP_F="$WORKDIR/new_pods_status.txt"
# Vault server health query timeout during HA recovery scenario
QUERY_TMOUT={{ .Values.manager.api.healthQueryTimeout }}
# Default curl timout for REST API commands to vault server.
# This value is what testing shows is the default timeout.
# Specifying it explicitly for clarity.
API_TMOUT=120
# API timeout for unseal operations
API_UNSEAL_OP_TMOUT={{ .Values.manager.api.unsealOpTimeout }}
# API timeout values for rekey operations
API_REKEY_QUERY_TMOUT={{ .Values.manager.api.rekeyStatusTimeout }}
API_REKEY_OP_TMOUT={{ .Values.manager.api.rekeyOpTimeout }}
STATEFULSET_RATE=5
INIT_CONVERGE_TIME=10
JOIN_RATE=5
JOIN_CONVERGE_TIME=1
UNSEAL_RATE=10
UNSEAL_CONVERGE_TIME=3
STATUS_RATE={{ .Values.manager.statusCheckRate }}
if [ -z "$STATUS_RATE" ]; then
STATUS_RATE=5
fi
# with STATUS_RATE, the period to delay unseal
# STATUS_RATE * STATEMACH_START seconds
STATEMACH_START={{ .Values.manager.unsealWaitIntervals }}
if [ -z "$STATEMACH_START" ]; then
STATEMACH_START=3
fi
# Log levels
DEBUG=1
INFO=2
WARNING=3
ERROR=4
FATAL=5
# Default log level and the set log level (Initially set as default).
# If the log function detects an override file, then it will switch
# the set log level and then delete it.
DEFAULT_LOG_LEVEL=$INFO
LOG_LEVEL={{ .Values.manager.log.defaultLogLevel }}
LOG_OVERRIDE_FILE="$WORKDIR/log_level"
# FUNCTIONS
# takes major/minor version of k8s and compares
# for example: v1.28 > v1.27 > v1.26
#
# Returns:
# 0 left is larger
# 1 equal
# 2 right is larger
function compareK8sVersion {
local left="$1"
local right="$2"
# strip leading 'v'
left="${left#v}"
right="${right#v}"
# compare the strings
if [ "$left" == "$right" ]; then
return 1
fi
# compare major
if [ "${left%.*}" -gt "${right%.*}" ]; then
return 0
elif [ "${left%.*}" -lt "${right%.*}" ]; then
return 2
fi
# compare the minor
if [ "${left#*.}" -gt "${right#*.}" ]; then
return 0
fi
return 2
}
# Give kubectl an opportunity to express complaints in the log
function k8sComplain {
local result
result="$( $KUBECTL version -o json 2>&1 >/dev/null )"
if [ -n "$result" ]; then
log $WARNING "kubectl: $result"
fi
}
# Double-check that the binary exists before setting the specified
# value of KUBECTL
function switchK8sVersion {
local select="$1"
local fname="kubectl.$select"
local newbin="${KUBECTL_INSTALL_PATH}/$fname"
which "$fname" >/dev/null
if [ $? -ne 0 -o ! -f "$newbin" ]; then
log $ERROR "Missing kubectl version: $select"
k8sComplain
return 1
fi
if [ "$KUBECTL" != "$fname" ]; then
KUBECTL="$fname"
log $INFO "Switching to use kubectl version $select"
fi
k8sComplain
return 0
}
# Select the version of kubectl matching the running server
function pickK8sVersion {
local result
local serverver
local majorver
local minorver
local select=""
local majmin=""
local maxver
local minver
# omit this code if the image does not support kubectl versions
if [ -z "$KUBE_VERSIONS" ]; then
k8sComplain
return
fi
if [ -n "$KUBECTL_HELM_OVERRIDE" ]; then
# pick the binary requested, if it exists
switchK8sVersion "$KUBECTL_HELM_OVERRIDE"
if [ $? -eq 0 ]; then
return
fi
log $ERROR "kubectl version from helm-override not" \
"available: $KUBECTL_HELM_OVERRIDE"
fi
# use -o json for consistent usage, as oppose to --short
result="$( $KUBECTL version -o json 2>/dev/null )"
if [ $? -ne 0 ]; then
log $ERROR "Unable to get k8s server version"
# no change in value of KUBECTL
k8sComplain
return
fi
serverver="$( jq -r '.serverVersion.gitVersion' <<<"$result" \
| grep "[0-9]" )"
majorver="$( jq -r '.serverVersion.major' <<<"$result" \
| grep "[0-9]" )"
minorver="$( jq -r '.serverVersion.minor' <<<"$result" \
| grep "[0-9]" )"
if [ -z "$serverver" -o -z "$majorver" -o -z "$minorver" ]; then
log $ERROR "Unable to detect K8s server version:" \
"["$result"]"
# no change in value of KUBECTL
k8sComplain
return
fi
# pick matching client major/minor version
for select in $KUBE_VERSIONS noverhere; do
majmin="v${majorver}.${minorver}"
if [[ "$select" =~ ^$majmin ]]; then
break
fi
done
if [ "$select" == noverhere ]; then
# Try to pick a near version. We really shouldn't be in
# this situation, but here is a compromise. This algorithm
# assumes that there are no omitted versions in the series
# of KUBE_VERSIONS, and that they are sorted largest to
# smallest in that list
maxver="$( awk '{print $1}' <<<"$KUBE_VERSIONS" )"
minver="$( awk '{print $NF}' <<<"$KUBE_VERSIONS" )"
compareK8sVersion ${serverver%.*} ${maxver%.*}
if [ "$?" -le 1 ]; then
select="$maxver"
else
compareK8sVersion ${minver%.*} ${serverver%.*}
if [ "$?" -le 1 ]; then
select="$minver"
else
log $ERROR "Could not pick nearest version for kubectl"
k8sComplain
return
fi
fi
fi
switchK8sVersion "${select%.*}"
}
# Convert log level to text for log message
function log_to_str {
local level="$1"
local logStr
case "$level" in
$INFO)
logStr="INFO"
;;
$DEBUG)
logStr="DEBUG"
;;
$WARNING)
logStr="WARNING"
;;
$ERROR)
logStr="ERROR"
;;
$FATAL)
logStr="FATAL"
;;
esac
echo "$logStr"
}
# Print the specified message to stdout if the call's specified
# level is at least the configured log level
function log {
local lvl="$1"
local logStr
local newLogLevel
# check if log override file "Exists"
if [ -f $LOG_OVERRIDE_FILE ] \
&& [ "$MANAGER_MODE" != "INTERACTIVE" ]; then
newLogLevel=$(cat $LOG_OVERRIDE_FILE)
# validation for newLogLevel
if [[ "$newLogLevel" =~ ^[1-5]$ ]]; then
LOG_LEVEL=$newLogLevel
logStr="$( log_to_str "$LOG_LEVEL" )"
echo "$(date +%Y-%m-%dT%H-%M-%S) DEBUG" \
"Log level set to $logStr"
else
echo "$(date +%Y-%m-%dT%H-%M-%S) DEBUG" \
"Invalid log level read from $LOG_OVERRIDE_FILE."
fi
rm $LOG_OVERRIDE_FILE
fi
# validate LOG_LEVEL. If it is not valid, then use
# DEFAULT_LOG_LEVEL instead.
if [[ ! "$LOG_LEVEL" =~ ^[1-5]$ ]]; then
echo "$(date +%Y-%m-%dT%H-%M-%S) DEBUG" \
"Invalid log level detected, will be set to" \
"$( log_to_str "$DEFAULT_LOG_LEVEL" )"
LOG_LEVEL=$DEFAULT_LOG_LEVEL
fi
# check if the log level for this call is equal to or higher
# than the set log level
if [ "$lvl" -ge "$LOG_LEVEL" ]; then
# print log
logStr="$( log_to_str "$lvl" )"
echo "$(date +%Y-%m-%dT%H-%M-%S) $logStr ${@:2}"
fi
}
if ! [[ "$QUERY_TMOUT" =~ ^[0-9]+$ ]]; then
log $WARNING ".Values.manager.healthQueryTimeout not an integer"
QUERY_TMOUT=""
fi
function pause_on_trap {
local thistrap="$1"
local pausenum
if [ ! -e "$PAUSEFILE" ]; then
# no pause request
return
fi
pausenum="$( cat "$PAUSEFILE" )"
if [ -n "$pausenum" ] \
&& [ "$pausenum" != "$thistrap" ]; then
# not on this trap
return
fi
log $INFO "Vault manager is paused ($thistrap)"
# Until pause file is removed by the author,
# or until the content of pause_on_trap file is
# not-empty and not matching the current trap.
#
# If the pause_on_trap file containing specific trap number is
# replaced with empty file: the pause state is maintained.
while [ -e "$PAUSEFILE" ]; do
pausenum="$( cat "$PAUSEFILE" )"
if [ -n "$pausenum" ] \
&& [ "$thistrap" != "$pausenum" ]; then
break;
fi
sleep "$PAUSE_RATE"
done
log $INFO "Vault manager is unpaused"
}
function exit_on_trap {
local trap="$1"
local tfnum=""
if [ "$MANAGER_MODE" == "INTERACTIVE" ]; then
# do not interfere with exit_on_trap intended for
# vault-manager pod
return
fi
# Debug option pause_on_trap
pause_on_trap "$trap"
if [ -e "$TRAPFILE" ]; then
tfnum=$(cat $TRAPFILE)
log $DEBUG "exit_on_trap: removing $TRAPFILE"
rm "$TRAPFILE" # for workdir on PVC
if [ -z "$tfnum" ]; then
# an empty trap file is the default expected behaviour
log $INFO "exit_on_trap: ($trap)"
exit
# handle trap debugging feature - a developer specifies the
# trap number to target a specific exit_on_trap call.
# Setting a value of 0 (zero) disables the debugging trap
elif [ "$tfnum" -eq 0 ]; then
log $DEBUG "exit_on_trap: ($trap):" \
"disable debug trap ($DEBUGGING_TRAP)"
DEBUGGING_TRAP=0
# there is no trap with value zero
return
else
DEBUGGING_TRAP="$tfnum"
log $DEBUG "exit_on_trap: ($trap): " \
"enable debug trap ($DEBUGGING_TRAP)"
# check now just in case it matches
if [ "$DEBUGGING_TRAP" -eq "$trap" ]; then
log $INFO "exit_on_trap: ($trap): matching"
exit
fi
fi
# check if there is a matching debug trap set
elif [ "$DEBUGGING_TRAP" -eq "$trap" ]; then
log $INFO "exit_on_trap: ($trap): matching"
exit
else
log $DEBUG "exit_on_trap: ($trap): no trap file, no exit"
fi
}
# splits keys into separate files. Each file contains the key and the base64 encoded version.
# root token will be stored separately
function splitShard {
local index="$1"
jq '{"keys": [.keys['$index']], "keys_base64": [.keys_base64['$index']]}'
}
# merges two split keys
function mergeKeyJson {
# the two parameters are names for variables
local jstr1="$1"
local jstr2="$2"
mkfifo "$WORKDIR"/s1
mkfifo "$WORKDIR"/s2
(
jq -Mn --argfile file1 $WORKDIR/s1 --argfile file2 $WORKDIR/s2 '
def mergek: ($file1, $file2) | .keys as $k | $k;
def mergeb: ($file1, $file2) | .keys_base64 as $b | $b;
{keys: (reduce mergek as $x ([]; . + $x)),
keys_base64: (reduce mergeb as $x ([]; . + $x))}
' & ) 2>/dev/null
echo -n "${!jstr1}" > "$WORKDIR"/s1
echo -n "${!jstr2}" > "$WORKDIR"/s2
rm -f "$WORKDIR"/s1 "$WORKDIR"/s2
}
# Prepare a json document from the k8s secrets prefixed with
# prefix, and the root token
#
# Required parameter: The prefix of the k8s secrets containing
# the shards
#
# Outputs the json document which is comparable to the original
# response for vault initialization. The calling function is
# responsible for validating the document content.
#
function reconstructInitResponse {
local prefix="$1"
local index
local keys
local mkeys
# pull secrets from k8s and merge into one json file.
for index in $( seq 0 $(( KEY_SECRET_SHARES - 1 )) ); do
keys="$( get_secret "${prefix}-$index" )"
if [ "$index" -eq 0 ]; then
mkeys="$keys"
continue
fi
mkeys=$( mergeKeyJson mkeys keys )
done
# append the root secret and echo the document
echo "$mkeys" | jq -c '{keys: .keys,
keys_base64: .keys_base64,
root_token: "'$( get_secret "cluster-key-root" )'"}'
}
# Check the structure of json data and confirm equivalence of
# the stdin with stored secrets
#
# Required parameter: The prefix of the k8s secrets containing
# the shards in stored secrets
#
# Returns the normal linux success=0, failure!=0
function validateSecrets {
local keyprefix="$1"
local text
local keys
local keys_base64
local root_token
local count
local saved
local shaA
local shaB
text=$( cat )
keys=$( echo "$text" | jq '.keys' )
keys_base64=$( echo "$text" | jq '.keys_base64' )
root_token=$( echo "$text" | jq -r '.root_token' )
# response is 'null' if the dict key is missing
# response is empty (-z) is the source document is empty
if [ -z "$keys" -o "$keys" == "null" \
-o -z "$keys_base64" -o "$keys_base64" == "null" \
-o -z "$root_token" -o "$root_token" == "null" ]; then
log $ERROR "one or more missing keys"
return 1
fi
count=$( echo "$keys" | jq '. | length' )
if [ $? -ne 0 ]; then
log $ERROR "jq did not parse keys length"
return 1
fi
if [ -z "$count" ] || [ "$count" -ne "$KEY_SECRET_SHARES" ]; then
log $ERROR "Incorrect array length for keys:" \
"$count instead of $KEY_SECRET_SHARES"
return 1
fi
count=$( echo "$keys_base64" | jq '. | length' )
if [ $? -ne 0 ]; then
log $ERROR "jq did not parse keys_base64 length"
return 1
fi
if [ -z "$count" ] || [ "$count" -ne "$KEY_SECRET_SHARES" ]; then
log $ERROR "Incorrect array length for keys_base64:" \
"$count instead of $KEY_SECRET_SHARES"
return 1
fi
saved="$( reconstructInitResponse "${keyprefix}" )"
# finally ensure that the saved secrets are the same as the
# supplied text
shaA=$( echo "$text" | sha256sum )
shaB=$( echo "$saved" | sha256sum )
if [ "$shaA" != "$shaB" ]; then
log $ERROR "saved data differs from source data"
return 1
fi
log $INFO "Verified stored secrets are the same as supplied data"
return 0
}
# Creates a list of all k8s vault pods and stores in text file.
# Converts ips from X.X.X.X or a:b:c::d to X-X-X-X for use as pod
# dns names
#
# Optional parameter:
# --ha : append vault server active/standby status (boolean)
#
# Example output with --ha
# sva-vault-0 172-16-226-97 true
function getVaultPods {
local ha="$1"
local jpath
local meta='{.metadata.name}'
local ip='{.status.podIPs[].ip}'
local active='{.metadata.labels.vault-active}'
local jfields=${meta}'{"\t"}'${ip}
if [ "$ha" == "--ha" ]; then
jfields=${jfields}'{"\t"}'${active}
fi
jpath='{range .items[*]}'"$jfields"'{"\n"}{end}'
$KUBECTL get pods \
-n "$VAULT_NS" \
-l component=server,app.kubernetes.io/name=vault \
-o=jsonpath="$jpath" \
| sed 's/\.\|:/-/g'
}
# Wait for the vault servers in the stateful set to be
# created before initializing
function waitForPods {
local jsonPath='{range .items[*]}{.metadata.name}{"\t"} \
{.status.podIPs[].ip}{"\t"}{.status.phase}{"\n"} \
{end}'
CURRENT_PODS=$($KUBECTL get pods \
-l component=server,app.kubernetes.io/name=vault \
-o=jsonpath="$jsonPath" \
| grep Running \
| wc -l)
DESIRED_PODS=$1
if ! [[ "$CURRENT_PODS" =~ ^[0-9]+$ ]]; then
log $ERROR "Invalid Running pod number ($CURRENT_PODS) from kubectl get pods"
CURRENT_PODS=0
fi
while [ $CURRENT_PODS -lt $DESIRED_PODS ]; do
sleep "$STATEFULSET_RATE"
log $INFO "Waiting for ${VAULT_FN}" \
"statefulset running pods ($CURRENT_PODS) to equal" \
"desired pods ($DESIRED_PODS)"
CURRENT_PODS=$($KUBECTL get pods \
-l component=server,app.kubernetes.io/name=vault \
-o=jsonpath="$jsonPath" \
| grep Running \
| wc -l)
done
}
# Takes the json document output from vault initialization
# and stores it into secrets for key shards and the root token
#
# Required parameter: The prefix of the k8s secrets into which to
# store the shards
#
# This only works if the secrets are not pre-existing. An error
# is printed by set_secrets.
function storeVaultInitSecrets {
local keyprefix="$1"
local secrets
local index
local split_json
secrets=$( cat )
for index in $(seq 0 $((KEY_SECRET_SHARES - 1 ))); do
split_json=$( echo -n "$secrets" | splitShard "$index" )
set_secret "${keyprefix}-$index" /dev/stdin <<< "$split_json"
done
# if the data contains root_token, save it as well
split_json=$( echo "$secrets" | jq -r '.root_token' )
if [ -n "$split_json" -a "$split_json" != 'null' ]; then
set_secret "${keyprefix}-root" /dev/stdin <<< "$split_json"
fi
}
# Address a vault server with REST API request. Capture stderr,
# stdout and result of curl commands. Print error and debug logs
#
# Required positional parameters, in order:
# Response variable : variable in which to store the response
# from vault
# http request type : GET, POST, DELETE
# vault server : FQDN
# vault REST API path : e.g., /sys/health
#
# Optional final parameter : a quoted string of data
#
# Examples:
# # get health status query for the active vault status
# vaultAPI myvar GET $ACTIVE_TARGET /sys/health
#
# # post rekey initialization with shares 5 and threshold 3
# data='{"secret_shares": 5,"secret_threshold": 3}'
# vaultAPI myvar POST $ACTIVE_TARGET /sys/rekey/init "$data"
#
# Overridable ENV variables:
# API_TMOUT: the curl timeout
# NO_HEADER: omit header (the root token) if not empty
#
# Output:
# Return the stdout and command result code
#
# Print log messages for errors. The responses from vault are
# restricted to DEBUG lovel log in case there's secret information
# in them. But a non-specific ERROR message is printed in all
# cases of errors.
function vaultAPI {
local answer="$1"
local reqarg="$2"
local server="$3"
local apipath="$4"
local data="$5"
local cmderr=""
local cmdout=""
local cmdres=1
local header=""
local errors=""
if [ -z "$NO_HEADER" ]; then
header="X-Vault-Token:$( get_secret cluster-key-root )"
fi
log $DEBUG "Executing: [curl -s -S --cacert \""$CERT"\"" \
${API_TMOUT:+"--connect-timeout" "$API_TMOUT"} \
${header:+"--header" "xxxx"} \
"--request \"$reqarg\"" \
${data:+"--data" "xxxx"} \
"\"https://${server}:${TARGET_PORT}/v1${apipath}\"]"
# Capture stderr and stdout copied from google search example
# on stack overflow. Add capture of the command result code
{
IFS=$'\n' read -r -d '' cmderr;
IFS=$'\n' read -r -d '' cmdout;
cmdres="$( echo "$cmdout" | tail -n1 )"
cmdout="$( echo "$cmdout" | head -n-1 )"
} < <((printf '\0%s\0' "$(
curl -s -S --cacert "$CERT" \
${API_TMOUT:+"--connect-timeout" "$API_TMOUT"} \
${header:+"--header" "$header"} \
--request "$reqarg" \
${data:+"--data" "$data"} \
"https://${server}:${TARGET_PORT}/v1${apipath}"
echo "$?"
)" 1>&2) 2>&1)
if [ "$cmdres" -ne 0 ]; then
log $ERROR "curl returns non-zero result: $cmdres"
fi
if [ -n "$cmderr" ]; then
log $ERROR "curl returns stderr"
log $DEBUG "curl returns stderr: [$cmderr]"
fi
if [ -n "$cmdout" ]; then
# errors from the REST API
errors=$( echo "$cmdout" | jq -cr '.errors' )
if [[ "$errors" != 'null' ]] && [ -n "$errors" ]; then
log $ERROR "vault REST API error"
log $DEBUG "vault REST API error: $errors"
if [ "$cmdres" -eq 0 ]; then
# this code wants to know if there was an error
cmdres=1
fi
fi
fi
eval "$answer"='$cmdout'
return $cmdres
}
# Initializes the first vault pod, only needs to be performed once
# after deploying the helm chart
# Stores the root token and master key shards in k8s secrets
function initVault {
local V0 # the zeroeth vault pod
local keys
local key_error
local shares
local threshold
V0=$(awk 'NR==1{print $2}' $WORKDIR/pods.txt)
log $INFO "Initializing $V0"
shares='"secret_shares": '$KEY_SECRET_SHARES
threshold='"secret_threshold": '$KEY_REQUIRED_THRESHOLD
NO_HEADER=true \
vaultAPI keys POST $V0.$POD_TARGET_BASE \
/sys/init "{$shares, $threshold}"
key_error=$(echo -n "$keys"| jq -r '.errors[]?')
if [ -n "$key_error" ]; then
log $ERROR "vault init request failed: $key_error"
fi
echo "$keys" | storeVaultInitSecrets cluster-key
# check if the secrets match vault's REST API response
echo "$keys" | validateSecrets cluster-key
}
# Uses the master key shards to unseal vault
function unsealVault {
local server="$1"
local prefix="$2"
local index
local b64key
local data
local response
local value
local autherror
if [ -z "$prefix" ]; then
prefix='cluster-key'
fi
# always abort an unseal in progress
data='{"reset": true}'
NO_HEADER=true \
API_TMOUT=$API_UNSEAL_OP_TMOUT \
vaultAPI response POST $server.$POD_TARGET_BASE \
/sys/unseal "$data"
if [ $? -ne 0 ]; then
# error is already printed
# Including if vault is already unsealed.
if [[ "$response" == *"vault is unsealed"* ]]; then
log $WARNING "unsealVault: server $server is" \
"already unsealed"
fi
return 1
fi
for index in $(seq 0 $((KEY_SECRET_SHARES - 1 ))); do
b64key=$( get_secret "${prefix}-$index" \
| jq -r '.keys_base64[]' )
data="{\"key\": \"$b64key\"}"
NO_HEADER=true \
API_TMOUT=$API_UNSEAL_OP_TMOUT \
vaultAPI response POST $server.$POD_TARGET_BASE \
/sys/unseal "$data"
if [ $? -ne 0 ]; then
# error is already printed, including errors from the
# vault REST API; but for debugging purposes, highlight
# the authentication error
autherror="cipher: message authentication failed"
if [[ "$response" == *"$autherror"* ]]; then
log $ERROR "Failed to authenticate /sys/unseal" \
"with $prefix"
# perhaps use this info in the future
return 2
fi
log $DEBUG "Unknown failure authenticating unseal" \
"$response"
return 1
fi
# when the unseal completes with KEY_REQUIRED_THRESHOLD then
# the response will indicate sealed=false
value="$( echo "$response" | jq -r ".sealed" )"
if [ "$value" == "false" ]; then
log $DEBUG "Success authenticating unseal"
return 0
fi
value="$( echo "$response" | jq -r ".progress" )"
log $DEBUG "Success authenticating unseal" \
"(${value}/${KEY_REQUIRED_THRESHOLD})"
# Some sleep is required to allow Raft convergence
sleep "$UNSEAL_CONVERGE_TIME"
done
log $ERROR "unsealVault completes without unseal or error"
return 1
}
# Unseal a vault server under conditions of recovery,
# including selecting and remembering alternate shard
# secrets.
#
# This algorithm remembers the last shards used to unseal the vault,
# to prioritize using those again the next time.
function unsealVaultRecover {
local server="$1"
local attempted
local use_secrets=""
if [ -n "$SHARDS_LAST_SUCCESSFUL" ]; then
# double check the keys we were using are not deleted
if assertShardSecrets "$SHARDS_LAST_SUCCESSFUL"; then
use_secrets="$SHARDS_LAST_SUCCESSFUL"
fi
fi
use_secrets="$use_secrets $( \
getOtherShardSecrets "$SHARDS_LAST_SUCCESSFUL" )"
for attempted in $use_secrets; do
log $INFO "Attempt unseal with $attempted"
unsealVault "$server" "$attempted"
case $? in
0)
SHARDS_LAST_SUCCESSFUL="$attempted"
return 0
;;
2)
# an error is already printed
# try a different set of shards
continue
;;
*)
# failure is not clear, try again later
log $ERROR "Fail to unseal $server with" \
"$attempted; try later"
return 1
;;
esac
done
log $ERROR "No set of shards unseal the server $server:" \
"attempted: $use_secrets"
return 1
}
# Takes the address of vault-0 as the cluster leader and
# joins other nodes to raft
function joinRaft {
local dnsname="$1"
local activeLink="https://${ACTIVE_TARGET}:${TARGET_PORT}"
local dataJson="{\"leader_api_addr\": \"$activeLink\", \"leader_ca_cert\": \"$CA_ONELINE\"}"
RAFT_STATUS=""
while [ "$RAFT_STATUS" != "true" ]; do
vaultAPI RAFT_STATUS POST $dnsname.$POD_TARGET_BASE \
/sys/storage/raft/join "$dataJson"
log $INFO "$dnsname $RAFT_STATUS"
RAFT_STATUS=$(echo $RAFT_STATUS | jq -r .joined)
sleep "$JOIN_CONVERGE_TIME"
done
}
function runStateMachine {
local host="$1"
local dns_name="$2"
local sealed="$3"
local status_rec
local old_rec
local counter
status_rec="/$host/$dns_name/$sealed/"
# log compression: do not print logs when status is unchanged
# omit counter when checking vault server state change
old_rec="$( grep "$status_rec" "$PODREC_F" )"
if [ $? -ne 0 ]; then
log $DEBUG "$( grep "$dns_name" $WORKDIR/pods.txt )"
log $INFO "Sealed status of $dns_name is now: $sealed"
# reread the record by hostname only
old_rec="$( grep "^/$host/" "$PODREC_F" )"
else
log $DEBUG "There is no change in pod seal status"
fi
if [ "$sealed" != "true" ]; then
# There is nothing more to do: the vault is unsealed
# or the sealed status is unclear
echo "$status_rec" >> "$PODREC_TMP_F"
return
fi
# The vault is sealed
#
# Check if there is a countdown in progress
#
# else -z old_rec: "the pod didn't have an IP address the last
# iteration, but now it does" - treat the same as "sealed
# without a countdown"
counter=""
if [ -n "$old_rec" ]; then
counter="$( echo "$old_rec" | awk -F/ '{print $5}' )"
fi
if [ -z "$counter" ]; then
# sealed without a countdown: start counting
log $DEBUG "Sealed vault $host: begin unseal delay:" \
"$( expr "$STATUS_RATE" \* "$STATEMACH_START" )s"
echo "${status_rec}${STATEMACH_START}" >> "$PODREC_TMP_F"
return
fi
# Check for end of period: 1 means "zero at this interval"
# "less than 1" for resilience
if [ "$counter" -le 1 -o "$STATEMACH_START" -eq 0 ]; then
# We've waited (STATUS_RATE * STATEMACH_START) seconds
# Or, STATEMACH_START == 0 means do not delay
log $INFO "Unsealing $dns_name"
unsealVaultRecover "$dns_name"
echo "$status_rec" >> "$PODREC_TMP_F"
return
fi
# finally, continue to countdown
counter="$( expr "$counter" - 1 )"
echo "${status_rec}${counter}" >> "$PODREC_TMP_F"
}
function vaultInitialized {
local response
local dnsname
local initialized
local text
# Wait for the pod to respond with a positive vault API response
# (i.e., not just a curl failure, and not a vault API failure)
while true; do
dnsname=$(awk 'NR==1{print $2}' $WORKDIR/pods.txt)
if [ -z "$dnsname" ]; then
log $INFO "waiting..."
sleep $STATUS_RATE
getVaultPods > $WORKDIR/pods.txt
continue
fi
log $INFO "Query server $dnsname for initialization status"
NO_HEADER=true \
API_TMOUT=$QUERY_TMOUT \
vaultAPI response GET $dnsname.$POD_TARGET_BASE /sys/health
if [ $? -ne 0 ]; then
log $INFO "waiting..."
sleep $STATUS_RATE
getVaultPods > $WORKDIR/pods.txt
continue
fi
break
done
echo -n "$response" > $WORKDIR/healthcheck.txt
initialized=$( echo "$response" | jq -r .initialized )
text="$( grep $dnsname $WORKDIR/pods.txt )"
if [ $? -eq 0 ]; then
log $DEBUG "$text"
log $DEBUG "Initialized status is $initialized"
fi
# The empty check is here as a extra safety net, but an
# investigation into in which exact conditions the result would
# be empty would be helpful.
if [ ! -z $initialized ] && [ $initialized = false ]; then
return 1
else
return 0
fi
}
function set_secret {
local secret="$1"
local contentf="$2"
local output
local result
output="$( $KUBECTL create secret generic -n "$VAULT_NS" \
"$secret" "--from-file=strdata=$contentf" 2>&1 )"
result=$?
if [ "$result" -ne 0 ]; then
log $ERROR "Failed to create secret $secret"
log $DEBUG "Output: [$output]"
fi
return $result
}
function get_secret {
local secret="$1"
$KUBECTL get secrets -n "$VAULT_NS" "$secret" \
-o jsonpath='{.data.strdata}' \
| base64 -d
}
# When vault-manager is run in "MOUNT_HELPER" mode, this function
# will not return. Instead the function will exit_on_trap or exit
# when it times-out.
#
# Basically: this function doesn't do anything except wait to be
# terminated.
#
# Vault-manager in MOUNT_HELPER has PVC mounted, allowing the real
# vault-manager to read secrets from cluster_keys.json
function mountHelper {
local count
# omit this function if this pod is not the mount helper
if [ -z "$MANAGER_MODE" -o "$MANAGER_MODE" != "MOUNT_HELPER" ]; then
log $INFO "Mode is VAULT_MANAGER"
return
fi
# When vault-manager is running in this mode, it should be
# deleted by vault-manager running in the default mode, which
# is using this pod to read secrets from mounted PVC
log $INFO "Mode is $MANAGER_MODE"
# start with some debug/error logs
if [ -f "$PVC_DIR/cluster_keys.json" ]; then
log $DEBUG "Successfully mounted secrets file"
else
log $WARNING "Secrets file not found"
fi
# sleep for MOUNT_HELPER_MAX_TIME, expecting SIGTERM signal
log $INFO "Waiting for termination request via SIGTERM"
count=0
while [ "$count" -lt "$MOUNT_HELPER_MAX_TIME" ]; do
exit_on_trap
count=$((count+1))
sleep 1
done
# Normally should exit by exit_on_trap, but here we timeout
# waiting for the real vault-manager to delete this job/pod.
log $INFO "Exiting without receiving SIGTERM request"
exit 0
}
# Check if a secret exists
#
# Returns the normal linux success=0, failure!=0
# Prints the name of the secret
function secretExists {
local name="$1"
$KUBECTL get secrets -n "$VAULT_NS" "$name" \
-o jsonpath='{.metadata.name}' 2>/dev/null \
| grep "$name"
}
# Return linux success=0 if any of the secrets exist
function secretsExistAny {
local list="$@"
local name
for name in $list; do
secretExists $name >/dev/null
if [ $? -eq 0 ]; then
return 0
fi
done
return 1
}
# Assert that the shard secrets starting with prefix exist
#
# Parameter: prefix for k8s secrets, such as 'cluster-key'
#
# Optional second parameter:
# --nokeys : failed if at least one exists
#
# Returns the normal linux success=0, failure!=0
#
# When --nokeys is selected, the failure return code is the number
# of secrets found. Zero secrets were expected.
#
# When --nokeys is omitted, the failure return code is either the
# number of secrets found or if the number of secrets found was
# zero, KEY_SECRET_SHARES is returned as error code
function assertShardSecrets {
local prefix="$1"
local nokey="$2"
local i
local count=0
for i in $( seq 0 $((KEY_SECRET_SHARES-1)) ); do
secretExists "${prefix}-$i" >/dev/null
if [ $? -eq 0 ]; then
count=$((count+1))
fi
done
if [ "$nokey" == "--nokeys" ]; then
# 0 secrets == true (0)
# Else return the number of secrets
return $count
fi
if [ "$count" -eq "$KEY_SECRET_SHARES" ]; then
return 0
elif [ "$count" -eq 0 ]; then
return "$KEY_SECRET_SHARES" # an error result
fi
return "$count"
}
# Return a list of other existing Shard secrets other than the set
# specified
#
# Sort by priority order:
# cluster-key
# cluster-rekey
# cluster-key-bk
#
function getOtherShardSecrets {
local omit="$1"
local secrets="cluster-key cluster-rekey cluster-key-bk"
local secret
local others=""
for secret in $secrets; do
if [ "$secret" == "$omit" ]; then
continue
fi
if assertShardSecrets $secret; then
others="$others $secret"
fi
done
echo $others
}
# Delete the specified list of secrets
#
# Uses a single kubectl command
function deleteSecrets {
local secrets="$@"
local text
text="$( $KUBECTL delete secrets -n "$VAULT_NS" \
$secrets 2>&1 )"
if [ $? -ne 0 ]; then
log $ERROR "Error deleting secrets: ["$text"]"
return 1
fi
log $INFO $text
return 0
}
# Check if the PVC resource exists
#
# Returns 0 if pvc does not exist
# Returns 1 if pvc exists but is terminating
# Returns 2 if pvc exists and is not terminating
# Prints the name of the PVC resource
function pvcRemoved {
local text
local jqscript
jqscript='.items
| map(select(.metadata.name | test("^manager-pvc")))
| "\(.[0].metadata.name) \(.[0].status.phase)"'
# using jq since kubernetes does not support regex
# the grep makes sure the result contains the 'manager-pvc'
# string (as opposed to 'null' for example)
text="$(
$KUBECTL get persistentvolumeclaims -n "$VAULT_NS" -o json \
| jq -r "$jqscript" 2>/dev/null \
| grep manager-pvc )"
if [ -n "$text" ]; then
readarray -d " " -t pvcInfo <<< "$text"
pvcName="${pvcInfo[0]}"
pvcStatus="${pvcInfo[1]}"
echo "$pvcName"
if [ "$pvcStatus" = "Terminating" ]; then
return 1
else
return 2
fi
fi
return 0
}
# Check if the PVC is mounted to any pod in vault namespace
#
# Returns the normal linux success=0, failure!=0
# Prints the name of the PVC resource
function testPVCMount {
local result
local cspec
local vspec
cspec=".items[*].spec.containers[*]"
vspec="volumeMounts[?(@.name=='manager-pvc')].name"
# this kubectl query returns zero whether manager-pvc is
# found or not
# result variable is either empty or 'manager-pvc'
result="$( $KUBECTL get pods -n "$VAULT_NS" \
-o jsonpath="{${cspec}.${vspec}}" )"
if [ -n "$result" ]; then
return 0
fi
return 1 # assertion 'fails'
}
# This function prints a DEBUG log of kubectl delete
function deleteMountHelper {
local text
local result
log $DEBUG "Waiting for delete of mount-helper job"
text="$( $KUBECTL delete --ignore-not-found=true --wait=true \
-f /opt/yaml/pvc-attach.yaml 2>&1 )"
result=$?
log $DEBUG "Output of deleting mount-helper: [$text]"
return $result
}
# Run shred on the file content of PVC
#
# All files a shredded, and the result is an error if
# - command return code is non-zero
# - file comparison shows unchanged file(s)
#
# A warning is issued if shred/kubectl command has any stdout or
# stderr
#
# Returns the normal linux success=0, failure!=0
function securelyWipePVC {
local helper="$1"
if [ -z "$helper" ]; then
log $ERROR "No pod specified for shredding"
return 1
fi
# get profile of the files before shredding
$KUBECTL exec -n "$VAULT_NS" "$helper" -- \
bash -c 'find /mnt/data -type f \
| sort | xargs wc | head -n-1' \
>/tmp/shred_before.txt 2>&1
log $DEBUG "Original files: [$( cat /tmp/shred_before.txt )]"
# run the shred command
#
# Shred all the files in mounted /mnt/data/
#
# The shred by default has three randomized passes, and with -z
# option will finalize with zeros. -f prompts shred to work
# around any unexpected file permissions
text="$( $KUBECTL exec -n "$VAULT_NS" "$helper" -- \
bash -c '\
result=0; \
while read fname; do \
shred -f -z "$fname"; \
[ $? -ne 0 ] && result=1; \
done <<<"$(find /mnt/data -type f )"; \
exit $result' 2>&1 )"
result=$?
# get profile of the files after shredding
$KUBECTL exec -n "$VAULT_NS" "$helper" -- \
bash -c 'find /mnt/data -type f \
| sort | xargs wc | head -n-1' \
>/tmp/shred_after.txt 2>&1
log $DEBUG "Shredded files: [$( cat /tmp/shred_after.txt )]"
# compare the profiles for error reporting
#
# If the file lists, pushed through wc, have files with the same
# character, word, and line counts then report an error: a file
# has not been shred
#
# Ignore files that were empty
difftext="$( diff -wuU100000 /tmp/shred_before.txt \
/tmp/shred_after.txt )"
unchanged="$( echo "$difftext" | grep "^ " \
| grep -v "^\([ ]\{1,\}0\)\{3\} /" )"
# Report the errors/success
if [ "$result" -ne 0 ]; then
log $ERROR "Error on shred: [$text]"
if [ -n "$unchanged" ]; then
log $ERROR "Unchanged: [$unchanged]"
fi
return 1
fi
if [ -n "$text" ]; then
log $WARNING "Output of shred is not empty: [$text]"
fi
if [ -n "$unchanged" ]; then
log $ERROR "Shred did not shred some files"
log $ERROR "Unchanged: [$unchanged]"
return 1
fi
log $INFO "Shredding of PVC data verified"
return 0
}
# Delete the PVC resource
#
# The delete will succeed even if attached to a pod, such as a
# terminating vault-manager or mount-helper - the PVC remains
# in terminating status until the pod is also terminated.
function deletePVC {
local text
local name
name="$( pvcRemoved )"
if [ $? -eq 2 ] && [[ "$name" =~ ^manager-pvc ]]; then
text="$( $KUBECTL delete persistentvolumeclaims \
-n "$VAULT_NS" "$name" 2>&1 )"
if [ $? -ne 0 ]; then
log $ERROR "Error deleting PVC: [$text]"
else
log $INFO "$text"
fi
else
log $WARNING "Request to delete PVC but PVC not found"
fi
}
# Run a job/pod, to mount the PVC resource, and retrieve the secrets
# from PVC.
#
# See also the function mountHelper and the ConfigMap named:
# {{ .Values.vault.name }}-mount-helper
#
# This function does not support overwriting an existing
# cluster-key-* secret, but it does support validating those secrets
# if they exist
function convertPVC {
local output
local pod
local count
local text
local PVCtext
local result
local waitPVCterm
if testPVCMount; then
log $ERROR "Cannot mount PVC already mounted"
return 1
fi
# run the pod
output="$( $KUBECTL apply -f /opt/yaml/pvc-attach.yaml 2>&1 )"
if [ $? -ne 0 ]; then
log $ERROR "Failed to apply mount-helper"
log $DEBUG "Output: [$output]"
deleteMountHelper
return 1
fi
# wait for pod
pod=''
count=0
log $INFO "Waiting for mount-helper pod to run"
while [ -z "$pod" -a "$count" -le "$MAX_POD_RUN_TRIES" ]; do
count=$((count+1))
text="$( $KUBECTL get pods -n "$VAULT_NS" \
| grep "mount-helper" )"
pod="$( echo "$text" | grep "Running" | awk '{print $1}' )"
if [ -z "$pod" ]; then
sleep 1
fi
done
if [ -z "$pod" ]; then
log $ERROR "Failed to run mount-helper pod"
log $DEBUG "Pod state: [$( echo $text )]"
deleteMountHelper
return 1
fi
# get the pvc data
PVCtext="$( $KUBECTL exec -n "$VAULT_NS" "$pod" \
-- cat /mnt/data/cluster_keys.json )"
if [ $? -ne 0 -o -z "$PVCtext" ]; then
log $ERROR "Failed to read cluster_keys.json"
deleteMountHelper
return 1
fi
log $INFO "Data retrieved from PVC"
# if the Root secret is pre-existing, compare the existing
# shard secrets and root secret before deleting the PVC
$KUBECTL get secrets -n "$VAULT_NS" \
cluster-key-root >/dev/null 2>&1
if [ $? -eq 0 ]; then
log $INFO "Cluster secrets exist:" \
"validating"
else
# create a secret from the data
echo "$PVCtext" | storeVaultInitSecrets cluster-key
fi
# verify the data stored versus text from PVC
echo "$PVCtext" | validateSecrets cluster-key
result=$?
if [ "$result" -eq 0 ]; then
securelyWipePVC "$pod"
# omit deleting the PVC for manual analysis and shred
# when the wipe fails
if [ $? -eq 0 ]; then
deletePVC
fi
fi
# clean up but do not care about the result
deleteMountHelper
# Sleep before finishing conversion, so that pvc termination process has started
waitPVCterm=5
sleep $waitPVCterm
return $result
}
function convertBootstrapSecrets {
local text
local count
text="$( get_secret cluster-key-bootstrap )"
echo "$text" | storeVaultInitSecrets cluster-key
# verify the split secrets versus the bootstrap text
echo "$text" | validateSecrets cluster-key
if [ $? -ne 0 ]; then
# an error is already printed
return 1
fi
deleteSecrets cluster-key-bootstrap
# Also validate and delete the PVC resource
# This procedure depends on waiting for the old version
# of vault-manager pod to exit
count="$TERMINATE_TRIES_MAX"
log $INFO "Waiting for vault-manager pod to exit"
while testPVCMount && [ "$count" -gt 0 ]; do
sleep "$TERMINATE_TRIES_SLEEP"
count=$((count-1))
done
if [ $count -eq 0 ]; then
log $WARNING "Maximum time reached waiting" \
"for the previous pod to be terminated."
fi
convertPVC
}
# When enabled, after conversion of storage from PVC to k8s secrets,
# Vault-manager will prompt itself to rekey the vault server
# storage.
function requestRekey {
local value
if [ "$AUTO_REKEY_CONVERT" != "true" ]; then
return
fi
log $INFO "Auto rekey enabled: [$AUTO_REKEY_CONVERT]"
secretExists cluster-rekey-request >/dev/null
if [ $? -eq 0 ]; then
value="$( get_secret cluster-rekey-request )"
log $WARNING "Auto rekey: rekey request exists: $value"
return
fi
value=$( uuidgen )
set_secret cluster-rekey-request /dev/stdin <<<"$value"
if [ $? -eq 0 ]; then
log $INFO "Rekey requested: $value"
else
log $ERROR "Failed to request rekey: $value"
fi
return
}
function runConversion {
if [ -n "$K8S_SECRETS_PREEXIST" ]; then
log $INFO "Cluster secrets exist"
return
elif [ -n "$BOOTSTRAP_PREEXISTS" ]; then
# this is the normal application update procedure; the
# lifecycle code retrieved the secrets from previous version
# of the application.
log $INFO "Using secrets provided in $BOOTSTRAP_PREEXISTS"
convertBootstrapSecrets
requestRekey
return
elif [ -z "$PVC_PREEXISTS" ]; then
log $INFO "No pre-existing secrets exist"
return
fi
# Finally, read the pre-existing PVC. This occurs if the
# application updates outside of application-update. For
# example if the old application is removed and deleted, and the
# new application is uploaded and applied.
convertPVC
requestRekey
}
# Test whether the specified vault server(s) agree with the
# specified status of the specified endpoint
#
# Print DEBUG logs when status is non-conforming (the function will
# be used to wait for conformance).
#
# The first parameter is the vault API endpoint to check status
# of, either /sys/rekey/init or /sys/rekey/verify
# The second parameter is the quoted string of json data returned
# from vault REST API call. The data should include these fields,
# which are tested for conformance:
# {"nonce": "S", "started": B, "progress": N,
# "verification_required": B}
#
# The other parameters are the servers to test, specified as
# dash-separated IP address output of getVaultPods (XX-XX-XX-XX)
#
# Returns the normal linux success=0, failure!=0
function assertRekeyStatus {
local endpoint="$1"
local data="$2"
shift 2
local -a servers=($@)
local -a key_arr
local required
local jscript
local key
local index
local error
local server
local response
local record
required="nonce progress started verification_required"
jscript=".nonce, .progress, .started, .verification_required"
if [ "$endpoint" == "/sys/rekey/verify" ]; then
required="nonce progress started"
jscript=".nonce, .progress, .started"
fi
# quick check to assure the data parameter is sane
key_arr=($(echo "$data" | jq -r 'keys[]' | sort))
for key in $required; do
if [[ " ${key_arr[*]} " != *" $key "* ]]; then
log $ERROR "assertRekeyStatus requires: [$required]," \
"received: ${key_arr[*]}"
return 1
fi
done
required="$( echo "$data" | jq -r "$jscript" )"
index=0
error=0
while [ "$index" -lt "${#servers[@]}" ]; do
server="${servers[$index]}"
index=$((index+1))
server="${server}.$POD_TARGET_BASE"
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI response GET "$server" "$endpoint"
if [ $? -ne 0 -o -z "$response" ]; then
# failing the REST API call is not the same
# as non-conformance
return 2
continue
fi
record="$( echo "$response" | jq -r "$jscript" )"
if [ "$record" != "$required" ]; then
log $ERROR "$server does not conform to:" \
"$( echo "$data" | jq -c '.' )"
log $DEBUG "$server does not confirm: $response"
error=1
continue
fi
log $DEBUG "$server conforms: $response"
done
return $error
}
# Test whether the vault server(s) agree about rekey status
#
# The parameter is the quoted string of json data to pass to
# assertRekeyStatus
#
# Returns the normal linux success=0, failure!=0
function assertServerStatus {
local reference="$1"
local pods
local count
pods="$( getVaultPods | awk '{print $2}' )"
count="$( echo $pods | wc -w )"
if [ "$count" -ne "$HA_REPLICAS" ]; then
log $ERROR "server without IP does not conform"
return 1
fi
assertRekeyStatus "/sys/rekey/init" "$reference" $pods
}
# Test whether the vault server(s) agree about rekey validation
# status. Warn when the active vault server changes
#
# The parameter is the quoted string of json data to pass to
# assertRekeyStatus
#
# Returns the normal linux success=0, failure!=0
function assertVerifyStatus {
local reference="$1"
local response
local pods
local result
local count
# first assert the rekey status; /sys/rekey/verify returns
# error if a server does not have rekey in progress
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI response GET $ACTIVE_TARGET /sys/rekey/init
result=$?
if [ "$result" -ne 0 ]; then
return $result
fi
assertServerStatus "$response"
result=$?
if [ $result -ne 0 ]; then
return $result
fi
pods="$( getVaultPods | awk '{print $2}' )"
count="$( echo $pods | wc -w )"
if [ "$count" -ne "$HA_REPLICAS" ]; then
log $ERROR "server without IP does not conform"
return 1
fi
assertRekeyStatus "/sys/rekey/verify" "$reference" $pods
}
# Assert that the /sys/rekey/init endpoint reports no
# rekey procedure in progress on any server
#
# Returns the normal linux success=0, failure!=0
function assertNoRekey {
local data
data='{"nonce": "", "started": false, "progress": 0'
data="$data"', "verification_required": false}'
assertServerStatus "$data"
}
# Retrieve the rekey status from active vault server
# and assert that all server conform to the status
#
# Returns the normal linux success=0, failure!=0
function assertServersConform {
local response
local value
local result
local pods
local count
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI response GET $ACTIVE_TARGET /sys/rekey/init
if [ $? -ne 0 ]; then
# cannot check conformance
log $ERROR "Cannot check server conformance to" \
"/sys/rekey/init"
return 2
fi
assertServerStatus "$response"
result="$?"
if [ "$result" -ne 0 ]; then
return $result
fi
value="$( echo "$response" | jq -r '.verification_nonce' )"
if [ -z "$value" -o "$value" == "null" ]; then
return 0
fi
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI response GET $ACTIVE_TARGET /sys/rekey/verify
if [ $? -ne 0 ]; then
# cannot check conformance
log $ERROR "Cannot check server conformance to" \
"/sys/rekey/verify"
return 2
fi
pods="$( getVaultPods | awk '{print $2}' )"
count="$( echo $pods | wc -w )"
if [ "$count" -ne "$HA_REPLICAS" ]; then
log $ERROR "server without IP does not conform"
return 1
fi
assertRekeyStatus "/sys/rekey/verify" "$response" $pods
}
# This function is used during the pre-rekey assertions
# Testing if the main loop (via PODREC_F) indicates a server
# is not running.
function allServersRunning {
local records
local count
records="$( grep "^/$VAULT_FN" "$PODREC_F" )"
count="$( awk -F/ '{print $2}' <<<"$records" | wc -w )"
if [ "$count" -ne "$HA_REPLICAS" ]; then
return 1
fi
return 0
}
# This function is used during the pre-rekey assertions
# Testing if the main loop (via PODREC_F) indicates a server
# is sealed
function allServersUnsealed {
local records
local count
records="$( grep "^/$VAULT_FN" "$PODREC_F" )"
count="$( grep "/false/" <<<"$records" \
| awk -F/ '{print $2}' | wc -w )"
if [ "$count" -ne "$HA_REPLICAS" ]; then
return 1
fi
return 0
}
# This function is used during the pre-rekey assertions
# Testing if the main loop (via PODREC_F) indicates a server
# omits IP address
function allServersHaveIP {
local records
local count
records="$( grep "^/$VAULT_FN" "$PODREC_F" )"
count="$( echo "$records" | awk -F/ '{print $3}' | wc -w )"
if [ "$count" -ne "$HA_REPLICAS" ]; then
return 1
fi
return 0
}
# Test the status of rekey procedure 'started' during pre-rekey
# tests for procedure progress selection (sharing a single vaultAPI
# call to GET /sys/rekey/init
#
# Return linux true (0) if the status of /sys/rekey/init includes
# started == true
#
# Optional argument --not inverts the logic, but maintains
# error response 2
function assertRekeyStarted {
local started
local not="$1"
# assert that a rekey is in progress
started="$( echo "$REKEY_STATUS_JSON" | jq -r '.started' )"
if [ "$started" == "true" ]; then
started=0
elif [ "$started" != "false" ]; then
# the rekey status is unclear
# an error is probably printed
log $DEBUG "unclear response for /sys/rekey/init:" \
"$( jq -c <<<"$REKEY_STATUS_JSON" )"
return 2
else
started=1
fi
if [ "$started" -eq 0 ]; then
if [ "$not" == "--not" ]; then
return 1
fi
return 0
fi
if [ "$not" == "--not" ]; then
return 0
fi
return 1
}
# Delete the shard secrets with speficied prefix
#
# The secrets are deleting on a single kubectl command
function deleteShardSecrets {
local prefix="$1"
local i
local list=''
for i in $( seq 0 $((KEY_SECRET_SHARES-1)) ); do
if [ -n "$( secretExists "${prefix}-$i" )" ]; then
list="$list ${prefix}-$i"
fi
done
if [ -n "$list" ]; then
deleteSecrets $list
return $?
fi
return 0
}
# Make a copy of the shard secrets with specified prefix
#
# The calling function needs to verify the result
function copyShardSecrets {
local from="$1"
local to="$2"
local i
for i in $( seq 0 $((KEY_SECRET_SHARES-1))); do
get_secret "${from}-$i" \
| set_secret "${to}-$i" /dev/stdin
if [ $? -ne 0 ]; then
# don't try anything else
log $ERROR "Failed to copy ${from}-$i to ${to}-$i"
break
fi
done
}
# Just log the content of cluster-rekey-request again
#
# Keeps track of whether vault-manager has been restarted
# with REKEY_STARTED variable, so that the rekey procedure
# status is documented in log
function rekeyResuming {
if [ "$REKEY_STARTED" -ne 0 ]; then
log $INFO "Resuming rekey:" \
"$( get_secret cluster-rekey-request )"
REKEY_STARTED=0
fi
}
# Return linux true (0) if a rekey is requested and the vault
# server pods are in a stable condition
#
# If the vault servers are not "stable" then the rekey operation
# needs that stability first. vault-manager's main runStateMachine
# will monitor pods and restore unsealed status.
function needsRekey {
local pods
local sealed
local response
# the first milestone to be created is cluster-rekey-request;
# the last milestone to be deleted is cluster-rekey-audit;
# proceed if any exists
secretsExistAny cluster-rekey-request \
cluster-rekey-verified \
cluster-rekey-shuffle \
cluster-rekey-audit
if [ $? -ne 0 ]; then
# rekey is not requested
return 1
fi
# progress the rekey procedure only if the servers are all
# running
if ! allServersRunning; then
log $INFO "Rekey: wait for vault servers to equal" \
"$HA_REPLICAS"
return 1
fi
# progress the rekey procedure only if the servers were
# previously unsealed.
if ! allServersUnsealed; then
log $INFO "Rekey: wait for unsealed vault servers to" \
"equal $HA_REPLICAS"
return 1
fi
# progress the rekey procedure only if the servers all have
# DNS names (IP addresses) provided by k8s
if ! allServersHaveIP; then
log $INFO "Rekey: wait for $HA_REPLICAS vault servers" \
"to have IP addresses"
return 1
fi
# The above three tests are based on output of kubectl get pods
# command. Doublecheck with REST API call to each server
pods="$( getVaultPods | grep "^$VAULT_FN" | awk '{print $2}' )"
for pod in $pods; do
NO_HEADER=true \
API_TMOUT=$QUERY_TMOUT \
vaultAPI response GET ${pod}.$POD_TARGET_BASE /sys/health
if [ $? -ne 0 ]; then
log $ERROR "$pod fails health check during rekey"
return 1
fi
sealed="$( echo "$response" | jq -r '.sealed' )"
if [ "$sealed" != "false" ]; then
log $ERROR "$pod is sealed during rekey"
return 1
fi
done
assertServersConform
return $?
}
# Return linux true (0) if the current step of the rekey procedure
# is to send initialize request to /sys/rekey/int
#
# Initialize is the first step
#
# Will not begin initialization if there are stale cluster-rekey or
# cluster-key-bk secrets
function needsInitialization {
local progress
local count
local error=0
assertRekeyStarted --not
progress=$?
if [ "$progress" -ne 0 ]; then
return "$progress"
fi
# skip if this represents a recovery path
secretsExistAny cluster-rekey-verified \
cluster-rekey-shuffle \
cluster-rekey-audit
if [ $? -eq 0 ]; then
return 1
fi
# make assertions about the artifacts left behind by previous
# rekey procedure attempts
# assert that there are no stale keys before starting rekey
assertShardSecrets cluster-rekey --nokeys
count=$?
if [ "$count" -ne 0 ]; then
log $ERROR "Stale cluster-rekey secrets ($count) present"
# there was a possibility that vault had cancelled the rekey
# due to active server failure, so fall through to
# rekeyRecovery
return 1
fi
assertShardSecrets cluster-key-bk --nokeys
count=$?
if [ "$count" -ne 0 ]; then
log $ERROR "cluster-key-bk secrets ($count) present"
return 2
fi
return 0
}
# Start the rekey procedure
#
# Send initialize request to /sys/rekey/int
#
# Initialize is the first step
#
# Will not begin initialization if there are stale cluster-rekey or
# cluster-key-bk secrets
function rekeyInitialize {
local shares
local threshold
local verify
local data
local response
local value
log $INFO "Initializing vault rekey"
REKEY_STARTED=0
shares='"secret_shares": '$KEY_SECRET_SHARES
threshold='"secret_threshold": '$KEY_REQUIRED_THRESHOLD
verify='"require_verification": true'
data="{$shares,$threshold,$verify}"
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response POST $ACTIVE_TARGET /sys/rekey/init "$data"
if [ $? -ne 0 ]; then
return 1
fi
value="$( echo "$response" | jq -r ".started" )"
if [ 'false' == "$value" ]; then
log $ERROR "Rekey not started"
return 1
fi
# log the nonce
value="$( echo "$response" | jq -r ".nonce" )"
verify="$( echo "$response" | jq -r ".verification_required" )"
log $INFO "Rekey started: $value" \
"(verification_required==$verify)"
# just a sanity check
if [ 'true' != "$verify" ]; then
log $ERROR "Rekey started without verification_required:" \
"aborting"
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response DELETE $ACTIVE_TARGET /sys/rekey/init
return 1
fi
assertServerStatus "$response"
return $?
}
# The rekey authentication should happen when
# - there is a rekey in progress
# - there is a verification_nonce
#
# Authentication of the rekey request is the second step
#
# Omit rekey verification if:
# - there are existing cluster-rekey secrets
# - Verification is complete: cluster-rekey-verified or any later
# stage is complete
#
# Return linux true (0) if the current stage of rekey
# is to complete the rekey verification
# Return linux true (0) if the current stage of rekey
# is to authentication the rekey request
function needsAuthentication {
local progress
assertRekeyStarted
progress=$?
if [ "$progress" -ne 0 ]; then
return "$progress"
fi
progress="$( echo "$REKEY_STATUS_JSON" \
| jq -r '.verification_nonce' )"
if ! [ -z "$progress" -o "$progress" == "null" ]; then
# There is a rekey in progress with a verification nonce
# pass through to recovery
return 1
fi
# this represents a recovery path
assertShardSecrets cluster-rekey --nokeys
if [ $? -ne 0 ]; then
# There are already cluster-rekey secrets
return 1
fi
# skip if this represents a recovery path
secretsExistAny cluster-rekey-verified \
cluster-rekey-shuffle \
cluster-rekey-audit
if [ $? -eq 0 ]; then
return 1
fi
return $?
}
# Submits a keyshard for the rekey procedure
# Returns 0 on success
# Returns 1 on failure
# Returns KEY_SECRET_SHARES when authentication completes
function rekeySubmitShard {
local nonce="$1"
local index="$2"
local verifyauth="$3"
local prefix="$4"
local shard
local dnonce
local key
local data
local response
local progress
local root_token
local new_doc
if [ -z "$prefix" ]; then
prefix=cluster-key
fi
shard="$( get_secret "${prefix}-$index" | jq -r .keys[0] )"
dnonce='"nonce": "'$nonce'"'
key='"key": "'$shard'"'
data="{$dnonce,$key}"
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response POST $ACTIVE_TARGET /sys/rekey/update "$data"
if [ $? -ne 0 ]; then
return 1
fi
# Check the response for verification_nonce, which
# indicates completion
progress="$( echo "$response" | jq -r '.verification_nonce' )"
if [ -n "$progress" -a "$progress" != 'null' ]; then
log $INFO "Success authenticating:" \
"$((index+1)) of $KEY_REQUIRED_THRESHOLD"
if [ "$verifyauth" == "--verify-auth" ]; then
# delete the rekey and return success
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response DELETE $ACTIVE_TARGET /sys/rekey/init
return "$KEY_SECRET_SHARES"
fi
# Procedure to ensure that the old and new shards are
# secured in k8s secrets. Deletion of old shards will only
# occur when verification is successful.
root_token="$( get_secret cluster-key-root )"
new_doc="$( echo "$response" \
| jq -c '{"keys": .keys,
"keys_base64": .keys_base64,
"root_token": "'"$root_token"'"}' )"
# store the new shards
echo "$response" \
| jq -c '{"keys": .keys, "keys_base64": .keys_base64}' \
| storeVaultInitSecrets cluster-rekey
# check that the secrets match vault's rekey response
echo "$new_doc" | validateSecrets cluster-rekey
if [ $? -ne 0 ]; then
# calling function will abort the rekey
# and any cluster-rekey secrets
log $ERROR "Failed to store and verify shards" \
"after rekey authentication complete"
return 1
fi
# authentication of the rekey request is completed
# successfully
log $INFO "Rekey authentication successful"
return "$KEY_SECRET_SHARES"
fi
# Otherwise verify the response
progress="$( echo "$response" | jq -r '.progress' )"
index="$((index+1))"
if [ "$progress" -ne "$index" ]; then
log $ERROR "Authentication sequence mismatching" \
"($progress, $index)"
return 1
fi
# assert that the servers agree
assertServerStatus "$response"
if [ $? -ne 0 ]; then
log $ERROR "Vault server rekey status fails during" \
"authentication at $index of $KEY_REQUIRED_THRESHOLD"
return 1
fi
log $INFO "Success authenticating:" \
"$index of $KEY_REQUIRED_THRESHOLD"
return 0
}
# Return linux true (0) if the current step of the rekey procedure
# is to authenticate the request
#
# Authentication of the rekey request is the second step
#
function rekeyAuthenticate {
local verifyauth="$1"
local prefix="$2"
local response
local index
local value
local nonce
local progress
local result
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI response GET $ACTIVE_TARGET /sys/rekey/init
if [ $? -ne 0 ]; then
# an error is already printed
return 1
fi
value="$( echo "$response" | jq -r '.started' )"
if [ 'true' != "$value" ]; then
log $ERROR "Rekey authentication, but rekey not in progress"
return 1
fi
nonce="$( echo "$response" | jq -r '.nonce' )"
progress="$( echo "$response" | jq -r '.progress' )"
if ! [[ "$progress" =~ ^[0-9]{1,}$ ]]; then
log $ERROR "Rekey authentication progress not integer:" \
"$response"
return 1
elif [ "$progress" -ge "$KEY_SECRET_SHARES" ]; then
log $ERROR "Rekey authentication progress out of range:" \
"$response"
return 1
fi
if [ "$progress" -ne 0 ]; then
log $WARNING "Continue authenticating rekey at: $progress"
fi
# authenticate and store the new keys
for index in $( seq $progress $((KEY_SECRET_SHARES-1)) ); do
rekeySubmitShard "$nonce" "$index" $verifyauth $prefix
result="$?"
if [ "$result" -eq "$KEY_SECRET_SHARES" ]; then
# start the verify procedure now
if [ "$verifyauth" != "--verify-auth" ]; then
log $INFO "Starting rekey verify"
fi
break
elif [ "$result" -ne 0 ]; then
return $result
fi
done
return 0
}
# The rekey verification should happen when
# - there is a rekey in progress
# - there is a verification_nonce
#
# Omit rekey verification if:
# - there are existing cluster-rekey secrets
# - Verification is complete: cluster-rekey-verified or any later
# stage is complete
#
# Return linux true (0) if the current stage of rekey
# is to complete the rekey verification
function needsVerify {
local progress
assertRekeyStarted
progress=$?
if [ "$progress" -ne 0 ]; then
return "$progress"
fi
progress="$( echo "$REKEY_STATUS_JSON" \
| jq -r '.verification_nonce' )"
if [ -z "$progress" -o "$progress" == "null" ]; then
# There is a rekey in progress, but not with a
# verification nonce
return 1
fi
# Assert that the nonce is UUID-ish
if ! [[ "$progress" =~ ^[a-f0-9-]{36}$ ]]; then
log $ERROR "The verification_nonce is not UUID-ish:" \
"$REKEY_STATUS_JSON"
return 2
fi
assertShardSecrets cluster-rekey
if [ $? -ne 0 ]; then
# this should not happen: verify in progress but no
# cluster-rekey secrets
log $ERROR "rekey verify in progress but no cluster-rekey"
return 1
fi
# skip if this represents a recovery path
secretsExistAny cluster-rekey-verified \
cluster-rekey-shuffle \
cluster-rekey-audit
if [ $? -eq 0 ]; then
return 1
fi
return 0
}
# Submits a keyshard for the rekey verification procedure
# Returns 0 on success
# Returns 1 on failure
# Returns KEY_REQUIRED_THRESHOLD when authentication completes
function rekeyVerifySubmitShard {
local nonce="$1"
local index="$2"
local shard
local dnonce
local key
local data
local response
local progress
shard="$( get_secret cluster-rekey-$index \
| jq -r .keys[0] )"
dnonce='"nonce": "'$nonce'"'
key='"key": "'$shard'"'
data="{$dnonce,$key}"
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response POST $ACTIVE_TARGET \
/sys/rekey/verify "$data"
if [ $? -ne 0 ]; then
# an error is printed
return 1
fi
progress="$( echo "$response" | jq -r ".complete" )"
if [ "$progress" == 'true' ]; then
log $INFO "Success verifying: using new shards"
set_secret cluster-rekey-verified /dev/stdin \
<<<"$( get_secret cluster-rekey-request )"
return $KEY_REQUIRED_THRESHOLD
fi
progress="$( echo "$response" | jq -r ".progress" )"
if [ -z "$progress" -o "$progress" == "null" ]; then
log $ERROR "Expecting rekey verify progress" \
"[$((index+1))] instead of [$progress]"
return 1
fi
# Print the progress of rekey verify.
if [ "$((index+1))" -eq "$progress" ]; then
log $INFO "Success verifying:" \
"$progress of $KEY_REQUIRED_THRESHOLD"
elif [ "$((index+1))" -gt "$progress" ]; then
# A sanity check only
log $WARNING "Verify progress [$progress] less" \
"than expected [$((index+1))]"
else
# A sanity check only
log $WARNING "Verify progress [$progress]" \
"greater than expected [$((index+1))]"
fi
assertVerifyStatus "$response"
if [ $? -ne 0 ]; then
log $ERROR "Vault server verify status fails during" \
"authentication at" \
"$index of $KEY_REQUIRED_THRESHOLD"
return 1
fi
}
# Return linux true (0) if the current step of the rekey procedure
# is to verify shard secrets
#
# This step confirms that vault manager has correctly stored the
# shards received from the vault server. This allows failures of
# the procedure to be recovered:
# - receive the shards from vault
# - store the shards in k8s secrets
# - play the shards back to vault
# - upon successful verification the new shards are effective
#
# Verification of the rekey request is the Third step
#
function rekeyVerify {
local value
local nonce
local progress
local response
local shard
local dnonce
local key
local data
local index
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI response GET $ACTIVE_TARGET /sys/rekey/verify
if [ $? -ne 0 ]; then
# an error is already printed
return 1
fi
value="$( echo "$response" | jq -r '.started' )"
if [ 'true' != "$value" ]; then
log $ERROR "Rekey verify, but rekey not in progress"
return 1
fi
nonce="$( echo "$response" | jq -r '.nonce' )"
progress="$( echo "$response" | jq -r '.progress' )"
if ! [[ "$progress" =~ ^[0-9]{1,}$ ]]; then
log $ERROR "Rekey authentication progress not integer:" \
"$response"
return 1
elif [ "$progress" -ge "$KEY_SECRET_SHARES" ]; then
log $ERROR "Rekey authentication progress out of range:" \
"$response"
return 1
fi
if [ "$progress" -ne 0 ]; then
log $WARNING "Continue verifying rekey at: $progress"
fi
# assert that the servers agree on verify status
assertVerifyStatus "$response"
if [ $? -ne 0 ]; then
return 1
fi
# authenticate the verify procedure
for index in $( seq $progress $((KEY_SECRET_SHARES-1)) ); do
rekeyVerifySubmitShard "$nonce" "$index"
result=$?
if [ "$result" -eq "$KEY_REQUIRED_THRESHOLD" ]; then
# rekeyVerifySubmitShard returns KEY_REQUIRED_THRESHOLD
# when .complete == true was received
return 0
elif [ "$result" -ne 0 ]; then
# any other non-zero result is a failure
return 1
fi
done
log $ERROR "Verify procedure ended without completion"
return 1
}
# The shuffling of keys shards in k8s secrets should happen when
# th cluster-rekey-verified procedure step is completed.
#
# Omit shuffling if:
# - vault server reports rekey in progress (unclear status)
# - shuffling is already complete: cluster-rekey-shuffle or later
# stage is complete
# - there are no cluster-rekey secrets
# - there are cluster-key-bk secrets
#
# Return linux true (0) if the current stage of rekey
# is to complete the swapping of validated shards
function needsShuffle {
local progress
# assert that a rekey is not in progress
assertRekeyStarted --not
progress=$?
if [ "$progress" -ne 0 ]; then
# 1 - maintain the status of rekey in progress
# 2 - api error, try again later
return "$progress"
fi
secretExists cluster-rekey-verified >/dev/null
if [ $? -ne 0 ]; then
# proceeds to next procedure step
return 1
fi
# skip if this represents a recovery path
secretsExistAny cluster-rekey-shuffle \
cluster-rekey-audit
if [ $? -eq 0 ]; then
return 1
fi
assertShardSecrets cluster-rekey
case $? in
0)
# There is no rekey in progress, and there is a set
# of cluster-rekey shards recorded
;;
$KEY_SECRET_SHARES)
# There is no rekey in progress, and there are no
# cluster-rekey shards recorded
return 1
;;
*)
# with cluster-rekey-verified, an incomplete set of
# cluster-rekey indicates partial deletion after copying
# to cluster-key
# will want to audit the cluster-key secrets before
# deleting cluster-rekey
log $WARNING "The number key shard secrets for" \
"cluster-rekey is not complete"
return 1
;;
esac
# otherwise allow rekeyShuffleKeys to be re-entrant to
# the existance of or lack of cluster-key and cluster-key-bk
# cluster-rekey is only deleted when confirmed to be copied to
# cluster-key
return 0
}
# This procedure shuffles the shard secrets from cluster-rekey to
# cluster-key to cluster-bk
#
# The function intends to be resolve failures of the vault manager
# process where it is interrupted abruptly such as with kill -9.
# In combination with needsShuffle it can be re-run until it
# completes the shuffle:
# - cluster-key shards are copied to cluster-key-bk
# - cluster-key shards are delete
# - cluster-rekey is copied to cluster-key
# - cluster-rekey is delete
#
# A subsequent step audits the new keys before deleting the
# cluster-key-bk secrets
function rekeyShuffleKeys {
local key_exists
local rekey_exists
local bk_exists
local key_doc=""
local rekey_doc=""
assertShardSecrets cluster-key
key_exists=$?
assertShardSecrets cluster-rekey
rekey_exists=$?
assertShardSecrets cluster-key-bk
bk_exists=$?
if [ "$key_exists" -eq 0 ]; then
key_doc="$( reconstructInitResponse cluster-key )"
echo "$key_doc" | validateSecrets cluster-key
if [ $? -ne 0 ]; then
log $ERRROR "Failed to read cluster-key"
return 1
fi
fi
if [ "$rekey_exists" -eq 0 ]; then
rekey_doc="$( reconstructInitResponse cluster-rekey )"
echo "$rekey_doc" | validateSecrets cluster-rekey
if [ $? -ne 0 ]; then
log $ERROR "Failed to read cluster-rekey"
return 1
fi
else
# this is recovery path
if [ -n "key_doc" ]; then
log $WARNING "Progress cluster-rekey-shuffle without" \
"cluster-rekey"
set_secret cluster-rekey-shuffle /dev/stdin \
<<<"$( get_secret cluster-rekey-request )"
return
fi
log $ERROR "No cluster-key or cluster-rekey"
return 1
fi
if [ "$bk_exists" -lt "$KEY_SECRET_SHARES" \
-a "$bk_exists" -ne 0 ]; then
# this is a recovery path
# an incomplete copy of cluster-key secrets
if [ -n "$key_doc" ]; then
deleteShardSecrets cluster-key-bk
assertShardSecrets cluster-key-bk
bk_exists=$?
if [ "$bk_exists" -lt "$KEY_SECRET_SHARES" ]; then
log $ERROR "Failed to delete incomplete" \
"cluster-key-bk"
return 1
fi
else
# this shouldn't happen;
# either not both failures is anticipated
log $ERROR "Sanity: incomplete both cluster-key-bk" \
"and missing/incomplete cluster-key secrets"
return 1
fi
fi
if [ "$bk_exists" -eq 0 ]; then
# this is a recovery path
if [ -n "$key_doc" ]; then
# Assert that cluster-key and cluster-key-bk are the
# same
log $INFO "Recovering from pre-existing cluster-key-bk"
echo "$key_doc" | validateSecrets cluster-key-bk
if [ $? -eq 0 ]; then
# cluster-key-bk == cluster-key
deleteShardSecrets cluster-key
assertShardSecrets cluster-key
key_exists=$?
key_doc=""
else
echo "$key_doc" | validateSecrets cluster-rekey
if [ $? -eq 0 ]; then
# Recovering cluster-key == cluster-rekey
log $INFO "Recovering with cluster-key"
deleteShardSecrets cluster-rekey
set_secret cluster-rekey-shuffle /dev/stdin \
<<<"$( get_secret cluster-rekey-request )"
return 0
else
log $ERROR "Three different sets of keys" \
"in k8s secrets"
return 1
fi
fi
fi
# else: there is no cluster-key to backup
else
# this is the normal procedure path
log $INFO "Copying cluster-key secrets to cluster-key-bk"
copyShardSecrets cluster-key cluster-key-bk
echo "$key_doc" | validateSecrets cluster-key-bk
if [ $? -ne 0 ]; then
log $ERROR "Failed to copy cluster-key to cluster-key-bk"
deleteShardSecrets cluster-key-bk
return 1
fi
deleteShardSecrets cluster-key
if [ $? -ne 0 ]; then
log $ERROR "Failed to delete cluster-key secrets"
return 1
fi
assertShardSecrets cluster-key
key_exists=$?
key_doc=""
fi
# cluster-key-bk exists here
# cluster-rekey rekey_doc is valid here
# if cluster-key exists, such as number of secrets less than
# KEY_SECRET_SHARES, then delete them; deleteShardSecrets is a
# no-op if there are none there
deleteShardSecrets cluster-key
if [ $? -ne 0 ]; then
log $ERROR "Failed to delete cluster-key"
return 1
# try again later
fi
log $INFO "Copying cluster-rekey secrets to cluster-key"
copyShardSecrets cluster-rekey cluster-key
echo "$rekey_doc" | validateSecrets cluster-key
if [ $? -ne 0 ]; then
log $ERROR "Failed to copy cluster-rekey to cluster-key"
return 1
fi
deleteShardSecrets cluster-rekey
set_secret cluster-rekey-shuffle /dev/stdin \
<<<"$( get_secret cluster-rekey-request )"
return 0
}
# The audit of cluster-key should happen when these other procedure
# steps are completed:
# - cluster-rekey-verified
# - cluster-rekey-shuffle
#
# Omit audit if:
# - vault server reports rekey in progress (failed previous audit?)
# - audit is already complete: cluster-rekey-audit exists
#
# Return linux true (0) if the current stage of rekey
# is to run the audit
function needsAudit {
local progress
# assert that a rekey is not in progress
assertRekeyStarted --not
progress=$?
if [ "$progress" -ne 0 ]; then
return "$progress"
fi
# Select recovery path with response '3'
secretExists cluster-rekey-audit >/dev/null
if [ $? -eq 0 ]; then
# this path indicates a failure to complete
# finalizeRekey. cluster-rekey-audit is the last
# milestone to be deleted
log $INFO "rekey audit already completed"
return 3
fi
secretExists cluster-rekey-request >/dev/null
if [ $? -ne 0 ]; then
return 1
fi
secretExists cluster-rekey-verified >/dev/null
if [ $? -ne 0 ]; then
return 1
fi
secretExists cluster-rekey-shuffle >/dev/null
if [ $? -ne 0 ]; then
return 1
fi
assertShardSecrets cluster-key
if [ $? -ne 0 ]; then
log $ERROR "rekey audit requested but cluster-keys absent"
return 1
fi
}
# Audit that the active vault server authenticates with the cluster
# keys specified by prefix
#
# Returns 0 on success
# Returns 1 if the audit failes
# Returns 2 if there was a failure unrelated to authentication
function rekeyAudit {
local prefix="$1"
local value
local response
if [ -z "$prefix" ]; then
prefix="cluster-key"
fi
log $INFO "Auditing the shards in $prefix secrets"
assertNoRekey
if [ $? -ne 0 ]; then
log $ERROR "Cannot audit with rekey in progress"
return 2
fi
assertShardSecrets "$prefix"
if [ $? -ne 0 ]; then
log $ERROR "Audit fails with absent $prefix secrets"
return 1
fi
rekeyInitialize
if [ $? -ne 0 ]; then
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response DELETE $ACTIVE_TARGET /sys/rekey/init
return 2
fi
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI response GET $ACTIVE_TARGET /sys/rekey/init
if [ $? -ne 0 ]; then
# There's no reason to believe this one will succeed where
# the other hadn't
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response DELETE $ACTIVE_TARGET /sys/rekey/init
return 2
fi
value="$( echo "$response" | jq -r ".verification_required" )"
if [ "$value" != "true" ]; then
log $ERROR "Audit sanity: verification_required not set:" \
"$response"
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response DELETE $ACTIVE_TARGET /sys/rekey/init
return 1
fi
rekeyAuthenticate --verify-auth "$prefix"
result="$?"
if [ "$result" -eq 0 ]; then
log $INFO "Audit of cluster-key secrets passes"
else
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response DELETE $ACTIVE_TARGET /sys/rekey/init
fi
return $result
}
# clean up the artifacts from rekey procedure
# The audit procedure proves the shards in cluster-key
# secrets will unseal the vault.
#
# If vault-manager is killed during this procedure step it should
# continue to try to delete the artifacts until finally deleting
# cluster-rekey-audit
function finalizeRekey {
local secrettext
secrettext="$( get_secret cluster-rekey-audit )"
log $INFO "removing artifacts of the rekey procedure:" \
"$secrettext"
assertShardSecrets cluster-rekey --nokeys
if [ $? -ne 0 ]; then
log $WARNING "removing cluster-rekey secrets" \
"after audit"
deleteShardSecrets cluster-rekey
fi
deleteShardSecrets cluster-key-bk
deleteSecrets cluster-rekey-verified
deleteSecrets cluster-rekey-shuffle
deleteSecrets cluster-rekey-request
deleteSecrets cluster-rekey-audit
log $INFO "Rekey request complete: $secrettext"
}
# This procedure handle a few cases where the vault active server or
# vault-manager were killed.
#
# - rekey authentication completed by vault-manager was killed
# before the shards could be stored
# - rekey verification may be cancelled by the failure of the active
# vault server
#
function rekeyRecovery {
local key_exists
local rekey_exists
local bk_exists
local verified_exists
local shuffle_exists
local audit_exists
local inprogress
local verifyprogress
log $INFO "Recovering the rekey procedure"
# assert that the vault server are all up and agree
# about the rekey status
allServersRunning \
&& allServersHaveIP \
&& allServersUnsealed \
|| return 1
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI REKEY_STATUS_JSON GET $ACTIVE_TARGET /sys/rekey/init
if [ $? -ne 0 ]; then
# an error is printed
# wait for recovery
REKEY_STATUS_JSON=''
return 1
fi
assertServerStatus "$REKEY_STATUS_JSON"
if [ $? -ne 0 ]; then
# wait for the vault servers to sync
return 1
fi
inprogress="$( echo "$REKEY_STATUS_JSON" | jq -r '.started' )"
verifyprogress="$( echo "$REKEY_STATUS_JSON" \
| jq -r '.verification_nonce' )"
if [ "$inprogress" == "true" ]; then
# If a rekey is in progress, then cancel it
# - an authentication will reinitialize
# - a verification will reinitialtize
# - a rekeyAudit will retry
log $INFO "Cancelling rekey in progress"
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response DELETE $ACTIVE_TARGET /sys/rekey/init
if [ $? -ne 0 ]; then
# retry later
return 1
fi
fi
assertShardSecrets cluster-key
key_exists=$?
assertShardSecrets cluster-rekey
rekey_exists=$?
assertShardSecrets cluster-key-bk
bk_exists=$?
secretExists cluster-rekey-verified >/dev/null
verified_exists=$?
secretExists cluster-rekey-shuffle >/dev/null
shuffle_exists=$?
secretExists cluster-rekey-audit >/dev/null
audit_exists=$?
# review each of the milestones to discern the failure point
if [ "$audit_exists" -eq 0 ]; then
true
# no recovery options here
# pass through
elif [ "$shuffle_exists" -eq 0 ]; then
true
# no recovery options here
# pass through
elif [ "$verified_exists" -eq 0 ]; then
if [ "$rekey_exists" -gt 0 ]; then
if [ "$rekey_exists" -lt "$KEY_SECRET_SHARES" ]; then
# with verified_exists, indicates partial deletion
# of the cluster-rekey secrets after copying to
# cluster-key. Audit the cluster-key secrets before
# deleting rekey
rekeyAudit cluster-key
if [ $? -ne 0 ]; then
log $ERROR "Audit cluster-key fails with a" \
"partial set of cluster-rekey"
return 1
fi
deleteShardSecrets cluster-rekey
fi
# Handle condition where secrets were shuffled but
# vault-manager failed before recording the
# milestone cluster-rekey-shuffle
# auditRekey will double-check that cluster-key is
# in use
set_secret cluster-rekey-shuffle /dev/stdin \
<<<"$( get_secret cluster-rekey-request )"
log $INFO "Continuing rekey procedure with audit" \
"of cluster-key"
return 0
fi
# else: pass through
else
if [ "$rekey_exists" -eq 0 ]; then
# Handle condition where an active server fails during
# verification: vault may have cancelled the rekey procedure
# This question is: which shards are the vault servers
# using?
log $INFO "Recovering from mismatch of cluster-rekey" \
"and verified status"
# Audit the existing shards to see which ones the
# vault servers are keyed for.
# Most likely that the verification failed due to
# active server failing, start with cluster-key
rekeyAudit cluster-key
if [ $? -eq 0 ]; then
# The rekey verification did not complete
# remove cluster-rekey secrets
# The rekey procedure should restart
deleteShardSecrets cluster-rekey
log $INFO "Restart rekey procedure"
return 0
fi
# this happens when vault-manager process is killed
rekeyAudit cluster-rekey
if [ $? -eq 0 ]; then
set_secret cluster-rekey-verified /dev/null \
<<<$( get_secret cluster-rekey-request )
log $INFO "Continue rekey procedure with cluster-rekey"
return 0
fi
# else: pass through
elif [ "$rekey_exists" -eq 5 ]; then
# There are no cluster-rekey secrets; and the rekey is
# cancelled: the rekey procedure will restart
log $INFO "Continue rekey procedure with initialization"
return 0
else # cluster-rekey secrets are incomplete
# Handle condition where verification is needed but
# vault-manager did not store shards. The rekey was
# canceled above
# assert cluster-key before deleteing rekey
rekeyAudit cluster-key
if [ $? -eq 0 ]; then
# the rekey procedure will restart
log $INFO "Deleting partial set of" \
"cluster-rekey secrets"
deleteShardSecrets cluster-rekey
return 0
fi
# else: pass through
fi
fi
log $ERROR "Did not recover from current rekey status"
}
# The state machine for rekeying the vault server
#
# The overall procedure for rekey request includes:
# - wait for stability of vault servers
# - initialize the procedure
# - authenticate the rekey procedure by supplying shards
# - store the new shards
# - verify the rekey with the new shards read from k8s secrets
# - rotate the shard secrets:
# cluster-rekey - cluster-key - cluster-key-bk
# - Audit the new shards with active vault server
# - Remove artifacts of rekey procedure:
# cluster-key-bk, milestone secrets
#
function vaultRekey {
local records
local count
local result
local secrettext
if ! needsRekey; then
return
fi
# Retrieve and record the rekey status once for the tests that
# follow
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI REKEY_STATUS_JSON GET $ACTIVE_TARGET /sys/rekey/init
if [ $? -ne 0 ]; then
# an error is printed
REKEY_STATUS_JSON=''
return
fi
needsAudit
case $? in
0)
rekeyResuming
rekeyAudit
if [ $? -eq 0 ]; then
set_secret cluster-rekey-audit /dev/stdin \
<<<$( get_secret cluster-rekey-request )
finalizeRekey
fi
return
;;
1) # continue to procedure step
;;
3) # audit is already completed
secretExists cluster-rekey-audit >/dev/null
if [ $? -eq 0 ]; then
# the cluster-key secrets were audit, but vault
# manager didn't get a chance to set
# cluster-rekey-audit milestone
finalizeRekey
return
fi
log $ERROR "Discrepancy between needsAudit and" \
"rekeyVault"
return
;;
*)
# an error occurs for which the procedure should not
# continue
return
;;
esac
needsShuffle
case $? in
0)
rekeyResuming
rekeyShuffleKeys
return
;;
1) # continue to procedure step
;;
*)
# an error occurs for which the procedure should not
# continue
return
;;
esac
needsVerify
case $? in
0)
rekeyResuming
rekeyVerify
return
;;
1) # continue to procedure step
;;
*)
# an error occurs for which the procedure should not
# continue
return
;;
esac
needsAuthentication
case $? in
0)
rekeyResuming
rekeyAuthenticate
return
;;
1) # continue to procedure step
;;
*)
# an error occurs for which the procedure should not
# continue
return
;;
esac
needsInitialization
case $? in
0)
secrettext="$( get_secret cluster-rekey-request )"
log $INFO "Rekey request started: $secrettext"
rekeyInitialize
return
;;
1) # continue to failure
;;
*)
# an error occurs for which the procedure should not
# continue
return
;;
esac
# falling through the case statements requires remediation
rekeyResuming
rekeyRecovery
}
#
# LOGIC
#
if [[ "${BASH_SOURCE[0]}" != "${0}" ]]; then
# This script was sourced
return 0
fi
if [ -n "$EARLY_PAUSE" ]; then
echo -n "$EARLY_PAUSE" > $PAUSEFILE
fi
exit_on_trap 1
# Match kubectl version to server version (or etc)
pickK8sVersion
# check if this pod is helping to convert storage from pvc to k8s
# secrets
mountHelper
exit_on_trap 15
# check if there are existing key shard secrets, boot strap secret,
# or pre-existing resource
K8S_SECRETS_PREEXIST="$( secretExists cluster-key-root )"
exit_on_trap 16
BOOTSTRAP_PREEXISTS="$( secretExists cluster-key-bootstrap )"
exit_on_trap 17
PVC_PREEXISTS="$( pvcRemoved )"
exit_on_trap 18
runConversion
exit_on_trap 19
# check if PVC still persisted after conversion, and if so issue a warning.
PVC_PREEXISTS="$( pvcRemoved )"
PVC_STATUS=$?
if [ $PVC_STATUS -eq 1 ]; then
log $DEBUG "PVC storage $PVC_PREEXISTS is currently terminating"
elif [ $PVC_STATUS -eq 2 ]; then
log $WARNING "PVC storage $PVC_PREEXISTS deletion has failed during conversion"
fi
# Waiting for at least one vault server, to check initialization
waitForPods 1
exit_on_trap 2
log $DEBUG "Putting a list of vault pods and ip in $WORKDIR/pods.txt"
getVaultPods > $WORKDIR/pods.txt
exit_on_trap 3
vaultInitialized
IS_VAULT_INITIALIZED=$?
if [ $IS_VAULT_INITIALIZED -eq 1 ]; then
exit_on_trap 4
desired_pods=$HA_REPLICAS
# Waiting for vault servers to come up
waitForPods $desired_pods
exit_on_trap 5
log $INFO "Putting a list of vault pods and IPs in $WORKDIR/pods.txt"
getVaultPods > $WORKDIR/pods.txt
exit_on_trap 6
log $DEBUG "Initializing the vault on vault-0 and" \
"storing keys in k8s secrets"
initVault
#Some sleep required to allow convergence"
sleep "$INIT_CONVERGE_TIME"
log $DEBUG "Unsealing vault-0 using the init shards"
for row in $(awk 'NR==1{print $2}' $WORKDIR/pods.txt); do
unsealVault "$row"
done
log $DEBUG "Joining other vault servers to the HA Raft cluster"
for row in $(awk 'NR>1{print $2}' $WORKDIR/pods.txt); do
log $DEBUG "$( grep $row $WORKDIR/pods.txt )"
joinRaft "$row"
sleep "$JOIN_RATE"
done
exit_on_trap 7
log $INFO "Unsealing the remaining vaults"
for row in $(awk 'NR>1{print $2}' $WORKDIR/pods.txt); do
log $DEBUG "$( grep $row $WORKDIR/pods.txt )"
unsealVault "$row"
sleep "$UNSEAL_RATE"
exit_on_trap 8
done
else
log $INFO "Vault is initialized"
fi
exit_on_trap 9
# initialize the state machine - vault server status records
echo "" > "$PODREC_F"
while read host dns_name; do
if [ -z "$host" ]; then
continue
fi
status_rec="/$host/$dns_name//"
echo "$status_rec" >> "$PODREC_F"
done <$WORKDIR/pods.txt
# Loop forever to check the seal status of vaults and
# unseal if required
log $INFO "Checking vault pods seal status in perpetuity..."
while true; do
exit_on_trap 10
sleep "$STATUS_RATE"
exit_on_trap 20
pickK8sVersion # check if the k8s server version is changed
count=$( kubectl get pods -n "${VAULT_NS}" \
-o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' \
| grep "^${VAULT_FN}-manager" | wc -w )
if [ "$count" -gt 1 ]; then
log $ERROR "Multiple instances of vault manager detected. Waiting until one left"
exit_on_trap 21
continue
fi
rm $WORKDIR/pods.txt
echo "" > "$PODREC_TMP_F"
exit_on_trap 11
getVaultPods > $WORKDIR/pods.txt
exit_on_trap 12
while read host dnsname; do
if [ -z "$dnsname" ]; then
# probably a recovering pod waiting for an IP address
log $DEBUG "pod list has empty data: [$host] [$dnsname]"
continue
fi
NO_HEADER=true \
API_TMOUT=$QUERY_TMOUT \
vaultAPI server_status GET $dnsname.$POD_TARGET_BASE \
/sys/health
echo -n "$server_status" > $WORKDIR/healthcheck.txt
TEMP=$( echo "$server_status" | jq -r .sealed )
exit_on_trap 13
# Decide when to unseal the vault server; includes
# Adding records to new_pods_status.txt
runStateMachine "$host" "$dnsname" "$TEMP"
exit_on_trap 14
done <$WORKDIR/pods.txt
mv "$PODREC_TMP_F" "$PODREC_F"
vaultRekey
done
kind: ConfigMap
metadata:
managedFields:
- apiVersion: v1
fieldsType: FieldsV1
fieldsV1:
f:data:
.: {}
f:init.sh: {}
manager: vault-init-unseal
name: vault-init-unseal-3
namespace: {{ .Release.Namespace }}
---
apiVersion: v1
kind: ConfigMap
metadata:
managedFields:
- apiVersion: v1
fieldsType: FieldsV1
fieldsV1:
f:data:
.: {}
f:pvc-attach.yaml: {}
manager: {{ .Values.vault.name }}-mount-helper
name: {{ .Values.vault.name }}-mount-helper
namespace: {{ .Release.Namespace }}
data:
pvc-attach.yaml: |
---
apiVersion: batch/v1
kind: Job
metadata:
name: {{ .Values.vault.fullname }}-mount-helper
namespace: vault
spec:
activeDeadlineSeconds: 600
completions: 1
parallelism: 1
ttlSecondsAfterFinished: 0
template:
spec:
restartPolicy: Never
serviceAccountName: "{{ .Values.vault.fullname }}-manager-1"
{{- if .Values.manager.imagePullSecrets }}
imagePullSecrets:
{{- toYaml .Values.manager.imagePullSecrets | nindent 12 }}
{{- end }}
{{- if .Values.manager.tolerations }}
tolerations:
{{- tpl .Values.manager.tolerations . | nindent 12 }}
{{- end }}
containers:
- name: mount
image: "{{ .Values.manager.image.repository }}:{{ .Values.manager.image.tag }}"
imagePullPolicy: "{{ .Values.manager.image.pullPolicy }}"
args:
- bash
- /opt/script/init.sh
env:
- name: MANAGER_MODE
value: MOUNT_HELPER
- name: PVC_DIR
value: /mnt/data
volumeMounts:
- name: mount-helper
mountPath: /opt/script
readOnly: true
- name: manager-pvc
mountPath: /mnt/data
readOnly: false
volumes:
- name: mount-helper
configMap:
name: vault-init-unseal-3
- name: manager-pvc
persistentVolumeClaim:
claimName: manager-pvc-sva-vault-manager-0
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
namespace: {{ .Release.Namespace }}
name: {{ .Values.vault.fullname }}-manager-1
rules:
- apiGroups: [""] # "" indicates the core API group
resources: ["pods"]
verbs: ["get", "watch", "list"]
- apiGroups: [""] # "" indicates the core API group
resources: ["pods/exec"]
verbs: ["create"]
- apiGroups: [""] # "" indicates the core API group
resources: ["secrets"]
verbs: ["get", "create", "delete"]
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["get", "create", "delete"]
- apiGroups: [""] # "" indicates the core API group
resources: ["persistentvolumeclaims"]
verbs: ["list", "delete"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ .Values.vault.fullname }}-manager-1
namespace: {{ .Release.Namespace }}
labels:
helm.sh/chart: {{ .Values.manager.chart }}
app.kubernetes.io/name: {{ .Values.vault.name }}-manager
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ .Values.vault.fullname }}-manager-1
namespace: {{ .Release.Namespace }}
subjects:
- kind: ServiceAccount
name: {{ .Values.vault.fullname }}-manager-1
roleRef:
kind: Role
name: {{ .Values.vault.fullname }}-manager-1
apiGroup: rbac.authorization.k8s.io
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: {{ .Values.vault.fullname }}-manager-3
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: {{ .Values.vault.name }}-manager
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
component: webhook
spec:
serviceName: {{ .Values.vault.fullname }}
replicas: 1
selector:
matchLabels:
app.kubernetes.io/instance: {{ .Release.Name }}
component: webhook
template:
metadata:
labels:
app.kubernetes.io/name: {{ .Values.vault.name }}-manager
app.kubernetes.io/instance: {{ .Release.Name }}
component: webhook
{{- if .Values.manager.extraLabels }}
{{- toYaml .Values.manager.extraLabels | nindent 8 -}}
{{- end }}
spec:
serviceAccountName: "{{ .Values.vault.fullname }}-manager-1"
{{- if .Values.manager.imagePullSecrets }}
imagePullSecrets:
{{- toYaml .Values.manager.imagePullSecrets | nindent 8 }}
{{- end }}
{{- if .Values.manager.tolerations }}
tolerations:
{{- tpl .Values.manager.tolerations . | nindent 8 }}
{{- end }}
containers:
- name: manager
image: "{{ .Values.manager.image.repository }}:{{ .Values.manager.image.tag }}"
imagePullPolicy: "{{ .Values.manager.image.pullPolicy }}"
args:
- bash
- /opt/script/init.sh
env:
- name: CA_CERT
value: /mnt/data/ca/tls.crt
volumeMounts:
- name: vault-init-unseal-3
mountPath: /opt/script
readOnly: false
- name: mount-helper-yaml
mountPath: /opt/yaml
readOnly: true
- name: vault-ca
mountPath: /mnt/data/ca
readOnly: true
volumes:
- name: vault-init-unseal-3
configMap:
name: vault-init-unseal-3
- name: mount-helper-yaml
configMap:
name: {{ .Values.vault.name }}-mount-helper
- name: vault-ca
secret:
secretName: vault-ca