metal/mtce/src/scripts/wipedisk

187 lines
6.5 KiB
Bash
Executable File

#! /bin/bash
#
# Copyright (c) 2013-2017 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
usage ()
{
echo "Usage: `basename $0` [-h|--force]"
echo "Erases the master boot record on the hard drive."
echo "WARNING: All data on this hard drive will be lost."
echo ""
echo "Options:"
echo " -h display this help"
echo " --force do not ask for confirmation"
exit 1
}
OPTS=`getopt -o h -l force -- "$@"`
if [ $? != 0 ]
then
exit 1
fi
eval set -- "$OPTS"
while true ; do
case "$1" in
-h) usage; shift;;
--force) FORCE=1; shift;;
--) shift; break;;
esac
done
if [ $# != 0 ]
then
echo "Invalid argument. Use -h for help."
exit 1
fi
declare WIPE_HDD=
# Only wipe the rootfs and boot device disks
rootfs_part=$(df --output=source / | tail -1)
rootfs=$(readlink -f $(find -L /dev/disk/by-path/ -samefile $rootfs_part | sed 's/-part[0-9]*'//))
boot_disk_part=$(df --output=source /boot | tail -1)
boot_disk=$(readlink -f $(find -L /dev/disk/by-path/ -samefile $boot_disk_part | sed 's/-part[0-9]*'//))
WIPE_HDD=$rootfs
if [ "$rootfs" != "$boot_disk" ]
then
WIPE_HDD="$WIPE_HDD $boot_disk"
fi
# Due to dynamic partitioning, volume groups can have PVs across multiple disks.
# When deleting the rootfs, we should also delete all PVs (across all disks) that
# are part of volume groups that are also present on the rootfs.
rootfs_vgs=$(pvdisplay -C --separator ' | ' -o pv_name,vg_name | grep $rootfs | awk '{print $3}' | sort -u)
pvs_to_delete=""
for vg in $rootfs_vgs
do
pv=$(pvdisplay --select "vg_name=$vg" | awk '/PV Name/{print $3}')
pvs_to_delete="$pvs_to_delete $pv"
done
WIPE_HDD="$pvs_to_delete $WIPE_HDD"
# During host reinstalls ceph journals also require wiping, so we also gather information on
# journal partitions. Even if this script is also called during upgrades, there was no issue
# observed during that operation, so we skip wiping the journals during upgrades.
JOURNAL_DISKS=""
HOST_IN_UPGRADE=$(curl -sf http://pxecontroller:6385/v1/upgrade/$(hostname)/in_upgrade 2>/dev/null)
# The "ceph-disk list" command works even if the ceph cluster is not operational (for example if
# too many monitors are down) so we can grab journal info from the node, even in such scenarios.
# As a safety measure, we also wrap the command in a timeout command; it should never take long
# for the command to return, but if it does it's safer to just time it out after 15 seconds.
CEPH_DISK_OUTPUT=$(timeout 15 ceph-disk list 2>/dev/null)
if [[ $? == 0 && "$HOST_IN_UPGRADE" != "true" ]]; then
JOURNAL_DISKS=$(echo "$CEPH_DISK_OUTPUT" | grep "ceph journal" | awk '{print $1}')
fi
WIPE_HDD="$JOURNAL_DISKS $WIPE_HDD"
if [ ! $FORCE ]
then
echo "This will result in the loss of all data on the hard drives and"
echo "will require this node to be re-installed."
echo "The following disks will be wiped:"
for dev in $WIPE_HDD
do
echo " $dev"
done | sort
echo
read -p "Are you absolutely sure? [y/n] " -r
if [[ ! $REPLY =~ ^[Yy]$ ]]
then
echo "Aborted"
exit 1
fi
read -p "Type 'wipediskscompletely' to confirm: " -r
if [[ ! $REPLY = "wipediskscompletely" ]]
then
echo "Aborted"
exit 1
fi
fi
# Note that the BA5EBA11-0000-1111-2222- is the prefix used by STX and it's defined in sysinv constants.py.
# Since the 000000000001 suffix is used by custom stx LVM partitions,
# the next suffix is used for the persistent backup partition (000000000002)
BACKUP_PART_GUID="BA5EBA11-0000-1111-2222-000000000002"
part_type_guid_str="Partition GUID code"
for dev in $WIPE_HDD
do
if [[ -e $dev ]]
then
if [ "$dev" == "$rootfs" ]
then
part_numbers=( $(parted -s $dev print | awk '$1 == "Number" {i=1; next}; i {print $1}') )
for part_number in "${part_numbers[@]}"; do
part=$dev$part_number
case $part in
*"nvme"*)
part=${dev}p${part_number}
;;
esac
sgdisk_part_info=$(flock $dev sgdisk -i $part_number $dev)
part_type_guid=$(echo "$sgdisk_part_info" | grep "$part_type_guid_str" | awk '{print $4;}')
if [ "$part_type_guid" == $BACKUP_PART_GUID ]; then
echo "Skipping wipe backup partition $part..."
continue
fi
echo "Wiping partition $part..."
wipefs -f -a $part
# Delete the first few bytes at the start and end of the partition. This is required with
# GPT partitions, they save partition info at the start and the end of the block.
# Skip / or we will lose access to the tools on the system.
if [[ $part != $rootfs_part ]]
then
dd if=/dev/zero of=$part bs=512 count=34
dd if=/dev/zero of=$part bs=512 count=34 seek=$((`blockdev --getsz $part` - 34))
fi
echo "Removing partition $part..."
sgdisk $dev --delete $part_number
done
# Wipe bootloader signature to allow reboot from secondary boot devices (e.g. PXE)
dd if=/dev/zero of=$dev bs=440 count=1
else
echo "Wiping $dev..."
wipefs -f -a $dev
echo "$JOURNAL_DISKS" | grep -qw "$dev"
if [[ $? == 0 ]]; then
# Journal partitions require additional wiping. Based on the ceph-manage-journal.py
# script in the integ repo (at the ceph/ceph/files/ceph-manage-journal.py location)
# wiping 100MB of data at the beginning of the partition should be enough. We also
# wipe 100MB at the end, just to be safe.
dd if=/dev/zero of=$dev bs=1M count=100
dd if=/dev/zero of=$dev bs=1M count=100 seek=$((`blockdev --getsz $dev` - 204800))
else
# Clearing previous GPT tables or LVM data
# Delete the first few bytes at the start and end of the partition. This is required with
# GPT partitions, they save partition info at the start and the end of the block.
dd if=/dev/zero of=$dev bs=512 count=34
dd if=/dev/zero of=$dev bs=512 count=34 seek=$((`blockdev --getsz $dev` - 34))
fi
fi
fi
done
if [[ -z $WIPE_HDD ]]
then
echo "No disks were detected."
else
sync
echo "The disk(s) have been wiped."
fi