metal/mtce/src/scripts/wipedisk

#! /bin/bash
#
# Copyright (c) 2013-2017 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#


usage ()
{
    echo "Usage: `basename $0` [-h|--force]"
    echo "Erases the master boot record on the hard drive."
    echo "WARNING: All data on this hard drive will be lost."
    echo ""
    echo "Options:"
    echo "  -h        display this help"
    echo "  --force   do not ask for confirmation"
    exit 1
}

OPTS=`getopt -o h -l force -- "$@"`
if [ $? != 0 ]
then
    exit 1
fi

eval set -- "$OPTS"

while true ; do
    case "$1" in
        -h) usage; shift;;
        --force) FORCE=1; shift;;
        --) shift; break;;
    esac
done

if [ $# != 0 ]
then
    echo "Invalid argument. Use -h for help."
    exit 1
fi

declare WIPE_HDD=

# Only wipe the rootfs and boot device disks
rootfs_part=$(df --output=source / | tail -1)
rootfs=$(readlink -f $(find -L /dev/disk/by-path/ -samefile $rootfs_part | sed 's/-part[0-9]*'//))

boot_disk_part=$(df --output=source /boot | tail -1)
boot_disk=$(readlink -f $(find -L /dev/disk/by-path/ -samefile $boot_disk_part | sed 's/-part[0-9]*'//))

WIPE_HDD=$rootfs
if [ "$rootfs" != "$boot_disk" ]
then
    WIPE_HDD="$WIPE_HDD $boot_disk"
fi

# Due to dynamic partitioning, volume groups can have PVs across multiple disks.
# When deleting the rootfs, we should also delete all PVs (across all disks) that
# are part of volume groups that are also present on the rootfs.
rootfs_vgs=$(pvdisplay -C --separator '  |  ' -o pv_name,vg_name | grep $rootfs | awk '{print $3}' | sort -u)

pvs_to_delete=""

for vg in $rootfs_vgs
do
    pv=$(pvdisplay --select "vg_name=$vg" | awk '/PV Name/{print $3}')
    pvs_to_delete="$pvs_to_delete $pv"
done

WIPE_HDD="$pvs_to_delete $WIPE_HDD"

# During host reinstalls ceph journals also require wiping, so we also gather information on
# journal partitions. Even if this script is also called during upgrades, there was no issue
# observed during that operation, so we skip wiping the journals during upgrades.
JOURNAL_DISKS=""
HOST_IN_UPGRADE=$(curl -sf http://pxecontroller:6385/v1/upgrade/$(hostname)/in_upgrade 2>/dev/null)
# The "ceph-disk list" command works even if the ceph cluster is not operational (for example if
# too many monitors are down) so we can grab journal info from the node, even in such scenarios.
# As a safety measure, we also wrap the command in a timeout command; it should never take long
# for the command to return, but if it does it's safer to just time it out after 15 seconds.
CEPH_DISK_OUTPUT=$(timeout 15 ceph-disk list 2>/dev/null)
if [[ $? == 0  && "$HOST_IN_UPGRADE" != "true" ]]; then
    JOURNAL_DISKS=$(echo "$CEPH_DISK_OUTPUT" | grep "ceph journal" | awk '{print $1}')
fi

WIPE_HDD="$JOURNAL_DISKS $WIPE_HDD"

if [ ! $FORCE ]
then
    echo "This will result in the loss of all data on the hard drives and"
    echo "will require this node to be re-installed."
    echo "The following disks will be wiped:"
    for dev in $WIPE_HDD
    do
        echo "    $dev"
    done | sort
    echo
    read -p "Are you absolutely sure? [y/n] " -r
    if [[ ! $REPLY =~ ^[Yy]$ ]]
    then
        echo "Aborted"
        exit 1
    fi
    read -p "Type 'wipediskscompletely' to confirm: " -r
    if [[ ! $REPLY = "wipediskscompletely" ]]
    then
        echo "Aborted"
        exit 1
    fi
fi

# Note that the BA5EBA11-0000-1111-2222- is the prefix used by STX and it's defined in sysinv constants.py.
# Since the 000000000001 suffix is used by custom stx LVM partitions,
# the next suffix is used for the persistent backup partition (000000000002)
BACKUP_PART_GUID="BA5EBA11-0000-1111-2222-000000000002"
part_type_guid_str="Partition GUID code"

for dev in $WIPE_HDD
do
    if [[ -e $dev ]]
    then
        if [ "$dev" == "$rootfs" ]
        then
            part_numbers=( $(parted -s $dev print | awk '$1 == "Number" {i=1; next}; i {print $1}') )
            for part_number in "${part_numbers[@]}"; do
                part=$dev$part_number
                case $part in
                    *"nvme"*)
                        part=${dev}p${part_number}
                    ;;
                esac
                sgdisk_part_info=$(flock $dev sgdisk -i $part_number $dev)
                part_type_guid=$(echo "$sgdisk_part_info" | grep "$part_type_guid_str" | awk '{print $4;}')
                if [ "$part_type_guid" == $BACKUP_PART_GUID ]; then
                    echo "Skipping wipe backup partition $part..."
                    continue
                fi
                echo "Wiping partition $part..."
                wipefs -f -a $part

                # Delete the first few bytes at the start and end of the partition. This is required with
                # GPT partitions, they save partition info at the start and the end of the block.
                # Skip / or we will lose access to the tools on the system.
                if [[ $part != $rootfs_part ]]
                then
                    dd if=/dev/zero of=$part bs=512 count=34
                    dd if=/dev/zero of=$part bs=512 count=34 seek=$((`blockdev --getsz $part` - 34))
                fi

                echo "Removing partition $part..."
                sgdisk $dev --delete $part_number
            done

            # Wipe bootloader signature to allow reboot from secondary boot devices (e.g. PXE)
            dd if=/dev/zero of=$dev bs=440 count=1
        else
            echo "Wiping $dev..."
            wipefs -f -a $dev

            echo "$JOURNAL_DISKS" | grep -qw "$dev"
            if [[ $? == 0 ]]; then
                # Journal partitions require additional wiping. Based on the ceph-manage-journal.py
                # script in the integ repo (at the ceph/ceph/files/ceph-manage-journal.py location)
                # wiping 100MB of data at the beginning of the partition should be enough. We also
                # wipe 100MB at the end, just to be safe.
                dd if=/dev/zero of=$dev bs=1M count=100
                dd if=/dev/zero of=$dev bs=1M count=100 seek=$((`blockdev --getsz $dev` - 204800))
            else
                # Clearing previous GPT tables or LVM data
                # Delete the first few bytes at the start and end of the partition. This is required with
                # GPT partitions, they save partition info at the start and the end of the block.
                dd if=/dev/zero of=$dev bs=512 count=34
                dd if=/dev/zero of=$dev bs=512 count=34 seek=$((`blockdev --getsz $dev` - 34))
            fi
        fi
    fi
done

if [[ -z $WIPE_HDD ]]
then
    echo "No disks were detected."
else
    sync
    echo "The disk(s) have been wiped."
fi