[2/2] core168: Add script to automatically repair MDRAID arrays
Commit Message
Please see the header of the script for more details.
Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
---
config/rootfiles/common/aarch64/stage2 | 1 +
config/rootfiles/common/armv6l/stage2 | 1 +
config/rootfiles/common/x86_64/stage2 | 1 +
config/rootfiles/core/168/update.sh | 3 +
src/scripts/repair-mdraid | 169 +++++++++++++++++++++++++
5 files changed, 175 insertions(+)
create mode 100644 src/scripts/repair-mdraid
@@ -99,6 +99,7 @@ usr/local/bin/ipsec-interfaces
usr/local/bin/makegraphs
usr/local/bin/qosd
usr/local/bin/readhash
+usr/local/bin/repair-mdraid
usr/local/bin/run-parts
usr/local/bin/scanhd
usr/local/bin/settime
@@ -97,6 +97,7 @@ usr/local/bin/ipsec-interfaces
usr/local/bin/makegraphs
usr/local/bin/qosd
usr/local/bin/readhash
+usr/local/bin/repair-mdraid
usr/local/bin/run-parts
usr/local/bin/scanhd
usr/local/bin/settime
@@ -99,6 +99,7 @@ usr/local/bin/ipsec-interfaces
usr/local/bin/makegraphs
usr/local/bin/qosd
usr/local/bin/readhash
+usr/local/bin/repair-mdraid
usr/local/bin/run-parts
usr/local/bin/scanhd
usr/local/bin/settime
@@ -125,6 +125,9 @@ if ! grep -q rd.auto /etc/default/grub; then
sed -e "s/panic=10/& rd.auto/" -i /etc/default/grub
fi
+# Repair any broken MDRAID arrays
+/usr/local/bin/repair-mdraid
+
# Start services
/etc/init.d/fcron restart
/etc/init.d/sshd restart
new file mode 100644
@@ -0,0 +1,169 @@
+#!/bin/bash
+###############################################################################
+# #
+# IPFire.org - A linux based firewall #
+# Copyright (C) 2022 IPFire Team <info@ipfire.org> #
+# #
+# This program is free software: you can redistribute it and/or modify #
+# it under the terms of the GNU General Public License as published by #
+# the Free Software Foundation, either version 3 of the License, or #
+# (at your option) any later version. #
+# #
+# This program is distributed in the hope that it will be useful, #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
+# GNU General Public License for more details. #
+# #
+# You should have received a copy of the GNU General Public License #
+# along with this program. If not, see <http://www.gnu.org/licenses/>. #
+# #
+###############################################################################
+#
+# This script is supposed to repair any broken RAID installations
+# where the system has been booted from only one of the RAID devices
+# without the software RAID being activated first.
+#
+# This script does as follows:
+#
+# * It tries to find an inactive RAID called "ipfire:0"
+# * It will then destroy any devices that are still part of this RAID.
+# This is required because if the RAID is being assembled correctly,
+# data from the disk that has NOT been mounted will be replicated
+# back to the device that has been changed. That causes that any
+# data that has been written to the mounted disk will be lost.
+# To avoid this, we will partially destroy the RAID.
+# * We will then erase any partition tables and destroy any filesystems
+# on the devices so that they do not get accidentially mounted again.
+# * The system will then need to be rebooted where the RAID will be
+# mounted again in a degraded state which might take some extra
+# time at boot (the system stands still for about a minute).
+# * After the system has been booted up correctly, we will re-add
+# the devices back to the RAID which will resync and the system
+# will be back to its intended configuration.
+
+find_inactive_raid() {
+ local status
+ local device
+ local arg
+ local args
+
+ while read -r status device args; do
+ if [ "${status}" = "INACTIVE-ARRAY" ]; then
+ for arg in ${args}; do
+ case "${arg}" in
+ name=ipfire:0)
+ echo "${device}"
+ return 0
+ ;;
+ esac
+ done
+ fi
+ done <<< "$(mdadm --detail --scan)"
+
+ return 1
+}
+
+find_root() {
+ local device
+ local mp
+ local fs
+ local args
+
+ while read -r device mp fs args; do
+ if [ "${mp}" = "/" ]; then
+ echo "${device:0:-1}"
+ return 0
+ fi
+ done < /proc/mounts
+
+ return 1
+}
+
+find_raid_devices() {
+ local raid="${1}"
+
+ local IFS=,
+
+ local device
+ for device in $(mdadm -v --detail --scan "${raid}" | awk -F= '/^[ ]+devices/ { print $2 }'); do
+ echo "${device}"
+ done
+
+ return 0
+}
+
+destroy_everything() {
+ local device="${1}"
+ local part
+
+ # Destroy the RAID superblock
+ mdadm --zero-superblock "${device}"
+
+ # Wipe the partition table
+ wipefs -a "${device}"
+
+ # Wipe any partition signatures
+ for part in ${device}*; do
+ wipefs -a "${part}"
+ done
+}
+
+raid_rebuild() {
+ local devices=( "$@" )
+
+ cat > /etc/rc.d/rcsysinit.d/S99fix-raid <<EOF
+#!/bin/bash
+
+case "\${1}" in
+ start)
+ if [ -e "/dev/md/ipfire:0" ]; then
+ for device in ${devices[@]}; do
+ mdadm --add "/dev/md/ipfire:0" "\${device}"
+ done
+
+ # Delete this script
+ rm "\${0}"
+ fi
+ ;;
+esac
+EOF
+
+ chmod a+x /etc/rc.d/rcsysinit.d/S99fix-raid
+}
+
+main() {
+ local raid="$(find_inactive_raid)"
+
+ # Nothing to do if no RAID device found
+ if [ -z "${raid}" ]; then
+ return 0
+ fi
+
+ echo "Fixing RAID ${raid}..."
+
+ local root="$(find_root)"
+
+ # Finding any devices in this RAID
+ local devices=(
+ $(find_raid_devices "${raid}")
+ )
+
+ # Stop the RAID
+ mdadm --stop "${raid}" &>/dev/null
+
+ # Destroy any useful data on all remaining RAID devices
+ local device
+ for device in ${devices[@]}; do
+ # Skip root
+ [ "${device}" = "${root}" ] && continue
+
+ destroy_everything "${device}"
+ done &>/dev/null
+
+ # Re-add devices to the RAID
+ raid_rebuild "${device}"
+
+ return 0
+}
+
+main "$@" || return $?