|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
# Copyright (C) 2014-2017 Neil Brown <neilb@suse.de>
|
|
|
|
#
|
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# Author: Neil Brown
|
|
|
|
# Email: <neilb@suse.com>
|
|
|
|
|
|
|
|
# This script should be run periodically to automatically
|
|
|
|
# perform a 'check' on any md arrays.
|
|
|
|
#
|
|
|
|
# It supports a 'time budget' such that any incomplete 'check'
|
|
|
|
# will be checkpointed when that time has expired.
|
|
|
|
# A subsequent invocation can allow the 'check' to continue.
|
|
|
|
#
|
|
|
|
# Options are:
|
|
|
|
# --continue Don't start new checks, only continue old ones.
|
|
|
|
# --duration This is passed to "date --date=$duration" to find out
|
|
|
|
# when to finish
|
|
|
|
#
|
|
|
|
# To support '--continue', arrays are identified by UUID and the 'sync_completed'
|
|
|
|
# value is stored in /var/lib/mdcheck/$UUID
|
|
|
|
|
|
|
|
# convert a /dev/md name into /sys/.../md equivalent
|
|
|
|
sysname() {
|
|
|
|
set `ls -lLd $1`
|
|
|
|
maj=${5%,}
|
|
|
|
min=$6
|
|
|
|
readlink -f /sys/dev/block/$maj:$min
|
|
|
|
}
|
|
|
|
|
|
|
|
args=$(getopt -o hcd: -l help,continue,duration: -n mdcheck -- "$@")
|
|
|
|
rv=$?
|
|
|
|
if [ $rv -ne 0 ]; then exit $rv; fi
|
|
|
|
|
|
|
|
eval set -- $args
|
|
|
|
|
|
|
|
cont=
|
|
|
|
endtime=
|
|
|
|
while [ " $1" != " --" ]
|
|
|
|
do
|
|
|
|
case $1 in
|
|
|
|
--help )
|
|
|
|
echo >&2 'Usage: mdcheck [--continue] [--duration time-offset]'
|
|
|
|
echo >&2 ' time-offset must be understood by "date --date"'
|
|
|
|
exit 0
|
|
|
|
;;
|
|
|
|
--continue ) cont=yes ;;
|
|
|
|
--duration ) shift; dur=$1
|
|
|
|
endtime=$(date --date "$dur" "+%s")
|
|
|
|
;;
|
|
|
|
esac
|
|
|
|
shift
|
|
|
|
done
|
|
|
|
shift
|
|
|
|
|
|
|
|
# We need a temp file occasionally...
|
|
|
|
tmp=/var/lib/mdcheck/.md-check-$$
|
|
|
|
trap 'rm -f "$tmp"' 0 2 3 15
|
|
|
|
|
|
|
|
|
|
|
|
# firstly, clean out really old state files
|
|
|
|
mkdir -p /var/lib/mdcheck
|
|
|
|
find /var/lib/mdcheck -name "MD_UUID*" -type f -mtime +180 -exec rm {} \;
|
|
|
|
|
|
|
|
# Now look at each md device.
|
|
|
|
cnt=0
|
|
|
|
for dev in /dev/md?*
|
|
|
|
do
|
|
|
|
[ -e "$dev" ] || continue
|
|
|
|
sys=`sysname $dev`
|
|
|
|
if [ ! -f "$sys/md/sync_action" ]
|
|
|
|
then # cannot check this array
|
|
|
|
continue
|
|
|
|
fi
|
|
|
|
if [ "`cat $sys/md/sync_action`" != 'idle' ]
|
|
|
|
then # This array is busy
|
|
|
|
continue
|
|
|
|
fi
|
|
|
|
|
|
|
|
mdadm --detail --export "$dev" | grep '^MD_UUID=' > $tmp || continue
|
|
|
|
source $tmp
|
|
|
|
fl="/var/lib/mdcheck/MD_UUID_$MD_UUID"
|
|
|
|
if [ -z "$cont" ]
|
|
|
|
then
|
|
|
|
start=0
|
|
|
|
logger -p daemon.info mdcheck start checking $dev
|
|
|
|
elif [ -z "$MD_UUID" -o ! -f "$fl" ]
|
|
|
|
then
|
|
|
|
# Nothing to continue here
|
|
|
|
continue
|
|
|
|
else
|
|
|
|
start=`cat "$fl"`
|
|
|
|
logger -p daemon.info mdcheck continue checking $dev from $start
|
|
|
|
fi
|
|
|
|
|
|
|
|
cnt=$[cnt+1]
|
|
|
|
eval MD_${cnt}_fl=\$fl
|
|
|
|
eval MD_${cnt}_sys=\$sys
|
|
|
|
eval MD_${cnt}_dev=\$dev
|
|
|
|
echo $start > $fl
|
|
|
|
echo $start > $sys/md/sync_min
|
|
|
|
echo check > $sys/md/sync_action
|
|
|
|
done
|
|
|
|
|
|
|
|
if [ -z "$endtime" ]
|
|
|
|
then
|
|
|
|
exit 0
|
|
|
|
fi
|
|
|
|
|
|
|
|
while [ `date +%s` -lt $endtime ]
|
|
|
|
do
|
|
|
|
any=
|
|
|
|
for i in `eval echo {1..$cnt}`
|
|
|
|
do
|
|
|
|
eval fl=\$MD_${i}_fl
|
|
|
|
eval sys=\$MD_${i}_sys
|
|
|
|
|
|
|
|
if [ -z "$fl" ]; then continue; fi
|
|
|
|
|
|
|
|
if [ "`cat $sys/md/sync_action`" != 'check' ]
|
|
|
|
then
|
|
|
|
eval MD_${i}_fl=
|
|
|
|
rm -f $fl
|
|
|
|
continue;
|
|
|
|
fi
|
|
|
|
read a rest < $sys/md/sync_completed
|
|
|
|
echo $a > $fl
|
|
|
|
any=yes
|
|
|
|
done
|
|
|
|
if [ -z "$any" ]; then
|
|
|
|
#mdcheck_continue.timer is started by mdcheck_start.timer.
|
|
|
|
#When he check action can be finished in mdcheck_start.service,
|
|
|
|
#it doesn't need mdcheck_continue anymore.
|
|
|
|
systemctl stop mdcheck_continue.timer
|
|
|
|
exit 0;
|
|
|
|
fi
|
|
|
|
sleep 120
|
|
|
|
done
|
|
|
|
|
|
|
|
# We've waited, and there are still checks running.
|
|
|
|
# Time to stop them.
|
|
|
|
for i in `eval echo {1..$cnt}`
|
|
|
|
do
|
|
|
|
eval fl=\$MD_${i}_fl
|
|
|
|
eval sys=\$MD_${i}_sys
|
|
|
|
eval dev=\$MD_${i}_dev
|
|
|
|
|
|
|
|
if [ -z "$fl" ]; then continue; fi
|
|
|
|
|
|
|
|
if [ "`cat $sys/md/sync_action`" != 'check' ]
|
|
|
|
then
|
|
|
|
eval MD_${i}_fl=
|
|
|
|
rm -f $fl
|
|
|
|
continue;
|
|
|
|
fi
|
|
|
|
echo idle > $sys/md/sync_action
|
|
|
|
cat $sys/md/sync_min > $fl
|
|
|
|
logger -p daemon.info pause checking $dev at `cat $fl`
|
|
|
|
done
|