[BACK]Return to xfscrash CVS log [TXT][DIR] Up to [Development] / xfs-cmds / xfstests / crash

File: [Development] / xfs-cmds / xfstests / crash / xfscrash (download)

Revision 1.6, Wed Nov 9 02:50:19 2005 UTC (11 years, 11 months ago) by nathans.longdrop.melbourne.sgi.com
Branch: MAIN
CVS Tags: HEAD
Changes since 1.5: +0 -28 lines

Update copyright annotations and license boilerplates to correspond with SGI Legals preferences.
Merge of master-melb:xfs-cmds:24329a by kenmcd.

#!/bin/sh
#
# Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
#
# xfscrash - control the XFS crash tests
#

  #######################
### configuration stuff ########################################################
  #######################

# remount, repair or corrupt
MODE=remount
# where to find xfscrash
XFSCRASH=/xfscrash
# put log files here
LOG=$XFSCRASH
# put output to these places
OUTPUT="$LOG/xfscrash.log /dev/tty1 /dev/console"
# awk...
AWK_PROG=gawk
# clear FS if >= this percent full at start of run. 100 is a good
# number - only used on corrupt test so far
FULL_LIMIT=80
  
case `hostname -s`
in
    leesa)
        # mount test partition here
        TEST_MNT=/mnt/arch0
        # build test partition here
        TEST_DEV=/dev/hda6
        # backup test partition to here (or empty)
        BACKUP_DEV=/dev/hda8
        # backup block size for dd
        BACKUP_BS=1024k
        # base stress time
        STRESS_TIME=60
        # stress random time
        STRESS_RANDOM=60
	;;
    lumpy)
        # mount test partition here
        TEST_MNT=/mnt/scratch_0
        # build test partition here
        TEST_DEV=/dev/sdc5
        # backup test partition to here (or empty)
        BACKUP_DEV=  ;#/dev/sdc6
        # backup block size for dd
        BACKUP_BS=10240k
        # base stress time
        STRESS_TIME=360
        # stress random time
        STRESS_RANDOM=360
	;;
    *)
        echo "!!! no configuration data for host `hostname -s`"
        exit 1
        ;;
esac

# avoid stress

AVOID="-f resvsp=0 -f unresvsp=0"

# DIY stress command
STRESS="/usr/local/bin/fsstress -d $TEST_MNT/stress -n 10000000 -p 1 $AVOID"
#STRESS="/usr/local/bin/randholes -l 10000000 -c 100000 -b 512 $TEST_MNT/stress/holes"

# stress command for the corrupt test
CORRUPT_STRESS="/usr/local/bin/fsstress -d $TEST_MNT/stress -n 10000 -p 1 $AVOID"

###########################################################################

reboot=-1

_log()
{
    tee -a $OUTPUT > /dev/null
}

_echo()
{
    echo "$*" | _log
}

_mount()
{
    _echo "   *** Mounting $TEST_DEV on $TEST_MNT"
    if ! mount -t xfs $TEST_DEV $TEST_MNT
    then
        _echo "   !!! unable to mount"
        exit 1
    fi
}

_unmount()
{
    _echo "   *** Unmounting $TEST_DEV"
    if ! umount $TEST_DEV &> /dev/null
    then
        _echo "   !!! unable to unmount"
        exit 1
    fi
}

_check()
{
    expect=$1
    fail=0
    
    if [ $expect -eq 0 ]
    then
        _echo "   *** Checking FS (expecting clean fs)"
    else
        _echo "   *** Checking FS (expecting dirty fs)"
    fi
    
    
    if [ $expect -eq 0 ]
    then
        _echo "      *** xfs_check ($LOG/check_clean.out)"   
        xfs_check $TEST_DEV &> $LOG/check_clean.out || fail=1
        [ -s /tmp/xfs_check_clean.out ] && fail=1
    else
        _echo "      *** xfs_check ($LOG/check_dirty.out)"   
        xfs_check $TEST_DEV &> $LOG/check_dirty.out || fail=1
    fi
    
    if [ $fail -eq 0 -a $expect -eq 0 ]
    then
        _echo "      *** xfs_repair -n ($LOG/repair_clean.out)"   
        xfs_repair -n $TEST_DEV &> $LOG/repair_clean.out || fail=1
    fi
    
    if [ $fail -eq 0 ]
    then
        _echo "         *** FS checks ok"
    else
        if [ $expect -eq 0 ]
        then
            _echo "         !!! FS check failed - inconsistent FS"
            _echo "         !!! (see $LOG/*.out for details)"
            exit 1
        else
            _echo "         *** inconsistent fs (as expected)"
        fi
    fi
}

_check_core()
{        
    if [ -e core ]
    then
        _echo "   !!! core file found!"
        exit 1
    fi
}

_repair()
{
    rm -f core
    _echo "   *** repair"
    _echo "      *** repair pass 1 (RO)"
    xfs_repair -n $TEST_DEV &> $LOG/repair_1.out \
        && _echo "         !!! no errors found (eh?)" \
        || _echo "         *** errors found (expected)"
        
    _check_core
        
    _echo "      *** repair pass 2 (RW)"
    
    if xfs_repair $TEST_DEV &> $LOG/repair_2.out
    then
        _echo "         *** FS checks ok (now)"
    else
        _echo "         !!! xfs_repair returned error code"
        _echo "         !!! (see $LOG/repair_*.out for details)"
        exit 1
    fi
        
    _check_core
        
    _echo "      *** repair pass 3 (RO)"
    if xfs_repair -n $TEST_DEV &> $LOG/repair_3.out
    then
        _echo "         *** FS checks ok"
    else
        _echo "         !!! errors found after repair (unexpected)"
        _echo "         !!! (see $LOG/repair_*.out for details)"
        exit 1
    fi

    _check_core
}

_cleanup()
{
    rm -f $XFSCRASH/counter $XFSCRASH/start $XFSCRASH/stop $XFSCRASH/active 
    
    if [ $reboot != -1 ]
    then
        kill $reboot
    fi
    
}

_random()
{
    od -tu -N 4 /dev/random | gawk -v v=$1 'NR==1 { print $2 % v }'
}

_backup()
{
    if [ $count -ne 1 -a "$BACKUP_DEV" != "" ]
    then
        _echo "   *** Backing up $TEST_DEV to $BACKUP_DEV"
        if ! dd if=$TEST_DEV of=$BACKUP_DEV bs=$BACKUP_BS &> $LOG/dd.out
        then
            _echo "   !!! unable to backup fs"
            _echo "   !!! (see $LOG/dd.out)"
            exit 1
        fi
    else
        _echo "   *** skipping back up step"
    fi
}

_logprint()
{
    _echo "   *** dumping log to $LOG/logprint.out"
    rm -f core
    xfs_logprint $TEST_DEV &> $LOG/logprint.out
    if [ -e core ]
    then
        _echo "      !!! xfs_logprint dumped core"
        echo "" >> $LOG/logprint.out
        echo "*** CORE DUMPED ***" >> $LOG/logprint.out
        echo "" >> $LOG/logprint.out
    fi
    
    _echo "   *** dumping log (-t -i) to $LOG/logprint_inode.out"
    
    rm -f core
    xfs_logprint -t -i $TEST_DEV &> $LOG/logprint_inode.out
    if [ -e core ]
    then
        _echo "      !!! xfs_logprint dumped core"
        echo "" >> $LOG/logprint_inode.out
        echo "*** CORE DUMPED ***" >> $LOG/logprint_inode.out
        echo "" >> $LOG/logprint_inode.out
    fi
    
    _echo "   *** dumping log (-t -b) to $LOG/logprint_buf.out"
    
    rm -f core
    xfs_logprint -t -b $TEST_DEV &> $LOG/logprint_buf.out
    if [ -e core ]
    then
        _echo "      !!! xfs_logprint dumped core"
        echo "" >> $LOG/logprint_buf.out
        echo "*** CORE DUMPED ***" >> $LOG/logprint_buf.out
        echo "" >> $LOG/logprint_buf.out
    fi
}
#
# _df_device : get an IRIX style df line for a given device 
#
#       - returns "" if not mounted
#       - returns fs type in field two (ala IRIX)
#       - joins line together if split by fancy df formatting
#       - strips header etc
#

_df_device()
{
    if [ $# -ne 1 ]
    then
	echo "Usage: _df_device device" >&2
	exit 1
    fi
    
    df -T 2> /dev/null | $AWK_PROG -v what=$1 '
        match($1,what) && NF==1 { 
            v=$1
            getline
            print v, $0
            exit
        }
        match($1,what) {
            print
            exit
        }
    '
}

#
# _df_dir : get an IRIX style df line for device where a directory resides
#
#       - returns fs type in field two (ala IRIX)
#       - joins line together if split by fancy df formatting
#       - strips header etc
#

_df_dir()
{
    if [ $# -ne 1 ]
    then
	echo "Usage: _df_dir device" >&2
	exit 1
    fi
    
    df -T $1 2> /dev/null | $AWK_PROG -v what=$1 '
        NR == 2 && NF==1 { 
            v=$1
            getline 
            print v, $0;
            exit 0
        }
        NR == 2 {
            print;
            exit 0
        }
        {}
    '
    # otherwise, nada
}

# return percentage used disk space for mounted device

_used()
{
    if [ $# -ne 1 ]
    then
	echo "Usage: _used device" >&2
	exit 1
    fi
    
    _df_device $1 | $AWK_PROG '{ sub("%", "") ; print $6 }'
}

_check_free()
{
     used=`_used $TEST_DEV`

     if [ $used -ge $FULL_LIMIT ]    
     then
         _echo "      *** $used % used on $TEST_DEV - deleting files"
         rm -rf $TEST_MNT/stress
     fi
}       

# loop, stressing, unounting and checking
# no (expected) rebooting...
_corrupt()
{
    count=0
    
    # don't want to restart if we reboot...
    _cleanup
    
    while true
    do

        if [ -e $XFSCRASH/stop ]
        then
            _echo "### XFS Crash stopped "
            exit 0
        fi

        _echo "*** run $count"
        let "count = count + 1"
        
        _check 0
        _mount
        
        _check_free
        
        $CORRUPT_STRESS | _log
        
        _unmount        
    done
}

###########################################################################
    
_echo ""
_echo ""
echo "XFSCRASH [output to $OUTPUT]"
_echo ""

if [ "$1" = "start" ]
then
    touch $XFSCRASH/start
fi

if [ "$1" = "stop" ]
then
    touch $XFSCRASH/stop
fi


trap "_cleanup; exit \$status" 0 1 2 3 15


if [ -e $XFSCRASH/stop ]
then
    _echo "### XFS Crash stopped "
    exit 0
fi

if [ -e $XFSCRASH/start ]
then
    _echo "### XFS Crash started "
    _cleanup
    rm -f $LOG/*.out $LOG/*.log core
    
    touch $XFSCRASH/active

    _echo "   *** Building fresh XFS FS"
    umount $TEST_DEV &> /dev/null
    if ! mkfs -t xfs -f $TEST_DEV &> $LOG/mkfs.out
    then
        _echo "   !!! unable to mkfs"
        _echo "   !!! (see $LOG/mkfs.out)"
        exit 1
    fi
fi

if [ ! -e $XFSCRASH/active ]
then
    _echo "### XFS Crash inactive "
    exit 0
fi


if [ -r $XFSCRASH/counter ]
then
    count=`cat $XFSCRASH/counter`
else
    count=0
fi
_echo "### Crash test run $count (mode=$MODE, log=$LOG/{*.out,*.log})"

let "count = count +1"
echo $count > $XFSCRASH/counter

# real test starts here

_echo "   *** Checking for R/O root"
if ! mount | grep "on / type" | grep -q "(ro)"
then
    _echo "   !!! root not mounted readonly"
    exit 1
fi

_echo "   *** Loading XFS modules"
if ! modprobe xfs
then
    _echo "   !!! unable to modprobe xfs"
    exit 1
fi

_echo "   *** Unmounting $TEST_DEV"
umount $TEST_DEV &> /dev/null

_logprint
if [ $MODE != "corrupt" ]
then
    _backup
fi

case $MODE
in
    remount)
        _check 1 # expect errors
        _mount
        _unmount
	;;
    repair)
        _repair
	;;
    corrupt)
        _corrupt
        exit 0
        ;;
    *)
	_echo "xfscrash: MODE must be remount or repair"
	exit 1
	;;
esac

_check 0 # don't expect errors
_mount

_echo "   *** Cleaning XFS FS"
if ! rm -rf $TEST_MNT/stress $TEST_MNT/lost+found &> $LOG/clean.out
then
    _echo "   !!! unable to clean XFS FS"
    _echo "   !!! (see $LOG/clean.out)"
    exit 1
fi

_echo "   *** Making stress directory"
if ! mkdir $TEST_MNT/stress
then
    _echo "   !!! unable to mkdir stress"
    exit 1
fi

let "bang = STRESS_TIME + `_random $STRESS_RANDOM`"

_echo "   *** Preparing random reboot (in $bang seconds)"
(
    sleep $bang
    _echo "      *** BANG ****"
    reboot -fn
) &
reboot=$!

_echo "   *** Causing stress & waiting for the inevitable"
$STRESS | _log

exit 0