xfs
[Top] [All Lists]

XFS file corruption bug - test script

To: linux-xfs@xxxxxxxxxxx
Subject: XFS file corruption bug - test script
From: James Foris <jforis@xxxxxxxxx>
Date: Thu, 17 Mar 2005 20:14:13 -0600
In-reply-to: <137F7B9449E219489D5C64ED60FA68E803F8ABBE@MKEMLVEM08.e2k.ad.ge.com>
References: <137F7B9449E219489D5C64ED60FA68E803F8ABBE@MKEMLVEM08.e2k.ad.ge.com>
Sender: linux-xfs-bounce@xxxxxxxxxxx
User-agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.3) Gecko/20041116

Here is a script (developed by Rich Coe) that demonstrates the file corruption problem found on our systems.


Jim Foris

--

#!/bin/bash

# shell script to fill a partion with files of a known pattern.
# this demonstrates a problem found on xfs raid0 partition.
#
# this script makes no assumptions about the format or implementation of the
# destination partition.
#
# this script assumes that you already have made the destination
# partition that you are testing on.
#
# In our configuration, the raidtab looks like this:
# raiddev /dev/md0
#        raid-level 0
#        nr-raid-disks 2
#        nr-spare-disks 0
#        persistent-superblock 1
#        chunk-size 256
#        device /dev/sdb1
#        raid-disk 0
#        device /dev/sdc1
#        raid-disk 1
#
# when we reduced the chunk size to 128, corruption occurred in all chunks
# except the first.
#
# Here's what this script does:
#
#   - makes a source directory of 1023 files with the test pattern
#     the file size is 129 * 4K, or (2 * 256K) + 4K
#     on each following run, I always change the pattern
#
#   - creates 6 writer processes which fill the partition
#
#   - shows the current state of the f/s (percent full)
#
#   - waits for the writer processes to exit
#
#   - unmounts/mounts the file system
#
#   - checksums the target data to verify correct file output

# Tested configurations:
# linux 2.6.8-rc2-bk5 smp dual i686 (xeon)
# linux 2.6.11 smp dual i686 (xeon)
# linux 2.6.11 smp dual x86_64 (opteron)
#
# xfs /dev/md0 raid0 (2 74G partitions) ext journal 256k                failed
# xfs /dev/md0 raid0 (2 74G partitions) int journal 256k                failed
# xfs /dev/md0 raid0 (2 74G partitions) ext journal 128k                failed
# xfs /dev/sdb (74G partition) ext journal                              PASSED
# xfs /dev/md0 raid0 (2 37G partitions) ext journal 256k                failed
# ext3 /dev/md0 raid0 (2 74G partitions) ext journal 256k               PASSED
# reiserfs /dev/md0 raid0 (2 74G partitions) ext journal 256k           PASSED

# NOTES:
#    - there's no indication of the amount done while checking for corruption
#    - this should have been written in perl
#    - is sum as fast as it could be ?

function usage
{
    echo "usage: $0 srcdir destdir pattern [mount-pt]"
    echo "     $0 \$PWD /data/dest a5a5"
    echo "     where srcdir = source directory to write pattern files"
    echo "           destdir = destination directory under test, /dev/md0"
    echo "           pattern = pattern for this round (4 chars)"
    echo "           mount-pt = mount point of /dev/md0"
    echo usage: $0 -check destdir chksum
    echo "     where destdir = destination dir to be checked"
    echo "           chksum is value supplied by program"
    exit 1
}

function vfill
{
    SRCD=$1
    DESD=$2
    offset=$3
    cnt=$4
    echo "begin filling space $DESD/t$offset"
    let i=1
    let j=$i+$offset
    while ((i < $cnt)); do
        df -k $DESD | tail -1 | egrep '9[0-9]%' > /dev/null
        if [ $? -eq 0 ]; then break; fi
        cp -r $SRCD $DESD/t$j 2>/dev/null
        if [ $? -ne 0 ]; then break; fi
        let i=$i+1
        let j=$j+1
    done
}

function mkpat
{
    echo "making pattern $1"
    let i=0
    while ((i < 1024)) ; do
        echo -n $1
        let i=$i+1
    done > $2
}

function crpat
{
    src=$1
    dest=$2
    echo "creating pattern directory $dest from $src"

    mkdir -p $dest

    let j=0
    while ((j < 129)) ; do
        cat $1
        let j=$j+1
    done > $dest/i1

    let i=2
    while ((i < 1024)) ; do
        cp $dest/i1 $dest/i$i
        let i=$i+1
    done
}

function chkdest
{
    find $1/test -type f | xargs sum | egrep -v $2
    exit $?
}

if [ $# -eq 0 ]; then
    usage
fi

if [ $# -lt 3 ]; then
    usage
fi

if [ "$1" = "-check" ]; then
    chkdest $2 $3
fi

SRCDIR=$1
DEST=$2
PATTERN=$3
MNTPT=$4

mkpat $PATTERN pat.$$
crpat pat.$$ $SRCDIR/top/slvl/tlvl
chksum=`sum $SRCDIR/top/slvl/tlvl/i1 -f1 -d' '`

mkdir $DEST/test

let pcnt=0
for U in 100 200 300 400 500 600 ; do
    vfill $SRCDIR/top $DEST/test $U 100 &
    pids[$pcnt]=$!
    let pcnt=$pcnt+1
done

while : ; do
    cnt=`ps -ef | egrep $0 | wc -l`
    if [ $cnt -eq 1 ]; then break; fi
    status=`df -k $DEST/test | tail -1`
    echo $status
    echo $status | egrep '9[0-9]%'
    if [ $? -eq 0 ]; then break; fi
    sleep 30
done

let i=0
while ((i < pcnt)); do
    wait ${pids[$i]}
    let i=$i+1
done

if [ -z "$MNTPT" ]; then
    echo "writing files now complete"
    echo "unmount and remount /dev/md0, and rerun script with:"
    echo "    $0 -check $DEST/test $chksum"
    exit 0
fi

umount $MNTPT
if [ $? -ne 0 ]; then
    echo "cannot unmount $MNTPT (/dev/md0)"
    echo "unmount and remount /dev/md0, and rerun script with:"
    echo "    $0 -check $DEST/test $chksum"
    exit 11
fi
mount $MNTPT
chkdest $DEST/test $chksum


<Prev in Thread] Current Thread [Next in Thread>