Here is a script (developed by Rich Coe) that demonstrates the file
corruption problem found on our systems.
Jim Foris
--
#!/bin/bash
# shell script to fill a partion with files of a known pattern.
# this demonstrates a problem found on xfs raid0 partition.
#
# this script makes no assumptions about the format or implementation of the
# destination partition.
#
# this script assumes that you already have made the destination
# partition that you are testing on.
#
# In our configuration, the raidtab looks like this:
# raiddev /dev/md0
# raid-level 0
# nr-raid-disks 2
# nr-spare-disks 0
# persistent-superblock 1
# chunk-size 256
# device /dev/sdb1
# raid-disk 0
# device /dev/sdc1
# raid-disk 1
#
# when we reduced the chunk size to 128, corruption occurred in all chunks
# except the first.
#
# Here's what this script does:
#
# - makes a source directory of 1023 files with the test pattern
# the file size is 129 * 4K, or (2 * 256K) + 4K
# on each following run, I always change the pattern
#
# - creates 6 writer processes which fill the partition
#
# - shows the current state of the f/s (percent full)
#
# - waits for the writer processes to exit
#
# - unmounts/mounts the file system
#
# - checksums the target data to verify correct file output
# Tested configurations:
# linux 2.6.8-rc2-bk5 smp dual i686 (xeon)
# linux 2.6.11 smp dual i686 (xeon)
# linux 2.6.11 smp dual x86_64 (opteron)
#
# xfs /dev/md0 raid0 (2 74G partitions) ext journal 256k failed
# xfs /dev/md0 raid0 (2 74G partitions) int journal 256k failed
# xfs /dev/md0 raid0 (2 74G partitions) ext journal 128k failed
# xfs /dev/sdb (74G partition) ext journal PASSED
# xfs /dev/md0 raid0 (2 37G partitions) ext journal 256k failed
# ext3 /dev/md0 raid0 (2 74G partitions) ext journal 256k PASSED
# reiserfs /dev/md0 raid0 (2 74G partitions) ext journal 256k PASSED
# NOTES:
# - there's no indication of the amount done while checking for corruption
# - this should have been written in perl
# - is sum as fast as it could be ?
function usage
{
echo "usage: $0 srcdir destdir pattern [mount-pt]"
echo " $0 \$PWD /data/dest a5a5"
echo " where srcdir = source directory to write pattern files"
echo " destdir = destination directory under test, /dev/md0"
echo " pattern = pattern for this round (4 chars)"
echo " mount-pt = mount point of /dev/md0"
echo usage: $0 -check destdir chksum
echo " where destdir = destination dir to be checked"
echo " chksum is value supplied by program"
exit 1
}
function vfill
{
SRCD=$1
DESD=$2
offset=$3
cnt=$4
echo "begin filling space $DESD/t$offset"
let i=1
let j=$i+$offset
while ((i < $cnt)); do
df -k $DESD | tail -1 | egrep '9[0-9]%' > /dev/null
if [ $? -eq 0 ]; then break; fi
cp -r $SRCD $DESD/t$j 2>/dev/null
if [ $? -ne 0 ]; then break; fi
let i=$i+1
let j=$j+1
done
}
function mkpat
{
echo "making pattern $1"
let i=0
while ((i < 1024)) ; do
echo -n $1
let i=$i+1
done > $2
}
function crpat
{
src=$1
dest=$2
echo "creating pattern directory $dest from $src"
mkdir -p $dest
let j=0
while ((j < 129)) ; do
cat $1
let j=$j+1
done > $dest/i1
let i=2
while ((i < 1024)) ; do
cp $dest/i1 $dest/i$i
let i=$i+1
done
}
function chkdest
{
find $1/test -type f | xargs sum | egrep -v $2
exit $?
}
if [ $# -eq 0 ]; then
usage
fi
if [ $# -lt 3 ]; then
usage
fi
if [ "$1" = "-check" ]; then
chkdest $2 $3
fi
SRCDIR=$1
DEST=$2
PATTERN=$3
MNTPT=$4
mkpat $PATTERN pat.$$
crpat pat.$$ $SRCDIR/top/slvl/tlvl
chksum=`sum $SRCDIR/top/slvl/tlvl/i1 -f1 -d' '`
mkdir $DEST/test
let pcnt=0
for U in 100 200 300 400 500 600 ; do
vfill $SRCDIR/top $DEST/test $U 100 &
pids[$pcnt]=$!
let pcnt=$pcnt+1
done
while : ; do
cnt=`ps -ef | egrep $0 | wc -l`
if [ $cnt -eq 1 ]; then break; fi
status=`df -k $DEST/test | tail -1`
echo $status
echo $status | egrep '9[0-9]%'
if [ $? -eq 0 ]; then break; fi
sleep 30
done
let i=0
while ((i < pcnt)); do
wait ${pids[$i]}
let i=$i+1
done
if [ -z "$MNTPT" ]; then
echo "writing files now complete"
echo "unmount and remount /dev/md0, and rerun script with:"
echo " $0 -check $DEST/test $chksum"
exit 0
fi
umount $MNTPT
if [ $? -ne 0 ]; then
echo "cannot unmount $MNTPT (/dev/md0)"
echo "unmount and remount /dev/md0, and rerun script with:"
echo " $0 -check $DEST/test $chksum"
exit 11
fi
mount $MNTPT
chkdest $DEST/test $chksum
|