I have been working on this problem for the last several weeks.
Below is a test case that will reproduce the problem every time.
I don't think this is an unmount problem, as I have this code in
xfs_flush_buftarg()
{
[ ... ]
printk(KERN_WARNING "xfs: xfs_flush_buftarg pincount %d\n", pincount);
return pincount;
}
I always get a pincount of zero, and I can get corruption to always occur.
My configuration and failing test cases follows.
ctest will fill the disk and show the corruption.
ctest.size shows that if the writes are slowed down or limited, that
the corruption
can be avoided. Some results of experiments on friday:
10 images 1 sec no corruption
20 images 1 sec no corruption
50 images 1 sec no corruption
100 images 1 sec corruption
20 images .5 sec 1 corruption
20 images .75 sec no corruption
There seems to be a problem with the amount of data or the state of busy-ness
in the disks. Nathan Scott attempted to reproduce this with IDE
drives, but couldn't
either because the size of the data partitions was too small or the
drives too slow.
####
mkfs.xfs -f -l logdev=/dev/sda5,sunit=8 /dev/md0
#### xfs_info ####
meta-data=/export isize=256 agcount=16, agsize=263104 blks
= sectsz=512
data = bsize=4096 blocks=4208896, imaxpct=25
= sunit=64 swidth=128 blks, unwritten=1
naming =version 2 bsize=4096
log =external bsize=4096 blocks=18065, version=2
= sectsz=512 sunit=1 blks
realtime =none extsz=524288 blocks=0, rtextents=0
#### /etc/fstab ####
/dev/md0 /bigdir xfs defaults,noatime,noalign,logbufs=8,logdev=/dev/sda5 1 2
#### /dev/sdb ####
Disk /dev/sdb: 73.4 GB, 73407865856 bytes
255 heads, 63 sectors/track, 8924 cylinders
Units = cylinders of 16065 * 512 = 8225280 bytes
Device Boot Start End Blocks Id System
/dev/sdb1 1 1048 8418028+ fd Linux raid autodetect
#### /dev/sdc ####
Disk /dev/sdc: 73.4 GB, 73407865856 bytes
255 heads, 63 sectors/track, 8924 cylinders
Units = cylinders of 16065 * 512 = 8225280 bytes
Device Boot Start End Blocks Id System
/dev/sdc1 1 1048 8418028+ fd Linux raid autodetect
#### raidtab ####
raiddev /dev/md0
raid-level 0
nr-raid-disks 2
nr-spare-disks 0
persistent-superblock 1
chunk-size 256
device /dev/sdb1
raid-disk 0
device /dev/sdc1
raid-disk 1
#### ctest #####
#!/bin/bash
function elapsed
{
tot=`expr $2 - $1`
mins=`expr $tot \/ 60`
tsec=`expr $mins \* 60`
rsec=`expr $tot - $tsec`
echo "$mins minutes $rsec secs"
}
numberOfSmallFiles=1000000
directory=/bigdir/home/pool/test
# rm -rf $directory
mkdir -p $directory 2> /dev/null
#find $directory -type f | egrep -v 'orimularge_[0-9][0-9]+' | xargs rm
find $directory -type f | xargs rm
start=`date +'%s'`
device=`df -k $directory | tail -1 | awk '{ print $1 }'`
# --- fragment the disk ---
echo "create lots of small files"
i=0
while ((i < numberOfSmallFiles)); do
echo "*" > $directory/small_$i
let i=$i+1
done
echo "fill disk with big files"
i=0
while : ; do
if [ ! -e "$directory/orimularge_$i" ]; then
if ( dd if=/dev/zero of=$directory/orimularge_$i bs=1M count=256
2>/dev/null) ; then
echo -n "."
else break; fi;
fi
let i=$i+1
done
echo "rm the small files"
i=0
while ((i < numberOfSmallFiles )); do
echo $directory/small_$i
let i=$i+2
done | xargs rm
echo " --- create several copies of the same file ---"
dd if=/dev/urandom of=$directory/data_reference bs=1K count=513
i=0
while ( cp $directory/data_reference $directory/data_$i ); do
let i=$i+1
done
echo " --- mount/umount ---"
umount $device
mount $device
echo " --- check that the copies are equal --- "
find $directory -name "data_*[0-9]" -exec cmp $directory/data_reference {} \;
end=`date +'%s'`
elapsed $start $end
#### ctest.size ####
#!/bin/bash
if [ -z "$1" -o -z "$2" ]; then
echo "ctest files interval"
exit 1
fi
SZ=$1
ITER=$2
echo "ctest: copying $SZ files at $ITER intervals"
function elapsed
{
tot=`expr $2 - $1`
mins=`expr $tot \/ 60`
tsec=`expr $mins \* 60`
rsec=`expr $tot - $tsec`
echo "$mins minutes $rsec secs"
}
numberOfSmallFiles=1000000
directory=/bigdir/home/pool/test
# rm -rf $directory
mkdir -p $directory 2> /dev/null
find $directory -type f | egrep -v 'orimularge_[0-9][0-9]+' | xargs rm
#find $directory -type f | xargs rm
start=`date +'%s'`
device=`df -k $directory | tail -1 | awk '{ print $1 }'`
# --- fragment the disk ---
echo "create lots of small files"
i=0
while ((i < numberOfSmallFiles)); do
echo "*" > $directory/small_$i
let i=$i+1
done
echo "fill disk with big files"
i=0
while : ; do
if [ ! -e "$directory/orimularge_$i" ]; then
if ( dd if=/dev/zero of=$directory/orimularge_$i bs=1M count=256
2>/dev/null) ; then
echo -n "."
else break; fi;
fi
let i=$i+1
done
echo "rm the small files"
i=0
while ((i < numberOfSmallFiles )); do
echo $directory/small_$i
let i=$i+2
done | xargs rm
echo " --- create several copies of the same file ---"
dd if=/dev/urandom of=$directory/data_reference bs=1K count=513
i=0
while ( cp $directory/data_reference $directory/data_$i ); do
if (( 0 == i % $SZ )); then
usleep $ITER
fi
let i=$i+1
done
echo " --- mount/umount ---"
umount $device
mount $device
echo " --- check that the copies are equal --- "
find $directory -name "data_*[0-9]" -exec cmp
$directory/data_reference {} \; > log
wc log
end=`date +'%s'`
elapsed $start $end
|