bad performance on touch/cp file on XFS system
Zhang Qiang
zhangqiang.buaa at gmail.com
Mon Aug 25 03:47:39 CDT 2014
I have checked icount and ifree, but I found there are about 11.8 percent
free, so the free inode should not be too few.
Here's the detail log, any new clue?
# mount /dev/sda4 /data1/
# xfs_info /data1/
meta-data=/dev/sda4 isize=256 agcount=4, agsize=142272384
blks
= sectsz=512 attr=2, projid32bit=0
data = bsize=4096 blocks=569089536, imaxpct=5
= sunit=0 swidth=0 blks
naming =version 2 bsize=4096 ascii-ci=0
log =internal bsize=4096 blocks=277875, version=2
= sectsz=512 sunit=0 blks, lazy-count=1
realtime =none extsz=4096 blocks=0, rtextents=0
# umount /dev/sda4
# xfs_db /dev/sda4
xfs_db> sb 0
xfs_db> p
magicnum = 0x58465342
blocksize = 4096
dblocks = 569089536
rblocks = 0
rextents = 0
uuid = 13ecf47b-52cf-4944-9a71-885bddc5e008
logstart = 536870916
rootino = 128
rbmino = 129
rsumino = 130
rextsize = 1
agblocks = 142272384
agcount = 4
rbmblocks = 0
logblocks = 277875
versionnum = 0xb4a4
sectsize = 512
inodesize = 256
inopblock = 16
fname = "\000\000\000\000\000\000\000\000\000\000\000\000"
blocklog = 12
sectlog = 9
inodelog = 8
inopblog = 4
agblklog = 28
rextslog = 0
inprogress = 0
imax_pct = 5
icount = 220619904
ifree = 26202919
fdblocks = 147805479
frextents = 0
uquotino = 0
gquotino = 0
qflags = 0
flags = 0
shared_vn = 0
inoalignmt = 2
unit = 0
width = 0
dirblklog = 0
logsectlog = 0
logsectsize = 0
logsunit = 1
features2 = 0xa
bad_features2 = 0xa
xfs_db> sb 1
xfs_db> p
magicnum = 0x58465342
blocksize = 4096
dblocks = 569089536
rblocks = 0
rextents = 0
uuid = 13ecf47b-52cf-4944-9a71-885bddc5e008
logstart = 536870916
rootino = 128
rbmino = null
rsumino = null
rextsize = 1
agblocks = 142272384
agcount = 4
rbmblocks = 0
logblocks = 277875
versionnum = 0xb4a4
sectsize = 512
inodesize = 256
inopblock = 16
fname = "\000\000\000\000\000\000\000\000\000\000\000\000"
blocklog = 12
sectlog = 9
inodelog = 8
inopblog = 4
agblklog = 28
rextslog = 0
inprogress = 1
imax_pct = 5
icount = 0
ifree = 0
fdblocks = 568811645
frextents = 0
uquotino = 0
gquotino = 0
qflags = 0
flags = 0
shared_vn = 0
inoalignmt = 2
unit = 0
width = 0
dirblklog = 0
logsectlog = 0
logsectsize = 0
logsunit = 1
features2 = 0xa
bad_features2 = 0xa
xfs_db> sb 2
xfs_db> p
magicnum = 0x58465342
blocksize = 4096
dblocks = 569089536
rblocks = 0
rextents = 0
uuid = 13ecf47b-52cf-4944-9a71-885bddc5e008
logstart = 536870916
rootino = null
rbmino = null
rsumino = null
rextsize = 1
agblocks = 142272384
agcount = 4
rbmblocks = 0
logblocks = 277875
versionnum = 0xb4a4
sectsize = 512
inodesize = 256
inopblock = 16
fname = "\000\000\000\000\000\000\000\000\000\000\000\000"
blocklog = 12
sectlog = 9
inodelog = 8
inopblog = 4
agblklog = 28
rextslog = 0
inprogress = 1
imax_pct = 5
icount = 0
ifree = 0
fdblocks = 568811645
frextents = 0
uquotino = 0
gquotino = 0
qflags = 0
flags = 0
shared_vn = 0
inoalignmt = 2
unit = 0
width = 0
dirblklog = 0
logsectlog = 0
logsectsize = 0
logsunit = 1
features2 = 0xa
bad_features2 = 0xa
xfs_db> sb 3
xfs_db> p
magicnum = 0x58465342
blocksize = 4096
dblocks = 569089536
rblocks = 0
rextents = 0
uuid = 13ecf47b-52cf-4944-9a71-885bddc5e008
logstart = 536870916
rootino = 128
rbmino = null
rsumino = null
rextsize = 1
agblocks = 142272384
agcount = 4
rbmblocks = 0
logblocks = 277875
versionnum = 0xb4a4
sectsize = 512
inodesize = 256
inopblock = 16
fname = "\000\000\000\000\000\000\000\000\000\000\000\000"
blocklog = 12
sectlog = 9
inodelog = 8
inopblog = 4
agblklog = 28
rextslog = 0
inprogress = 1
imax_pct = 5
icount = 0
ifree = 0
fdblocks = 568811645
frextents = 0
uquotino = 0
gquotino = 0
qflags = 0
flags = 0
shared_vn = 0
inoalignmt = 2
unit = 0
width = 0
dirblklog = 0
logsectlog = 0
logsectsize = 0
logsunit = 1
features2 = 0xa
bad_features2 = 0xa
2014-08-25 13:18 GMT+08:00 Dave Chinner <david at fromorbit.com>:
> On Mon, Aug 25, 2014 at 11:34:34AM +0800, Zhang Qiang wrote:
> > Dear XFS community & developers,
> >
> > I am using CentOS 6.3 and xfs as base file system and use RAID5 as
> hardware
> > storage.
> >
> > Detail environment as follow:
> > OS: CentOS 6.3
> > Kernel: kernel-2.6.32-279.el6.x86_64
> > XFS option info(df output): /dev/sdb1 on /data type xfs
> > (rw,noatime,nodiratime,nobarrier)
> >
> > Detail phenomenon:
> >
> > # df
> > Filesystem Size Used Avail Use% Mounted on
> > /dev/sda1 29G 17G 11G 61% /
> > /dev/sdb1 893G 803G 91G 90% /data
> > /dev/sda4 2.2T 1.6T 564G 75% /data1
> >
> > # time touch /data1/1111
> > real 0m23.043s
> > user 0m0.001s
> > sys 0m0.349s
> >
> > # perf top
> > Events: 6K cycles
> > 16.96% [xfs] [k] xfs_inobt_get_rec
> > 11.95% [xfs] [k] xfs_btree_increment
> > 11.16% [xfs] [k] xfs_btree_get_rec
> > 7.39% [xfs] [k] xfs_btree_get_block
> > 5.02% [xfs] [k] xfs_dialloc
> > 4.87% [xfs] [k] xfs_btree_rec_offset
> > 4.33% [xfs] [k] xfs_btree_readahead
> > 4.13% [xfs] [k] _xfs_buf_find
> > 4.05% [kernel] [k] intel_idle
> > 2.89% [xfs] [k] xfs_btree_rec_addr
> > 1.04% [kernel] [k] kmem_cache_free
> >
> >
> > It seems that some xfs kernel function spend much time
> (xfs_inobt_get_rec,
> > xfs_btree_increment, etc.)
> >
> > I found a bug in bugzilla [1], is that is the same issue like this?
>
> No.
>
> > It's very greatly appreciated if you can give constructive suggestion
> about
> > this issue, as It's really hard to reproduce from another system and it's
> > not possible to do upgrade on that online machine.
>
> You've got very few free inodes, widely distributed in the allocated
> inode btree. The CPU time above is the btree search for the next
> free inode.
>
> This is the issue solved by this series of recent commits to add a
> new on-disk free inode btree index:
>
> 53801fd xfs: enable the finobt feature on v5 superblocks
> 0c153c1 xfs: report finobt status in fs geometry
> a3fa516 xfs: add finobt support to growfs
> 3efa4ff xfs: update the finobt on inode free
> 2b64ee5 xfs: refactor xfs_difree() inobt bits into xfs_difree_inobt()
> helper
> 6dd8638 xfs: use and update the finobt on inode allocation
> 0aa0a75 xfs: insert newly allocated inode chunks into the finobt
> 9d43b18 xfs: update inode allocation/free transaction reservations for
> finobt
> aafc3c2 xfs: support the XFS_BTNUM_FINOBT free inode btree type
> 8e2c84d xfs: reserve v5 superblock read-only compat. feature bit for finobt
> 57bd3db xfs: refactor xfs_ialloc_btree.c to support multiple inobt numbers
>
> Which is of no help to you, however, because it's not available in
> any CentOS kernel.
>
> There's really not much you can do to avoid the problem once you've
> punched random freespace holes in the allocated inode btree. IT
> generally doesn't affect many people; those that it does affect are
> normally using XFS as an object store indexed by a hard link farm
> (e.g. various backup programs do this).
>
> If you dump the superblock via xfs_db, the difference between icount
> and ifree will give you idea of how much "needle in a haystack"
> searching is going on. You can probably narrow it down to a specific
> AG by dumping the AGI headers and checking the same thing. filling
> in all the holes (by creating a bunch of zero length files in the
> appropriate AGs) might take some time, but it should make the
> problem go away until you remove more filesystem and create random
> free inode holes again...
>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david at fromorbit.com
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://oss.sgi.com/pipermail/xfs/attachments/20140825/b5e7d80a/attachment.html>
More information about the xfs
mailing list