bad performance on touch/cp file on XFS system

Zhang Qiang zhangqiang.buaa at gmail.com
Mon Aug 25 03:47:39 CDT 2014


I have checked icount and ifree, but I found there are about 11.8 percent
free, so the free inode should not be too few.

Here's the detail log, any new clue?

# mount /dev/sda4 /data1/
# xfs_info /data1/
meta-data=/dev/sda4              isize=256    agcount=4, agsize=142272384
blks
         =                       sectsz=512   attr=2, projid32bit=0
data     =                       bsize=4096   blocks=569089536, imaxpct=5
         =                       sunit=0      swidth=0 blks
naming   =version 2              bsize=4096   ascii-ci=0
log      =internal               bsize=4096   blocks=277875, version=2
         =                       sectsz=512   sunit=0 blks, lazy-count=1
realtime =none                   extsz=4096   blocks=0, rtextents=0
# umount /dev/sda4
# xfs_db /dev/sda4
xfs_db> sb 0
xfs_db> p
magicnum = 0x58465342
blocksize = 4096
dblocks = 569089536
rblocks = 0
rextents = 0
uuid = 13ecf47b-52cf-4944-9a71-885bddc5e008
logstart = 536870916
rootino = 128
rbmino = 129
rsumino = 130
rextsize = 1
agblocks = 142272384
agcount = 4
rbmblocks = 0
logblocks = 277875
versionnum = 0xb4a4
sectsize = 512
inodesize = 256
inopblock = 16
fname = "\000\000\000\000\000\000\000\000\000\000\000\000"
blocklog = 12
sectlog = 9
inodelog = 8
inopblog = 4
agblklog = 28
rextslog = 0
inprogress = 0
imax_pct = 5
icount = 220619904
ifree = 26202919
fdblocks = 147805479
frextents = 0
uquotino = 0
gquotino = 0
qflags = 0
flags = 0
shared_vn = 0
inoalignmt = 2
unit = 0
width = 0
dirblklog = 0
logsectlog = 0
logsectsize = 0
logsunit = 1
features2 = 0xa
bad_features2 = 0xa
xfs_db> sb 1
xfs_db> p
magicnum = 0x58465342
blocksize = 4096
dblocks = 569089536
rblocks = 0
rextents = 0
uuid = 13ecf47b-52cf-4944-9a71-885bddc5e008
logstart = 536870916
rootino = 128
rbmino = null
rsumino = null
rextsize = 1
agblocks = 142272384
agcount = 4
rbmblocks = 0
logblocks = 277875
versionnum = 0xb4a4
sectsize = 512
inodesize = 256
inopblock = 16
fname = "\000\000\000\000\000\000\000\000\000\000\000\000"
blocklog = 12
sectlog = 9
inodelog = 8
inopblog = 4
agblklog = 28
rextslog = 0
inprogress = 1
imax_pct = 5
icount = 0
ifree = 0
fdblocks = 568811645
frextents = 0
uquotino = 0
gquotino = 0
qflags = 0
flags = 0
shared_vn = 0
inoalignmt = 2
unit = 0
width = 0
dirblklog = 0
logsectlog = 0
logsectsize = 0
logsunit = 1
features2 = 0xa
bad_features2 = 0xa
xfs_db> sb 2
xfs_db> p
magicnum = 0x58465342
blocksize = 4096
dblocks = 569089536
rblocks = 0
rextents = 0
uuid = 13ecf47b-52cf-4944-9a71-885bddc5e008
logstart = 536870916
rootino = null
rbmino = null
rsumino = null
rextsize = 1
agblocks = 142272384
agcount = 4
rbmblocks = 0
logblocks = 277875
versionnum = 0xb4a4
sectsize = 512
inodesize = 256
inopblock = 16
fname = "\000\000\000\000\000\000\000\000\000\000\000\000"
blocklog = 12
sectlog = 9
inodelog = 8
inopblog = 4
agblklog = 28
rextslog = 0
inprogress = 1
imax_pct = 5
icount = 0
ifree = 0
fdblocks = 568811645
frextents = 0
uquotino = 0
gquotino = 0
qflags = 0
flags = 0
shared_vn = 0
inoalignmt = 2
unit = 0
width = 0
dirblklog = 0
logsectlog = 0
logsectsize = 0
logsunit = 1
features2 = 0xa
bad_features2 = 0xa
xfs_db> sb 3
xfs_db> p
magicnum = 0x58465342
blocksize = 4096
dblocks = 569089536
rblocks = 0
rextents = 0
uuid = 13ecf47b-52cf-4944-9a71-885bddc5e008
logstart = 536870916
rootino = 128
rbmino = null
rsumino = null
rextsize = 1
agblocks = 142272384
agcount = 4
rbmblocks = 0
logblocks = 277875
versionnum = 0xb4a4
sectsize = 512
inodesize = 256
inopblock = 16
fname = "\000\000\000\000\000\000\000\000\000\000\000\000"
blocklog = 12
sectlog = 9
inodelog = 8
inopblog = 4
agblklog = 28
rextslog = 0
inprogress = 1
imax_pct = 5
icount = 0
ifree = 0
fdblocks = 568811645
frextents = 0
uquotino = 0
gquotino = 0
qflags = 0
flags = 0
shared_vn = 0
inoalignmt = 2
unit = 0
width = 0
dirblklog = 0
logsectlog = 0
logsectsize = 0
logsunit = 1
features2 = 0xa
bad_features2 = 0xa



2014-08-25 13:18 GMT+08:00 Dave Chinner <david at fromorbit.com>:

> On Mon, Aug 25, 2014 at 11:34:34AM +0800, Zhang Qiang wrote:
> > Dear XFS community & developers,
> >
> > I am using CentOS 6.3 and xfs as base file system and use RAID5 as
> hardware
> > storage.
> >
> > Detail environment as follow:
> >    OS: CentOS 6.3
> >    Kernel: kernel-2.6.32-279.el6.x86_64
> >    XFS option info(df output): /dev/sdb1 on /data type xfs
> > (rw,noatime,nodiratime,nobarrier)
> >
> > Detail phenomenon:
> >
> >     # df
> >     Filesystem            Size  Used Avail Use% Mounted on
> >     /dev/sda1              29G   17G   11G  61% /
> >     /dev/sdb1             893G  803G   91G  90% /data
> >     /dev/sda4             2.2T  1.6T  564G  75% /data1
> >
> >     # time touch /data1/1111
> >     real    0m23.043s
> >     user    0m0.001s
> >     sys     0m0.349s
> >
> >     # perf top
> >     Events: 6K cycles
> >      16.96%  [xfs]                     [k] xfs_inobt_get_rec
> >      11.95%  [xfs]                     [k] xfs_btree_increment
> >      11.16%  [xfs]                     [k] xfs_btree_get_rec
> >       7.39%  [xfs]                     [k] xfs_btree_get_block
> >       5.02%  [xfs]                     [k] xfs_dialloc
> >       4.87%  [xfs]                     [k] xfs_btree_rec_offset
> >       4.33%  [xfs]                     [k] xfs_btree_readahead
> >       4.13%  [xfs]                     [k] _xfs_buf_find
> >       4.05%  [kernel]                  [k] intel_idle
> >       2.89%  [xfs]                     [k] xfs_btree_rec_addr
> >       1.04%  [kernel]                  [k] kmem_cache_free
> >
> >
> > It seems that some xfs kernel function spend much time
> (xfs_inobt_get_rec,
> > xfs_btree_increment, etc.)
> >
> > I found a bug in bugzilla [1], is that is the same issue like this?
>
> No.
>
> > It's very greatly appreciated if you can give constructive suggestion
> about
> > this issue, as It's really hard to reproduce from another system and it's
> > not possible to do upgrade on that online machine.
>
> You've got very few free inodes, widely distributed in the allocated
> inode btree. The CPU time above is the btree search for the next
> free inode.
>
> This is the issue solved by this series of recent commits to add a
> new on-disk free inode btree index:
>
> 53801fd xfs: enable the finobt feature on v5 superblocks
> 0c153c1 xfs: report finobt status in fs geometry
> a3fa516 xfs: add finobt support to growfs
> 3efa4ff xfs: update the finobt on inode free
> 2b64ee5 xfs: refactor xfs_difree() inobt bits into xfs_difree_inobt()
> helper
> 6dd8638 xfs: use and update the finobt on inode allocation
> 0aa0a75 xfs: insert newly allocated inode chunks into the finobt
> 9d43b18 xfs: update inode allocation/free transaction reservations for
> finobt
> aafc3c2 xfs: support the XFS_BTNUM_FINOBT free inode btree type
> 8e2c84d xfs: reserve v5 superblock read-only compat. feature bit for finobt
> 57bd3db xfs: refactor xfs_ialloc_btree.c to support multiple inobt numbers
>
> Which is of no help to you, however, because it's not available in
> any CentOS kernel.
>
> There's really not much you can do to avoid the problem once you've
> punched random freespace holes in the allocated inode btree. IT
> generally doesn't affect many people; those that it does affect are
> normally using XFS as an object store indexed by a hard link farm
> (e.g. various backup programs do this).
>
> If you dump the superblock via xfs_db, the difference between icount
> and ifree will give you idea of how much "needle in a haystack"
> searching is going on. You can probably narrow it down to a specific
> AG by dumping the AGI headers and checking the same thing. filling
> in all the holes (by creating a bunch of zero length files in the
> appropriate AGs) might take some time, but it should make the
> problem go away until you remove more filesystem and create random
> free inode holes again...
>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david at fromorbit.com
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://oss.sgi.com/pipermail/xfs/attachments/20140825/b5e7d80a/attachment.html>


More information about the xfs mailing list