[PATCH 1/7] xfs: use generic percpu counters for inode counter
Brian Foster
bfoster at redhat.com
Thu Feb 5 08:09:33 CST 2015
On Thu, Feb 05, 2015 at 07:54:03AM +1100, Dave Chinner wrote:
> XFS has hand-rolled per-cpu counters for the superblock since before
> there was any generic implementation. There are some warts around
> the use of them for the inode counter as the hand rolled counter is
> designed to be accurate at zero, but has no specific accurracy at
> any other value. This design causes problems for the maximum inode
> count threshold enforcement, as there is no trigger that balances
> the counters as they get close tothe maximum threshold.
>
> Instead of designing new triggers for balancing, just replace the
> handrolled per-cpu counter with a generic counter. This enables us
> to update the counter through the normal superblock modification
> funtions, but rather than do that we add a xfs_mod_icount() helper
> function (from Christoph Hellwig) and keep the percpu counter
> outside the superblock in the struct xfs_mount.
>
> This means we still need to initialise the per-cpu counter
> specifically when we read the superblock, and vice versa when we
> log/write it, but it does mean that we don't need to change any
> other code.
>
> Signed-off-by: Dave Chinner <dchinner at redhat.com>
> ---
> fs/xfs/libxfs/xfs_ialloc.c | 6 ++--
> fs/xfs/libxfs/xfs_sb.c | 2 ++
> fs/xfs/xfs_fsops.c | 3 +-
> fs/xfs/xfs_mount.c | 76 +++++++++++++++++++++-------------------------
> fs/xfs/xfs_mount.h | 7 +++--
> fs/xfs/xfs_super.c | 7 +++--
> fs/xfs/xfs_trans.c | 5 ++-
> 7 files changed, 54 insertions(+), 52 deletions(-)
>
> diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
> index 116ef1d..5b4ba9f 100644
> --- a/fs/xfs/libxfs/xfs_ialloc.c
> +++ b/fs/xfs/libxfs/xfs_ialloc.c
> @@ -376,7 +376,8 @@ xfs_ialloc_ag_alloc(
> */
> newlen = args.mp->m_ialloc_inos;
> if (args.mp->m_maxicount &&
> - args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
> + percpu_counter_read(&args.mp->m_icount) + newlen >
> + args.mp->m_maxicount)
> return -ENOSPC;
> args.minlen = args.maxlen = args.mp->m_ialloc_blks;
> /*
> @@ -1340,7 +1341,8 @@ xfs_dialloc(
> * inode.
> */
> if (mp->m_maxicount &&
> - mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) {
> + percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos >
> + mp->m_maxicount) {
> noroom = 1;
> okalloc = 0;
> }
> diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
> index b0a5fe9..017cb2f 100644
> --- a/fs/xfs/libxfs/xfs_sb.c
> +++ b/fs/xfs/libxfs/xfs_sb.c
> @@ -771,6 +771,8 @@ xfs_log_sb(
> struct xfs_mount *mp = tp->t_mountp;
> struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0);
>
> + mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
> +
> xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
> xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
> xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb));
> diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
> index f711452..e1470f2 100644
> --- a/fs/xfs/xfs_fsops.c
> +++ b/fs/xfs/xfs_fsops.c
> @@ -631,11 +631,12 @@ xfs_fs_counts(
> xfs_fsop_counts_t *cnt)
> {
> xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
> + cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
> +
> spin_lock(&mp->m_sb_lock);
> cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
> cnt->freertx = mp->m_sb.sb_frextents;
> cnt->freeino = mp->m_sb.sb_ifree;
> - cnt->allocino = mp->m_sb.sb_icount;
> spin_unlock(&mp->m_sb_lock);
> return 0;
> }
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index 4fa80e6..702ea6a 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
> @@ -1099,6 +1099,21 @@ xfs_log_sbcount(xfs_mount_t *mp)
> return xfs_sync_sb(mp, true);
> }
>
> +int
> +xfs_mod_icount(
> + struct xfs_mount *mp,
> + int64_t delta)
> +{
> + /* deltas are +/-64, hence the large batch size of 128. */
> + __percpu_counter_add(&mp->m_icount, delta, 128);
> + if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
> + ASSERT(0);
> + percpu_counter_add(&mp->m_icount, -delta);
> + return -EINVAL;
> + }
> + return 0;
> +}
> +
> /*
> * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply
> * a delta to a specified field in the in-core superblock. Simply
> @@ -1127,14 +1142,8 @@ xfs_mod_incore_sb_unlocked(
> */
> switch (field) {
> case XFS_SBS_ICOUNT:
> - lcounter = (long long)mp->m_sb.sb_icount;
> - lcounter += delta;
> - if (lcounter < 0) {
> - ASSERT(0);
> - return -EINVAL;
> - }
> - mp->m_sb.sb_icount = lcounter;
> - return 0;
> + ASSERT(0);
> + return -ENOSPC;
> case XFS_SBS_IFREE:
> lcounter = (long long)mp->m_sb.sb_ifree;
> lcounter += delta;
> @@ -1288,8 +1297,9 @@ xfs_mod_incore_sb(
> int status;
>
> #ifdef HAVE_PERCPU_SB
> - ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS);
> + ASSERT(field < XFS_SBS_IFREE || field > XFS_SBS_FDBLOCKS);
> #endif
Not really sure why the above changes, since XFS_SBS_IFREE >
XFS_SBS_ICOUNT. This goes away, so it doesn't matter. :)
Reviewed-by: Brian Foster <bfoster at redhat.com>
> +
> spin_lock(&mp->m_sb_lock);
> status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
> spin_unlock(&mp->m_sb_lock);
> @@ -1492,7 +1502,6 @@ xfs_icsb_cpu_notify(
> case CPU_ONLINE:
> case CPU_ONLINE_FROZEN:
> xfs_icsb_lock(mp);
> - xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
> xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
> xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
> xfs_icsb_unlock(mp);
> @@ -1504,17 +1513,14 @@ xfs_icsb_cpu_notify(
> * re-enable the counters. */
> xfs_icsb_lock(mp);
> spin_lock(&mp->m_sb_lock);
> - xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
> xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
> xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
>
> - mp->m_sb.sb_icount += cntp->icsb_icount;
> mp->m_sb.sb_ifree += cntp->icsb_ifree;
> mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
>
> memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
>
> - xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
> xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
> xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
> spin_unlock(&mp->m_sb_lock);
> @@ -1531,11 +1537,18 @@ xfs_icsb_init_counters(
> xfs_mount_t *mp)
> {
> xfs_icsb_cnts_t *cntp;
> + int error;
> int i;
>
> + error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
> + if (error)
> + return error;
> +
> mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
> - if (mp->m_sb_cnts == NULL)
> + if (!mp->m_sb_cnts) {
> + percpu_counter_destroy(&mp->m_icount);
> return -ENOMEM;
> + }
>
> for_each_online_cpu(i) {
> cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
> @@ -1563,13 +1576,14 @@ void
> xfs_icsb_reinit_counters(
> xfs_mount_t *mp)
> {
> + percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
> +
> xfs_icsb_lock(mp);
> /*
> * start with all counters disabled so that the
> * initial balance kicks us off correctly
> */
> mp->m_icsb_counters = -1;
> - xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
> xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
> xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
> xfs_icsb_unlock(mp);
> @@ -1583,6 +1597,9 @@ xfs_icsb_destroy_counters(
> unregister_hotcpu_notifier(&mp->m_icsb_notifier);
> free_percpu(mp->m_sb_cnts);
> }
> +
> + percpu_counter_destroy(&mp->m_icount);
> +
> mutex_destroy(&mp->m_icsb_mutex);
> }
>
> @@ -1645,7 +1662,6 @@ xfs_icsb_count(
>
> for_each_online_cpu(i) {
> cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
> - cnt->icsb_icount += cntp->icsb_icount;
> cnt->icsb_ifree += cntp->icsb_ifree;
> cnt->icsb_fdblocks += cntp->icsb_fdblocks;
> }
> @@ -1659,7 +1675,7 @@ xfs_icsb_counter_disabled(
> xfs_mount_t *mp,
> xfs_sb_field_t field)
> {
> - ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
> + ASSERT((field >= XFS_SBS_IFREE) && (field <= XFS_SBS_FDBLOCKS));
> return test_bit(field, &mp->m_icsb_counters);
> }
>
> @@ -1670,7 +1686,7 @@ xfs_icsb_disable_counter(
> {
> xfs_icsb_cnts_t cnt;
>
> - ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
> + ASSERT((field >= XFS_SBS_IFREE) && (field <= XFS_SBS_FDBLOCKS));
>
> /*
> * If we are already disabled, then there is nothing to do
> @@ -1689,9 +1705,6 @@ xfs_icsb_disable_counter(
>
> xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
> switch(field) {
> - case XFS_SBS_ICOUNT:
> - mp->m_sb.sb_icount = cnt.icsb_icount;
> - break;
> case XFS_SBS_IFREE:
> mp->m_sb.sb_ifree = cnt.icsb_ifree;
> break;
> @@ -1716,15 +1729,12 @@ xfs_icsb_enable_counter(
> xfs_icsb_cnts_t *cntp;
> int i;
>
> - ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
> + ASSERT((field >= XFS_SBS_IFREE) && (field <= XFS_SBS_FDBLOCKS));
>
> xfs_icsb_lock_all_counters(mp);
> for_each_online_cpu(i) {
> cntp = per_cpu_ptr(mp->m_sb_cnts, i);
> switch (field) {
> - case XFS_SBS_ICOUNT:
> - cntp->icsb_icount = count + resid;
> - break;
> case XFS_SBS_IFREE:
> cntp->icsb_ifree = count + resid;
> break;
> @@ -1750,8 +1760,6 @@ xfs_icsb_sync_counters_locked(
>
> xfs_icsb_count(mp, &cnt, flags);
>
> - if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
> - mp->m_sb.sb_icount = cnt.icsb_icount;
> if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
> mp->m_sb.sb_ifree = cnt.icsb_ifree;
> if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
> @@ -1805,12 +1813,6 @@ xfs_icsb_balance_counter_locked(
>
> /* update counters - first CPU gets residual*/
> switch (field) {
> - case XFS_SBS_ICOUNT:
> - count = mp->m_sb.sb_icount;
> - resid = do_div(count, weight);
> - if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
> - return;
> - break;
> case XFS_SBS_IFREE:
> count = mp->m_sb.sb_ifree;
> resid = do_div(count, weight);
> @@ -1871,14 +1873,6 @@ again:
> }
>
> switch (field) {
> - case XFS_SBS_ICOUNT:
> - lcounter = icsbp->icsb_icount;
> - lcounter += delta;
> - if (unlikely(lcounter < 0))
> - goto balance_counter;
> - icsbp->icsb_icount = lcounter;
> - break;
> -
> case XFS_SBS_IFREE:
> lcounter = icsbp->icsb_ifree;
> lcounter += delta;
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index a5b2ff8..457c6e3 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -41,7 +41,6 @@ struct xfs_da_geometry;
> typedef struct xfs_icsb_cnts {
> uint64_t icsb_fdblocks;
> uint64_t icsb_ifree;
> - uint64_t icsb_icount;
> unsigned long icsb_flags;
> } xfs_icsb_cnts_t;
>
> @@ -81,8 +80,11 @@ typedef struct xfs_mount {
> struct super_block *m_super;
> xfs_tid_t m_tid; /* next unused tid for fs */
> struct xfs_ail *m_ail; /* fs active log item list */
> - xfs_sb_t m_sb; /* copy of fs superblock */
> +
> + struct xfs_sb m_sb; /* copy of fs superblock */
> spinlock_t m_sb_lock; /* sb counter lock */
> + struct percpu_counter m_icount; /* allocated inodes counter */
> +
> struct xfs_buf *m_sb_bp; /* buffer for superblock */
> char *m_fsname; /* filesystem name */
> int m_fsname_len; /* strlen of fs name */
> @@ -377,6 +379,7 @@ extern void xfs_unmountfs(xfs_mount_t *);
> extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
> extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
> uint, int);
> +extern int xfs_mod_icount(struct xfs_mount *mp, int64_t delta);
> extern int xfs_mount_log_sb(xfs_mount_t *);
> extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
> extern int xfs_readsb(xfs_mount_t *, int);
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index 40d2ac5..87e169f 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -1087,6 +1087,7 @@ xfs_fs_statfs(
> xfs_sb_t *sbp = &mp->m_sb;
> struct xfs_inode *ip = XFS_I(dentry->d_inode);
> __uint64_t fakeinos, id;
> + __uint64_t icount;
> xfs_extlen_t lsize;
> __int64_t ffree;
>
> @@ -1098,6 +1099,7 @@ xfs_fs_statfs(
> statp->f_fsid.val[1] = (u32)(id >> 32);
>
> xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
> + icount = percpu_counter_sum(&mp->m_icount);
>
> spin_lock(&mp->m_sb_lock);
> statp->f_bsize = sbp->sb_blocksize;
> @@ -1106,15 +1108,14 @@ xfs_fs_statfs(
> statp->f_bfree = statp->f_bavail =
> sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
> fakeinos = statp->f_bfree << sbp->sb_inopblog;
> - statp->f_files =
> - MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
> + statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
> if (mp->m_maxicount)
> statp->f_files = min_t(typeof(statp->f_files),
> statp->f_files,
> mp->m_maxicount);
>
> /* make sure statp->f_ffree does not underflow */
> - ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
> + ffree = statp->f_files - (icount - sbp->sb_ifree);
> statp->f_ffree = max_t(__int64_t, ffree, 0);
>
> spin_unlock(&mp->m_sb_lock);
> diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
> index eb90cd5..9bc742b 100644
> --- a/fs/xfs/xfs_trans.c
> +++ b/fs/xfs/xfs_trans.c
> @@ -554,8 +554,7 @@ xfs_trans_unreserve_and_mod_sb(
> }
>
> if (idelta) {
> - error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT,
> - idelta, rsvd);
> + error = xfs_mod_icount(mp, idelta);
> if (error)
> goto out_undo_fdblocks;
> }
> @@ -634,7 +633,7 @@ out_undo_ifreecount:
> xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd);
> out_undo_icount:
> if (idelta)
> - xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd);
> + xfs_mod_icount(mp, -idelta);
> out_undo_fdblocks:
> if (blkdelta)
> xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd);
> --
> 2.0.0
>
> _______________________________________________
> xfs mailing list
> xfs at oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
More information about the xfs
mailing list