xfs
[Top] [All Lists]

[RFC PATCH 4/4] xfs: implement parallism quota check

To: "xfs@xxxxxxxxxxx" <xfs@xxxxxxxxxxx>
Subject: [RFC PATCH 4/4] xfs: implement parallism quota check
From: Jeff Liu <jeff.liu@xxxxxxxxxx>
Date: Tue, 12 Nov 2013 17:30:15 +0800
Delivered-to: xfs@xxxxxxxxxxx
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:11.0) Gecko/20120410 Thunderbird/11.0.1
From: Jie Liu <jeff.liu@xxxxxxxxxx>

XFS does quota check at mount time with a single thread if required,
and this process must done before a successful file system mount.
That is fun if the desired quota options has been enabled when user
creating/removing files, however, it need to travel the whole file
system to figure out the quota usages if previously those options
were not enabled.  Hence, the mount procedure will stuck for a long
time depending on the how many inodes resides on the storage as well
as the disk IO speed.

This patch is implement parallism quota check based on allocation
groups, therefore the quota check is performed among each AG via
work queues combine with a completion.  In this way, I can observed
significant speedup on faster devices.

Signed-off-by: Jie Liu <jeff.liu@xxxxxxxxxx>

---
 fs/xfs/xfs_qm.c |  357 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 fs/xfs/xfs_qm.h |   18 +++
 2 files changed, 359 insertions(+), 16 deletions(-)

diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 14a4996..110df7b 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -35,8 +35,11 @@
 #include "xfs_trans.h"
 #include "xfs_trans_space.h"
 #include "xfs_qm.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc_btree.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
+#include "xfs_inum.h"
 #include "xfs_cksum.h"
 #include "xfs_dinode.h"
 
@@ -51,6 +54,9 @@ STATIC int    xfs_qm_init_quotainfo(xfs_mount_t *);
 
 
 STATIC void    xfs_qm_dqfree_one(struct xfs_dquot *dqp);
+STATIC int     xfs_qm_dqusage_adjust(struct xfs_mount *mp, xfs_ino_t ino,
+                                     int *res);
+
 /*
  * We use the batch lookup interface to iterate over the dquots as it
  * currently is the only interface into the radix tree code that allows
@@ -1349,9 +1355,6 @@ STATIC int
 xfs_qm_dqusage_adjust(
        xfs_mount_t     *mp,            /* mount point for filesystem */
        xfs_ino_t       ino,            /* inode number to get data for */
-       void            __user *buffer, /* not used */
-       int             ubsize,         /* not used */
-       int             *ubused,        /* not used */
        int             *res)           /* result code value */
 {
        xfs_inode_t     *ip;
@@ -1439,6 +1442,337 @@ error0:
        return error;
 }
 
+static int
+xfs_qm_dqusage_adjust_ichunk(
+       struct xfs_mount                *mp,
+       xfs_agnumber_t                  agno,
+       struct xfs_inobt_rec_incore     *irbp,
+       xfs_ino_t                       *lastinop)
+{
+       xfs_ino_t                       lastino = *lastinop;
+       int                             chunkidx, clustidx;
+       int                             error = 0;
+       xfs_agino_t                     agino;
+
+       for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
+            irbp->ir_freecount < XFS_INODES_PER_CHUNK;
+            chunkidx++, clustidx++, agino++) {
+               xfs_ino_t       ino = XFS_AGINO_TO_INO(mp, agno, agino);
+               int             stat;
+
+               ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
+
+               /* Skip if this inode is free */
+               if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
+                       lastino = ino;
+                       continue;
+               }
+
+               /*
+                * Count used inodes as free so we can tell when the
+                * chunk is used up.
+                */
+               irbp->ir_freecount++;
+
+               error = xfs_qm_dqusage_adjust(mp, ino, &stat);
+               if (stat == BULKSTAT_RV_NOTHING) {
+                       if (error && error != ENOENT && error != EINVAL)
+                               break;
+
+                       lastino = ino;
+                       continue;
+               }
+               if (stat == BULKSTAT_RV_GIVEUP) {
+                       ASSERT(error);
+                       break;
+               }
+               lastino = ino;
+       }
+
+       *lastinop = lastino;
+       return error;
+}
+
+static int
+xfs_qm_dqusage_adjust_perag(
+       struct xfs_dq_adjuster  *qa)
+{
+       struct xfs_mount        *mp = qa->qa_mp;
+       xfs_agnumber_t          agno = qa->qa_agno;
+       xfs_inobt_rec_incore_t  *irbp;  /* current irec buffer pointer */
+       xfs_inobt_rec_incore_t  *irbuf; /* start of irec buffer */
+       xfs_inobt_rec_incore_t  *irbufend; /* end of good irec buffer entries */
+       xfs_btree_cur_t         *cur;   /* btree cursor for ialloc btree */
+       xfs_ino_t               lastino;/* last inode # in question */
+       xfs_agino_t             agino;  /* inode # in allocation group */
+       size_t                  irbsize; /* size of irec buffer in bytes */
+       int                     nirbuf; /* size of irbuf */
+       int                     rval;   /* return value error code */
+       int                     error;  /* error code */
+
+       irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
+       if (!irbuf)
+               return ENOMEM;
+       nirbuf = irbsize / sizeof(*irbuf);
+
+       rval = 0;
+       agino = 0;
+       lastino = 0;
+
+       /*
+        * Loop over the allocation groups, starting from the last
+        * inode returned; 0 means start of the allocation group.
+        */
+       do {
+               xfs_buf_t       *agbp;  /* agi header buffer */
+               xfs_agi_t       *agi;   /* agi header data */
+               int             stat;   /* result value from btree calls */
+               bool            end_of_ag = false;
+
+               cond_resched();
+
+               irbp = irbuf;
+               irbufend = irbuf + nirbuf;
+
+               error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
+               if (error) {
+                       rval = error;
+                       break;
+               }
+               agi = XFS_BUF_TO_AGI(agbp);
+
+               /* Allocate and initialize a btree cursor for ialloc btree */
+               cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
+               error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, &stat);
+
+               /*
+                * Loop through inode btree records in this ag until we run out
+                * of inodes or space in the buffer.
+                */
+               while (irbp < irbufend) {
+                       xfs_inobt_rec_incore_t r;
+
+                       /* Loop as long as we're unable to read the inode btree 
*/
+                       while (error) {
+                               agino += XFS_INODES_PER_CHUNK;
+                               if (XFS_AGINO_TO_AGBNO(mp, agino) >=
+                                   be32_to_cpu(agi->agi_length))
+                                       break;
+
+                               error = xfs_inobt_lookup(cur, agino,
+                                                        XFS_LOOKUP_GE, &stat);
+                               cond_resched();
+                       }
+
+                       /*
+                        * If ran off the end of the ag either with an error,
+                        * or the normal way, set end and stop collecting.
+                        */
+                       if (error) {
+                               end_of_ag = true;
+                               break;
+                       }
+
+                       error = xfs_inobt_get_rec(cur, &r, &stat);
+                       if (error || stat == 0) {
+                               end_of_ag = true;
+                               break;
+                       }
+
+                       /*
+                        * If this chunk has any allocated inodes, save it.
+                        * Also start read-ahead now for this chunk.
+                        */
+                       if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
+                               struct blk_plug plug;
+
+                               blk_start_plug(&plug);
+                               xfs_inobt_reada_chunk(mp, agno, &r);
+                               blk_finish_plug(&plug);
+
+                               irbp->ir_startino = r.ir_startino;
+                               irbp->ir_freecount = r.ir_freecount;
+                               irbp->ir_free = r.ir_free;
+                               irbp++;
+                       }
+
+                       /* Set agino to after this chunk and bump the cursor */
+                       agino = r.ir_startino + XFS_INODES_PER_CHUNK;
+                       error = xfs_btree_increment(cur, 0, &stat);
+                       cond_resched();
+               }
+
+               /*
+                * Drop the btree buffers and the agi buffer.  We can't hold
+                * any of the locks these represent when calling iget.
+                */
+               xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+               xfs_buf_relse(agbp);
+
+               irbufend = irbp;
+               for (irbp = irbuf; irbp < irbufend; irbp++) {
+                       error = xfs_qm_dqusage_adjust_ichunk(mp, agno, irbp, 
&lastino);
+                       if (error)
+                               rval = error;
+                       cond_resched();
+               }
+
+               if (end_of_ag)
+                       break;
+
+               /* Set up for the next loop iteration */
+               agino = XFS_INO_TO_AGINO(mp, lastino);
+       } while (1);
+
+       /* Done, we're either out of filesystem or space to put the data */
+       kmem_free(irbuf);
+
+       return rval;
+}
+
+/*
+ * Iterate thru the file system to fetch all the inodes in the given
+ * inode range and adjusting the corresponding dquot counters in core.
+ */
+STATIC void
+xfs_qm_dq_adjust_worker(
+       struct work_struct      *work)
+{
+       struct xfs_dq_adjuster  *qa = container_of(work,
+                                     struct xfs_dq_adjuster, qa_work);
+       int                     error;
+
+       error = xfs_qm_dqusage_adjust_perag(qa);
+       complete(&qa->qa_complete);
+}
+
+STATIC int
+xfs_qm_init_quotacheck(
+       struct xfs_mount        *mp,
+       struct xfs_quotacheck   *qc)
+{
+       memset(qc, 0, sizeof(*qc));
+
+       INIT_LIST_HEAD(&qc->qc_adjusters);
+       spin_lock_init(&qc->qc_lock);
+       qc->qc_mp = mp;
+       qc->qc_wq = alloc_workqueue("xfs-dqcheck/%s", WQ_NON_REENTRANT,
+                                   0, mp->m_fsname);
+       if (!qc->qc_wq) {
+               list_del(&qc->qc_adjusters);
+               return ENOMEM;
+       }
+
+       return 0;
+}
+
+STATIC void
+xfs_qm_destroy_quotacheck(
+       struct xfs_quotacheck   *qc)
+{
+       destroy_workqueue(qc->qc_wq);
+       spinlock_destroy(&qc->qc_lock);
+       list_del(&qc->qc_adjusters);
+}
+
+STATIC void
+xfs_qm_destroy_adjusters(
+       struct xfs_quotacheck   *qc)
+{
+       struct xfs_dq_adjuster  *qa, *tmp;
+
+       list_for_each_entry_safe(qa, tmp, &qc->qc_adjusters, qa_node) {
+               list_del(&qa->qa_node);
+               kfree(qa);
+       }
+}
+
+STATIC struct xfs_dq_adjuster *
+xfs_qm_alloc_adjuster(
+       struct xfs_quotacheck   *qc,
+       xfs_agnumber_t          agno)
+{
+       struct xfs_dq_adjuster  *qa;
+
+       qa = kzalloc(sizeof(*qa), GFP_NOFS);
+       if (!qa)
+               return NULL;
+
+       qa->qa_qc = qc;
+       qa->qa_mp = qc->qc_mp;
+       qa->qa_agno = agno;
+       INIT_LIST_HEAD(&qa->qa_node);
+       INIT_WORK(&qa->qa_work, xfs_qm_dq_adjust_worker);
+       init_completion(&qa->qa_complete);
+       list_add_tail(&qa->qa_node, &qc->qc_adjusters);
+
+       return qa;
+}
+
+STATIC int
+xfs_qm_alloc_queue_adjusters(
+       struct xfs_quotacheck   *qc)
+{
+       xfs_agnumber_t          agcount = qc->qc_mp->m_sb.sb_agcount;
+       int                     i, error = 0;
+
+       for (i = 0; i < agcount; i++) {
+               struct xfs_dq_adjuster  *qa;
+
+               spin_lock(&qc->qc_lock);
+               qa = xfs_qm_alloc_adjuster(qc, i);
+               if (!qa) {
+                       error = ENOMEM;
+                       spin_unlock(&qc->qc_lock);
+                       goto out_destroy_adjusters;
+               }
+               queue_work(qc->qc_wq, &qa->qa_work);
+               spin_unlock(&qc->qc_lock);
+       }
+
+       return error;
+
+out_destroy_adjusters:
+       xfs_qm_destroy_adjusters(qc);
+       return error;
+}
+
+STATIC void
+xfs_qm_wait_for_adjusters(
+       struct xfs_quotacheck   *qc)
+{
+       struct xfs_dq_adjuster  *qa;
+
+       list_for_each_entry(qa, &qc->qc_adjusters, qa_node)
+               wait_for_completion(&qa->qa_complete);
+}
+
+STATIC int
+xfs_qm_do_quotacheck(
+       struct xfs_mount        *mp)
+{
+       struct xfs_quotacheck   qc;
+       int                     error;
+
+       error = xfs_qm_init_quotacheck(mp, &qc);
+       if (error)
+               return error;
+
+       /* Allocate and queue adjusters */
+       error = xfs_qm_alloc_queue_adjusters(&qc);
+       if (error)
+               goto out_destroy_quotacheck;
+
+       xfs_qm_wait_for_adjusters(&qc);
+
+       xfs_qm_destroy_adjusters(&qc);
+
+out_destroy_quotacheck:
+       xfs_qm_destroy_quotacheck(&qc);
+
+       return error;
+}
+
 STATIC int
 xfs_qm_flush_one(
        struct xfs_dquot        *dqp,
@@ -1474,7 +1808,7 @@ int
 xfs_qm_quotacheck(
        xfs_mount_t     *mp)
 {
-       int                     done, count, error, error2;
+       int                     count, error, error2;
        xfs_ino_t               lastino;
        size_t                  structsz;
        uint                    flags;
@@ -1522,18 +1856,9 @@ xfs_qm_quotacheck(
                flags |= XFS_PQUOTA_CHKD;
        }
 
-       do {
-               /*
-                * Iterate thru all the inodes in the file system,
-                * adjusting the corresponding dquot counters in core.
-                */
-               error = xfs_bulkstat(mp, &lastino, &count,
-                                    xfs_qm_dqusage_adjust,
-                                    structsz, NULL, &done);
-               if (error)
-                       break;
-
-       } while (!done);
+       error = xfs_qm_do_quotacheck(mp);
+       if (error)
+               goto error_return;
 
        /*
         * We've made all the changes that we need to make incore.  Flush them
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index a788b66..c7e2e6d 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -26,6 +26,24 @@ struct xfs_inode;
 
 extern struct kmem_zone        *xfs_qm_dqtrxzone;
 
+struct xfs_dq_adjuster {
+       struct list_head        qa_node;
+       struct xfs_mount        *qa_mp;
+       struct xfs_quotacheck   *qa_qc;
+       xfs_agnumber_t          qa_agno;
+       int                     qa_error;
+       struct work_struct      qa_work;
+       struct completion       qa_complete;
+};
+
+struct xfs_quotacheck {
+       struct list_head        qc_adjusters;
+       spinlock_t              qc_lock;
+       struct xfs_mount        *qc_mp;
+       int                     qc_done;
+       struct workqueue_struct *qc_wq;
+};
+
 /*
  * This defines the unit of allocation of dquots.
  * Currently, it is just one file system block, and a 4K blk contains 30
-- 
1.7.9.5

<Prev in Thread] Current Thread [Next in Thread>