xfs
[Top] [All Lists]

[patch 12/19] xfs: flatten the dquot lock ordering

To: xfs@xxxxxxxxxxx
Subject: [patch 12/19] xfs: flatten the dquot lock ordering
From: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Date: Tue, 06 Dec 2011 16:58:18 -0500
References: <20111206215806.844405397@xxxxxxxxxxxxxxxxxxxxxx>
User-agent: quilt/0.48-1
Introduce a new XFS_DQ_FREEING flag that tells lookup and mplist walks
to skip a dquot that is beeing freed, and use this avoid the trylock
on the hash and mplist locks in xfs_qm_dqreclaim_one.  Also simplify
xfs_dqpurge by moving the inodes to a dispose list after marking them
XFS_DQ_FREEING and avoid the locker ordering constraints.

Signed-off-by: Christoph Hellwig <hch@xxxxxx>

---
 fs/xfs/xfs_dquot.c |  113 +++++++++++++++++++-------------------------
 fs/xfs/xfs_dquot.h |    2 
 fs/xfs/xfs_qm.c    |  134 +++++++++++++++++++----------------------------------
 fs/xfs/xfs_quota.h |    4 +
 4 files changed, 103 insertions(+), 150 deletions(-)

Index: xfs/fs/xfs/xfs_dquot.c
===================================================================
--- xfs.orig/fs/xfs/xfs_dquot.c 2011-12-06 15:41:03.000000000 +0100
+++ xfs/fs/xfs/xfs_dquot.c      2011-12-06 15:46:19.730356139 +0100
@@ -728,6 +728,12 @@ xfs_qm_dqlookup(
                trace_xfs_dqlookup_found(dqp);
 
                xfs_dqlock(dqp);
+               if (dqp->dq_flags & XFS_DQ_FREEING) {
+                       *O_dqpp = NULL;
+                       xfs_dqunlock(dqp);
+                       return -1;
+               }
+
                XFS_DQHOLD(dqp);
 
                /*
@@ -781,11 +787,7 @@ xfs_qm_dqget(
                        return (EIO);
                }
        }
-#endif
 
- again:
-
-#ifdef DEBUG
        ASSERT(type == XFS_DQ_USER ||
               type == XFS_DQ_PROJ ||
               type == XFS_DQ_GROUP);
@@ -797,13 +799,21 @@ xfs_qm_dqget(
                        ASSERT(ip->i_gdquot == NULL);
        }
 #endif
+
+restart:
        mutex_lock(&h->qh_lock);
 
        /*
         * Look in the cache (hashtable).
         * The chain is kept locked during lookup.
         */
-       if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
+       switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) {
+       case -1:
+               XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
+               mutex_unlock(&h->qh_lock);
+               delay(1);
+               goto restart;
+       case 0:
                XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
                /*
                 * The dquot was found, moved to the front of the chain,
@@ -814,9 +824,11 @@ xfs_qm_dqget(
                ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
                mutex_unlock(&h->qh_lock);
                trace_xfs_dqget_hit(*O_dqpp);
-               return (0);     /* success */
+               return 0;       /* success */
+       default:
+               XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
+               break;
        }
-       XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
 
        /*
         * Dquot cache miss. We don't want to keep the inode lock across
@@ -913,16 +925,21 @@ xfs_qm_dqget(
                 * lock order between the two dquots here since dqp isn't
                 * on any findable lists yet.
                 */
-               if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
+               switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) {
+               case 0:
+               case -1:
                        /*
-                        * Duplicate found. Just throw away the new dquot
-                        * and start over.
+                        * Duplicate found, either in cache or on its way out.
+                        * Just throw away the new dquot and start over.
                         */
-                       xfs_qm_dqput(tmpdqp);
+                       if (tmpdqp)
+                               xfs_qm_dqput(tmpdqp);
                        mutex_unlock(&h->qh_lock);
                        xfs_qm_dqdestroy(dqp);
                        XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
-                       goto again;
+                       goto restart;
+               default:
+                       break;
                }
        }
 
@@ -1250,51 +1267,18 @@ xfs_dqlock2(
        }
 }
 
-
 /*
- * Take a dquot out of the mount's dqlist as well as the hashlist.
- * This is called via unmount as well as quotaoff, and the purge
- * will always succeed unless there are soft (temp) references
- * outstanding.
- *
- * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
- * that we're returning! XXXsup - not cool.
+ * Take a dquot out of the mount's dqlist as well as the hashlist.  This is
+ * called via unmount as well as quotaoff, and the purge will always succeed.
  */
-/* ARGSUSED */
-int
+void
 xfs_qm_dqpurge(
-       xfs_dquot_t     *dqp)
+       struct xfs_dquot        *dqp)
 {
-       xfs_dqhash_t    *qh = dqp->q_hash;
-       xfs_mount_t     *mp = dqp->q_mount;
-
-       ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
-       ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
-
-       /*
-        * XXX(hch): horrible locking order, will get cleaned up ASAP.
-        */
-       if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
-               mutex_unlock(&dqp->q_hash->qh_lock);
-               return 1;
-       }
+       struct xfs_mount        *mp = dqp->q_mount;
+       struct xfs_dqhash       *qh = dqp->q_hash;
 
        xfs_dqlock(dqp);
-       /*
-        * We really can't afford to purge a dquot that is
-        * referenced, because these are hard refs.
-        * It shouldn't happen in general because we went thru _all_ inodes in
-        * dqrele_all_inodes before calling this and didn't let the mountlock 
go.
-        * However it is possible that we have dquots with temporary
-        * references that are not attached to an inode. e.g. see xfs_setattr().
-        */
-       if (dqp->q_nrefs != 0) {
-               xfs_dqunlock(dqp);
-               mutex_unlock(&dqp->q_hash->qh_lock);
-               return (1);
-       }
-
-       ASSERT(!list_empty(&dqp->q_freelist));
 
        /*
         * If we're turning off quotas, we have to make sure that, for
@@ -1313,19 +1297,14 @@ xfs_qm_dqpurge(
        }
 
        /*
-        * XXXIf we're turning this type of quotas off, we don't care
+        * If we are turning this type of quotas off, we don't care
         * about the dirty metadata sitting in this dquot. OTOH, if
         * we're unmounting, we do care, so we flush it and wait.
         */
        if (XFS_DQ_IS_DIRTY(dqp)) {
                int     error;
 
-               /* dqflush unlocks dqflock */
                /*
-                * Given that dqpurge is a very rare occurrence, it is OK
-                * that we're holding the hashlist and mplist locks
-                * across the disk write. But, ... XXXsup
-                *
                 * We don't care about getting disk errors here. We need
                 * to purge this dquot anyway, so we go ahead regardless.
                 */
@@ -1335,28 +1314,36 @@ xfs_qm_dqpurge(
                                __func__, dqp);
                xfs_dqflock(dqp);
        }
+
        ASSERT(atomic_read(&dqp->q_pincount) == 0);
        ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
               !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
 
+       xfs_dqfunlock(dqp);
+       xfs_dqunlock(dqp);
+
+       mutex_lock(&qh->qh_lock);
        list_del_init(&dqp->q_hashlist);
        qh->qh_version++;
+       mutex_unlock(&qh->qh_lock);
 
+       mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
        list_del_init(&dqp->q_mplist);
        mp->m_quotainfo->qi_dqreclaims++;
        mp->m_quotainfo->qi_dquots--;
+       mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
 
+       /*
+        * We move dquots to the freelist as soon as their reference count
+        * hits zero, so it really should be on the freelist here.
+        */
+       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+       ASSERT(!list_empty(&dqp->q_freelist));
        list_del_init(&dqp->q_freelist);
        xfs_Gqm->qm_dqfrlist_cnt--;
-
-       xfs_dqfunlock(dqp);
-       xfs_dqunlock(dqp);
-
        mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-       mutex_unlock(&qh->qh_lock);
 
        xfs_qm_dqdestroy(dqp);
-       return 0;
 }
 
 /*
Index: xfs/fs/xfs/xfs_qm.c
===================================================================
--- xfs.orig/fs/xfs/xfs_qm.c    2011-12-06 15:41:03.870350281 +0100
+++ xfs/fs/xfs/xfs_qm.c 2011-12-06 15:48:38.753692050 +0100
@@ -398,7 +398,8 @@ again:
        mutex_lock(&q->qi_dqlist_lock);
        list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
                xfs_dqlock(dqp);
-               if (! XFS_DQ_IS_DIRTY(dqp)) {
+               if ((dqp->dq_flags & XFS_DQ_FREEING) ||
+                   !XFS_DQ_IS_DIRTY(dqp)) {
                        xfs_dqunlock(dqp);
                        continue;
                }
@@ -437,6 +438,7 @@ again:
        /* return ! busy */
        return 0;
 }
+
 /*
  * Release the group dquot pointers the user dquots may be
  * carrying around as a hint. mplist is locked on entry and exit.
@@ -453,6 +455,13 @@ xfs_qm_detach_gdquots(
        ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
        list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
                xfs_dqlock(dqp);
+               if (dqp->dq_flags & XFS_DQ_FREEING) {
+                       xfs_dqunlock(dqp);
+                       mutex_unlock(&q->qi_dqlist_lock);
+                       delay(1);
+                       mutex_lock(&q->qi_dqlist_lock);
+                       goto again;
+               }
                if ((gdqp = dqp->q_gdquot)) {
                        xfs_dqlock(gdqp);
                        dqp->q_gdquot = NULL;
@@ -489,8 +498,8 @@ xfs_qm_dqpurge_int(
        struct xfs_quotainfo    *q = mp->m_quotainfo;
        struct xfs_dquot        *dqp, *n;
        uint                    dqtype;
-       int                     nrecl;
-       int                     nmisses;
+       int                     nmisses = 0;
+       LIST_HEAD               (dispose_list);
 
        if (!q)
                return 0;
@@ -509,46 +518,26 @@ xfs_qm_dqpurge_int(
         */
        xfs_qm_detach_gdquots(mp);
 
-      again:
-       nmisses = 0;
-       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
        /*
-        * Try to get rid of all of the unwanted dquots. The idea is to
-        * get them off mplist and hashlist, but leave them on freelist.
+        * Try to get rid of all of the unwanted dquots.
         */
        list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
                xfs_dqlock(dqp);
-               if ((dqp->dq_flags & dqtype) == 0) {
-                       xfs_dqunlock(dqp);
-                       continue;
+               if ((dqp->dq_flags & dqtype) != 0 &&
+                   !(dqp->dq_flags & XFS_DQ_FREEING)) {
+                       if (dqp->q_nrefs == 0) {
+                               dqp->dq_flags |= XFS_DQ_FREEING;
+                               list_move_tail(&dqp->q_mplist, &dispose_list);
+                       } else
+                               nmisses++;
                }
                xfs_dqunlock(dqp);
-
-               if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
-                       nrecl = q->qi_dqreclaims;
-                       mutex_unlock(&q->qi_dqlist_lock);
-                       mutex_lock(&dqp->q_hash->qh_lock);
-                       mutex_lock(&q->qi_dqlist_lock);
-
-                       /*
-                        * XXXTheoretically, we can get into a very long
-                        * ping pong game here.
-                        * No one can be adding dquots to the mplist at
-                        * this point, but somebody might be taking things off.
-                        */
-                       if (nrecl != q->qi_dqreclaims) {
-                               mutex_unlock(&dqp->q_hash->qh_lock);
-                               goto again;
-                       }
-               }
-
-               /*
-                * Take the dquot off the mplist and hashlist. It may remain on
-                * freelist in INACTIVE state.
-                */
-               nmisses += xfs_qm_dqpurge(dqp);
        }
        mutex_unlock(&q->qi_dqlist_lock);
+
+       list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist)
+               xfs_qm_dqpurge(dqp);
+
        return nmisses;
 }
 
@@ -1667,25 +1656,16 @@ xfs_qm_init_quotainos(
 
 
 /*
- * Just pop the least recently used dquot off the freelist and
- * recycle it. The returned dquot is locked.
+ * Pop the least recently used dquot off the freelist and recycle it.
  */
-STATIC xfs_dquot_t *
+STATIC struct xfs_dquot *
 xfs_qm_dqreclaim_one(void)
 {
-       xfs_dquot_t     *dqpout;
-       xfs_dquot_t     *dqp;
-       int             restarts;
-       int             startagain;
-
-       restarts = 0;
-       dqpout = NULL;
+       struct xfs_dquot        *dqp;
+       int                     restarts = 0;
 
-       /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock 
*/
-again:
-       startagain = 0;
        mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-
+restart:
        list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
                struct xfs_mount *mp = dqp->q_mount;
                xfs_dqlock(dqp);
@@ -1701,7 +1681,6 @@ again:
                        list_del_init(&dqp->q_freelist);
                        xfs_Gqm->qm_dqfrlist_cnt--;
                        restarts++;
-                       startagain = 1;
                        goto dqunlock;
                }
 
@@ -1737,57 +1716,42 @@ again:
                        }
                        goto dqunlock;
                }
+               xfs_dqfunlock(dqp);
 
                /*
-                * We're trying to get the hashlock out of order. This races
-                * with dqlookup; so, we giveup and goto the next dquot if
-                * we couldn't get the hashlock. This way, we won't starve
-                * a dqlookup process that holds the hashlock that is
-                * waiting for the freelist lock.
+                * Prevent lookup now that we are going to reclaim the dquot.
+                * Once XFS_DQ_FREEING is set lookup won't touch the inode,
+                * thus we can drop the lock now.
                 */
-               if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
-                       restarts++;
-                       goto dqfunlock;
-               }
+               dqp->dq_flags |= XFS_DQ_FREEING;
+               xfs_dqunlock(dqp);
 
-               /*
-                * This races with dquot allocation code as well as dqflush_all
-                * and reclaim code. So, if we failed to grab the mplist lock,
-                * giveup everything and start over.
-                */
-               if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
-                       restarts++;
-                       startagain = 1;
-                       goto qhunlock;
-               }
+               mutex_lock(&dqp->q_hash->qh_lock);
+               list_del_init(&dqp->q_hashlist);
+               dqp->q_hash->qh_version++;
+               mutex_unlock(&dqp->q_hash->qh_lock);
 
-               ASSERT(dqp->q_nrefs == 0);
+               mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
                list_del_init(&dqp->q_mplist);
                mp->m_quotainfo->qi_dquots--;
                mp->m_quotainfo->qi_dqreclaims++;
-               list_del_init(&dqp->q_hashlist);
-               dqp->q_hash->qh_version++;
+               mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+
+               ASSERT(dqp->q_nrefs == 0);
                list_del_init(&dqp->q_freelist);
                xfs_Gqm->qm_dqfrlist_cnt--;
-               dqpout = dqp;
-               mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-qhunlock:
-               mutex_unlock(&dqp->q_hash->qh_lock);
-dqfunlock:
-               xfs_dqfunlock(dqp);
+
+               mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+               return dqp;
 dqunlock:
                xfs_dqunlock(dqp);
-               if (dqpout)
-                       break;
                if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
                        break;
-               if (startagain) {
-                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-                       goto again;
-               }
+               goto restart;
        }
+
        mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-       return dqpout;
+       return NULL;
 }
 
 /*
Index: xfs/fs/xfs/xfs_quota.h
===================================================================
--- xfs.orig/fs/xfs/xfs_quota.h 2011-12-06 15:41:03.000000000 +0100
+++ xfs/fs/xfs/xfs_quota.h      2011-12-06 15:41:27.023684043 +0100
@@ -87,6 +87,7 @@ typedef struct xfs_dqblk {
 #define XFS_DQ_PROJ            0x0002          /* project quota */
 #define XFS_DQ_GROUP           0x0004          /* a group quota */
 #define XFS_DQ_DIRTY           0x0008          /* dquot is dirty */
+#define XFS_DQ_FREEING         0x0010          /* dquot is beeing torn down */
 
 #define XFS_DQ_ALLTYPES                (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
 
@@ -94,7 +95,8 @@ typedef struct xfs_dqblk {
        { XFS_DQ_USER,          "USER" }, \
        { XFS_DQ_PROJ,          "PROJ" }, \
        { XFS_DQ_GROUP,         "GROUP" }, \
-       { XFS_DQ_DIRTY,         "DIRTY" }
+       { XFS_DQ_DIRTY,         "DIRTY" }, \
+       { XFS_DQ_FREEING,       "FREEING" }
 
 /*
  * In the worst case, when both user and group quotas are on,
Index: xfs/fs/xfs/xfs_dquot.h
===================================================================
--- xfs.orig/fs/xfs/xfs_dquot.h 2011-12-06 15:38:33.000000000 +0100
+++ xfs/fs/xfs/xfs_dquot.h      2011-12-06 15:41:27.023684043 +0100
@@ -133,7 +133,7 @@ static inline void xfs_dqunlock_nonotify
 
 extern void            xfs_qm_dqdestroy(xfs_dquot_t *);
 extern int             xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern int             xfs_qm_dqpurge(xfs_dquot_t *);
+extern void            xfs_qm_dqpurge(xfs_dquot_t *);
 extern void            xfs_qm_dqunpin_wait(xfs_dquot_t *);
 extern void            xfs_qm_adjust_dqtimers(xfs_mount_t *,
                                        xfs_disk_dquot_t *);

<Prev in Thread] Current Thread [Next in Thread>