| To: | xfs@xxxxxxxxxxx, xfs-dev@xxxxxxx |
|---|---|
| Subject: | [PATCH] Use atomic_t and wait_event to track dquot pincount |
| From: | Peter Leckie <pleckie@xxxxxxx> |
| Date: | Wed, 24 Sep 2008 14:28:13 +1000 |
| Sender: | xfs-bounce@xxxxxxxxxxx |
| User-agent: | Thunderbird 1.5.0.10 (X11/20070305) |
|
During a heavy workload with quota's enabled the system can stall with
every process requesting log space, however the log is full of dquots. The aild is trying to push the tail of the log however every item in the log had previously been pushed by the aild and was marked as clean so the aild aborts pushing the items in the log. The reason the log items are still in the log is because the iodone routine xfs_qm_dqflush_done detected that the log sequence number(lsn) had changed and assumed the log item had been re-dirtied so it should be left in the log. However the log item had not been re-dirtied so it was still marked as clean preventing it from being pushed again causing the log item to be stuck in the log. After a while the log eventually filled with these dquot log items. The reason the lsn had changed was due to it not being initialized by the time a copy of the lsn was taken in xfs_qm_dqflush(). The lsn was then latter updated causing the test in xfs_qm_dqflush_done() to fail. Synchronizations between the 2 functions is done by the pincount in struct xfs_dquot_t and xfs_qm_dqflush() calls xfs_qm_dqunpin_wait() to wait for the pincount == 0. However after been woken up it does not validate the pincount is actually 0, allowing a false wake up by the scheduler to cause xfs_qm_dqflush() to prematurely start processing the dquot. So this patch uses an atomic_t to track the pincount which allows us to easily use the wait_event macro to wait, this will guarantee that when we return from xfs_qm_dqunpin_wait() that the pincount == 0. We also remove the global qi_pinlock from xfs_quotainfo which may also reduce contention when pinning dquot's. Index: 2.6.x-xfs/fs/xfs/quota/xfs_dquot_item.c =================================================================== --- 2.6.x-xfs.orig/fs/xfs/quota/xfs_dquot_item.c 2008-09-24 12:02:41.987960702 +1000 +++ 2.6.x-xfs/fs/xfs/quota/xfs_dquot_item.c 2008-09-24 14:22:01.643627312 +1000 @@ -98,9 +98,7 @@ xfs_qm_dquot_logitem_pin( dqp = logitem->qli_dquot;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
- spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
- dqp->q_pincount++;
- spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
+ atomic_inc(&dqp->q_pincount);
}/*
@@ -117,13 +115,9 @@ xfs_qm_dquot_logitem_unpin(
xfs_dquot_t *dqp; dqp = logitem->qli_dquot;
- ASSERT(dqp->q_pincount > 0);
- spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
- dqp->q_pincount--;
- if (dqp->q_pincount == 0) {
- sv_broadcast(&dqp->q_pinwait);
- }
- spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
+ ASSERT(atomic_read(&dqp->q_pincount) > 0);
+ if (atomic_dec_and_test(&dqp->q_pincount))
+ wake_up(&dqp->q_pinwait);
}/* ARGSUSED */
@@ -193,21 +187,14 @@ xfs_qm_dqunpin_wait(
xfs_dquot_t *dqp)
{
ASSERT(XFS_DQ_IS_LOCKED(dqp));
- if (dqp->q_pincount == 0) {
+ if (atomic_read(&dqp->q_pincount) == 0)
return;
- } /*
* Give the log a push so we don't wait here too long.
*/
xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE);
- spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
- if (dqp->q_pincount == 0) {
- spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
- return;
- }
- sv_wait(&(dqp->q_pinwait), PINOD,
- &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s);
+ wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
}/*
@@ -310,7 +297,7 @@ xfs_qm_dquot_logitem_trylock(
uint retval; dqp = qip->qli_dquot;
- if (dqp->q_pincount > 0)
+ if (atomic_read(&dqp->q_pincount) > 0)
return (XFS_ITEM_PINNED);if (! xfs_qm_dqlock_nowait(dqp)) Index: 2.6.x-xfs/fs/xfs/quota/xfs_dquot.h =================================================================== --- 2.6.x-xfs.orig/fs/xfs/quota/xfs_dquot.h 2008-09-24 12:02:41.991960179 +1000 +++ 2.6.x-xfs/fs/xfs/quota/xfs_dquot.h 2008-09-24 14:20:28.979820915 +1000 @@ -83,8 +83,8 @@ typedef struct xfs_dquot { xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ mutex_t q_qlock; /* quota lock */ struct completion q_flush; /* flush completion queue */ - uint q_pincount; /* pin count for this dquot */ - sv_t q_pinwait; /* sync var for pinning */ + atomic_t q_pincount; /* dquot pin count */ + wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ #ifdef XFS_DQUOT_TRACE struct ktrace *q_trace; /* trace header structure */ #endif Index: 2.6.x-xfs/fs/xfs/quota/xfs_dquot.c =================================================================== --- 2.6.x-xfs.orig/fs/xfs/quota/xfs_dquot.c 2008-09-24 12:09:14.025200071 +1000 +++ 2.6.x-xfs/fs/xfs/quota/xfs_dquot.c 2008-09-24 14:20:56.948140364 +1000 @@ -101,7 +101,7 @@ xfs_qm_dqinit( if (brandnewdquot) { dqp->dq_flnext = dqp->dq_flprev = dqp; mutex_init(&dqp->q_qlock); - sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq"); + init_waitqueue_head(&dqp->q_pinwait); /*
* Because we want to use a counting completion, complete
@@ -131,7 +131,7 @@ xfs_qm_dqinit(
dqp->q_res_bcount = 0;
dqp->q_res_icount = 0;
dqp->q_res_rtbcount = 0;
- dqp->q_pincount = 0;
+ atomic_set(&dqp->q_pincount, 0);
dqp->q_hash = NULL;
ASSERT(dqp->dq_flnext == dqp->dq_flprev);@@ -1490,7 +1490,7 @@ xfs_qm_dqpurge(
"xfs_qm_dqpurge: dquot %p flush failed", dqp);
xfs_dqflock(dqp);
}
- ASSERT(dqp->q_pincount == 0);
+ ASSERT(atomic_read(&dqp->q_pincount) == 0);
ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
!(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));Index: 2.6.x-xfs/fs/xfs/xfsidbg.c
===================================================================
--- 2.6.x-xfs.orig/fs/xfs/xfsidbg.c 2008-09-24 12:02:42.115943985 +1000
+++ 2.6.x-xfs/fs/xfs/xfsidbg.c 2008-09-24 14:21:17.413447303 +1000
@@ -6577,7 +6577,7 @@ xfsidbg_xqm_dquot(xfs_dquot_t *dqp)
(unsigned long long)dqp->q_res_rtbcount);
kdb_printf("qlock 0x%p &q_flush 0x%p (%d) pincount 0x%x\n",
&dqp->q_qlock, &dqp->q_flush,
- dqp->q_flush.done, dqp->q_pincount);
+ dqp->q_flush.done, atomic_read(&dqp->q_pincount));
#ifdef XFS_DQUOT_TRACE
qprintf("dqtrace 0x%p\n", dqp->q_trace);
#endif
@@ -6761,10 +6761,9 @@ xfsidbg_xqm_qinfo(xfs_mount_t *mp)
return;
}- kdb_printf("uqip 0x%p, gqip 0x%p, &pinlock 0x%p &dqlist 0x%p\n",
+ kdb_printf("uqip 0x%p, gqip 0x%p, &dqlist 0x%p\n",
mp->m_quotainfo->qi_uquotaip,
mp->m_quotainfo->qi_gquotaip,
- &mp->m_quotainfo->qi_pinlock,
&mp->m_quotainfo->qi_dqlist);kdb_printf("btmlimit 0x%x, itmlimit 0x%x, RTbtmlim 0x%x\n", Index: 2.6.x-xfs/fs/xfs/quota/xfs_qm.h =================================================================== --- 2.6.x-xfs.orig/fs/xfs/quota/xfs_qm.h 2008-09-24 12:02:41.999959135 +1000 +++ 2.6.x-xfs/fs/xfs/quota/xfs_qm.h 2008-09-24 14:00:36.168448681 +1000 @@ -106,7 +106,6 @@ typedef struct xfs_qm { typedef struct xfs_quotainfo { xfs_inode_t *qi_uquotaip; /* user quota inode */ xfs_inode_t *qi_gquotaip; /* group quota inode */ - spinlock_t qi_pinlock; /* dquot pinning lock */ xfs_dqlist_t qi_dqlist; /* all dquots in filesys */ int qi_dqreclaims; /* a change here indicates a removal in the dqlist */ Index: 2.6.x-xfs/fs/xfs/quota/xfs_qm.c =================================================================== --- 2.6.x-xfs.orig/fs/xfs/quota/xfs_qm.c 2008-09-24 12:02:42.000000000 +1000 +++ 2.6.x-xfs/fs/xfs/quota/xfs_qm.c 2008-09-24 14:03:44.795703839 +1000 @@ -1137,7 +1137,6 @@ xfs_qm_init_quotainfo( return error; } - spin_lock_init(&qinf->qi_pinlock);
xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
qinf->qi_dqreclaims = 0;@@ -1234,7 +1233,6 @@ xfs_qm_destroy_quotainfo(
*/
xfs_qm_rele_quotafs_ref(mp);- spinlock_destroy(&qi->qi_pinlock);
xfs_qm_list_destroy(&qi->qi_dqlist); if (qi->qi_uquotaip) { |
| <Prev in Thread] | Current Thread | [Next in Thread> |
|---|---|---|
| ||
| Previous by Date: | Re: [PATCH] XFS: Check for valid transaction headers in recovery, Dave Chinner |
|---|---|
| Next by Date: | [PATCH] don't use signed int to store xfs_dqid_t, Peter Leckie |
| Previous by Thread: | [PATCH] XFS: Check for valid transaction headers in recovery, Dave Chinner |
| Next by Thread: | Re: [PATCH] Use atomic_t and wait_event to track dquot pincount, Dave Chinner |
| Indexes: | [Date] [Thread] [Top] [All Lists] |