During a heavy workload with quota's enabled the system can stall with
every process
requesting log space, however the log is full of dquots. The aild is
trying to push the tail
of the log however every item in the log had previously been pushed by
the aild and was
marked as clean so the aild aborts pushing the items in the log.
The reason the log items are still in the log is because the iodone
routine xfs_qm_dqflush_done
detected that the log sequence number(lsn) had changed and assumed the
log item had been re-dirtied
so it should be left in the log. However the log item had not been
re-dirtied so it was still marked as
clean preventing it from being pushed again causing the log item to be
stuck in the log. After a while
the log eventually filled with these dquot log items.
The reason the lsn had changed was due to it not being initialized by
the time a copy of the lsn was taken
in xfs_qm_dqflush(). The lsn was then latter updated causing the test
in xfs_qm_dqflush_done() to fail.
Synchronizations between the 2 functions is done by the pincount in
struct xfs_dquot_t and xfs_qm_dqflush()
calls xfs_qm_dqunpin_wait() to wait for the pincount == 0. However after
been woken up it does not validate
the pincount is actually 0, allowing a false wake up by the scheduler to
cause xfs_qm_dqflush() to prematurely
start processing the dquot.
So
this patch uses an atomic_t to track the pincount which allows us to easily
use the wait_event macro to wait, this will guarantee
that when we return from
xfs_qm_dqunpin_wait() that the pincount == 0. We also remove the global
qi_pinlock
from xfs_quotainfo which may also reduce contention when pinning dquot's.
Index: 2.6.x-xfs/fs/xfs/quota/xfs_dquot_item.c
===================================================================
--- 2.6.x-xfs.orig/fs/xfs/quota/xfs_dquot_item.c 2008-09-24
12:02:41.987960702 +1000
+++ 2.6.x-xfs/fs/xfs/quota/xfs_dquot_item.c 2008-09-24
14:22:01.643627312 +1000
@@ -98,9 +98,7 @@ xfs_qm_dquot_logitem_pin(
dqp = logitem->qli_dquot;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
- spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
- dqp->q_pincount++;
- spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
+ atomic_inc(&dqp->q_pincount);
}
/*
@@ -117,13 +115,9 @@ xfs_qm_dquot_logitem_unpin(
xfs_dquot_t *dqp;
dqp = logitem->qli_dquot;
- ASSERT(dqp->q_pincount > 0);
- spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
- dqp->q_pincount--;
- if (dqp->q_pincount == 0) {
- sv_broadcast(&dqp->q_pinwait);
- }
- spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
+ ASSERT(atomic_read(&dqp->q_pincount) > 0);
+ if (atomic_dec_and_test(&dqp->q_pincount))
+ wake_up(&dqp->q_pinwait);
}
/* ARGSUSED */
@@ -193,21 +187,14 @@ xfs_qm_dqunpin_wait(
xfs_dquot_t *dqp)
{
ASSERT(XFS_DQ_IS_LOCKED(dqp));
- if (dqp->q_pincount == 0) {
+ if (atomic_read(&dqp->q_pincount) == 0)
return;
- }
/*
* Give the log a push so we don't wait here too long.
*/
xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE);
- spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
- if (dqp->q_pincount == 0) {
- spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
- return;
- }
- sv_wait(&(dqp->q_pinwait), PINOD,
- &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s);
+ wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
}
/*
@@ -310,7 +297,7 @@ xfs_qm_dquot_logitem_trylock(
uint retval;
dqp = qip->qli_dquot;
- if (dqp->q_pincount > 0)
+ if (atomic_read(&dqp->q_pincount) > 0)
return (XFS_ITEM_PINNED);
if (! xfs_qm_dqlock_nowait(dqp))
Index: 2.6.x-xfs/fs/xfs/quota/xfs_dquot.h
===================================================================
--- 2.6.x-xfs.orig/fs/xfs/quota/xfs_dquot.h 2008-09-24
12:02:41.991960179 +1000
+++ 2.6.x-xfs/fs/xfs/quota/xfs_dquot.h 2008-09-24 14:20:28.979820915
+1000
@@ -83,8 +83,8 @@ typedef struct xfs_dquot {
xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
mutex_t q_qlock; /* quota lock */
struct completion q_flush; /* flush completion queue */
- uint q_pincount; /* pin count for this dquot */
- sv_t q_pinwait; /* sync var for pinning */
+ atomic_t q_pincount; /* dquot pin count */
+ wait_queue_head_t q_pinwait; /* dquot pinning wait queue */
#ifdef XFS_DQUOT_TRACE
struct ktrace *q_trace; /* trace header structure */
#endif
Index: 2.6.x-xfs/fs/xfs/quota/xfs_dquot.c
===================================================================
--- 2.6.x-xfs.orig/fs/xfs/quota/xfs_dquot.c 2008-09-24
12:09:14.025200071 +1000
+++ 2.6.x-xfs/fs/xfs/quota/xfs_dquot.c 2008-09-24 14:20:56.948140364
+1000
@@ -101,7 +101,7 @@ xfs_qm_dqinit(
if (brandnewdquot) {
dqp->dq_flnext = dqp->dq_flprev = dqp;
mutex_init(&dqp->q_qlock);
- sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq");
+ init_waitqueue_head(&dqp->q_pinwait);
/*
* Because we want to use a counting completion, complete
@@ -131,7 +131,7 @@ xfs_qm_dqinit(
dqp->q_res_bcount = 0;
dqp->q_res_icount = 0;
dqp->q_res_rtbcount = 0;
- dqp->q_pincount = 0;
+ atomic_set(&dqp->q_pincount, 0);
dqp->q_hash = NULL;
ASSERT(dqp->dq_flnext == dqp->dq_flprev);
@@ -1490,7 +1490,7 @@ xfs_qm_dqpurge(
"xfs_qm_dqpurge: dquot %p flush failed", dqp);
xfs_dqflock(dqp);
}
- ASSERT(dqp->q_pincount == 0);
+ ASSERT(atomic_read(&dqp->q_pincount) == 0);
ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
!(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
Index: 2.6.x-xfs/fs/xfs/xfsidbg.c
===================================================================
--- 2.6.x-xfs.orig/fs/xfs/xfsidbg.c 2008-09-24 12:02:42.115943985 +1000
+++ 2.6.x-xfs/fs/xfs/xfsidbg.c 2008-09-24 14:21:17.413447303 +1000
@@ -6577,7 +6577,7 @@ xfsidbg_xqm_dquot(xfs_dquot_t *dqp)
(unsigned long long)dqp->q_res_rtbcount);
kdb_printf("qlock 0x%p &q_flush 0x%p (%d) pincount 0x%x\n",
&dqp->q_qlock, &dqp->q_flush,
- dqp->q_flush.done, dqp->q_pincount);
+ dqp->q_flush.done, atomic_read(&dqp->q_pincount));
#ifdef XFS_DQUOT_TRACE
qprintf("dqtrace 0x%p\n", dqp->q_trace);
#endif
@@ -6761,10 +6761,9 @@ xfsidbg_xqm_qinfo(xfs_mount_t *mp)
return;
}
- kdb_printf("uqip 0x%p, gqip 0x%p, &pinlock 0x%p &dqlist 0x%p\n",
+ kdb_printf("uqip 0x%p, gqip 0x%p, &dqlist 0x%p\n",
mp->m_quotainfo->qi_uquotaip,
mp->m_quotainfo->qi_gquotaip,
- &mp->m_quotainfo->qi_pinlock,
&mp->m_quotainfo->qi_dqlist);
kdb_printf("btmlimit 0x%x, itmlimit 0x%x, RTbtmlim 0x%x\n",
Index: 2.6.x-xfs/fs/xfs/quota/xfs_qm.h
===================================================================
--- 2.6.x-xfs.orig/fs/xfs/quota/xfs_qm.h 2008-09-24
12:02:41.999959135 +1000
+++ 2.6.x-xfs/fs/xfs/quota/xfs_qm.h 2008-09-24 14:00:36.168448681 +1000
@@ -106,7 +106,6 @@ typedef struct xfs_qm {
typedef struct xfs_quotainfo {
xfs_inode_t *qi_uquotaip; /* user quota inode */
xfs_inode_t *qi_gquotaip; /* group quota inode */
- spinlock_t qi_pinlock; /* dquot pinning lock */
xfs_dqlist_t qi_dqlist; /* all dquots in filesys */
int qi_dqreclaims; /* a change here indicates
a removal in the dqlist */
Index: 2.6.x-xfs/fs/xfs/quota/xfs_qm.c
===================================================================
--- 2.6.x-xfs.orig/fs/xfs/quota/xfs_qm.c 2008-09-24
12:02:42.000000000 +1000
+++ 2.6.x-xfs/fs/xfs/quota/xfs_qm.c 2008-09-24 14:03:44.795703839 +1000
@@ -1137,7 +1137,6 @@ xfs_qm_init_quotainfo(
return error;
}
- spin_lock_init(&qinf->qi_pinlock);
xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
qinf->qi_dqreclaims = 0;
@@ -1234,7 +1233,6 @@ xfs_qm_destroy_quotainfo(
*/
xfs_qm_rele_quotafs_ref(mp);
- spinlock_destroy(&qi->qi_pinlock);
xfs_qm_list_destroy(&qi->qi_dqlist);
if (qi->qi_uquotaip) {
|