xfs
[Top] [All Lists]

[Patch] Per iclog callback chain lock

To: xfs-dev <xfs-dev@xxxxxxx>
Subject: [Patch] Per iclog callback chain lock
From: David Chinner <dgc@xxxxxxx>
Date: Wed, 2 Apr 2008 09:13:48 +1000
Cc: xfs-oss <xfs@xxxxxxxxxxx>
Sender: xfs-bounce@xxxxxxxxxxx
User-agent: Mutt/1.4.2.1i
Introduce an iclog callback chain lock.

Rather than use the icloglock for protecting the iclog completion
callback chain, use a new per-iclog lock so that walking the
callback chain doesn't require holding a global lock.

This reduces contention on the icloglock during log buffer I/O
completion as the callback chain lock is take for every callback
that is issued. On large log buffers, this can number in the
hundreds to thousands per iclog so isolating the lock to the
iclog makes a lot of sense.

Signed-off-by: Dave Chinner <dgc@xxxxxxx>
---
 fs/xfs/xfs_log.c      |   35 +++++++++++++++++++----------------
 fs/xfs/xfs_log_priv.h |   33 ++++++++++++++++++++++++++-------
 2 files changed, 45 insertions(+), 23 deletions(-)

Index: 2.6.x-xfs-new/fs/xfs/xfs_log.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_log.c 2008-03-13 13:10:23.000000000 +1100
+++ 2.6.x-xfs-new/fs/xfs/xfs_log.c      2008-03-13 19:35:51.251913648 +1100
@@ -397,12 +397,10 @@ xfs_log_notify(xfs_mount_t          *mp,          /* mo
               void               *iclog_hndl,  /* iclog to hang callback off */
               xfs_log_callback_t *cb)
 {
-       xlog_t *log = mp->m_log;
        xlog_in_core_t    *iclog = (xlog_in_core_t *)iclog_hndl;
        int     abortflg;
 
-       cb->cb_next = NULL;
-       spin_lock(&log->l_icloglock);
+       spin_lock(&iclog->ic_callback_lock);
        abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
        if (!abortflg) {
                ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
@@ -411,7 +409,7 @@ xfs_log_notify(xfs_mount_t    *mp,          /* mo
                *(iclog->ic_callback_tail) = cb;
                iclog->ic_callback_tail = &(cb->cb_next);
        }
-       spin_unlock(&log->l_icloglock);
+       spin_unlock(&iclog->ic_callback_lock);
        return abortflg;
 }      /* xfs_log_notify */
 
@@ -1257,6 +1255,8 @@ xlog_alloc_log(xfs_mount_t        *mp,
                iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
                iclog->ic_state = XLOG_STATE_ACTIVE;
                iclog->ic_log = log;
+               atomic_set(&iclog->ic_refcnt, 0);
+               spin_lock_init(&iclog->ic_callback_lock);
                iclog->ic_callback_tail = &(iclog->ic_callback);
                iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize;
 
@@ -1990,7 +1990,7 @@ xlog_state_clean_log(xlog_t *log)
                if (iclog->ic_state == XLOG_STATE_DIRTY) {
                        iclog->ic_state = XLOG_STATE_ACTIVE;
                        iclog->ic_offset       = 0;
-                       iclog->ic_callback      = NULL;   /* don't need to free 
*/
+                       ASSERT(iclog->ic_callback == NULL);
                        /*
                         * If the number of ops in this iclog indicate it just
                         * contains the dummy transaction, we can
@@ -2193,37 +2193,40 @@ xlog_state_do_callback(
                                        be64_to_cpu(iclog->ic_header.h_lsn);
                                spin_unlock(&log->l_grant_lock);
 
-                               /*
-                                * Keep processing entries in the callback list
-                                * until we come around and it is empty.  We
-                                * need to atomically see that the list is
-                                * empty and change the state to DIRTY so that
-                                * we don't miss any more callbacks being added.
-                                */
-                               spin_lock(&log->l_icloglock);
                        } else {
+                               spin_unlock(&log->l_icloglock);
                                ioerrors++;
                        }
-                       cb = iclog->ic_callback;
 
+                       /*
+                        * Keep processing entries in the callback list until
+                        * we come around and it is empty.  We need to
+                        * atomically see that the list is empty and change the
+                        * state to DIRTY so that we don't miss any more
+                        * callbacks being added.
+                        */
+                       spin_lock(&iclog->ic_callback_lock);
+                       cb = iclog->ic_callback;
                        while (cb) {
                                iclog->ic_callback_tail = &(iclog->ic_callback);
                                iclog->ic_callback = NULL;
-                               spin_unlock(&log->l_icloglock);
+                               spin_unlock(&iclog->ic_callback_lock);
 
                                /* perform callbacks in the order given */
                                for (; cb; cb = cb_next) {
                                        cb_next = cb->cb_next;
                                        cb->cb_func(cb->cb_arg, aborted);
                                }
-                               spin_lock(&log->l_icloglock);
+                               spin_lock(&iclog->ic_callback_lock);
                                cb = iclog->ic_callback;
                        }
 
                        loopdidcallbacks++;
                        funcdidcallbacks++;
 
+                       spin_lock(&log->l_icloglock);
                        ASSERT(iclog->ic_callback == NULL);
+                       spin_unlock(&iclog->ic_callback_lock);
                        if (!(iclog->ic_state & XLOG_STATE_IOERROR))
                                iclog->ic_state = XLOG_STATE_DIRTY;
 
Index: 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_log_priv.h    2008-02-22 13:48:25.000000000 
+1100
+++ 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h 2008-03-13 19:34:57.430809151 +1100
@@ -324,6 +324,19 @@ typedef struct xlog_rec_ext_header {
  * - ic_offset is the current number of bytes written to in this iclog.
  * - ic_refcnt is bumped when someone is writing to the log.
  * - ic_state is the state of the iclog.
+ *
+ * Because of cacheline contention on large machines, we need to separate
+ * various resources onto different cachelines. To start with, make the
+ * structure cacheline aligned. The following fields can be contended on
+ * by independent processes:
+ *
+ *     - ic_callback_*
+ *     - ic_refcnt
+ *     - fields protected by the global l_icloglock
+ *
+ * so we need to ensure that these fields are located in separate cachelines.
+ * We'll put all the read-only and l_icloglock fields in the first cacheline,
+ * and move everything else out to subsequent cachelines.
  */
 typedef struct xlog_iclog_fields {
        sv_t                    ic_forcesema;
@@ -332,18 +345,23 @@ typedef struct xlog_iclog_fields {
        struct xlog_in_core     *ic_prev;
        struct xfs_buf          *ic_bp;
        struct log              *ic_log;
-       xfs_log_callback_t      *ic_callback;
-       xfs_log_callback_t      **ic_callback_tail;
-#ifdef XFS_LOG_TRACE
-       struct ktrace           *ic_trace;
-#endif
        int                     ic_size;
        int                     ic_offset;
-       atomic_t                ic_refcnt;
        int                     ic_bwritecnt;
        ushort_t                ic_state;
        char                    *ic_datap;      /* pointer to iclog data */
-} xlog_iclog_fields_t;
+#ifdef XFS_LOG_TRACE
+       struct ktrace           *ic_trace;
+#endif
+
+       /* Callback structures need their own cacheline */
+       spinlock_t              ic_callback_lock ____cacheline_aligned_in_smp;
+       xfs_log_callback_t      *ic_callback;
+       xfs_log_callback_t      **ic_callback_tail;
+
+       /* reference counts need their own cacheline */
+       atomic_t                ic_refcnt ____cacheline_aligned_in_smp;
+} xlog_iclog_fields_t ____cacheline_aligned_in_smp;
 
 typedef union xlog_in_core2 {
        xlog_rec_header_t       hic_header;
@@ -366,6 +384,7 @@ typedef struct xlog_in_core {
 #define        ic_bp           hic_fields.ic_bp
 #define        ic_log          hic_fields.ic_log
 #define        ic_callback     hic_fields.ic_callback
+#define        ic_callback_lock hic_fields.ic_callback_lock
 #define        ic_callback_tail hic_fields.ic_callback_tail
 #define        ic_trace        hic_fields.ic_trace
 #define        ic_size         hic_fields.ic_size


<Prev in Thread] Current Thread [Next in Thread>