[PATCH 02/18] xfs: reduce the number of CIL lock round trips during commit
Christoph Hellwig
hch at infradead.org
Tue Sep 14 09:48:10 CDT 2010
On Tue, Sep 14, 2010 at 08:56:01PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner at redhat.com>
>
> When commiting a transaction, we do a lock CIL state lock round trip
> on every single log vector we insert into the CIL. This is resulting
> in the lock being as hot as the inode and dcache locks on 8-way
> create workloads. Rework the insertion loops to bring the number
> of lock round trips to one per transaction for log vectors, and one
> more do the busy extents.
>
> Also change the allocation of the log vector buffer not to zero it
> as we copy over the entire allocated buffer anyway.
The change looks good, but I think the functions names and splits in
the CIL insert code are now a bit confusing. What about mergeing
something like the patch below into yours?
Index: xfs/fs/xfs/xfs_log_cil.c
===================================================================
--- xfs.orig/fs/xfs/xfs_log_cil.c 2010-09-14 11:32:36.365935029 -0300
+++ xfs/fs/xfs/xfs_log_cil.c 2010-09-14 11:43:58.046935056 -0300
@@ -145,6 +145,47 @@ xlog_cil_init_post_recovery(
log->l_curr_block);
}
+static void
+xlog_cil_prepare_item(
+ struct log *log,
+ struct xfs_log_vec *lv,
+ int *len,
+ int *diff_iovecs)
+{
+ struct xfs_log_vec *old = lv->lv_item->li_lv;
+
+ if (old) {
+ /* existing lv on log item, space used is a delta */
+ ASSERT(!list_empty(&lv->lv_item->li_cil));
+ ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
+
+ *len += lv->lv_buf_len - old->lv_buf_len;
+ *diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
+ kmem_free(old->lv_buf);
+ kmem_free(old);
+ } else {
+ /* new lv, must pin the log item */
+ ASSERT(!lv->lv_item->li_lv);
+ ASSERT(list_empty(&lv->lv_item->li_cil));
+
+ *len += lv->lv_buf_len;
+ *diff_iovecs += lv->lv_niovecs;
+ IOP_PIN(lv->lv_item);
+ }
+
+ /* attach new log vector to log item */
+ lv->lv_item->li_lv = lv;
+
+ /*
+ * If this is the first time the item is being committed to the
+ * CIL, store the sequence number on the log item so we can
+ * tell in future commits whether this is the first checkpoint
+ * the item is being committed into.
+ */
+ if (!lv->lv_item->li_seq)
+ lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
+}
+
/*
* Insert the log items into the CIL and calculate the difference in space
* consumed by the item. Add the space to the checkpoint ticket and calculate
@@ -153,27 +194,46 @@ xlog_cil_init_post_recovery(
* the current transaction ticket so that the accounting works out correctly.
*/
static void
-xlog_cil_insert(
+xlog_cil_insert_items(
struct log *log,
- struct xlog_ticket *ticket,
struct xfs_log_vec *log_vector,
- int diff_length,
- int diff_iovecs)
+ struct xlog_ticket *ticket)
{
struct xfs_cil *cil = log->l_cilp;
struct xfs_cil_ctx *ctx = cil->xc_ctx;
- int iclog_space;
- int len = diff_length;
struct xfs_log_vec *lv;
+ int len = 0;
+ int diff_iovecs = 0;
+ int iclog_space;
- spin_lock(&cil->xc_cil_lock);
+ ASSERT(log_vector);
- /* move the items to the tail of the CIL */
+ /*
+ * Do all the accounting aggregation and switching of log vectors
+ * around in a separate loop to the insertion of items into the CIL.
+ * Then we can do a separate loop to update the CIL within a single
+ * lock/unlock pair. This reduces the number of round trips on the CIL
+ * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
+ * hold time for the transaction commit.
+ *
+ * If this is the first time the item is being placed into the CIL in
+ * this context, pin it so it can't be written to disk until the CIL is
+ * flushed to the iclog and the iclog written to disk.
+ *
+ * We can do this safely because the context can't checkpoint until we
+ * are done so it doesn't matter exactly how we update the CIL.
+ */
for (lv = log_vector; lv; lv = lv->lv_next)
- list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
+ xlog_cil_prepare_item(log, lv, &len, &diff_iovecs);
/* account for space used by new iovec headers */
len += diff_iovecs * sizeof(xlog_op_header_t);
+
+ spin_lock(&cil->xc_cil_lock);
+ /* move the items to the tail of the CIL */
+ for (lv = log_vector; lv; lv = lv->lv_next)
+ list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
+
ctx->nvecs += diff_iovecs;
/*
@@ -270,76 +330,6 @@ xlog_cil_format_items(
}
static void
-xlog_cil_insert_items(
- struct log *log,
- struct xfs_log_vec *log_vector,
- struct xlog_ticket *ticket,
- xfs_lsn_t *start_lsn)
-{
- struct xfs_log_vec *lv;
- int len = 0;
- int diff_iovecs = 0;
-
- ASSERT(log_vector);
-
- if (start_lsn)
- *start_lsn = log->l_cilp->xc_ctx->sequence;
-
- /*
- * Do all the accounting aggregation and switching of log vectors
- * around in a separate loop to the insertion of items into the CIL.
- * Then we can do a separate loop to update the CIL within a single
- * lock/unlock pair. This reduces the number of round trips on the CIL
- * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
- * hold time for the transaction commit.
- *
- * If this is the first time the item is being placed into the CIL in
- * this context, pin it so it can't be written to disk until the CIL is
- * flushed to the iclog and the iclog written to disk.
- *
- * We can do this safely because the context can't checkpoint until we
- * are done so it doesn't matter exactly how we update the CIL.
- */
- for (lv = log_vector; lv; lv = lv->lv_next) {
- struct xfs_log_vec *old = lv->lv_item->li_lv;
-
- if (old) {
- /* existing lv on log item, space used is a delta */
- ASSERT(!list_empty(&lv->lv_item->li_cil));
- ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
-
- len += lv->lv_buf_len - old->lv_buf_len;
- diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
- kmem_free(old->lv_buf);
- kmem_free(old);
- } else {
- /* new lv, must pin the log item */
- ASSERT(!lv->lv_item->li_lv);
- ASSERT(list_empty(&lv->lv_item->li_cil));
-
- len += lv->lv_buf_len;
- diff_iovecs += lv->lv_niovecs;
- IOP_PIN(lv->lv_item);
-
- }
-
- /* attach new log vector to log item */
- lv->lv_item->li_lv = lv;
-
- /*
- * If this is the first time the item is being committed to the
- * CIL, store the sequence number on the log item so we can
- * tell in future commits whether this is the first checkpoint
- * the item is being committed into.
- */
- if (!lv->lv_item->li_seq)
- lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
- }
-
- xlog_cil_insert(log, ticket, log_vector, len, diff_iovecs);
-}
-
-static void
xlog_cil_free_logvec(
struct xfs_log_vec *log_vector)
{
@@ -654,7 +644,11 @@ xfs_log_commit_cil(
/* lock out background commit */
down_read(&log->l_cilp->xc_ctx_lock);
- xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn);
+
+ if (commit_lsn)
+ *commit_lsn = log->l_cilp->xc_ctx->sequence;
+
+ xlog_cil_insert_items(log, log_vector, tp->t_ticket);
/* check we didn't blow the reservation */
if (tp->t_ticket->t_curr_res < 0)
More information about the xfs
mailing list