xfs
[Top] [All Lists]

[PATCH 6/6] xfs: make discard operations asynchronous

To: xfs@xxxxxxxxxxx
Subject: [PATCH 6/6] xfs: make discard operations asynchronous
From: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Date: Tue, 22 Mar 2011 15:55:56 -0400
References: <20110322195550.260682574@xxxxxxxxxxxxxxxxxxxxxx>
User-agent: quilt/0.48-1
Instead of waiting for each discard request keep the CIL context alive
until all of them are done, at which point we can tear it down completly
and remove the busy extents from the rbtree.

At this point I'm doing the I/O completion from IRQ context for simplicity,
but I'll benchmark it against a version that uses a workqueue.

Signed-off-by: Christoph Hellwig <hch@xxxxxx>

Index: xfs/fs/xfs/linux-2.6/xfs_discard.c
===================================================================
--- xfs.orig/fs/xfs/linux-2.6/xfs_discard.c     2011-03-22 15:58:10.301855813 
+0100
+++ xfs/fs/xfs/linux-2.6/xfs_discard.c  2011-03-22 18:39:09.000000000 +0100
@@ -30,6 +30,7 @@
 #include "xfs_inode.h"
 #include "xfs_alloc.h"
 #include "xfs_error.h"
+#include "xfs_log_priv.h"
 #include "xfs_discard.h"
 #include "xfs_trace.h"
 
@@ -192,37 +193,119 @@ xfs_ioc_trim(
        return 0;
 }
 
+void
+xfs_cil_discard_done(
+       struct xfs_cil_ctx      *ctx)
+{
+       if (atomic_dec_and_test(&ctx->discards)) {
+               struct xfs_busy_extent  *busyp, *n;
+
+               list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
+                       xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp);
+               kmem_free(ctx);
+       }
+}
+
+STATIC void
+xfs_discard_end_io(
+       struct bio              *bio,
+       int                     err)
+{
+       struct xfs_cil_ctx      *ctx = bio->bi_private;
+
+       if (err && err != -EOPNOTSUPP) {
+               xfs_info(ctx->cil->xc_log->l_mp,
+                        "I/O error during discard\n");
+       }
+
+       bio_put(bio);
+       xfs_cil_discard_done(ctx);
+}
+
+static int
+xfs_issue_discard(
+       struct block_device     *bdev,
+       sector_t                sector,
+       sector_t                nr_sects,
+       gfp_t                   gfp_mask,
+       struct xfs_cil_ctx      *ctx)
+{
+       struct request_queue    *q = bdev_get_queue(bdev);
+       unsigned int            max_discard_sectors;
+       struct bio              *bio;
+       int                     ret = 0;
+
+       if (!q)
+               return -ENXIO;
+
+       if (!blk_queue_discard(q))
+               return -EOPNOTSUPP;
+
+       /*
+        * Ensure that max_discard_sectors is of the proper
+        * granularity
+        */
+       max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+       if (q->limits.discard_granularity) {
+               unsigned int disc_sects = q->limits.discard_granularity >> 9;
+
+               max_discard_sectors &= ~(disc_sects - 1);
+       }
+
+
+       while (nr_sects && !ret) {
+               bio = bio_alloc(gfp_mask, 1);
+               if (!bio) {
+                       ret = -ENOMEM;
+                       break;
+               }
+
+               bio->bi_sector = sector;
+               bio->bi_end_io = xfs_discard_end_io;
+               bio->bi_bdev = bdev;
+               bio->bi_private = ctx;
+
+               if (nr_sects > max_discard_sectors) {
+                       bio->bi_size = max_discard_sectors << 9;
+                       nr_sects -= max_discard_sectors;
+                       sector += max_discard_sectors;
+               } else {
+                       bio->bi_size = nr_sects << 9;
+                       nr_sects = 0;
+               }
+
+               atomic_inc(&ctx->discards);
+               submit_bio(REQ_WRITE | REQ_DISCARD, bio);
+       }
+
+       return ret;
+}
+
 int
 xfs_discard_extent(
        struct xfs_mount        *mp,
-       struct xfs_busy_extent  *busyp)
+       struct xfs_busy_extent  *busyp,
+       struct xfs_cil_ctx      *ctx)
 {
        struct xfs_perag        *pag;
-       int                     error = 0;
        xfs_daddr_t             bno;
        int64_t                 len;
        bool                    done  = false;
 
-       if ((mp->m_flags & XFS_MOUNT_DISCARD) == 0)
-               return 0;
-
        bno = XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno);
        len = XFS_FSB_TO_BB(mp, busyp->length);
 
        pag = xfs_perag_get(mp, busyp->agno);
-       spin_lock(&pag->pagb_lock);
+       spin_lock_irq(&pag->pagb_lock);
        if (!busyp->length)
                done = true;
        busyp->flags = XFS_ALLOC_BUSY_DISCARDED;
-       spin_unlock(&pag->pagb_lock);
+       spin_unlock_irq(&pag->pagb_lock);
        xfs_perag_put(pag);
 
        if (done)
                return 0;
 
-       error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, bno, len,
-                                     GFP_NOFS, 0);
-       if (error && error != EOPNOTSUPP)
-               xfs_info(mp, "discard failed, error %d", error);
-       return error;
+       return -xfs_issue_discard(mp->m_ddev_targp->bt_bdev,
+                                 bno, len, GFP_NOFS, ctx);
 }
Index: xfs/fs/xfs/linux-2.6/xfs_discard.h
===================================================================
--- xfs.orig/fs/xfs/linux-2.6/xfs_discard.h     2011-03-22 15:58:10.313857879 
+0100
+++ xfs/fs/xfs/linux-2.6/xfs_discard.h  2011-03-22 18:39:09.000000000 +0100
@@ -3,10 +3,13 @@
 
 struct fstrim_range;
 struct xfs_busy_extent;
+struct xfs_cil_ctx;
 
 extern int     xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
 
 extern int     xfs_discard_extent(struct xfs_mount *,
-                                  struct xfs_busy_extent *);
+                                  struct xfs_busy_extent *,
+                                  struct xfs_cil_ctx *);
+extern void    xfs_cil_discard_done(struct xfs_cil_ctx *ctx);
 
 #endif /* XFS_DISCARD_H */
Index: xfs/fs/xfs/xfs_log_cil.c
===================================================================
--- xfs.orig/fs/xfs/xfs_log_cil.c       2011-03-22 15:58:10.329855977 +0100
+++ xfs/fs/xfs/xfs_log_cil.c    2011-03-22 18:39:09.000000000 +0100
@@ -68,6 +68,7 @@ xlog_cil_init(
        INIT_LIST_HEAD(&ctx->busy_extents);
        ctx->sequence = 1;
        ctx->cil = cil;
+       atomic_set(&ctx->discards, 1);
        cil->xc_ctx = ctx;
        cil->xc_current_sequence = ctx->sequence;
 
@@ -364,14 +365,18 @@ xlog_cil_committed(
        struct xfs_cil_ctx      *ctx = args;
        struct xfs_mount        *mp = ctx->cil->xc_log->l_mp;
        struct xfs_busy_extent  *busyp, *n;
+       bool                    keep_alive = false;
 
        xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
                                        ctx->start_lsn, abort);
 
-       list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) {
-               if (!abort)
-                       xfs_discard_extent(mp, busyp);
-               xfs_alloc_busy_clear(mp, busyp);
+       if (!(mp->m_flags & XFS_MOUNT_DISCARD) || abort) {
+               list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
+                       xfs_alloc_busy_clear(mp, busyp);
+       } else if (!list_empty(&ctx->busy_extents)) {
+               list_for_each_entry(busyp, &ctx->busy_extents, list)
+                       xfs_discard_extent(mp, busyp, ctx);
+               keep_alive = true;
        }
 
        spin_lock(&ctx->cil->xc_cil_lock);
@@ -379,7 +384,10 @@ xlog_cil_committed(
        spin_unlock(&ctx->cil->xc_cil_lock);
 
        xlog_cil_free_logvec(ctx->lv_chain);
-       kmem_free(ctx);
+       if (keep_alive)
+               xfs_cil_discard_done(ctx);
+       else
+               kmem_free(ctx);
 }
 
 /*
@@ -490,6 +498,7 @@ xlog_cil_push(
        INIT_LIST_HEAD(&new_ctx->busy_extents);
        new_ctx->sequence = ctx->sequence + 1;
        new_ctx->cil = cil;
+       atomic_set(&ctx->discards, 1);
        cil->xc_ctx = new_ctx;
 
        /*
Index: xfs/fs/xfs/xfs_alloc.c
===================================================================
--- xfs.orig/fs/xfs/xfs_alloc.c 2011-03-22 18:39:05.173855849 +0100
+++ xfs/fs/xfs/xfs_alloc.c      2011-03-22 18:39:09.000000000 +0100
@@ -2498,7 +2498,7 @@ xfs_alloc_busy_insert(
        trace_xfs_alloc_busy(tp, agno, bno, len, 0);
 
        pag = xfs_perag_get(tp->t_mountp, new->agno);
-       spin_lock(&pag->pagb_lock);
+       spin_lock_irq(&pag->pagb_lock);
        rbp = &pag->pagb_tree.rb_node;
        while (*rbp) {
                parent = *rbp;
@@ -2521,7 +2521,7 @@ xfs_alloc_busy_insert(
        rb_insert_color(&new->rb_node, &pag->pagb_tree);
 
        list_add(&new->list, &tp->t_busy);
-       spin_unlock(&pag->pagb_lock);
+       spin_unlock_irq(&pag->pagb_lock);
        xfs_perag_put(pag);
 }
 
@@ -2547,7 +2547,7 @@ xfs_alloc_busy_search(
        int                     match = 0;
 
        pag = xfs_perag_get(mp, agno);
-       spin_lock(&pag->pagb_lock);
+       spin_lock_irq(&pag->pagb_lock);
 
        rbp = pag->pagb_tree.rb_node;
 
@@ -2570,7 +2570,7 @@ xfs_alloc_busy_search(
                        break;
                }
        }
-       spin_unlock(&pag->pagb_lock);
+       spin_unlock_irq(&pag->pagb_lock);
        trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match);
        xfs_perag_put(pag);
        return match;
@@ -2706,7 +2706,7 @@ xfs_alloc_busy_reuse(
 
        pag = xfs_perag_get(tp->t_mountp, agno);
 restart:
-       spin_lock(&pag->pagb_lock);
+       spin_lock_irq(&pag->pagb_lock);
        rbp = pag->pagb_tree.rb_node;
        while (rbp) {
                struct xfs_busy_extent *busyp =
@@ -2727,7 +2727,7 @@ restart:
                overlap = xfs_alloc_busy_try_reuse(pag, busyp,
                                                   fbno, fbno + flen);
                if (overlap == -1 || (overlap && userdata)) {
-                       spin_unlock(&pag->pagb_lock);
+                       spin_unlock_irq(&pag->pagb_lock);
                        xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
                        goto restart;
                }
@@ -2743,7 +2743,7 @@ restart:
                else
                        rbp = rbp->rb_right;
        }
-       spin_unlock(&pag->pagb_lock);
+       spin_unlock_irq(&pag->pagb_lock);
        xfs_perag_put(pag);
 }
 
@@ -2764,7 +2764,7 @@ xfs_alloc_busy_trim(
        ASSERT(flen > 0);
 
 restart:
-       spin_lock(&args->pag->pagb_lock);
+       spin_lock_irq(&args->pag->pagb_lock);
        rbp = args->pag->pagb_tree.rb_node;
        while (rbp && flen >= args->minlen) {
                struct xfs_busy_extent *busyp =
@@ -2789,7 +2789,7 @@ restart:
                        overlap = xfs_alloc_busy_try_reuse(args->pag, busyp,
                                                           fbno, fbno + flen);
                        if (unlikely(overlap == -1)) {
-                               spin_unlock(&args->pag->pagb_lock);
+                               spin_unlock_irq(&args->pag->pagb_lock);
                                xfs_log_force(args->mp, XFS_LOG_SYNC);
                                goto restart;
                        }
@@ -2935,7 +2935,7 @@ restart:
                flen = fend - fbno;
        }
 out:
-       spin_unlock(&args->pag->pagb_lock);
+       spin_unlock_irq(&args->pag->pagb_lock);
        *rbno = fbno;
        *rlen = flen;
        return;
@@ -2944,7 +2944,7 @@ fail:
         * Return a zero extent length as failure indications.  All callers
         * re-check if the trimmed extent satisfies the minlen requirement.
         */
-       spin_unlock(&args->pag->pagb_lock);
+       spin_unlock_irq(&args->pag->pagb_lock);
        *rbno = fbno;
        *rlen = 0;
 }
@@ -2955,6 +2955,7 @@ xfs_alloc_busy_clear(
        struct xfs_busy_extent  *busyp)
 {
        struct xfs_perag        *pag;
+       unsigned long           flags;
 
        trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno,
                                                busyp->length);
@@ -2962,10 +2963,10 @@ xfs_alloc_busy_clear(
        list_del_init(&busyp->list);
 
        pag = xfs_perag_get(mp, busyp->agno);
-       spin_lock(&pag->pagb_lock);
+       spin_lock_irqsave(&pag->pagb_lock, flags);
        if (busyp->length)
                rb_erase(&busyp->rb_node, &pag->pagb_tree);
-       spin_unlock(&pag->pagb_lock);
+       spin_unlock_irqrestore(&pag->pagb_lock, flags);
        xfs_perag_put(pag);
 
        kmem_free(busyp);
Index: xfs/fs/xfs/xfs_log_priv.h
===================================================================
--- xfs.orig/fs/xfs/xfs_log_priv.h      2011-03-22 18:39:05.229883275 +0100
+++ xfs/fs/xfs/xfs_log_priv.h   2011-03-22 18:39:09.000000000 +0100
@@ -389,6 +389,7 @@ struct xfs_cil_ctx {
        struct xfs_log_vec      *lv_chain;      /* logvecs being pushed */
        xfs_log_callback_t      log_cb;         /* completion callback hook. */
        struct list_head        committing;     /* ctx committing list */
+       atomic_t                discards;       /* no. of pending discards */
 };
 
 /*

<Prev in Thread] Current Thread [Next in Thread>