xfs
[Top] [All Lists]

[PATCH 74/76] xfs: set up per-AG preallocated block pools

To: david@xxxxxxxxxxxxx, darrick.wong@xxxxxxxxxx
Subject: [PATCH 74/76] xfs: set up per-AG preallocated block pools
From: "Darrick J. Wong" <darrick.wong@xxxxxxxxxx>
Date: Sat, 19 Dec 2015 01:04:36 -0800
Cc: xfs@xxxxxxxxxxx
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <20151219085622.12713.88678.stgit@xxxxxxxxxxxxxxxx>
References: <20151219085622.12713.88678.stgit@xxxxxxxxxxxxxxxx>
User-agent: StGit/0.17.1-dirty
One unfortunate quirk of the reference count btree -- it can expand in
size when blocks are written to *other* allocation groups if, say, one
large extent becomes a lot of tiny extents.  Since we don't want to
start throwing errors in the middle of CoWing, establish a pool of
reserved blocks in each AG to feed such an expansion.  Reserved pools
can be large enough to obviate the need for external allocations and
use EFI/EFDs so that the the reserved blocks will be freed if the
system crashes.

Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
 fs/xfs/Makefile                |    1 
 fs/xfs/libxfs/xfs_perag_pool.c |  379 ++++++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_perag_pool.h |   47 +++++
 fs/xfs/xfs_trace.h             |   15 ++
 4 files changed, 442 insertions(+)
 create mode 100644 fs/xfs/libxfs/xfs_perag_pool.c
 create mode 100644 fs/xfs/libxfs/xfs_perag_pool.h


diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 798e2b0..d2ab008 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -51,6 +51,7 @@ xfs-y                         += $(addprefix libxfs/, \
                                   xfs_inode_fork.o \
                                   xfs_inode_buf.o \
                                   xfs_log_rlimit.o \
+                                  xfs_perag_pool.o \
                                   xfs_rmap.o \
                                   xfs_rmap_btree.o \
                                   xfs_refcount.o \
diff --git a/fs/xfs/libxfs/xfs_perag_pool.c b/fs/xfs/libxfs/xfs_perag_pool.c
new file mode 100644
index 0000000..b49ffd2
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_perag_pool.c
@@ -0,0 +1,379 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_alloc.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_trans.h"
+#include "xfs_bit.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_perag_pool.h"
+#include "xfs_trans_space.h"
+
+/**
+ * xfs_perag_pool_free() -- Free a per-AG reserved block pool.
+ */
+int
+xfs_perag_pool_free(
+       struct xfs_perag_pool           *p)
+{
+       struct xfs_mount                *mp;
+       struct xfs_perag_pool_entry     *ppe, *n;
+       struct xfs_trans                *tp;
+       xfs_fsblock_t                   fsb;
+       struct xfs_bmap_free            freelist;
+       int                             committed;
+       int                             error = 0, err;
+
+       if (!p)
+               return 0;
+
+       mp = p->pp_mount;
+       list_for_each_entry_safe(ppe, n, &p->pp_entries, ppe_list) {
+               list_del(&ppe->ppe_list);
+               if (XFS_FORCED_SHUTDOWN(mp)) {
+                       kmem_free(ppe);
+                       continue;
+               }
+
+               /* Set up transaction. */
+               tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+               tp->t_flags |= XFS_TRANS_RESERVE;
+               err = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 0, 0);
+               if (err)
+                       goto loop_cancel;
+               xfs_bmap_init(&freelist, &fsb);
+               fsb = XFS_AGB_TO_FSB(p->pp_mount, p->pp_agno, ppe->ppe_bno);
+
+               trace_xfs_perag_pool_free_extent(mp, p->pp_agno, ppe->ppe_bno,
+                               ppe->ppe_len, &p->pp_oinfo);
+
+               /* Free the block. */
+               xfs_bmap_add_free(mp, &freelist, fsb, ppe->ppe_len,
+                               &p->pp_oinfo);
+
+               err = xfs_bmap_finish(&tp, &freelist, &committed, NULL);
+               if (err)
+                       goto loop_cancel;
+
+               err = xfs_trans_commit(tp);
+               if (!error)
+                       error = err;
+               kmem_free(ppe);
+               continue;
+loop_cancel:
+               if (!error)
+                       error = err;
+               xfs_trans_cancel(tp);
+               kmem_free(ppe);
+       }
+
+       kmem_free(p);
+       if (error)
+               trace_xfs_perag_pool_free_error(mp, p->pp_agno, error,
+                               _RET_IP_);
+       return error;
+}
+
+/* Allocate a block for the pool. */
+static int
+xfs_perag_pool_grab_block(
+       struct xfs_perag_pool           *p,
+       struct xfs_trans                *tp,
+       xfs_extlen_t                    *len)
+{
+       struct xfs_mount                *mp;
+       struct xfs_perag_pool_entry     *ppe;
+       struct xfs_alloc_arg            args;
+       int                             error;
+
+       mp = p->pp_mount;
+
+       /* Set up the allocation. */
+       memset(&args, 0, sizeof(args));
+       args.mp = mp;
+       args.type = XFS_ALLOCTYPE_NEAR_BNO;
+       args.fsbno = XFS_AGB_TO_FSB(mp, p->pp_agno, p->pp_agbno);
+       args.firstblock = args.fsbno;
+       args.oinfo = p->pp_oinfo;
+       args.minlen = 1;
+
+       /* Allocate blocks. */
+       args.tp = tp;
+       args.maxlen = args.prod = *len;
+       p->pp_allocating = true;
+       error = xfs_alloc_vextent(&args);
+       p->pp_allocating = false;
+       if (error)
+               goto out_error;
+       if (args.fsbno == NULLFSBLOCK) {
+               /* oh well, we're headed towards failure. */
+               error = -ENOSPC;
+               goto out_error;
+       }
+       *len = args.len;
+
+       trace_xfs_perag_pool_grab_block(mp, p->pp_agno, args.agbno, args.len,
+                       &p->pp_oinfo);
+
+       /* Add to our list. */
+       ASSERT(args.agno == p->pp_agno);
+       ppe = kmem_alloc(sizeof(struct xfs_perag_pool_entry), KM_SLEEP);
+       ppe->ppe_bno = args.agbno;
+       ppe->ppe_len = args.len;
+       list_add_tail(&ppe->ppe_list, &p->pp_entries);
+       return 0;
+
+out_error:
+       trace_xfs_perag_pool_grab_block_error(mp, p->pp_agno, error, _RET_IP_);
+       return error;
+}
+
+/* Ensure the pool has some capacity. */
+static int
+__xfs_perag_pool_ensure_capacity(
+       struct xfs_perag_pool           *p,
+       xfs_extlen_t                    sz,
+       bool                            force)
+{
+       struct xfs_mount                *mp = p->pp_mount;
+       struct xfs_trans                *tp;
+       struct xfs_perag                *pag;
+       uint                            resblks;
+       xfs_extlen_t                    alloc_len;
+       int                             error;
+
+       if (sz <= p->pp_len - p->pp_inuse)
+               return 0;
+       sz -= p->pp_len - p->pp_inuse;
+
+       trace_xfs_perag_pool_ensure_capacity(mp, p->pp_agno,
+                       p->pp_len - p->pp_inuse, sz, &p->pp_oinfo);
+
+       /* Do we even have enough free blocks? */
+       pag = xfs_perag_get(mp, p->pp_agno);
+       resblks = pag->pagf_freeblks;
+       xfs_perag_put(pag);
+       if (force && resblks < sz)
+               sz = resblks;
+       if (resblks < sz) {
+               error = -ENOSPC;
+               goto out_error;
+       }
+
+       while (sz) {
+               /* Set up a transaction */
+               resblks = XFS_DIOSTRAT_SPACE_RES(mp, sz);
+               tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
+               if (error)
+                       goto out_cancel;
+
+               /* Allocate the blocks */
+               alloc_len = sz;
+               error = xfs_perag_pool_grab_block(p, tp, &alloc_len);
+               if (error)
+                       goto out_cancel;
+
+               /* Commit the transaction */
+               error = xfs_trans_commit(tp);
+               if (error)
+                       goto out_error;
+
+               p->pp_len += alloc_len;
+               sz -= alloc_len;
+       }
+       return 0;
+
+out_cancel:
+       xfs_trans_cancel(tp);
+out_error:
+       trace_xfs_perag_pool_ensure_capacity_error(mp, p->pp_agno, error,
+                       _RET_IP_);
+       return error;
+}
+
+/**
+ * xfs_perag_pool_ensure_capacity() -- Ensure the pool has some capacity.
+ *
+ * @p: per-AG reserved blocks pool.
+ * @sz: Ensure that there are at least this many free blocks.
+ */
+int
+xfs_perag_pool_ensure_capacity(
+       struct xfs_perag_pool           *p,
+       xfs_extlen_t                    sz)
+{
+       if (!p)
+               return 0;
+       return __xfs_perag_pool_ensure_capacity(p, sz, false);
+}
+
+/**
+ * xfs_perag_pool_init() -- Initialize a per-AG reserved block pool.
+ */
+int
+xfs_perag_pool_init(
+       struct xfs_mount                *mp,
+       xfs_agnumber_t                  agno,
+       xfs_agblock_t                   agbno,
+       xfs_extlen_t                    len,
+       xfs_extlen_t                    inuse,
+       uint64_t                        owner,
+       struct xfs_perag_pool           **pp)
+{
+       struct xfs_perag_pool           *p;
+       struct xfs_owner_info           oinfo;
+       int                             error;
+
+       XFS_RMAP_AG_OWNER(&oinfo, owner);
+       trace_xfs_perag_pool_init(mp, agno, agbno, len, &oinfo);
+       trace_xfs_perag_pool_init(mp, agno, agbno, inuse, &oinfo);
+
+       p = kmem_alloc(sizeof(struct xfs_perag_pool), KM_SLEEP);
+       p->pp_mount = mp;
+       p->pp_agno = agno;
+       p->pp_agbno = agbno;
+       p->pp_inuse = p->pp_len = inuse;
+       p->pp_oinfo = oinfo;
+       p->pp_allocating = false;
+       INIT_LIST_HEAD(&p->pp_entries);
+       *pp = p;
+
+       /* Try to reserve some blocks. */
+       error = __xfs_perag_pool_ensure_capacity(p, len - inuse, true);
+       if (error == -ENOSPC)
+               error = 0;
+
+       if (error)
+               trace_xfs_perag_pool_init_error(mp, agno, error, _RET_IP_);
+       return error;
+}
+
+/**
+ * xfs_perag_pool_alloc_block() -- Allocate a block from the pool.
+ *
+ * @p: Reserved block pool.
+ * @tp: Transaction to record the allocation.
+ * @bno: (out) The allocated block number.
+ */
+int
+xfs_perag_pool_alloc_block(
+       struct xfs_perag_pool           *p,
+       struct xfs_trans                *tp,
+       xfs_agblock_t                   *bno)
+{
+       struct xfs_mount                *mp;
+       struct xfs_perag_pool_entry     *ppe;
+       xfs_extlen_t                    len;
+       int                             error;
+
+       if (p == NULL || p->pp_allocating)
+               return -EINVAL;
+
+       mp = p->pp_mount;
+       mp = mp;
+       /* Empty pool?  Grab another block. */
+       if (list_empty(&p->pp_entries)) {
+               len = 1;
+               error = xfs_perag_pool_grab_block(p, tp, &len);
+               if (error)
+                       goto err;
+               ASSERT(len == 1);
+               if (list_empty(&p->pp_entries)) {
+                       error = -ENOSPC;
+                       goto err;
+               }
+       }
+
+       /* Find an available block. */
+       ppe = list_first_entry(&p->pp_entries, struct xfs_perag_pool_entry,
+                       ppe_list);
+       *bno = ppe->ppe_bno;
+
+       trace_xfs_perag_pool_alloc_block(mp, p->pp_agno, *bno, 1, &p->pp_oinfo);
+
+       /* Update the accounting. */
+       ppe->ppe_len--;
+       ppe->ppe_bno++;
+       if (ppe->ppe_len == 0)
+               list_del(&ppe->ppe_list);
+       p->pp_inuse++;
+
+       return 0;
+err:
+       trace_xfs_perag_pool_alloc_block_error(mp, p->pp_agno, error, _RET_IP_);
+       return error;
+}
+
+/**
+ * xfs_perag_pool_free_block() -- Put a block back in the pool.
+ *
+ * @p: Reserved block pool.
+ * @tp: Transaction to record the free operation.
+ * @bno: Block to put back.
+ */
+int
+xfs_perag_pool_free_block(
+       struct xfs_perag_pool           *p,
+       struct xfs_trans                *tp,
+       xfs_agblock_t                   bno)
+{
+       struct xfs_mount                *mp;
+       struct xfs_perag_pool_entry     *ppe;
+
+       if (p == NULL)
+               return -EINVAL;
+
+       mp = p->pp_mount;
+       mp = mp;
+       trace_xfs_perag_pool_free_block(mp, p->pp_agno, bno, 1, &p->pp_oinfo);
+
+       list_for_each_entry(ppe, &p->pp_entries, ppe_list) {
+               if (ppe->ppe_bno - 1 == bno) {
+
+                       /* Adjust bookkeeping. */
+                       p->pp_inuse--;
+                       ppe->ppe_bno--;
+                       ppe->ppe_len++;
+                       return 0;
+               }
+               if (ppe->ppe_bno + ppe->ppe_len == bno) {
+                       p->pp_inuse--;
+                       ppe->ppe_len++;
+                       return 0;
+               }
+       }
+       ppe = kmem_alloc(sizeof(struct xfs_perag_pool_entry), KM_SLEEP);
+       ppe->ppe_bno = bno;
+       ppe->ppe_len = 1;
+       p->pp_inuse--;
+
+       list_add_tail(&ppe->ppe_list, &p->pp_entries);
+       return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_perag_pool.h b/fs/xfs/libxfs/xfs_perag_pool.h
new file mode 100644
index 0000000..ecdcd2a
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_perag_pool.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+struct xfs_perag_pool_entry {
+       struct list_head        ppe_list;       /* pool list */
+       xfs_agblock_t           ppe_bno;        /* AG block number */
+       xfs_extlen_t            ppe_len;        /* length */
+};
+
+struct xfs_perag_pool {
+       struct xfs_mount        *pp_mount;      /* XFS mount */
+       xfs_agnumber_t          pp_agno;        /* AG number */
+       xfs_agblock_t           pp_agbno;       /* suggested AG block number */
+       xfs_extlen_t            pp_len;         /* blocks in pool */
+       xfs_extlen_t            pp_inuse;       /* blocks in use */
+       struct xfs_owner_info   pp_oinfo;       /* owner */
+       struct list_head        pp_entries;     /* pool entries */
+       bool                    pp_allocating;  /* are we allocating? */
+};
+
+int xfs_perag_pool_free(struct xfs_perag_pool *p);
+int xfs_perag_pool_init(struct xfs_mount *mp, xfs_agnumber_t agno,
+               xfs_agblock_t agbno, xfs_extlen_t len, xfs_extlen_t inuse,
+               uint64_t owner, struct xfs_perag_pool **pp);
+
+int xfs_perag_pool_ensure_capacity(struct xfs_perag_pool *p, xfs_extlen_t sz);
+
+int xfs_perag_pool_alloc_block(struct xfs_perag_pool *p, struct xfs_trans *tp,
+               xfs_agblock_t *bno);
+int xfs_perag_pool_free_block(struct xfs_perag_pool *p, struct xfs_trans *tp,
+               xfs_agblock_t bno);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 0773938..dad57dc 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3160,6 +3160,21 @@ DEFINE_INODE_EVENT(xfs_reflink_cancel_pending_cow);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cancel_cow);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_pending_cow_error);
 
+/* perag pool tracepoints */
+#define DEFINE_PERAG_POOL_EVENT        DEFINE_RMAP_EVENT
+DEFINE_PERAG_POOL_EVENT(xfs_perag_pool_free_extent);
+DEFINE_PERAG_POOL_EVENT(xfs_perag_pool_grab_block);
+DEFINE_PERAG_POOL_EVENT(xfs_perag_pool_init);
+DEFINE_PERAG_POOL_EVENT(xfs_perag_pool_ensure_capacity);
+DEFINE_PERAG_POOL_EVENT(xfs_perag_pool_alloc_block);
+DEFINE_PERAG_POOL_EVENT(xfs_perag_pool_free_block);
+
+DEFINE_AG_ERROR_EVENT(xfs_perag_pool_free_error);
+DEFINE_AG_ERROR_EVENT(xfs_perag_pool_grab_block_error);
+DEFINE_AG_ERROR_EVENT(xfs_perag_pool_init_error);
+DEFINE_AG_ERROR_EVENT(xfs_perag_pool_ensure_capacity_error);
+DEFINE_AG_ERROR_EVENT(xfs_perag_pool_alloc_block_error);
+
 #endif /* _TRACE_XFS_H */
 
 #undef TRACE_INCLUDE_PATH

<Prev in Thread] Current Thread [Next in Thread>