xfs
[Top] [All Lists]

[PATCH 7/7] xfs: add "fail at unmount" error handling configuration

To: xfs@xxxxxxxxxxx
Subject: [PATCH 7/7] xfs: add "fail at unmount" error handling configuration
From: Carlos Maiolino <cmaiolino@xxxxxxxxxx>
Date: Wed, 4 May 2016 17:43:20 +0200
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <1462376600-8617-1-git-send-email-cmaiolino@xxxxxxxxxx>
References: <1462376600-8617-1-git-send-email-cmaiolino@xxxxxxxxxx>
If we take "retry forever" literally on metadata IO errors, we can
hang at unmount, once it retries those writes forever. This is the
default behavior, unfortunately.

Add an error configuration option for this behavior and default it to "fail" so
that an unmount will trigger actuall errors, a shutdown and allow the unmount to
succeed. It will be noisy, though, as it will log the errors and shutdown that
occurs.

To fix this, we need to mark the filesystem as being in the process of
unmounting. Do this with a mount flag that is added at the appropriate time
(i.e. before the blocking AIL sync). We also need to add this flag if mount
fails after the initial phase of log recovery has been run.

Changelog:

V3:
        - No major changes have been done to this patch, only the ones needed to
          accomodate it on top of the remaining patches

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
Signed-off-by: Carlos Maiolino <cmaiolino@xxxxxxxxxx>
---
 fs/xfs/xfs_buf_item.c |  4 ++++
 fs/xfs/xfs_mount.c    |  9 +++++++++
 fs/xfs/xfs_mount.h    |  2 ++
 fs/xfs/xfs_sysfs.c    | 38 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 53 insertions(+)

diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 6fe6852..c37f118 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1105,6 +1105,10 @@ xfs_buf_iodone_callback_error(
            time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
                        goto permanent_error;
 
+       /* At unmount we may treat errors differently */
+       if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && cfg->fail_at_unmount)
+               goto permanent_error;
+
        /* still a transient error, higher layers will retry */
        xfs_buf_ioerror(bp, 0);
        xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index eda3906..d89fba6 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -961,6 +961,7 @@ xfs_mountfs(
        cancel_delayed_work_sync(&mp->m_reclaim_work);
        xfs_reclaim_inodes(mp, SYNC_WAIT);
  out_log_dealloc:
+       mp->m_flags |= XFS_MOUNT_UNMOUNTING;
        xfs_log_mount_cancel(mp);
  out_fail_wait:
        if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
@@ -1012,6 +1013,14 @@ xfs_unmountfs(
        xfs_log_force(mp, XFS_LOG_SYNC);
 
        /*
+        * We now need to tell the world we are unmounting. This will allow
+        * us to detect that the filesystem is going away and we should error
+        * out anything that we have been retrying in the background. This will
+        * prevent neverending retries iin AIL pushing from hanging the unmount.
+        */
+       mp->m_flags |= XFS_MOUNT_UNMOUNTING;
+
+       /*
         * Flush all pending changes from the AIL.
         */
        xfs_ail_push_all_sync(mp->m_ail);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index e3b3267..86f4344 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -59,6 +59,7 @@ struct xfs_error_cfg {
        struct xfs_kobj kobj;
        int             max_retries;    /* -1 = retry forever */
        unsigned long   retry_timeout;  /* in jiffies, 0 = no timeout */
+       bool            fail_at_unmount;
 };
 
 typedef struct xfs_mount {
@@ -193,6 +194,7 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_WSYNC                (1ULL << 0)     /* for nfs - all 
metadata ops
                                                   must be synchronous except
                                                   for space allocations */
+#define XFS_MOUNT_UNMOUNTING   (1ULL << 1)     /* filesystem is unmounting */
 #define XFS_MOUNT_WAS_CLEAN    (1ULL << 3)
 #define XFS_MOUNT_FS_SHUTDOWN  (1ULL << 4)     /* atomic stop of all filesystem
                                                   operations, typically for
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 1ed9033..ab27445 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -447,9 +447,42 @@ retry_timeout_seconds_store(
 }
 XFS_SYSFS_ATTR_RW(retry_timeout_seconds);
 
+static ssize_t
+fail_at_unmount_show(
+       struct kobject  *kobject,
+       char            *buf)
+{
+               struct xfs_error_cfg *cfg = to_error_cfg(kobject);
+
+               return snprintf(buf, PAGE_SIZE, "%d\n", cfg->fail_at_unmount);
+}
+
+static ssize_t
+fail_at_unmount_store(
+       struct kobject  *kobject,
+       const char      *buf,
+       size_t          count)
+{
+       struct xfs_error_cfg *cfg = to_error_cfg(kobject);
+       int             ret;
+       int             val;
+
+       ret = kstrtoint(buf, 0, &val);
+       if (ret)
+               return ret;
+
+       if (val < 0 || val > 1)
+               return -EINVAL;
+
+       cfg->fail_at_unmount = val;
+       return count;
+}
+XFS_SYSFS_ATTR_RW(fail_at_unmount);
+
 static struct attribute *xfs_error_attrs[] = {
        ATTR_LIST(max_retries),
        ATTR_LIST(retry_timeout_seconds),
+       ATTR_LIST(fail_at_unmount),
        NULL,
 };
 
@@ -474,20 +507,24 @@ struct xfs_error_init {
        char            *name;
        int             max_retries;
        int             retry_timeout;  /* in seconds */
+       bool            fail_at_unmount;
 };
 
 static const struct xfs_error_init xfs_error_meta_init[XFS_ERR_ERRNO_MAX] = {
        { .name = "default",
          .max_retries = -1,
          .retry_timeout = 0,
+         .fail_at_unmount = true,
        },
        { .name = "EIO",
          .max_retries = -1,
          .retry_timeout = 0,
+         .fail_at_unmount = true,
        },
        { .name = "ENOSPC",
          .max_retries = -1,
          .retry_timeout = 0,
+         .fail_at_unmount = true,
        },
        { .name = "ENODEV",
          .max_retries = -1,
@@ -523,6 +560,7 @@ xfs_error_sysfs_init_class(
                cfg->max_retries = init[i].max_retries;
                cfg->retry_timeout = msecs_to_jiffies(
                                        init[i].retry_timeout * MSEC_PER_SEC);
+               cfg->fail_at_unmount = init[i].fail_at_unmount;
        }
        return 0;
 
-- 
2.4.11

<Prev in Thread] Current Thread [Next in Thread>