xfs
[Top] [All Lists]

Re: [PATCH 7/7] xfs: add "fail at unmount" error handling configuration

To: Carlos Maiolino <cmaiolino@xxxxxxxxxx>
Subject: Re: [PATCH 7/7] xfs: add "fail at unmount" error handling configuration
From: Brian Foster <bfoster@xxxxxxxxxx>
Date: Wed, 11 May 2016 10:15:51 -0400
Cc: xfs@xxxxxxxxxxx
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <1462882581-30859-8-git-send-email-cmaiolino@xxxxxxxxxx>
References: <1462882581-30859-1-git-send-email-cmaiolino@xxxxxxxxxx> <1462882581-30859-8-git-send-email-cmaiolino@xxxxxxxxxx>
User-agent: Mutt/1.6.0 (2016-04-01)
On Tue, May 10, 2016 at 02:16:21PM +0200, Carlos Maiolino wrote:
> If we take "retry forever" literally on metadata IO errors, we can
> hang at unmount, once it retries those writes forever. This is the
> default behavior, unfortunately.
> 
> Add an error configuration option for this behavior and default it to "fail" 
> so
> that an unmount will trigger actuall errors, a shutdown and allow the unmount 
> to
> succeed. It will be noisy, though, as it will log the errors and shutdown that
> occurs.
> 
> To fix this, we need to mark the filesystem as being in the process of
> unmounting. Do this with a mount flag that is added at the appropriate time
> (i.e. before the blocking AIL sync). We also need to add this flag if mount
> fails after the initial phase of log recovery has been run.
> 
> Changelog:
> 
> V3:
>       - No major changes have been done to this patch, only the ones needed to
>         accomodate it on top of the remaining patches
> V4:
>       - Make fail_at_unmount a global configuration option into
>         ../xfs/<dev>/error directory
>       - Add m_fail_unmount boolean to xfs_mount, to keep track of unmount
>         error behavior
>       - Create err_to_mp() helper, to translate a kobject, from m_error_kobj
>         into its xfs_mount
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> Signed-off-by: Carlos Maiolino <cmaiolino@xxxxxxxxxx>
> ---
>  fs/xfs/xfs_buf_item.c |  4 ++++
>  fs/xfs/xfs_mount.c    | 12 ++++++++++++
>  fs/xfs/xfs_mount.h    |  2 ++
>  fs/xfs/xfs_sysfs.c    | 46 ++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 64 insertions(+)
> 
...
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index eda3906..1bcee7e 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
...
> @@ -1012,6 +1016,14 @@ xfs_unmountfs(
>       xfs_log_force(mp, XFS_LOG_SYNC);
>  
>       /*
> +      * We now need to tell the world we are unmounting. This will allow
> +      * us to detect that the filesystem is going away and we should error
> +      * out anything that we have been retrying in the background. This will
> +      * prevent neverending retries iin AIL pushing from hanging the unmount.

Spelling:                              in

Otherwise looks good:

Reviewed-by: Brian Foster <bfoster@xxxxxxxxxx>

> +      */
> +     mp->m_flags |= XFS_MOUNT_UNMOUNTING;
> +
> +     /*
>        * Flush all pending changes from the AIL.
>        */
>       xfs_ail_push_all_sync(mp->m_ail);
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index 72ec3e3..9063a9c 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -177,6 +177,7 @@ typedef struct xfs_mount {
>        */
>       __uint32_t              m_generation;
>  
> +     bool                    m_fail_unmount;
>  #ifdef DEBUG
>       /*
>        * DEBUG mode instrumentation to test and/or trigger delayed allocation
> @@ -195,6 +196,7 @@ typedef struct xfs_mount {
>  #define XFS_MOUNT_WSYNC              (1ULL << 0)     /* for nfs - all 
> metadata ops
>                                                  must be synchronous except
>                                                  for space allocations */
> +#define XFS_MOUNT_UNMOUNTING (1ULL << 1)     /* filesystem is unmounting */
>  #define XFS_MOUNT_WAS_CLEAN  (1ULL << 3)
>  #define XFS_MOUNT_FS_SHUTDOWN        (1ULL << 4)     /* atomic stop of all 
> filesystem
>                                                  operations, typically for
> diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
> index 084a606..4c2c550 100644
> --- a/fs/xfs/xfs_sysfs.c
> +++ b/fs/xfs/xfs_sysfs.c
> @@ -381,6 +381,13 @@ to_error_cfg(struct kobject *kobject)
>       return container_of(kobj, struct xfs_error_cfg, kobj);
>  }
>  
> +static inline struct xfs_mount *
> +err_to_mp(struct kobject *kobject)
> +{
> +     struct xfs_kobj *kobj = to_kobj(kobject);
> +     return container_of(kobj, struct xfs_mount, m_error_kobj);
> +}
> +
>  static ssize_t
>  max_retries_show(
>       struct kobject  *kobject,
> @@ -447,6 +454,38 @@ retry_timeout_seconds_store(
>  }
>  XFS_SYSFS_ATTR_RW(retry_timeout_seconds);
>  
> +static ssize_t
> +fail_at_unmount_show(
> +     struct kobject  *kobject,
> +     char            *buf)
> +{
> +     struct xfs_mount        *mp = err_to_mp(kobject);
> +
> +     return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_unmount);
> +}
> +
> +static ssize_t
> +fail_at_unmount_store(
> +     struct kobject  *kobject,
> +     const char      *buf,
> +     size_t          count)
> +{
> +     struct xfs_mount        *mp = err_to_mp(kobject);
> +     int             ret;
> +     int             val;
> +
> +     ret = kstrtoint(buf, 0, &val);
> +     if (ret)
> +             return ret;
> +
> +     if (val < 0 || val > 1)
> +             return -EINVAL;
> +
> +     mp->m_fail_unmount = val;
> +     return count;
> +}
> +XFS_SYSFS_ATTR_RW(fail_at_unmount);
> +
>  static struct attribute *xfs_error_attrs[] = {
>       ATTR_LIST(max_retries),
>       ATTR_LIST(retry_timeout_seconds),
> @@ -462,6 +501,7 @@ struct kobj_type xfs_error_cfg_ktype = {
>  
>  struct kobj_type xfs_error_ktype = {
>       .release = xfs_sysfs_release,
> +     .sysfs_ops = &xfs_sysfs_ops,
>  };
>  
>  /*
> @@ -548,6 +588,12 @@ xfs_error_sysfs_init(
>       if (error)
>               return error;
>  
> +     error = sysfs_create_file(&mp->m_error_kobj.kobject,
> +                               ATTR_LIST(fail_at_unmount));
> +
> +     if (error)
> +             goto out_error;
> +
>       /* .../xfs/<dev>/error/metadata/ */
>       error = xfs_error_sysfs_init_class(mp, XFS_ERR_METADATA,
>                               "metadata", &mp->m_error_meta_kobj,
> -- 
> 2.4.11
> 
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs

<Prev in Thread] Current Thread [Next in Thread>