xfs
[Top] [All Lists]

Re: [PATCH] xfs: inodes are new until the dentry cache is set up

To: Dave Chinner <david@xxxxxxxxxxxxx>
Subject: Re: [PATCH] xfs: inodes are new until the dentry cache is set up
From: Brian Foster <bfoster@xxxxxxxxxx>
Date: Thu, 8 Jan 2015 10:32:05 -0500
Cc: xfs@xxxxxxxxxxx
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <1420667576-7592-1-git-send-email-david@xxxxxxxxxxxxx>
References: <1420667576-7592-1-git-send-email-david@xxxxxxxxxxxxx>
User-agent: Mutt/1.5.23 (2014-03-12)
On Thu, Jan 08, 2015 at 08:52:56AM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> Al Viro noticed a generic set of issues to do with filehandle lookup
> racing with dentry cache setup. They involve a filehandle lookup
> occurring while an inode is being created and the filehandle lookup
> racing with the dentry creation for the real file. This can lead to
> multiple dentries for the one path being instantiated. There are a
> host of other issues around this same set of paths.
> 
> The underlying cause is that file handle lookup only waits on inode
> cache instantiation rather than full dentry cache instantiation. XFS
> is mostly immune to the problems discovered due to it's own internal
> inode cache, but there are a couple of corner cases where races can
> happen.
> 
> We currently clear the XFS_INEW flag when the inode is fully set up
> after insertion into the cache. Newly allocated inodes are inserted
> locked and so aren't usable until the allocation transaction
> commits. This, however, occurs before the dentry and security
> information is fully initialised and hence the inode is unlocked and
> available for lookups to find too early.
> 
> To solve the problem, only clear the XFS_INEW flag for newly created
> inodes once the dentry is fully instantiated. This means lookups
> will retry until the XFS_INEW flag is removed from the inode and
> hence avoids the race conditions in questions.
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
>  fs/xfs/xfs_icache.c |  4 ++--
>  fs/xfs/xfs_inode.c  |  2 +-
>  fs/xfs/xfs_inode.h  | 22 ++++++++++++++++++++++
>  fs/xfs/xfs_iops.c   | 24 ++++++++++--------------
>  fs/xfs/xfs_iops.h   |  2 --
>  fs/xfs/xfs_qm.c     | 13 +++++++++----
>  6 files changed, 44 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
> index 9771b7e..76a9f27 100644
> --- a/fs/xfs/xfs_icache.c
> +++ b/fs/xfs/xfs_icache.c
> @@ -439,11 +439,11 @@ again:
>       *ipp = ip;
>  
>       /*
> -      * If we have a real type for an on-disk inode, we can set ops(&unlock)
> +      * If we have a real type for an on-disk inode, we can setup the inode
>        * now.  If it's a new inode being created, xfs_ialloc will handle it.
>        */
>       if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
> -             xfs_setup_inode(ip);
> +             xfs_setup_existing_inode(ip);
>       return 0;
>  
>  out_error_or_again:
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 9916aef..400791a 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -818,7 +818,7 @@ xfs_ialloc(
>       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
>       xfs_trans_log_inode(tp, ip, flags);
>  
> -     /* now that we have an i_mode we can setup inode ops and unlock */
> +     /* now that we have an i_mode we can setup the inode structure */
>       xfs_setup_inode(ip);
>  
>       *ipp = ip;
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index f772296..de97ccc 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -381,6 +381,28 @@ int              xfs_zero_eof(struct xfs_inode *, 
> xfs_off_t, xfs_fsize_t);
>  int          xfs_iozero(struct xfs_inode *, loff_t, size_t);
>  
>  
> +/* from xfs_iops.c */
> +/*
> + * When setting up a newly allocated inode, we need to call
> + * xfs_finish_inode_setup() once the inode is fully instantiated at
> + * the VFS level to prevent the rest of the world seeing the inode
> + * before we've completed instantiation. Otherwise we can do it
> + * the moment the inode lookup is complete.
> + */
> +extern void xfs_setup_inode(struct xfs_inode *ip);
> +static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
> +{
> +     xfs_iflags_clear(ip, XFS_INEW);
> +     barrier();
> +     unlock_new_inode(VFS_I(ip));
> +}
> +
> +static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
> +{
> +     xfs_setup_inode(ip);
> +     xfs_finish_inode_setup(ip);
> +}
> +
>  #define IHOLD(ip) \
>  do { \
>       ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index ce80eeb..8be5bb5 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -186,6 +186,8 @@ xfs_generic_create(
>       else
>               d_instantiate(dentry, inode);
>  
> +     xfs_finish_inode_setup(ip);
> +
>   out_free_acl:
>       if (default_acl)
>               posix_acl_release(default_acl);
> @@ -194,6 +196,7 @@ xfs_generic_create(
>       return error;
>  
>   out_cleanup_inode:
> +     xfs_finish_inode_setup(ip);
>       if (!tmpfile)
>               xfs_cleanup_inode(dir, inode, dentry);
>       iput(inode);
> @@ -366,9 +369,11 @@ xfs_vn_symlink(
>               goto out_cleanup_inode;
>  
>       d_instantiate(dentry, inode);
> +     xfs_finish_inode_setup(cip);
>       return 0;
>  
>   out_cleanup_inode:
> +     xfs_finish_inode_setup(cip);
>       xfs_cleanup_inode(dir, inode, dentry);
>       iput(inode);
>   out:

Ok, but what about post-inode-allocation failure conditions down in
xfs_create()? I don't know if there's any real harm in releasing an
I_NEW inode, but iput_final() does throw a warning. Same general
question applies to xfs_create_tmpfile(), etc..

Brian

> @@ -1231,16 +1236,12 @@ xfs_diflags_to_iflags(
>  }
>  
>  /*
> - * Initialize the Linux inode, set up the operation vectors and
> - * unlock the inode.
> - *
> - * When reading existing inodes from disk this is called directly
> - * from xfs_iget, when creating a new inode it is called from
> - * xfs_ialloc after setting up the inode.
> + * Initialize the Linux inode and set up the operation vectors.
>   *
> - * We are always called with an uninitialised linux inode here.
> - * We need to initialise the necessary fields and take a reference
> - * on it.
> + * When reading existing inodes from disk this is called directly from 
> xfs_iget,
> + * when creating a new inode it is called from xfs_ialloc after setting up 
> the
> + * inode. These callers have different criteria for clearing XFS_INEW, so 
> leave
> + * it up to the caller to deal with unlocking the inode appropriately.
>   */
>  void
>  xfs_setup_inode(
> @@ -1327,9 +1328,4 @@ xfs_setup_inode(
>               inode_has_no_xattr(inode);
>               cache_no_acl(inode);
>       }
> -
> -     xfs_iflags_clear(ip, XFS_INEW);
> -     barrier();
> -
> -     unlock_new_inode(inode);
>  }
> diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
> index 1c34e43..d4bcc29 100644
> --- a/fs/xfs/xfs_iops.h
> +++ b/fs/xfs/xfs_iops.h
> @@ -25,8 +25,6 @@ extern const struct file_operations xfs_dir_file_operations;
>  
>  extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
>  
> -extern void xfs_setup_inode(struct xfs_inode *);
> -
>  /*
>   * Internal setattr interfaces.
>   */
> diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
> index 3e81862..1f9e23c 100644
> --- a/fs/xfs/xfs_qm.c
> +++ b/fs/xfs/xfs_qm.c
> @@ -719,6 +719,7 @@ xfs_qm_qino_alloc(
>       xfs_trans_t     *tp;
>       int             error;
>       int             committed;
> +     bool            need_alloc = true;
>  
>       *ip = NULL;
>       /*
> @@ -747,6 +748,7 @@ xfs_qm_qino_alloc(
>                               return error;
>                       mp->m_sb.sb_gquotino = NULLFSINO;
>                       mp->m_sb.sb_pquotino = NULLFSINO;
> +                     need_alloc = false;
>               }
>       }
>  
> @@ -758,7 +760,7 @@ xfs_qm_qino_alloc(
>               return error;
>       }
>  
> -     if (!*ip) {
> +     if (need_alloc) {
>               error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
>                                                               &committed);
>               if (error) {
> @@ -794,11 +796,14 @@ xfs_qm_qino_alloc(
>       spin_unlock(&mp->m_sb_lock);
>       xfs_log_sb(tp);
>  
> -     if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
> +     error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
> +     if (error) {
> +             ASSERT(XFS_FORCED_SHUTDOWN(mp));
>               xfs_alert(mp, "%s failed (error %d)!", __func__, error);
> -             return error;
>       }
> -     return 0;
> +     if (need_alloc)
> +             xfs_finish_inode_setup(*ip);
> +     return error;
>  }
>  
>  
> -- 
> 2.0.0
> 
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs

<Prev in Thread] Current Thread [Next in Thread>