xfs
[Top] [All Lists]

Re: generic/04[89] fail on XFS due to change in writeback code [4.2-rc1

To: Tejun Heo <tj@xxxxxxxxxx>
Subject: Re: generic/04[89] fail on XFS due to change in writeback code [4.2-rc1 regression]
From: Eryu Guan <eguan@xxxxxxxxxx>
Date: Fri, 14 Aug 2015 14:19:39 +0800
Cc: Dave Chinner <david@xxxxxxxxxxxxx>, xfs@xxxxxxxxxxx, axboe@xxxxxx, jack@xxxxxxx, linux-fsdevel@xxxxxxxxxxxxxxx
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <20150813232400.GH4496@xxxxxxxxxxxxxxx>
References: <20150812101204.GE17933@xxxxxxxxxxxxxxxxxxxxxxxxxx> <20150813004435.GN3902@dastard> <20150813232400.GH4496@xxxxxxxxxxxxxxx>
User-agent: Mutt/1.5.23 (2014-03-12)
On Thu, Aug 13, 2015 at 07:24:00PM -0400, Tejun Heo wrote:
> Hello, Eryu.
> 
> Can you please do the followings?
> 
> 1. See if the "writeback: fix syncing of I_DIRTY_TIME inodes" patch
>    changes anything.

After applying this patch, I can no longer reproduce the failure.

> 
> 2. If not, apply this patch.  This patch *should* make the failures go
>    away and might print out some error messages along with stack
>    trace.  Can you please verify that the failures go away with this
>    patch and report the kernel messages if any trigger?

And this patch fixed the failure too, and no WARNING no kernel message
triggered.

All my testings are based on 4.2-rc6 kernel (I confirmed rc6 kernel
failed the test first), then applied the first patch, and reverted the
first patch, and applied the second patch.

> 
> Thanks a lot.

Thanks for looking into this!

Eryu
> 
> Index: work/fs/fs-writeback.c
> ===================================================================
> --- work.orig/fs/fs-writeback.c
> +++ work/fs/fs-writeback.c
> @@ -103,7 +103,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(wbc_writepa
>  
>  static bool wb_io_lists_populated(struct bdi_writeback *wb)
>  {
> -     if (wb_has_dirty_io(wb)) {
> +     if (test_bit(WB_has_dirty_io, &wb->state)) {
>               return false;
>       } else {
>               set_bit(WB_has_dirty_io, &wb->state);
> @@ -844,14 +844,13 @@ static void bdi_split_work_to_wbs(struct
>       struct wb_iter iter;
>  
>       might_sleep();
> -
> -     if (!bdi_has_dirty_io(bdi))
> -             return;
>  restart:
>       rcu_read_lock();
>       bdi_for_each_wb(wb, bdi, &iter, next_blkcg_id) {
> -             if (!wb_has_dirty_io(wb) ||
> -                 (skip_if_busy && writeback_in_progress(wb)))
> +             /* SYNC_ALL writes out I_DIRTY_TIME too */
> +             if (!wb_has_dirty_io(wb) && base_work->sync_mode == 
> WB_SYNC_NONE)
> +                     continue;
> +             if (skip_if_busy && writeback_in_progress(wb))
>                       continue;
>  
>               base_work->nr_pages = wb_split_bdi_pages(wb, nr_pages);
> @@ -899,8 +898,7 @@ static void bdi_split_work_to_wbs(struct
>  {
>       might_sleep();
>  
> -     if (bdi_has_dirty_io(bdi) &&
> -         (!skip_if_busy || !writeback_in_progress(&bdi->wb))) {
> +     if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) {
>               base_work->auto_free = 0;
>               base_work->single_wait = 0;
>               base_work->single_done = 0;
> @@ -2275,8 +2273,8 @@ void sync_inodes_sb(struct super_block *
>       };
>       struct backing_dev_info *bdi = sb->s_bdi;
>  
> -     /* Nothing to do? */
> -     if (!bdi_has_dirty_io(bdi) || bdi == &noop_backing_dev_info)
> +     /* bdi_has_dirty() ignores I_DIRTY_TIME but we can't, always kick wbs */
> +     if (bdi == &noop_backing_dev_info)
>               return;
>       WARN_ON(!rwsem_is_locked(&sb->s_umount));
>  
> Index: work/include/linux/backing-dev.h
> ===================================================================
> --- work.orig/include/linux/backing-dev.h
> +++ work/include/linux/backing-dev.h
> @@ -38,7 +38,17 @@ extern struct workqueue_struct *bdi_wq;
>  
>  static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
>  {
> -     return test_bit(WB_has_dirty_io, &wb->state);
> +     bool ret = test_bit(WB_has_dirty_io, &wb->state);
> +
> +     if (!ret && (!list_empty(&wb->b_dirty) || !list_empty(&wb->b_io) ||
> +                  !list_empty(&wb->b_more_io))) {
> +             const char *name = wb->bdi->dev ? dev_name(wb->bdi->dev) : 
> "UNK";
> +
> +             pr_err("wb_has_dirty_io: ERR %s has_dirty=%d b_dirty=%d b_io=%d 
> b_more_io=%d\n",
> +                    name, ret, !list_empty(&wb->b_dirty), 
> !list_empty(&wb->b_io), !list_empty(&wb->b_more_io));
> +             WARN_ON(1);
> +     }
> +     return ret;
>  }
>  
>  static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi)
> @@ -47,7 +57,18 @@ static inline bool bdi_has_dirty_io(stru
>        * @bdi->tot_write_bandwidth is guaranteed to be > 0 if there are
>        * any dirty wbs.  See wb_update_write_bandwidth().
>        */
> -     return atomic_long_read(&bdi->tot_write_bandwidth);
> +     bool ret = atomic_long_read(&bdi->tot_write_bandwidth);
> +
> +     if (ret != wb_has_dirty_io(&bdi->wb)) {
> +             const char *name = bdi->dev ? dev_name(bdi->dev) : "UNK";
> +
> +             pr_err("bdi_has_dirty_io: ERR %s tot_write_bw=%ld b_dirty=%d 
> b_io=%d b_more_io=%d\n",
> +                    name, atomic_long_read(&bdi->tot_write_bandwidth),
> +                    !list_empty(&bdi->wb.b_dirty), 
> !list_empty(&bdi->wb.b_io), !list_empty(&bdi->wb.b_more_io));
> +             WARN_ON(1);
> +     }
> +
> +     return ret;
>  }
>  
>  static inline void __add_wb_stat(struct bdi_writeback *wb,

<Prev in Thread] Current Thread [Next in Thread>