From: Goldwyn Rodrigues <rgoldwyn@suse.de>
Subject: XFS - Make xfsbufd less aggressive
References: bnc#649473
Patch-mainline: no

xfsbufd and flush threads are in contention of the xfs_buf_t because
flush thread needs to read a buffer (usually for btree metadata)
which is locked for I/O by xfsbufd for writes. This could block
writes for a long time.

After a run of heavy writes (approx 10 minutes), the vmscan code
shrinks slabs, which instructs xfsbufd to flush the slab cache
as well. xfsbufd receives flush requests. However, this is too 
aggressive for xfsbufd, and causes more contention with flush
threads. To make things worse, the xfsbufd_wakeup returns
zero which does not account to the objects it will free.

This patch makes the flushes less aggressive by returning the count
of objects in the list, and force flushes only when priority > 0.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.de>
Signed-off-by: NeilBrown <neilb@suse.de>

---
 fs/xfs/linux-2.6/xfs_buf.c |   17 +++++++++++++----
 fs/xfs/linux-2.6/xfs_buf.h |    3 +++
 2 files changed, 16 insertions(+), 4 deletions(-)

--- linux-2.6.32-SLE11-SP1.orig/fs/xfs/linux-2.6/xfs_buf.c
+++ linux-2.6.32-SLE11-SP1/fs/xfs/linux-2.6/xfs_buf.c
@@ -411,7 +411,7 @@ _xfs_buf_lookup_pages(
 					__func__, gfp_mask);
 
 			XFS_STATS_INC(xb_page_retries);
-			xfsbufd_wakeup(0, gfp_mask);
+			xfsbufd_wakeup(1, gfp_mask);
 			congestion_wait(BLK_RW_ASYNC, HZ/50);
 			goto retry;
 		}
@@ -1565,6 +1565,7 @@ xfs_alloc_delwrite_queue(
 	INIT_LIST_HEAD(&btp->bt_list);
 	INIT_LIST_HEAD(&btp->bt_delwrite_queue);
 	spin_lock_init(&btp->bt_delwrite_lock);
+	atomic_set(&btp->bt_qcount, 0);
 	btp->bt_flags = 0;
 	btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd");
 	if (IS_ERR(btp->bt_task)) {
@@ -1627,6 +1628,7 @@ xfs_buf_delwri_queue(
 
 	bp->b_flags |= _XBF_DELWRI_Q;
 	list_add_tail(&bp->b_list, dwq);
+	atomic_inc(&bp->b_target->bt_qcount);
 	bp->b_queuetime = jiffies;
 	spin_unlock(dwlk);
 
@@ -1669,16 +1671,22 @@ xfsbufd_wakeup(
 	gfp_t			mask)
 {
 	xfs_buftarg_t		*btp;
+	int count = 0;
 
 	spin_lock(&xfs_buftarg_lock);
 	list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
 		if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
 			continue;
-		set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
-		wake_up_process(btp->bt_task);
+		if (list_empty(&btp->bt_delwrite_queue))
+			continue;
+		count += atomic_read(&btp->bt_qcount);
+		if (priority) {
+			set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
+			wake_up_process(btp->bt_task);
+		}
 	}
 	spin_unlock(&xfs_buftarg_lock);
-	return 0;
+	return count;
 }
 
 /*
@@ -1715,6 +1723,7 @@ xfs_buf_delwri_split(
 					 _XBF_RUN_QUEUES);
 			bp->b_flags |= XBF_WRITE;
 			list_move_tail(&bp->b_list, list);
+			atomic_dec(&bp->b_target->bt_qcount);
 		} else
 			skipped++;
 	}
--- linux-2.6.32-SLE11-SP1.orig/fs/xfs/linux-2.6/xfs_buf.h
+++ linux-2.6.32-SLE11-SP1/fs/xfs/linux-2.6/xfs_buf.h
@@ -125,6 +125,9 @@ typedef struct xfs_buftarg {
 	struct list_head	bt_delwrite_queue;
 	spinlock_t		bt_delwrite_lock;
 	unsigned long		bt_flags;
+#ifndef __GENKSYMS__
+	atomic_t		bt_qcount;
+#endif
 } xfs_buftarg_t;
 
 /*
