"Davida, Joe" wrote:
>
> You say you have seen performance problems with non-sequential
> localized writes using the kiocluster option on mounts.
> Is the performance problem bad enough so that it is worse
> than ext2fs?
>
> Joe
Performance difference really depends on what is being run.
We have noticed that for dbench with small number of clients,
xfs+kiocluter does worse than ext2 ... but for dbench with
larger number of clients xfs+kiocluster does better than ext2.
I've been working on a patch which in my tests with dbench
has improved xfs+kiocluster significantly. If you are working
with a test system, can you please try the following patch
(in conjuction with the kiocluster option)? The patch is against
the latest development xfs tree ...
----------------- patch begins -------------------------------
diff -Naur ../../xfs-tot/linux/fs/pagebuf/page_buf.c ./fs/pagebuf/page_buf.c
--- ../../xfs-tot/linux/fs/pagebuf/page_buf.c Tue Nov 28 16:34:17 2000
+++ ./fs/pagebuf/page_buf.c Thu Nov 30 10:29:39 2000
@@ -186,7 +186,7 @@
unsigned long pagebuf_min[P_PARAM] = { HZ/2, 1*HZ, HZ/2, 1, 0, 0 };
unsigned long pagebuf_max[P_PARAM] = { HZ*30, HZ*300, HZ*30, 1024, 4096, 1 };
-pagebuf_param_t pb_params = {{ HZ, 15 * HZ, HZ, 512, 1024, 0 }};
+pagebuf_param_t pb_params = {{ HZ, 15 * HZ, 2*HZ, 512, 1024, 0 }};
/*
* Pagebuf statistics variables
@@ -595,7 +595,11 @@
kp->maplist[pi] = cp;
} else {
cp = kp->maplist[pi];
- set_bit(PG_referenced, &cp->flags);
+ /*
+ * Don't set referenced bit;
+ * we may consider age_page_up.
+ //set_bit(PG_referenced, &cp->flags);
+ */
while (TryLockPage(cp)) {
wait_on_page(cp);
}
@@ -1345,7 +1349,7 @@
struct buffer_head *bh;
off_t blk_offset;
size_t blk_length;
- int err=0;
+ int err=0, need_balance_dirty = 0;
int force_io = (rw != READ) || (pb->pb_flags & PBF_FORCEIO);
int concat_ok = ((MAJOR(dev) != LVM_BLK_MAJOR) || (MAJOR(dev) !=
MD_MAJOR));
@@ -1425,7 +1429,9 @@
if (rw == WRITE ) {
set_bit(BH_Uptodate, &bh->b_state);
- set_bit(BH_Dirty, &bh->b_state);
+ if (!buffer_dirty(bh))
+ need_balance_dirty = 1;
+ __mark_buffer_dirty(bh);
}
psync->bh[cnt++] = bh;
atomic_inc(&psync->remain);
@@ -1444,7 +1450,8 @@
if (locking)
UnlockPage(page);
}
-
+ if (need_balance_dirty)
+ balance_dirty(dev);
return err;
error:
/* If we ever do get here then clean up what we already did */
diff -Naur ../../xfs-tot/linux/fs/pagebuf/page_buf_io.c
./fs/pagebuf/page_buf_io.c
--- ../../xfs-tot/linux/fs/pagebuf/page_buf_io.c Tue Nov 28 16:34:17 2000
+++ ./fs/pagebuf/page_buf_io.c Fri Dec 1 10:31:28 2000
@@ -93,11 +93,11 @@
/*
* Forward declarations.
*/
-STATIC void __pb_block_commit_write_async(struct inode *,
+STATIC int __pb_block_commit_write_async(struct inode *,
struct page *, page_buf_bmap_t *);
STATIC int __pb_block_prepare_write_async(struct inode *, struct page *,
unsigned, unsigned, int, page_buf_bmap_t *, int);
-STATIC int pagebuf_delalloc_convert(struct page *, u_long, struct page **);
+STATIC int pagebuf_delalloc_convert(struct page *, u_long, struct page **,
int, int);
STATIC void hook_buffers_to_page(struct inode *, struct page *,
page_buf_bmap_t *, ulong);
@@ -114,6 +114,20 @@
int MAX_CLUSTER = 512;
int CLUSTER_PAGE_LIST_SIZE = ((2*512)+1);
+/*
+ * stats
+ */
+struct pb_io_stat_s {
+ int pcd_normal_scan;
+ int pcd_normal_converted;
+
+ int pcd_skip_locked;
+ int pcd_skip_referenced;
+
+ int pcd_ilock_failed;
+} pb_io_stat = {0, 0, 0, 0, 0};
+
+EXPORT_SYMBOL(pb_io_stat);
/*
* The minimum size where we will start using pagebuf structures instead
@@ -210,10 +224,12 @@
if (IS_KIOCLUSTER(ip)){
/*
- * If kmalloc, no big deal; the lower layers won't cluster.
+ * If kmalloc fails, no big deal; the lower layers won't
+ * cluster. Also, this allocation has to be non-sleeping
+ * since this can be in kswapd's path ...
*/
cpages = kmalloc(CLUSTER_PAGE_LIST_SIZE * sizeof(struct page *),
- GFP_KERNEL);
+ GFP_ATOMIC);
} else {
cpages = (struct page **)1; /* a boolean */
}
@@ -235,7 +251,7 @@
lock_page(page);
if (test_and_clear_bit(PG_delalloc, &page->flags)) {
- pagebuf_delalloc_convert(page, 0, cpages);
+ pagebuf_delalloc_convert(page, 0, cpages, 0, 0);
} else {
UnlockPage(page);
}
@@ -511,7 +527,8 @@
int pagebuf_iozero( /* zero contents of buffer */
page_buf_t * pb, /* buffer to zero */
off_t boff, /* offset in buffer */
- size_t bsize) /* size of data to zero */
+ size_t bsize, /* size of data to zero */
+ int *dirty)
{
loff_t cboff;
size_t cpoff;
@@ -541,6 +558,7 @@
if (pb->pb_bn == PAGE_BUF_DADDR_NULL) {
if (test_and_set_bit(PG_delalloc, &pm->flags) == 0) {
atomic_inc(&pb_delalloc_pages);
+ (*dirty)++;
}
}
}
@@ -548,16 +566,6 @@
pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
pb->pb_flags &= ~(_PBF_SOME_INVALID_PAGES | PBF_PARTIAL | PBF_NONE);
- if (!pcd_active && (pb->pb_bn == PAGE_BUF_DADDR_NULL)) {
- unsigned int np = atomic_read(&pb_delalloc_pages);
-
- if (np > 2 * pb_params.p_un.max_dirty_pages)
- wake_up_interruptible_sync(&pcd_waitq);
- else if (np > pb_params.p_un.max_dirty_pages)
- wake_up_interruptible(&pcd_waitq);
- }
-
-
return (0);
}
@@ -1174,62 +1182,6 @@
page, page->index, bh->b_blocknr));
}
-
-void
-set_buffer_dirty_uptodate(struct buffer_head *bh)
-{
- int need_balance_dirty = 0;
-
- if (bh->b_blocknr <= 0) {
- printk("Warning: buffer 0x%p with weird blockno (%ld)\n",
- bh, bh->b_blocknr);
- }
- set_bit(BH_Uptodate, &bh->b_state);
- if (!buffer_dirty(bh)) {
- bh->b_end_io = end_pb_buffer_io_async;
- need_balance_dirty = 1;
- }
- __mark_buffer_dirty(bh);
-
- if (need_balance_dirty)
- balance_dirty(bh->b_dev);
-}
-
-int pbwcm_debug = 0;
-
-int
-__pb_write_or_convert_bmap(
- struct inode *inode,
- struct page *page)
-{
- loff_t offset = page->index << PAGE_CACHE_SHIFT;
- int error, nmaps;
- page_buf_bmap_t map;
-
- error = inode->i_op->pagebuf_bmap(inode, offset, PAGE_CACHE_SIZE,
- &map, 1, &nmaps, PBF_WRITE);
- if (error == 0 && (map.pbm_flags & PBMF_DELAY)) {
- error = inode->i_op->pagebuf_bmap(inode, offset,
- map.pbm_bsize, &map, 1,
- &nmaps, PBF_WRITE|PBF_FILE_ALLOCATE);
- if (error) {
- printk("pbwcm: bmap error %d ro 0x%Lx size 0x%x\n",
- error, offset, map.pbm_bsize);
- } else {
- dprintk(pbwcm_debug,
- ("converted bn:%Ld off:%Ld size:%d flags:%d\n",
- map.pbm_bn, map.pbm_offset,
- map.pbm_bsize, map.pbm_flags));
- }
- }
- if (!error) {
- hook_buffers_to_page(inode, page, &map, PAGE_CACHE_SHIFT);
- set_buffer_dirty_uptodate(page->buffers);
- }
- return error;
-}
-
-
STATIC int
__pb_block_prepare_write_async(struct inode *inode, struct page *page,
unsigned from, unsigned to, int at_eof,
@@ -1390,15 +1342,34 @@
}
int pbcw_debug = 0;
+
+int
+set_buffer_dirty_uptodate(struct buffer_head *bh)
+{
+ int need_balance_dirty = 0;
+
+ if (bh->b_blocknr <= 0) {
+ printk("Warning: buffer 0x%p with weird blockno (%ld)\n",
+ bh, bh->b_blocknr);
+ }
+ set_bit(BH_Uptodate, &bh->b_state);
+ if (!buffer_dirty(bh)) {
+ bh->b_end_io = end_pb_buffer_io_async;
+ need_balance_dirty = 1;
+ }
+ __mark_buffer_dirty(bh);
+ return (need_balance_dirty);
+}
+
int pbcw_debug2 = 0;
-STATIC void
+STATIC int
__pb_block_commit_write_async(struct inode *inode,
struct page *page,
page_buf_bmap_t *mp)
{
struct buffer_head *bh;
- unsigned int np;
+ int dirty = 0;
/*
* Prepare write took care of reading/zero-out
@@ -1412,32 +1383,20 @@
if (test_bit(PG_delalloc, &page->flags)) {
dprintk(pbcw_debug2, ("mapped buffer 0x%p page 0x%p is
delalloc\n", bh,
page));
}
- set_buffer_dirty_uptodate(page->buffers);
+ dirty = set_buffer_dirty_uptodate(page->buffers);
dprintk(pbcw_debug, ("pbcw: refiled valid buffer 0x%p\n",
page->buffers));
} else if (test_and_set_bit(PG_delalloc, &page->flags) == 0) {
dprintk(pbcw_debug, ("Marking page 0x%p delalloc\n", page));
- np = atomic_read(&pb_delalloc_pages);
- if (np > PB_MAX_DIRTY_FACTOR * pb_params.p_un.max_dirty_pages) {
- clear_bit(PG_delalloc, &page->flags);
- if (__pb_write_or_convert_bmap(inode, page)) {
- BUG();
- }
- } else {
- atomic_inc(&pb_delalloc_pages);
- if (!pcd_active) {
- if (np > 2 * pb_params.p_un.max_dirty_pages)
- wake_up_interruptible_sync(&pcd_waitq);
- else if (np > pb_params.p_un.max_dirty_pages)
- wake_up_interruptible(&pcd_waitq);
- }
- balance_dirty(inode->i_rdev);
- }
+
+ atomic_inc(&pb_delalloc_pages);
+ dirty = 1;
}
/* Advance though extent no matter what */
if (mp)
mp->pbm_delta += PAGE_CACHE_SIZE;
+ return dirty;
}
int
@@ -1448,7 +1407,8 @@
char *user_addr,
size_t len,
loff_t *lp,
- page_buf_bmap_t *mp) /* bmap for page */
+ page_buf_bmap_t *mp, /* bmap for page */
+ int *dirty)
{
struct page *page;
unsigned long done;
@@ -1507,7 +1467,7 @@
goto unlock;
}
- __pb_block_commit_write_async(inode, page, mp);
+ *dirty += __pb_block_commit_write_async(inode, page, mp);
foff += bytes_in_page;
len -= bytes_in_page;
@@ -1533,7 +1493,8 @@
char *buf, /* buffer address */
size_t len, /* size of buffer */
loff_t * lp, /* file offset to use and update */
- int pb_flags) /* flags to pass to bmap calls */
+ int pb_flags, /* flags to pass to bmap calls */
+ int *dirty)
{
struct inode *inode = filp->f_dentry->d_inode;
page_buf_bmap_t map;
@@ -1628,7 +1589,7 @@
*/
status = __pagebuf_do_delwri(inode,
rounded_offset, size, buf,
- len, &foff, &map);
+ len, &foff, &map, dirty);
if (status <= 0)
break;
written += status;
@@ -1646,7 +1607,8 @@
struct file * filp, /* file to write */
char *buf, /* buffer address */
size_t len, /* size of buffer */
- loff_t * lp) /* file offset to use and update */
+ loff_t * lp, /* file offset to use and update */
+ int *dirty)
{
struct inode *inode = filp->f_dentry->d_inode;
unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
@@ -1711,7 +1673,7 @@
if (!page) {
status = _pagebuf_file_write(filp,
- buf, len, &foff, pb_flags);
+ buf, len, &foff, pb_flags, dirty);
if (status > 0)
written += status;
@@ -1748,7 +1710,7 @@
goto unlock;
}
- __pb_block_commit_write_async(inode, page, &map);
+ *dirty += __pb_block_commit_write_async(inode, page, &map);
len -= bytes;
buf += bytes;
@@ -1773,8 +1735,6 @@
}
int pcd_debug = 0;
-int pcd_skip_locked = 0;
-int pcd_ilock_failed = 0;
static int page_cleaner_daemon_started = 0;
static int daemon_terminate = 0;
@@ -1783,12 +1743,12 @@
* Returns page locked and with an extra reference count.
*/
STATIC struct page *
-probe_page(struct inode *inode, unsigned long index)
+probe_page(struct inode *inode, unsigned long index, int check)
{
struct page *page;
page = __find_lock_page_nowait(inode->i_mapping, index,
- page_hash(inode->i_mapping, index));
+ page_hash(inode->i_mapping, index), check);
if (!page)
return NULL;
if (!test_and_clear_bit(PG_delalloc, &(page)->flags)) {
@@ -1820,26 +1780,33 @@
kio_cluster_write(struct inode *inode,
struct page *startpage,
page_buf_bmap_t *mp,
- struct page **cpages)
+ struct page **cpages,
+ int np,
+ int check)
{
unsigned long tindex, tlast;
struct page **pcp, **pcstart;
loff_t cstart_offset;
page_buf_t *pb;
size_t csize;
- int count = pb_params.p_un.max_cluster;
+ int m, count = pb_params.p_un.max_cluster;
- pcp = &cpages[MAX_CLUSTER]; /* start from the middle */
dprintk(cluster_debug,
("cluster_write: inode 0x%p page 0x%p index 0x%lx\n",
inode, startpage, startpage->index));
+
+ if (np && count > np) /* obey limit if supplied */
+ count = np;
+ m = count >> 1; /* start from middle */
+ pcp = &cpages[m];
*pcp-- = startpage;
+ count--;
if (startpage->index != 0) {
tlast = mp->pbm_offset >> PAGE_CACHE_SHIFT;
for (tindex = startpage->index-1; tindex >= tlast &&
pcp >= &cpages[0] && count; tindex--, pcp--, count--)
{
- if (!(*pcp = probe_page(inode, tindex)))
+ if (!(*pcp = probe_page(inode, tindex, check)))
break;
dprintk(cluster_debug,
("cluster_write(L): inode 0x%p page 0x%p idx
0x%lx\n",
@@ -1849,11 +1816,11 @@
pcstart = pcp+1;
tlast = PAGE_CACHE_ALIGN_LL(mp->pbm_offset + mp->pbm_bsize) >>
PAGE_CACHE_SHIFT;
- for (tindex = startpage->index + 1, pcp = &cpages[MAX_CLUSTER+1];
- tindex < tlast && pcp < &cpages[CLUSTER_PAGE_LIST_SIZE] &&
count;
+ for (tindex = startpage->index + 1, pcp = &cpages[m+1];
+ tindex < tlast && pcp < &cpages[2*m] && count;
tindex++, pcp++, count--)
{
- if (!(*pcp = probe_page(inode, tindex)))
+ if (!(*pcp = probe_page(inode, tindex, check)))
break;
dprintk(cluster_debug,
("cluster_write(R): inode 0x%p page 0x%p index 0x%lx\n",
@@ -1920,7 +1887,8 @@
STATIC void
cluster_write(struct inode *inode,
unsigned long index,
- page_buf_bmap_t *mp)
+ page_buf_bmap_t *mp,
+ int check)
{
unsigned long tindex;
unsigned long tlast;
@@ -1930,7 +1898,7 @@
if (index != 0) {
tlast = mp->pbm_offset >> PAGE_CACHE_SHIFT;
for (tindex = index-1; tindex >= tlast; tindex--) {
- if (!(page = probe_page(inode, tindex)))
+ if (!(page = probe_page(inode, tindex, check)))
break;
convert_page(inode, page, mp);
}
@@ -1938,13 +1906,12 @@
tlast = PAGE_CACHE_ALIGN_LL(mp->pbm_offset + mp->pbm_bsize) >>
PAGE_CACHE_SHIFT;
for (tindex = index + 1; tindex < tlast; tindex++) {
- if (!(page = probe_page(inode, tindex)))
+ if (!(page = probe_page(inode, tindex, check)))
break;
convert_page(inode, page, mp);
}
}
-
int
pagebuf_convert_page(struct page *page, int toss, int wait)
{
@@ -1972,7 +1939,9 @@
pagebuf_delalloc_convert(
struct page *mm, /* delalloc page to convert - locked */
u_long flags, /* flags to pass to bmap call */
- struct page **cpages) /* can we cluster conversion? */
+ struct page **cpages, /* can we cluster conversion? */
+ int np, /* n pages in cpages */
+ int check) /* check flush times */
{
page_buf_bmap_t maps[PBF_MAX_MAPS];
struct inode *inode;
@@ -1996,7 +1965,7 @@
if (error) {
if (error == -EAGAIN) {
- pcd_ilock_failed++;
+ pb_io_stat.pcd_ilock_failed++;
set_bit(PG_delalloc, &mm->flags);
} else {
printk("PCD: pagebuf_bmap error %d pb_flags 0x%lx\n",
@@ -2020,13 +1989,13 @@
if (cpages) {
if (IS_KIOCLUSTER(inode)) {
get_page(mm);
- count = kio_cluster_write(inode, mm, &maps[0], cpages);
+ count = kio_cluster_write(inode, mm, &maps[0], cpages,
np, check);
} else {
hook_buffers_to_page(inode, mm, &maps[0],
PAGE_CACHE_SHIFT);
set_buffer_dirty_uptodate(mm->buffers);
UnlockPage(mm);
- cluster_write(inode, mm->index, &maps[0]);
+ cluster_write(inode, mm->index, &maps[0], check);
count = 1;
}
@@ -2042,6 +2011,8 @@
}
int pcd_debug2 = 0;
+int sum_min = 0;
+EXPORT_SYMBOL(sum_min);
STATIC int
page_cleaner_daemon(void *data)
@@ -2049,9 +2020,8 @@
mem_map_t *mm = &mem_map[0], *mmlast = &mem_map[max_mapnr];
u_long flags;
struct buffer_head *bh;
- int pb_min_save = PB_MIN_DIRTY_PAGES;
struct page **cpages;
- int looped, sum;
+ int looped, tsum, sum;
/* Set up the thread */
exit_files(current);
@@ -2074,7 +2044,6 @@
cpages = kmalloc(CLUSTER_PAGE_LIST_SIZE * sizeof(struct page *),
GFP_KERNEL);
- mm = &mem_map[0] - 1;
while (1) {
/*
* If we actually get into a low-memory situation,
@@ -2082,10 +2051,11 @@
* up on a more timely basis.
*/
- pcd_skip_locked = 0;
- pcd_ilock_failed = 0;
+ pb_io_stat.pcd_skip_locked = pb_io_stat.pcd_skip_referenced = 0;
+ pb_io_stat.pcd_ilock_failed = 0;
sum = looped = 0;
- while (atomic_read(&pb_delalloc_pages) > PB_MIN_DIRTY_PAGES) {
+ mm = &mem_map[0] - 1;
+ while (1) {
if (current->need_resched)
schedule();
@@ -2101,8 +2071,12 @@
}
if (!test_bit(PG_delalloc, &(mm)->flags))
continue;
+ if (mm->age >= PAGE_AGE_START && !looped) {
+ pb_io_stat.pcd_skip_referenced++;
+ continue;
+ }
if (TryLockPage(mm)) {
- pcd_skip_locked++;
+ pb_io_stat.pcd_skip_locked++;
continue;
}
if (!test_and_clear_bit(PG_delalloc, &(mm)->flags)) {
@@ -2129,16 +2103,20 @@
/* since bmap can block, this should be in a different daemon */
/*---------------- DELALLOC CONVERT --------------------------------*/
- sum += pagebuf_delalloc_convert(mm,
- PBF_BMAP_TRY_ILOCK, cpages);
+ tsum = pagebuf_delalloc_convert(mm,
+ PBF_BMAP_TRY_ILOCK, cpages, 0, 0);
+
+ pb_io_stat.pcd_normal_converted += tsum;
+ sum += tsum;
/* Do not let too many pages get locked up
* waiting for the queue to open in here
*/
- if (sum > 256) {
+ if (tsum > 256) {
run_task_queue(&tq_disk);
- sum = 0;
}
+ if (sum > sum_min)
+ break;
}
run_task_queue(&tq_disk);
@@ -2149,18 +2127,9 @@
wake_up_interruptible(&pcd_waitq);
break;
}
-
- /*
- * if woken up periodically (nothing else to do)
- * convert all the pages, else convert only
- * to keep watermarks happy.
- */
- if (interruptible_sleep_on_timeout(&pcd_waitq,
- pb_params.p_un.cluster_interval) == 0)
- {
- PB_MIN_DIRTY_PAGES = 0;
- } else
- PB_MIN_DIRTY_PAGES = pb_min_save;
+ interruptible_sleep_on_timeout(&pcd_waitq,
+ pb_params.p_un.cluster_interval);
+ pb_io_stat.pcd_normal_scan++;
pcd_active = 1;
}
kfree(cpages);
diff -Naur ../../xfs-tot/linux/fs/xfs/linux/xfs_lrw.c ./fs/xfs/linux/xfs_lrw.c
--- ../../xfs-tot/linux/fs/xfs/linux/xfs_lrw.c Mon Dec 4 13:28:38 2000
+++ ./fs/xfs/linux/xfs_lrw.c Fri Dec 1 10:30:10 2000
@@ -77,7 +77,8 @@
char *buf,
size_t size,
loff_t *offsetp,
- int read) /* set if read, otherwise this is write */
+ int read, /* set if read, otherwise this is write */
+ int *dirty)
{
ssize_t ret;
struct xfs_inode *xip;
@@ -98,7 +99,7 @@
if (!(filp->f_flags & O_INVISIBLE))
xfs_ichgtime(xip, XFS_ICHGTIME_ACC);
} else {
- ret = pagebuf_generic_file_write(filp, buf, size, offsetp);
+ ret = pagebuf_generic_file_write(filp, buf, size, offsetp,
dirty);
}
out:
return(ret);
@@ -118,6 +119,7 @@
vnode_t *vp;
xfs_inode_t *ip;
#endif
+ int dirty = 0;
n = XFS_MAX_FILE_OFFSET - *offsetp;
if (n <= 0)
@@ -145,7 +147,8 @@
}
#endif /* CONFIG_XFS_DMAPI */
- ret = xfs_rdwr(bdp, filp, buf, size, offsetp, 1);
+ /* dirty doesn't matter */
+ ret = xfs_rdwr(bdp, filp, buf, size, offsetp, 1, &dirty);
return(ret);
}
@@ -168,7 +171,8 @@
xfs_iocore_t *io,
xfs_off_t offset,
xfs_fsize_t isize,
- struct pm *pmp)
+ struct pm *pmp,
+ int *dirty)
{
xfs_fileoff_t last_fsb;
xfs_fileoff_t next_fsb;
@@ -342,7 +346,7 @@
printk("xfs_zero_last_block: unwritten?\n");
}
} else {
- error = pagebuf_iozero(pb, zero_offset, zero_len);
+ error = pagebuf_iozero(pb, zero_offset, zero_len, dirty);
pagebuf_rele(pb);
goto out_lock;
}
@@ -358,7 +362,7 @@
("zlb: pb_iozero pb 0x%p zf 0x%x zl 0x%x\n",
pb, zero_offset, zero_len));
- if (error = pagebuf_iozero(pb, zero_offset, zero_len)) {
+ if (error = pagebuf_iozero(pb, zero_offset, zero_len, dirty)) {
pagebuf_rele(pb);
goto out_lock;
}
@@ -409,7 +413,8 @@
xfs_iocore_t *io,
xfs_off_t offset,
xfs_fsize_t isize,
- struct pm *pmp)
+ struct pm *pmp,
+ int *dirty)
{
struct inode *ip = vp->v_inode;
xfs_fileoff_t start_zero_fsb;
@@ -440,7 +445,7 @@
* First handle zeroing the block on which isize resides.
* We only zero a part of that block so it is handled specially.
*/
- error = xfs_zero_last_block(ip, io, offset, isize, pmp);
+ error = xfs_zero_last_block(ip, io, offset, isize, pmp, dirty);
if (error) {
ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
@@ -555,7 +560,7 @@
}
if (imap.br_startblock == DELAYSTARTBLOCK) {
- error = pagebuf_iozero(pb, 0, lsize);
+ error = pagebuf_iozero(pb, 0, lsize, dirty);
pagebuf_rele(pb);
} else {
pb->pb_bn = XFS_FSB_TO_DB_IO(io, imap.br_startblock);
@@ -568,7 +573,7 @@
("xfs_zero_eof: real time device? use
diff inode\n"));
}
- if (error = pagebuf_iozero(pb, 0, lsize)) {
+ if (error = pagebuf_iozero(pb, 0, lsize, dirty)) {
pagebuf_rele(pb);
goto out_lock;
}
@@ -629,6 +634,7 @@
int eventsent = 0;
loff_t savedsize = *offsetp;
#endif
+ int dirty = 0;
vp = BHV_TO_VNODE(bdp);
xip = XFS_BHVTOI(bdp);
@@ -704,7 +710,7 @@
if (*offsetp > isize && isize) {
io->io_writeio_blocks = mp->m_writeio_blocks;
ret = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offsetp,
- isize, NULL);
+ isize, NULL, &dirty);
if (ret) {
xfs_iunlock(xip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL);
return(ret); /* JIMJIM should this be negative? */
@@ -713,7 +719,7 @@
xfs_iunlock(xip, XFS_ILOCK_EXCL);
retry:
- ret = xfs_rdwr(bdp, filp, buf, size, offsetp, 0);
+ ret = xfs_rdwr(bdp, filp, buf, size, offsetp, 0, &dirty);
#ifdef CONFIG_XFS_DMAPI
if ((ret == -ENOSPC) &&
@@ -754,6 +760,8 @@
}
}
xfs_iunlock(xip, XFS_IOLOCK_EXCL);
+ if (dirty)
+ balance_dirty(ip->i_dev);
return(ret);
}
diff -Naur ../../xfs-tot/linux/fs/xfs/linux/xfs_lrw.h ./fs/xfs/linux/xfs_lrw.h
--- ../../xfs-tot/linux/fs/xfs/linux/xfs_lrw.h Tue Nov 28 16:34:23 2000
+++ ./fs/xfs/linux/xfs_lrw.h Wed Oct 25 12:37:18 2000
@@ -48,7 +48,7 @@
extern int xfs_bdstrat_cb (struct xfs_buf *);
extern int xfs_zero_eof (vnode_t *, struct xfs_iocore *, xfs_off_t,
- xfs_fsize_t, struct pm *);
+ xfs_fsize_t, struct pm *, int *dirty);
extern ssize_t xfs_read (bhv_desc_t *, struct file *, char *,
size_t, loff_t *);
extern ssize_t xfs_write (bhv_desc_t *, struct file *, char *,
diff -Naur ../../xfs-tot/linux/fs/xfs/xfs_inode.c ./fs/xfs/xfs_inode.c
--- ../../xfs-tot/linux/fs/xfs/xfs_inode.c Tue Nov 28 16:34:30 2000
+++ ./fs/xfs/xfs_inode.c Thu Nov 30 10:29:40 2000
@@ -1707,7 +1707,7 @@
cred_t *credp)
{
xfs_fsize_t isize;
- int error;
+ int error, dirty;
ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
@@ -1720,7 +1720,8 @@
* xfs_write_file() beyond the end of the file
* and any blocks between the old and new file sizes.
*/
- error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, isize,
NULL);
+ error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, isize,
+ NULL, &dirty);
return error;
}
diff -Naur ../../xfs-tot/linux/fs/xfs/xfs_rw.c ./fs/xfs/xfs_rw.c
--- ../../xfs-tot/linux/fs/xfs/xfs_rw.c Tue Nov 28 16:34:31 2000
+++ ./fs/xfs/xfs_rw.c Wed Oct 25 12:11:52 2000
@@ -690,7 +690,7 @@
void *dio)
{
xfs_dio_t *diop = (xfs_dio_t *)dio;
- int relock;
+ int relock, dirty;
__uint64_t flush_end;
xfs_mount_t *mp;
@@ -717,7 +717,8 @@
XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
isize = XFS_SIZE(mp, io);
if (offset > isize) {
- xfs_zero_eof(vp, io, offset, isize, diop->xd_pmp);
+ xfs_zero_eof(vp, io, offset, isize,
+ diop->xd_pmp, &dirty);
}
XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
}
diff -Naur ../../xfs-tot/linux/include/linux/page_buf.h
./include/linux/page_buf.h
--- ../../xfs-tot/linux/include/linux/page_buf.h Tue Nov 28 16:34:57 2000
+++ ./include/linux/page_buf.h Fri Dec 1 16:38:38 2000
@@ -570,7 +570,8 @@
extern int pagebuf_iozero( /* zero contents of buffer */
page_buf_t *, /* buffer to zero */
off_t, /* offset in buffer */
- size_t); /* size of data to zero */
+ size_t, /* size of data to zero */
+ int *); /* generated new dirty data? */
extern int pagebuf_mapin( /* make buffer addressable */
page_buf_t *); /* buffer to make addressable */
@@ -635,7 +636,8 @@
struct file *, /* file to write */
char *, /* buffer address */
size_t, /* size of buffer */
- loff_t *); /* file offset to use and update */
+ loff_t *, /* file offset to use and update */
+ int *); /* dirty indicator */
/*
* pagebuf_generic_file_write writes data from the specified file
diff -Naur ../../xfs-tot/linux/include/linux/pagemap.h ./include/linux/pagemap.h
--- ../../xfs-tot/linux/include/linux/pagemap.h Tue Nov 28 16:34:57 2000
+++ ./include/linux/pagemap.h Fri Dec 1 16:38:39 2000
@@ -70,7 +70,7 @@
extern struct page * __find_lock_page (struct address_space * mapping,
unsigned long index, struct page **hash);
extern struct page * __find_lock_page_nowait (struct address_space * mapping,
- unsigned long index, struct page **hash);
+ unsigned long index, struct page **hash, int);
extern void lock_page(struct page *page);
#define find_lock_page(mapping, index) \
__find_lock_page(mapping, index, page_hash(mapping, index))
diff -Naur ../../xfs-tot/linux/include/linux/swap.h ./include/linux/swap.h
--- ../../xfs-tot/linux/include/linux/swap.h Tue Nov 28 16:34:59 2000
+++ ./include/linux/swap.h Fri Dec 1 16:36:29 2000
@@ -208,6 +208,9 @@
#define ZERO_PAGE_BUG \
if (page_count(page) == 0) BUG();
+#define DELALLOC_DEBUG_PAGE \
+ if (test_bit(PG_delalloc, &(page)->flags)) BUG();
+
#define add_page_to_active_list(page) { \
DEBUG_ADD_PAGE \
ZERO_PAGE_BUG \
@@ -228,6 +231,7 @@
#define add_page_to_inactive_clean_list(page) { \
DEBUG_ADD_PAGE \
ZERO_PAGE_BUG \
+ DELALLOC_DEBUG_PAGE \
SetPageInactiveClean(page); \
list_add(&(page)->lru, &page->zone->inactive_clean_list); \
page->zone->inactive_clean_pages++; \
diff -Naur ../../xfs-tot/linux/mm/filemap.c ./mm/filemap.c
--- ../../xfs-tot/linux/mm/filemap.c Tue Nov 28 16:35:03 2000
+++ ./mm/filemap.c Thu Nov 30 10:29:41 2000
@@ -252,6 +252,24 @@
spin_unlock(&pagecache_lock);
}
+static inline struct page * __find_page_nolock_noref(struct address_space
*mapping, unsigned
long offset, struct page *page)
+{
+ goto inside;
+
+ for (;;) {
+ page = page->next_hash;
+inside:
+ if (!page)
+ goto not_found;
+ if (page->mapping != mapping)
+ continue;
+ if (page->index == offset)
+ break;
+ }
+not_found:
+ return page;
+}
+
static inline struct page * __find_page_nolock(struct address_space *mapping,
unsigned long
offset, struct page *page)
{
goto inside;
@@ -580,17 +598,19 @@
}
struct page * __find_lock_page_nowait(struct address_space *mapping,
- unsigned long offset, struct page **hash)
+ unsigned long offset, struct page **hash, int check)
{
struct page *page;
spin_lock(&pagecache_lock);
- page = __find_page_nolock(mapping, offset, *hash);
+ page = __find_page_nolock_noref(mapping, offset, *hash);
if (page)
page_cache_get(page);
spin_unlock(&pagecache_lock);
- if (page && TryLockPage(page)) {
+ if (page &&
+ ((check && page->age >= PAGE_AGE_START) || TryLockPage(page)))
+ {
/* don't wait for page */
put_page(page);
return NULL;
diff -Naur ../../xfs-tot/linux/mm/swap.c ./mm/swap.c
--- ../../xfs-tot/linux/mm/swap.c Tue Nov 28 16:35:03 2000
+++ ./mm/swap.c Wed Nov 1 14:03:55 2000
@@ -173,7 +173,8 @@
* inactive_clean list it doesn't need to be perfect...
*/
int maxcount = (page->buffers ? 3 : 2);
- page->age = 0;
+ if (page->age)
+ return;
ClearPageReferenced(page);
/*
@@ -181,8 +182,7 @@
* (some pages aren't on any list at all)
*/
if (PageActive(page) && page_count(page) <= maxcount &&
- !page_ramdisk(page) &&
- !test_bit(PG_delalloc, &page->flags))
+ !page_ramdisk(page))
{
/*
@@ -194,7 +194,9 @@
* need to be cleared away) and/or the function calling
* us has an extra reference count on the page.
*/
- if (page->buffers || page_count(page) == 2) {
+ if (page->buffers || page_count(page) == 2
+ || test_bit(PG_delalloc, &page->flags))
+ {
del_page_from_active_list(page);
add_page_to_inactive_dirty_list(page);
/*
-------------------------------- patch ends ------------------------------
--
--------------------------------------------------------------------------
Rajagopal Ananthanarayanan ("ananth")
Member Technical Staff, SGI.
--------------------------------------------------------------------------
|