xfs
[Top] [All Lists]

Re: mysterious dbench results

To: Marcelo Tosatti <marcelo@xxxxxxxxxxxxxxxx>
Subject: Re: mysterious dbench results
From: Rajagopal Ananthanarayanan <ananth@xxxxxxx>
Date: Sat, 24 Feb 2001 15:14:35 -0800
Cc: Steve Lord <lord@xxxxxxx>, Thomas Graichen <thomas.graichen@xxxxxxxxxxxxxx>, linux-xfs@xxxxxxxxxxx
References: <Pine.LNX.4.21.0102241446340.3754-100000@xxxxxxxxxxxxxxxxxxxxxx>
Sender: owner-linux-xfs@xxxxxxxxxxx
Marcelo Tosatti wrote:
> 
> On Fri, 23 Feb 2001, Steve Lord wrote:
> 
> <snip>
> 
> > Other changes coming down the pipeline - probably next week will help too,
> > we have better dbench and bonnie numbers on some internal code right now.
> 
> It looks we're allocating the page to hold the page pointers for the
> cluster unconditionally in __pagebuf_write_full_page (my last message
> talks a bit more about that kind of stuff). This page, as far as I can
> see, is not used at all without kiobuf io. (Thomas is not using kiobuf IO
> IIRC).

Yeah, the allocation of cpages is unnecessary for non-kiocluster.
The following patch (delay-buffer-6.patch) contains several key
changes & cleanups. The fundamental change is to employ core
linux daemons and codepaths for handling delayed allocation.
Comments, feedback (stability & performance) appreciated!



-- 
--------------------------------------------------------------------------
Rajagopal Ananthanarayanan ("ananth")
Member Technical Staff, SGI.
--------------------------------------------------------------------------
diff -Naur ../../xfs-orig/linux/drivers/block/ll_rw_blk.c 
drivers/block/ll_rw_blk.c
--- ../../xfs-orig/linux/drivers/block/ll_rw_blk.c      Thu Feb 22 14:36:01 2001
+++ drivers/block/ll_rw_blk.c   Sat Feb 24 12:10:07 2001
@@ -1250,6 +1250,7 @@
        if (!nr)
                return;
 
+
        major = MAJOR(bhs[0]->b_dev);
 
        /* Determine correct block size for this device. */
@@ -1270,6 +1271,8 @@
                               correct_size, bh->b_size);
                        goto sorry;
                }
+               if (test_bit(BH_Delay, &bh->b_state) || !buffer_mapped(bh))
+                       BUG();
        }
 
        if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
diff -Naur ../../xfs-orig/linux/drivers/scsi/scsi_merge.c 
drivers/scsi/scsi_merge.c
--- ../../xfs-orig/linux/drivers/scsi/scsi_merge.c      Thu Feb 22 14:36:21 2001
+++ drivers/scsi/scsi_merge.c   Thu Feb 22 14:12:35 2001
@@ -92,7 +92,7 @@
        printk("counted segments is %x\n", segments);
        printk("Flags %d %d\n", use_clustering, dma_host);
        if (req->bh != NULL) {
-            for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext) {    
+            for (bh = req->bh; bh != NULL; bh = bh->b_reqnext) {       
                  printk("Segment 0x%p, blocks %d, addr 0x%lx\n",
                         bh,
                         bh->b_size >> 9,
diff -Naur ../../xfs-orig/linux/fs/buffer.c fs/buffer.c
--- ../../xfs-orig/linux/fs/buffer.c    Thu Feb 22 14:36:27 2001
+++ fs/buffer.c Sat Feb 24 12:04:12 2001
@@ -161,6 +161,38 @@
        atomic_dec(&bh->b_count);
 }
 
+
+#define buffer_delay_busy(bh) \
+       (test_bit(BH_Delay, &bh->b_state) && bh->b_page && 
PageLocked(bh->b_page))
+       
+void
+_write_buffer(struct buffer_head *bh)
+{
+       struct page *page = bh->b_page;
+
+       if (!page || TryLockPage(page))
+               return;
+       if (!buffer_delay(bh) || !buffer_dirty(bh)) {
+               if (buffer_delay(bh))
+                       BUG();
+               UnlockPage(page);
+               return;
+       }
+       page->mapping->a_ops->writepage(page);
+       if (DelallocPage(page))
+               BUG();
+}
+
+static inline void
+write_buffer(struct buffer_head *bh)
+{
+       if (!buffer_delay(bh))
+               ll_rw_block(WRITE, 1, &bh);
+       else
+               _write_buffer(bh);
+}
+
+
 /* Call sync_buffers with wait!=0 to ensure that the call does not
  * return until all buffer writes have completed.  Sync() may return
  * before the writes have finished; fsync() may not.
@@ -232,7 +264,7 @@
 
                        atomic_inc(&bh->b_count);
                        spin_unlock(&lru_list_lock);
-                       ll_rw_block(WRITE, 1, &bh);
+                       write_buffer(bh);
                        atomic_dec(&bh->b_count);
                        retry = 1;
                        goto repeat;
@@ -507,6 +539,8 @@
        struct bh_free_head *head = &free_list[BUFSIZE_INDEX(bh->b_size)];
        struct buffer_head **bhp = &head->list;
 
+       if (test_bit(BH_Delay, &bh->b_state))
+               BUG();
        bh->b_state = 0;
 
        spin_lock(&head->lock);
@@ -879,7 +913,7 @@
                        if (buffer_dirty(bh)) {
                                atomic_inc(&bh->b_count);
                                spin_unlock(&lru_list_lock);
-                               ll_rw_block(WRITE, 1, &bh);
+                               write_buffer(bh);
                                brelse(bh);
                                spin_lock(&lru_list_lock);
                        }
@@ -1395,8 +1429,10 @@
        head = page->buffers;
        bh = head;
 
-       if (DelallocPage(page))
-               BUG();
+       if (buffer_delay(bh)) {
+               page->mapping->a_ops->writepage_nounlock(page);
+               return 0; /* just started I/O ... likely didn't complete */
+       }
        do {
                unsigned int next_off = curr_off + bh->b_size;
                next = bh->b_this_page;
@@ -2381,7 +2417,7 @@
                        if (wait > 1)
                                __wait_on_buffer(p);
                } else if (buffer_dirty(p))
-                       ll_rw_block(WRITE, 1, &p);
+                       write_buffer(p);
        } while (tmp != bh);
 }
 
@@ -2408,6 +2444,11 @@
        int index = BUFSIZE_INDEX(bh->b_size);
        int loop = 0;
 
+       if (buffer_delay(bh)) {
+               if (wait)
+                       page->mapping->a_ops->writepage_nounlock(page);
+               return 0; /* just started I/O ... likely didn't complete */
+       }
 cleaned_buffers_try_again:
        spin_lock(&lru_list_lock);
        write_lock(&hash_table_lock);
@@ -2609,7 +2650,7 @@
                        __refile_buffer(bh);
                        continue;
                }
-               if (buffer_locked(bh))
+               if (buffer_locked(bh) || buffer_delay_busy(bh))
                        continue;
 
                if (check_flushtime) {
@@ -2627,7 +2668,7 @@
                /* OK, now we are committed to write it out. */
                atomic_inc(&bh->b_count);
                spin_unlock(&lru_list_lock);
-               ll_rw_block(WRITE, 1, &bh);
+               write_buffer(bh);
                atomic_dec(&bh->b_count);
 
                if (current->need_resched)
diff -Naur ../../xfs-orig/linux/fs/pagebuf/page_buf.c fs/pagebuf/page_buf.c
--- ../../xfs-orig/linux/fs/pagebuf/page_buf.c  Fri Feb 23 11:23:14 2001
+++ fs/pagebuf/page_buf.c       Fri Feb 23 18:54:46 2001
@@ -152,8 +152,6 @@
  *     External pagebuf I/O functions
  */
 
-extern int _page_cleaner_daemon_start(void);
-extern void _page_cleaner_daemon_stop(void);
 extern void _pb_zero_out_delay(struct inode *,
                                struct page *, page_buf_bmap_t *);
 
@@ -177,10 +175,10 @@
  * /proc/sys/vm/pagebuf
  */
 
-unsigned long pagebuf_min[P_PARAM] = { HZ/2, 1*HZ, HZ/2, 1, 0, 0 };
-unsigned long pagebuf_max[P_PARAM] = { HZ*30, HZ*300, HZ*30, 1024, 4096, 1 };
+unsigned long pagebuf_min[P_PARAM] = { HZ/2, 1*HZ,  1, 0 };
+unsigned long pagebuf_max[P_PARAM] = { HZ*30, HZ*300, 1024, 1 };
 
-pagebuf_param_t pb_params = {{ HZ, 15 * HZ, HZ, 512, 1024, 0 }};
+pagebuf_param_t pb_params = {{ HZ, 15 * HZ, 512, 0 }};
 
 /*
  * Pagebuf statistics variables
@@ -455,14 +453,13 @@
     struct page **pages)
 {
        loff_t next_buffer_offset;
-       loff_t next_desired_offset;
        unsigned long page_count;
        int rval;
        struct kiobuf *kp;
        unsigned long pi;
        unsigned long index;
        off_t start_off, end_off;
-       int all_mapped, good_pages, sectors, count;
+       int all_mapped, good_pages, sectors;
        struct page *cp, **hash, *cached_page;
        int gfp_mask;
 
@@ -2082,6 +2079,7 @@
        spin_unlock_irqrestore(&current->sigmask_lock, flags);
 
        strcpy(current->comm, "pagebuf_daemon");
+       current->flags |= PF_MEMALLOC;
 
        do {
                if (pb_daemon->active == 1) {
@@ -2367,8 +2365,7 @@
                        return -1; /* error */
                }
        }
-
-       return _page_cleaner_daemon_start();
+       return 0;
 }      
 
 int 
@@ -2404,18 +2401,12 @@
        {PB_FLUSH_AGE, "flush_age", &pb_params.data[1],
        sizeof(int), 0644, NULL, &proc_doulongvec_ms_jiffies_minmax,
        &sysctl_intvec, NULL, &pagebuf_min[1], &pagebuf_max[1]},
-       {PB_CLEAN_INT, "clean_int", &pb_params.data[2],
-       sizeof(int), 0644, NULL, &proc_doulongvec_ms_jiffies_minmax,
-       &sysctl_intvec, NULL, &pagebuf_min[2], &pagebuf_max[2]},
-       {PB_CLUSTER_LIMIT, "cluster_limit", &pb_params.data[3],
+       {PB_CLUSTER_LIMIT, "cluster_limit", &pb_params.data[2],
        sizeof(int), 0644, NULL, &proc_doulongvec_minmax, &sysctl_intvec, NULL,
-       &pagebuf_min[3], &pagebuf_max[3]},
-       {PB_DELALLOC_LIMIT, "delalloc_count", &pb_params.data[4],
-       sizeof(int), 0644, NULL, &proc_doulongvec_minmax, &sysctl_intvec, NULL,
-       &pagebuf_min[4], &pagebuf_max[4]},
-       {PB_DEBUG, "debug", &pb_params.data[5],
+       &pagebuf_min[2], &pagebuf_max[2]},
+       {PB_DEBUG, "debug", &pb_params.data[3],
        sizeof(int), 0644, NULL, &proc_doulongvec_minmax, &sysctl_intvec, NULL,
-       &pagebuf_min[5], &pagebuf_max[5]},
+       &pagebuf_min[3], &pagebuf_max[3]},
        {0}
 };
 
@@ -2545,7 +2536,6 @@
 {
        if (pagebuf_cache != NULL)
                kmem_cache_destroy(pagebuf_cache);
-       _page_cleaner_daemon_stop();
        pagebuf_daemon_stop();
        pagebuf_locking_terminate();
        avl_terminate();
diff -Naur ../../xfs-orig/linux/fs/pagebuf/page_buf_io.c 
fs/pagebuf/page_buf_io.c
--- ../../xfs-orig/linux/fs/pagebuf/page_buf_io.c       Fri Feb 23 10:29:09 2001
+++ fs/pagebuf/page_buf_io.c    Sat Feb 24 12:03:50 2001
@@ -103,8 +103,6 @@
 /*
  * Globals
  */
-static int pcd_active;
-int PB_MAX_DIRTY_FACTOR = 4;
 
 static DECLARE_WAIT_QUEUE_HEAD(pcd_waitq);
 static atomic_t        pb_delalloc_pages = ATOMIC_INIT(0);
@@ -114,7 +112,6 @@
 
 extern spinlock_t pagecache_lock;
 
-int    page_cleaner_count, page_cleaner_pages;
 int    do_write_full_page, do_write_pages;
 int    flush_convert, flush_convert_pages;
 
@@ -122,8 +119,6 @@
  * The minimum size where we will start using pagebuf structures instead
  * of just working with pages.
  */
-
-#define PAGEBUF_MIN_IOSIZE (4*PAGE_CACHE_SIZE)
 #define PBF_IO_CHUNKSIZE 65536
 #define PBF_MAX_MAPS   1
 
@@ -165,13 +160,28 @@
        __pb_block_commit_write_async(inode, page, mp, 0);
 }
 
-static inline void
-_unmark_delalloc(struct page *page)
+static void
+_unmark_delalloc(struct page *page, int toss)
 {
+       struct buffer_head *bh = page->buffers;
+
        if (!PageLocked(page))
                PAGE_BUG(page);
-       if (test_and_clear_bit(PG_delalloc, &page->flags))
-               atomic_dec(&pb_delalloc_pages);
+       if (!DelallocPage(page))
+               PAGE_BUG(page);
+       if (!bh)
+               BUG();
+       clear_bit(BH_Delay, &bh->b_state);
+       atomic_dec(&pb_delalloc_pages);
+       if (!toss && !buffer_mapped(bh))
+               printk("warning: unmarking unmapped buffer page 0x%p\n", page);
+       if (toss && !buffer_mapped(bh)) {
+               if (!buffer_dirty(bh))
+                       BUG();
+               mark_buffer_clean(bh);
+               if (bh->b_list != BUF_CLEAN)
+                       printk("buffer bh 0x%p not clean\n", bh);
+       }
 }
 
 /*
@@ -208,7 +218,6 @@
 
 
 static void _pagebuf_flush(
-       struct inode *ip,               /* used for KIOCLUSTER check    */
        struct list_head *head,         /* list of pages                */
        loff_t ioff,                    /* first location in range      */
        struct page **cpages)           /* clustering buffer            */
@@ -238,9 +247,8 @@
                                flush_convert_pages +=
                                pagebuf_delalloc_convert(page,
                                                PBF_FILE_ALLOCATE, cpages);
-                       } else {
-                               UnlockPage(page);
-                       }
+                       } 
+                       UnlockPage(page);
                        page_cache_release(page);
                        spin_lock(&pagecache_lock);
                        goto repeat;
@@ -257,6 +265,7 @@
 {
        struct page **cpages = NULL;
 
+#if defined(KIOCLUSTER)
        /*
         * If kmalloc fails, no big deal; the lower layers won't
         * cluster. Also, this allocation has to be non-sleeping
@@ -264,11 +273,12 @@
         */
        cpages = kmalloc(CLUSTER_PAGE_LIST_SIZE * sizeof(struct page *),
                                          GFP_PAGE_IO); 
+#endif
 
        spin_lock(&pagecache_lock);
-       _pagebuf_flush(ip, &ip->i_mapping->clean_pages, ioff, cpages);
-       _pagebuf_flush(ip, &ip->i_mapping->dirty_pages, ioff, cpages);
-       _pagebuf_flush(ip, &ip->i_mapping->locked_pages, ioff, cpages);
+       _pagebuf_flush(&ip->i_mapping->clean_pages, ioff, cpages);
+       _pagebuf_flush(&ip->i_mapping->dirty_pages, ioff, cpages);
+       _pagebuf_flush(&ip->i_mapping->locked_pages, ioff, cpages);
        spin_unlock(&pagecache_lock);
 
        generic_buffer_fdatasync(ip, (unsigned long) ioff, ~0UL);
@@ -507,27 +517,19 @@
                        return (-ENOMEM);
                }
                assert(((csize + cpoff) <= PAGE_CACHE_SIZE));
+               lock_page(page);
                memset((void *) (kmap(page) + cpoff), 0, csize);
                kunmap(page);
                SetPageUptodate(page);
                if (pb->pb_bn == PAGE_BUF_DADDR_NULL) {
-                       if (test_and_set_bit(PG_delalloc, &page->flags) == 0) {
-                               atomic_inc(&pb_delalloc_pages);
-                       }
+                       __pb_block_commit_write_async(pb->pb_target, page, 
NULL, 0);
                }
+               UnlockPage(page);
        }
 
        pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
        pb->pb_flags &= ~(_PBF_SOME_INVALID_PAGES | PBF_PARTIAL | PBF_NONE);
 
-       if (!pcd_active && (pb->pb_bn == PAGE_BUF_DADDR_NULL)) {
-               unsigned int    np = atomic_read(&pb_delalloc_pages);
-
-               if (np > pb_params.p_un.max_dirty_pages)
-                       wake_up_interruptible(&pcd_waitq);
-       }
-
-
        return (0);
 }
 
@@ -995,15 +997,13 @@
                                        PAGE_CACHE_SIZE,
                                        &map, 1, &nmaps, PBF_READ);
 
-               hook_buffers_to_page(inode, page, &map, PAGE_CACHE_SHIFT);
-               bh = page->buffers;
                if (map.pbm_bn > 0) {
+                       hook_buffers_to_page(inode, page, &map, 
PAGE_CACHE_SHIFT);
                        bh = head = page->buffers;
                } else if (map.pbm_flags & (PBMF_HOLE|PBMF_DELAY)) {
                        memset(kmap(page), 0, PAGE_CACHE_SIZE);
                        flush_dcache_page(page);
                        kunmap(page);
-                       set_bit(BH_Uptodate, &bh->b_state);
                        goto page_done;
                } else {
                        printk("pagebuf_read_full_page: page 0x%p map 0x%p\n",
@@ -1056,25 +1056,16 @@
        struct inode *inode,
        struct page *page)
 {
-       struct page **cpages;
+       struct page **cpages = NULL;
        int     pb_flags;
        int     count;
        unsigned long save_flags = current->flags;
 
-       spin_lock(&inode_lock);
-       if (inode->i_state & I_MAPPING) {
-               spin_unlock(&inode_lock);
-               SetPageDirty(page);
-               UnlockPage(page);
-               return 0;
-       }
-       inode->i_state |= I_MAPPING;
-       spin_unlock(&inode_lock);
-
        current->flags |= PF_MEMALLOC;
+#if defined(KIOCLUSTER)
        cpages = kmalloc(CLUSTER_PAGE_LIST_SIZE * sizeof(struct page *),
                        GFP_PAGE_IO);
-
+#endif
        do_write_full_page++;
 
        if (DelallocPage(page))
@@ -1085,13 +1076,11 @@
        count = pagebuf_delalloc_convert(page, pb_flags, cpages);
 
        do_write_pages += count;
+       if (DelallocPage(page))
+               BUG();
        if (cpages)
                kfree(cpages);
 
-       spin_lock(&inode_lock);
-       inode->i_state &= ~I_MAPPING;
-       spin_unlock(&inode_lock);
-
        current->flags = save_flags;
        return 0;
 }
@@ -1100,7 +1089,7 @@
  *     pagebuf_write_full_page
  */
 
-int pagebuf_write_full_page(struct page *page)
+STATIC int pagebuf_write_full_page(struct page *page)
 {
        struct inode *inode = (struct inode*)page->mapping->host;
        unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
@@ -1112,11 +1101,12 @@
                return __pagebuf_write_full_page(inode, page);
 
        /* things got complicated... */
-       offset = inode->i_size & PAGE_CACHE_MASK_LL;
+       offset = inode->i_size & (~PAGE_CACHE_MASK_LL);
        /* OK, are we completely out? */
        if ((page->index >= end_index+1) || !offset) {
-               UnlockPage(page);
-               return -EIO;
+               printk("Bad write on page 0x%p\n", page);
+               err =  -EIO;
+               goto out;
        }
 
        if (DelallocPage(page))
@@ -1132,11 +1122,39 @@
                __pb_block_commit_write_async(inode, page, NULL, 0);
        }
 
+       if (DelallocPage(page))
+               BUG();
        kunmap(page);
-       UnlockPage(page);
+out:
        return err;
 }
 
+int pagebuf_write_full_page_unlock(struct page *page)
+{
+       int ret = pagebuf_write_full_page(page);
+       UnlockPage(page);
+       return ret;
+}
+
+int pagebuf_write_full_page_nounlock(struct page *page)
+{
+       return pagebuf_write_full_page(page);
+}
+
+STATIC void
+hook_buffers_to_page_delay(struct inode *inode, struct page *page)
+{
+       struct buffer_head      *bh;
+
+       if (page->buffers)
+               BUG();
+       create_empty_buffers(page, inode->i_dev, PAGE_CACHE_SIZE);
+       bh = page->buffers;
+       bh->b_state = (1 << BH_Delay);
+       atomic_inc(&pb_delalloc_pages);
+       __mark_buffer_dirty(bh);
+       balance_dirty(bh->b_dev);
+}
 
 STATIC void
 hook_buffers_to_page(struct inode *inode,
@@ -1145,6 +1163,11 @@
        struct buffer_head      *bh;
        page_buf_daddr_t        bn;
 
+       if (mp->pbm_bn < 0) {
+               printk("hook_buffers_to_page: bad bn page 0x%p mp 0x%p\n",
+                       page, mp);
+               BUG();
+       }
        if (!page->buffers)
                create_empty_buffers(page, inode->i_dev, PAGE_CACHE_SIZE);
 
@@ -1153,21 +1176,13 @@
        bh->b_end_io = end_pb_buffer_io_async;
        bh->b_private = (void *) 0;
 
-       if (mp->pbm_flags & (PBMF_HOLE|PBMF_DELAY)) {
-               bh->b_blocknr = 0; 
-               bh->b_state = (1 << BH_Req) | (1 << BH_End_io);
-               return;
-       }
-       if (mp->pbm_bn < 0) {
-               printk("hook_buffers_to_page: bad bn page 0x%p mp 0x%p\n",
-                       page, mp);
-               BUG();
-       }
        bn = mp->pbm_bn >>
                (bshift - inode->i_sb->s_blocksize_bits);
        bn += (mp->pbm_delta >> bshift);
        bh->b_blocknr = bn; 
-       bh->b_state = (1 << BH_Mapped) | (1 << BH_Req) | (1 << BH_End_io);
+       if (buffer_locked(bh) || buffer_req(bh))
+               BUG();
+       bh->b_state |= (1 << BH_Mapped) | (1 << BH_Req) | (1 << BH_End_io);
 }
 
 
@@ -1183,6 +1198,7 @@
        set_bit(BH_Uptodate, &bh->b_state);
        if (!buffer_dirty(bh)) {
                bh->b_end_io = end_pb_buffer_io_async;
+               bh->b_state |= (1 << BH_End_io);
                need_balance_dirty = 1;
        }
        __mark_buffer_dirty(bh);
@@ -1198,7 +1214,7 @@
 {
        struct buffer_head      *bh;
        int                     err = 0;
-       int                     nmaps;
+       int                     nmaps, dp = DelallocPage(page);
        char                    *kaddr = kmap(page);
        page_buf_bmap_t         map;
 
@@ -1211,7 +1227,8 @@
         * go get some space.
         */
        bh = page->buffers;
-       if ((!bh || !buffer_mapped(bh)) && !DelallocPage(page)) {
+       if ((!bh || !buffer_mapped(bh)) && (!dp || (flags & PBF_FILE_ALLOCATE)))
+       {
                if (!mp) {
                        mp = &map;
                        err = inode->i_op->pagebuf_bmap(inode,
@@ -1226,6 +1243,8 @@
                }
                if (mp->pbm_bn > 0) {
                        hook_buffers_to_page(inode, page, mp, PAGE_CACHE_SHIFT);
+                       if (dp)
+                               _unmark_delalloc(page, 0);
                        bh = page->buffers;
                }
        }
@@ -1240,7 +1259,7 @@
        /*
         * Partial write. Is the page valid anyway?
         */
-       if (Page_Uptodate(page) || DelallocPage(page)) {
+       if (Page_Uptodate(page) || dp) {
                goto out;
        }
        /*
@@ -1341,7 +1360,6 @@
                                int             partial)
 {
        struct buffer_head      *bh;
-       unsigned int            np;
 
        /*
         * Prepare write took care of reading/zero-out
@@ -1351,15 +1369,8 @@
        SetPageUptodate(page);
        if ((bh = page->buffers) && buffer_mapped(bh)) {
                set_buffer_dirty_uptodate(page->buffers, partial);
-       } else if (test_and_set_bit(PG_delalloc, &page->flags) == 0) {
-               atomic_inc(&pb_delalloc_pages);
-               if (!pcd_active) {
-                       np = atomic_read(&pb_delalloc_pages);
-                       if (np > pb_params.p_un.max_dirty_pages)
-                               wake_up_interruptible(&pcd_waitq);
-               }
-               if (!partial)
-                       balance_dirty(inode->i_rdev);
+       } else if (!DelallocPage(page)) {
+               hook_buffers_to_page_delay(inode, page);
        }
 
        /* Advance though extent no matter what */
@@ -1693,9 +1704,6 @@
        return written ? written : status;
 }
 
-static int page_cleaner_daemon_started = 0;
-static int daemon_terminate = 0;
-
 /*
  * Probe for a given page (index) in the inode & test if it is delayed.
  * Returns page locked and with an extra reference count.
@@ -1730,21 +1738,11 @@
                page_cache_release(page);
                return NULL;
        }
-       /* In the case where we probe a page - push it back down the LRU
-        * so we do not hit it on the next pass.
-        */
-
-       spin_lock(&pagemap_lru_lock);
-       if (PageInactiveDirty(page)) {
-               list_del(&page->lru);
-               list_add(&page->lru, &inactive_dirty_list);
-       }
-       spin_unlock(&pagemap_lru_lock);
-       _unmark_delalloc(page);
        return page;
 }
 
 
+#if defined(KIOCLUSTER)
 /*
  * Convert & write out a cluster of pages in the same extent as defined
  * by mp and surrounding "startpage". startpage is locked & has an extra
@@ -1815,16 +1813,36 @@
 
        return count;
 }
+#endif /* KIOCLUSTER */
+
 /*
  * Allocate & map buffers for page given the extent map.
  */
 STATIC void
-convert_page(struct inode *inode, struct page *page, page_buf_bmap_t *mp)
+convert_page(struct inode *inode, struct page *page, page_buf_bmap_t *mp, int 
u)
 {
-       mp->pbm_delta = (page->index << PAGE_CACHE_SHIFT) - mp->pbm_offset;
-       hook_buffers_to_page(inode, page, mp, PAGE_CACHE_SHIFT);
-       set_buffer_dirty_uptodate(page->buffers, 0);
-       UnlockPage(page);
+       struct buffer_head *bh = page->buffers;
+       int dp = DelallocPage(page);
+
+       if (!bh || dp) {
+               mp->pbm_delta = (page->index << PAGE_CACHE_SHIFT) - 
mp->pbm_offset;
+               hook_buffers_to_page(inode, page, mp, PAGE_CACHE_SHIFT);
+               if (dp)
+                       _unmark_delalloc(page, 0);
+       }
+       bh = page->buffers;
+       /*
+        * 1 == don't balance dirty, we are doing I/O just below here.
+        * otherwise causes nasty recursions.
+        */
+       set_buffer_dirty_uptodate(bh, 1);
+       if (u)
+               UnlockPage(page);
+
+       atomic_inc(&bh->b_count);
+       ll_rw_block(WRITE, 1, &bh);
+       atomic_dec(&bh->b_count);
+
        page_cache_release(page);
 }
 
@@ -1849,16 +1867,16 @@
                for (tindex = startpage->index-1; tindex >= tlast; tindex--) {
                        if (!(page = probe_page(inode, tindex)))
                                break;
-                       convert_page(inode, page, mp);
+                       convert_page(inode, page, mp, 1);
                }
        }
-       convert_page(inode, startpage, mp);
+       convert_page(inode, startpage, mp, 0);
        tlast = PAGE_CACHE_ALIGN_LL(mp->pbm_offset + mp->pbm_bsize) >>
                                                        PAGE_CACHE_SHIFT;
        for (tindex = startpage->index + 1; tindex < tlast; tindex++) {
                if (!(page = probe_page(inode, tindex)))
                        break;
-               convert_page(inode, page, mp);
+               convert_page(inode, page, mp, 1);
        }
 }
 
@@ -1872,7 +1890,7 @@
        if (!PageLocked(page))
                BUG();
 
-       _unmark_delalloc(page);
+       _unmark_delalloc(page, 1);
 }
 
 
@@ -1884,7 +1902,7 @@
 {
        page_buf_bmap_t maps[PBF_MAX_MAPS];
        struct inode *inode;
-       int maps_returned, error, count;
+       int maps_returned, error;
        u_long pb_flags;
        loff_t rounded_offset;
 
@@ -1894,8 +1912,8 @@
         * anything.
         */
        if (!inode->i_nlink && (inode->i_state & I_FREEING)) {
-               _unmark_delalloc(page);
-               UnlockPage(page);
+               BUG();
+               _unmark_delalloc(page, 1);
                return 0;
        }
 
@@ -1918,12 +1936,10 @@
                if (error != -EIO)
                        printk("PCD: pagebuf_bmap error %d pb_flags 0x%lx\n",
                                        error, pb_flags);
-               UnlockPage(page);
                return 0;
        }
        if (maps[0].pbm_delta % PAGE_CACHE_SIZE) {
                printk("PCD: pbm_delta not page aligned mp 0x%p\n", &maps[0]);
-               UnlockPage(page);
                return 0;
        }
 
@@ -1935,236 +1951,15 @@
        }
 
        page_cache_get(page);
-       _unmark_delalloc(page);
        /*
         * page needs to be setup as though find_page(...) returned it,
         * which is a locked page with an extra reference.
         */
-       if (cpages) {
-               count = kio_cluster_write(inode, page, &maps[0], cpages);
-       } else {
-               cluster_write(inode, page, &maps[0]);
-               count = 1;
-       }
-       return count;
+       cluster_write(inode, page, &maps[0]);
+       return 1;
 }
 
 /*
- * Walk the active pages list looking for delalloc entries, we need to
- * age them out all the time, since they have to be converted before
- * being written to disk. If there is no other memory pressure then pages
- * on the active list do not get moved, and we do not put them somewhere
- * the cleaner can find them.
- */
-
-void age_delalloc_pages(void)
-{
-       struct page *page;
-       struct list_head * page_lru;
-       int     maxscan, page_active;
-
-       maxscan = nr_active_pages;
-       while (maxscan-- > 0 && (page_lru = active_list.prev) != &active_list) {
-               page = list_entry(page_lru, struct page, lru);
-               if (!DelallocPage(page)) {
-                       list_del(page_lru);
-                       list_add(page_lru, &active_list);
-                       continue;
-               }
-
-               /* Do aging on delalloc pages. */
-               if (PageTestandClearReferenced(page)) {
-                       age_page_up_nolock(page);
-                       page_active = 1;
-               } else {
-                       age_page_down_ageonly(page);
-                       if (page->age == 0 && page_count(page) <=
-                                               (page->buffers ? 2 : 1)) {
-                               deactivate_page_nolock(page);
-                               page_active = 0;
-                       } else {
-                               page_active = 1;
-                       }
-               }
-               if (page_active || PageActive(page)) {
-                       list_del(page_lru);
-                       list_add(page_lru, &active_list);
-               }
-       }
-}
-
-STATIC int
-page_cleaner_daemon(void *data)
-{
-       struct page *page;
-       u_long flags;
-       struct buffer_head *bh;
-       struct page **cpages;
-       int     maxscan, sum;
-       struct list_head * page_lru;
-
-       /*  Set up the thread  */
-       exit_files(current);
-       daemonize();
-
-       spin_lock_irqsave(&current->sigmask_lock, flags);       
-       flush_signals(current);
-       sigfillset(&current->blocked);
-       recalc_sigpending(current);
-       spin_unlock_irqrestore(&current->sigmask_lock, flags);
-
-       sprintf(current->comm, "page_daemon");
-
-       /*
-        * If we need more memory to do bmap,
-        * indicate this thread might really need it.
-        */
-       current->flags |= PF_MEMALLOC;
-
-       cpages = kmalloc(CLUSTER_PAGE_LIST_SIZE * sizeof(struct page *),
-                               GFP_KERNEL);
-       while (1) {
-               /*
-                * If we actually get into a low-memory situation,
-                * the processes needing more memory will wake us
-                * up on a more timely basis.
-                */
-
-               sum = 0;
-               spin_lock(&pagemap_lru_lock);
-
-               if (atomic_read(&pb_delalloc_pages) > 0)
-                       age_delalloc_pages();
-
-
-               maxscan = nr_inactive_dirty_pages;
-               while ((page_lru = inactive_dirty_list.prev) !=
-                       &inactive_dirty_list && maxscan-- > 0) {
-
-                       if (current->need_resched) {
-                               break;
-                       }
-
-                       page = list_entry(page_lru, struct page, lru);
-                       /*
-                        * We know this page is going to go somewhere, do not
-                        * bother scanning it again.
-                        */
-                       list_del(page_lru);
-                       list_add(page_lru, &inactive_dirty_list);
-
-                       if (!DelallocPage(page))
-                               continue;
-
-                       if (TryLockPage(page))
-                               continue;
-
-                       bh = page->buffers;
-                       if (bh && buffer_mapped(bh)) {
-                               /*
-                                * delalloc page has buffers refile it.
-                                */
-
-                               spin_unlock(&pagemap_lru_lock);
-                               _unmark_delalloc(page);
-                               set_buffer_dirty_uptodate(bh, 0);
-                               UnlockPage(page);
-                               spin_lock(&pagemap_lru_lock);
-                               continue;
-                       }
-
-/*---------------- DELALLOC CONVERT --------------------------------*/
-/* since bmap can block, this should be in a different daemon       */
-/*---------------- DELALLOC CONVERT --------------------------------*/
-
-                       spin_unlock(&pagemap_lru_lock);
-                       page_cleaner_count++;
-                       {
-                       int cnt;
-                       cnt = pagebuf_delalloc_convert(page, PBF_FILE_ALLOCATE,
-                                                                       cpages);
-
-                       sum += cnt;
-                       page_cleaner_pages += cnt;
-                       }
-
-                       /* Do not let too many pages get locked up
-                        * waiting for the queue to open in here
-                        */
-                       if (sum > 256) {
-                               run_task_queue(&tq_disk);
-                               sum = 0;
-                       }
-                       spin_lock(&pagemap_lru_lock);
-               }
-               spin_unlock(&pagemap_lru_lock);
-               run_task_queue(&tq_disk);
-               pcd_active = 0;
-
-               if (daemon_terminate) {
-                       page_cleaner_daemon_started = 0;
-                       wake_up_interruptible(&pcd_waitq);
-                       break;
-               }
-
-               /*
-                * if woken up periodically (nothing else to do)
-                * convert all the pages, else convert only
-                * to keep watermarks happy.
-                */
-               interruptible_sleep_on_timeout(&pcd_waitq,
-                               pb_params.p_un.cluster_interval);
-               pcd_active = 1;
-       }
-       kfree(cpages);
-       return 0;
-}
-
-int
-_page_cleaner_daemon_start(void)
-{
-       extern int pagebuf_max[];
-
-       if (!page_cleaner_daemon_started) {
-               page_cleaner_daemon_started = 1;
-
-               /*
-                * watermarks: at 1/16 of total mem start waking
-                * the daemon to convert ... at 1/8th kick the
-                * daemon synchronously ... at 1/4th stop generating
-                * any more delay pages. Low water before daemon
-                * normally stops is 1/4th of when the daemon is
-                * activated.
-                */
-               pb_params.p_un.max_dirty_pages = max_mapnr >> 4;
-
-               MAX_CLUSTER = pb_params.p_un.max_dirty_pages >> 1;
-               if (MAX_CLUSTER > 1024) /* arbitray max. */
-                       MAX_CLUSTER = 1024;
-               CLUSTER_PAGE_LIST_SIZE = ((2*MAX_CLUSTER)+1);
-               pagebuf_max[4] = MAX_CLUSTER;
-
-               if (0 > kernel_thread(page_cleaner_daemon, (void *)0,
-                               CLONE_FS|CLONE_FILES|CLONE_SIGHAND))
-               {
-                       printk("Can't start page cleaner daemon\n");
-                       return -1; /* error */
-               }
-       }
-       return 0; /* success */
-}
-
-void
-_page_cleaner_daemon_stop(void)
-{
-       daemon_terminate = 1;
-       wake_up_interruptible_sync(&pcd_waitq);
-       while (page_cleaner_daemon_started)
-               interruptible_sleep_on(&pcd_waitq);
-}
-
-
-/*
  *     Module management
  */
 
@@ -2177,7 +1972,8 @@
 EXPORT_SYMBOL(pagebuf_generic_file_read);
 EXPORT_SYMBOL(pagebuf_generic_file_write);
 EXPORT_SYMBOL(pagebuf_read_full_page);
-EXPORT_SYMBOL(pagebuf_write_full_page);
+EXPORT_SYMBOL(pagebuf_write_full_page_nounlock);
+EXPORT_SYMBOL(pagebuf_write_full_page_unlock);
 EXPORT_SYMBOL(pagebuf_toss_page);
 EXPORT_SYMBOL(pagebuf_prepare_write);
 EXPORT_SYMBOL(pagebuf_commit_write);
diff -Naur ../../xfs-orig/linux/fs/xfs/linux/xfs_iops.c fs/xfs/linux/xfs_iops.c
--- ../../xfs-orig/linux/fs/xfs/linux/xfs_iops.c        Mon Feb 12 14:20:44 2001
+++ fs/xfs/linux/xfs_iops.c     Tue Feb 20 21:47:55 2001
@@ -756,7 +756,8 @@
 
 struct address_space_operations linvfs_aops = {
   readpage:            pagebuf_read_full_page,
-  writepage:           pagebuf_write_full_page,
+  writepage:           pagebuf_write_full_page_unlock,
+  writepage_nounlock:  pagebuf_write_full_page_nounlock,
   sync_page:           block_sync_page,
   bmap:                        linvfs_bmap,
   toss_page:           pagebuf_toss_page,
diff -Naur ../../xfs-orig/linux/fs/xfs/xfs_log.c fs/xfs/xfs_log.c
--- ../../xfs-orig/linux/fs/xfs/xfs_log.c       Thu Feb 22 12:55:50 2001
+++ fs/xfs/xfs_log.c    Thu Feb 22 12:00:31 2001
@@ -1345,6 +1345,7 @@
        uint            count;          /* byte count of bwrite */
        int             split = 0;      /* split write into two regions */
        int             error;
+       unsigned long save_flags = current->flags;
 
        XFS_STATS_INC(xs_log_writes);
        ASSERT(iclog->ic_refcnt == 0);
@@ -1354,6 +1355,8 @@
                xlog_panic("xlog_sync: illegal flag");
 #endif
        
+       current->flags |= PF_MEMALLOC;
+
        xlog_pack_data(log, iclog);       /* put cycle number in every block */
        INT_SET(iclog->ic_header.h_len, ARCH_CONVERT, iclog->ic_offset);        
/* real byte length */
 
@@ -1412,6 +1415,7 @@
        if (error = XFS_bwrite(bp)) {
                xfs_ioerror_alert("xlog_sync", log->l_mp, XFS_BUF_TARGET(bp), 
                                  XFS_BUF_ADDR(bp));
+               current->flags = save_flags;
                return (error);
        }
        if (split) {
@@ -1448,9 +1452,11 @@
                if (error = XFS_bwrite(bp)) {
                        xfs_ioerror_alert("xlog_sync (split)", log->l_mp, 
                                          XFS_BUF_TARGET(bp), XFS_BUF_ADDR(bp));
+                       current->flags = save_flags;
                        return (error);
                }
        }
+       current->flags = save_flags;
        return (0);
 }      /* xlog_sync */
 
diff -Naur ../../xfs-orig/linux/include/linux/fs.h include/linux/fs.h
--- ../../xfs-orig/linux/include/linux/fs.h     Fri Feb 23 10:33:21 2001
+++ include/linux/fs.h  Fri Feb 23 18:38:49 2001
@@ -220,6 +220,8 @@
 #define BH_New         5       /* 1 if the buffer is new and not yet written 
out */
 #define BH_Protected   6       /* 1 if the buffer is protected */
 #define BH_End_io      7       /* 1 End io function defined don't remap it */
+#define BH_Delay       8       /* disk mapping is delayed */
+
 
 /*
  * Try to keep the most commonly used fields in single cache lines (16
@@ -275,6 +277,7 @@
 #define buffer_mapped(bh)      __buffer_state(bh,Mapped)
 #define buffer_new(bh)         __buffer_state(bh,New)
 #define buffer_protected(bh)   __buffer_state(bh,Protected)
+#define buffer_delay(bh)       __buffer_state(bh,Delay)
 
 #define bh_offset(bh)          ((unsigned long)(bh)->b_data & ~PAGE_MASK)
 
@@ -379,6 +382,7 @@
        int (*bmap)(struct address_space *, long);
 
        int (*toss_page)(struct page *);
+       int (*writepage_nounlock)(struct page *);
 
 };
 
@@ -481,8 +485,6 @@
                void                            *generic_ip;
        } u;
 };
-
-extern spinlock_t inode_lock;
 
 struct fown_struct {
        int pid;                /* pid or -pgrp where SIGIO should be sent */
diff -Naur ../../xfs-orig/linux/include/linux/mm.h include/linux/mm.h
--- ../../xfs-orig/linux/include/linux/mm.h     Fri Feb 23 10:33:21 2001
+++ include/linux/mm.h  Fri Feb 23 18:38:49 2001
@@ -167,7 +167,6 @@
 #define PG_skip                        10
 #define PG_inactive_clean      11
 #define PG_highmem             12
-#define PG_delalloc            13
                                /* bits 21-29 unused */
 #define PG_arch_1              30
 #define PG_reserved            31
@@ -182,7 +181,7 @@
 #define PageLocked(page)       test_bit(PG_locked, &(page)->flags)
 #define LockPage(page)         set_bit(PG_locked, &(page)->flags)
 #define TryLockPage(page)      test_and_set_bit(PG_locked, &(page)->flags)
-#define DelallocPage(page)     test_bit(PG_delalloc, &(page)->flags)
+#define DelallocPage(page)     (page->buffers && test_bit(BH_Delay, 
&(page)->buffers->b_state))
 
 extern void __set_page_dirty(struct page *);
 
diff -Naur ../../xfs-orig/linux/include/linux/page_buf.h 
include/linux/page_buf.h
--- ../../xfs-orig/linux/include/linux/page_buf.h       Fri Feb 23 10:34:56 2001
+++ include/linux/page_buf.h    Fri Feb 23 18:40:25 2001
@@ -342,7 +342,7 @@
  * Tunable pagebuf parameters
  */
 
-#define P_PARAM        6
+#define P_PARAM        4
 
 typedef union pagebuf_param {
        struct {
@@ -350,11 +350,7 @@
                                         * delwri flush daemon.  */
                ulong   age_buffer;     /* time for buffer to age before
                                         * we flush it.  */
-               ulong   cluster_interval; /* interval between runs of the
-                                        * page cleaner daemon. */
                ulong   max_cluster;    /* maximum pages to cluster */
-               ulong   max_dirty_pages; /* maximum pages allowed to be
-                                         * dirty. */
                ulong   debug;          /* debug tracing on or off */
        } p_un;
        ulong data[P_PARAM];
@@ -364,10 +360,8 @@
 {
         PB_FLUSH_INT = 1,
         PB_FLUSH_AGE = 2,
-        PB_CLEAN_INT = 3,
-        PB_CLUSTER_LIMIT = 4,
-        PB_DELALLOC_LIMIT = 5,
-        PB_DEBUG = 6
+        PB_CLUSTER_LIMIT = 3,
+        PB_DEBUG = 4
 };
 
 extern pagebuf_param_t pb_params;
@@ -626,8 +620,11 @@
          struct file *,                /* file to read         */
          struct page *);               /* page to read         */
 
-extern int pagebuf_write_full_page(    /* write a page via pagebuf     */
+extern int pagebuf_write_full_page_unlock(/* write a page via pagebuf  */
          struct page *);                /* page to write               */
+
+extern int pagebuf_write_full_page_nounlock(/* write a page via pagebuf        
*/
+         struct page *);                   /* page to write            */
 
 extern void pagebuf_toss_page( /* convertion of a delalloc page */
          struct page   *);             /* page to convert              */
diff -Naur ../../xfs-orig/linux/kdb/modules/kdbm_pg.c kdb/modules/kdbm_pg.c
--- ../../xfs-orig/linux/kdb/modules/kdbm_pg.c  Thu Feb 22 14:36:37 2001
+++ kdb/modules/kdbm_pg.c       Thu Feb 22 14:15:56 2001
@@ -28,7 +28,7 @@
 
 static char    *bh_state_vals[] = {
        "Uptodate", "Dirty", "Lock", "Req", "Mapped", "New",
-       "Protected", NULL };
+       "Protected", "End_io", "Delay", NULL };
 
 static char    *map_flags(unsigned long flags, char *mapping[])
 {
@@ -88,9 +88,9 @@
        kdb_printf("  next 0x%p bno %ld rsec %ld size %d dev 0x%x rdev 0x%x\n",
                bh.b_next, bh.b_blocknr, bh.b_rsector,
                bh.b_size, bh.b_dev, bh.b_rdev);
-       kdb_printf("  count %d state 0x%lx [%s] ftime 0x%lx\n",
+       kdb_printf("  count %d state 0x%lx [%s] ftime 0x%lx b_list %d b_reqnext 
0x%p b_data 0x%p\n",
                bh.b_count.counter, bh.b_state, map_flags(bh.b_state, 
bh_state_vals),
-               bh.b_flushtime);
+               bh.b_flushtime, bh.b_list, bh.b_reqnext, bh.b_data);
        kdb_printf("  b_page 0x%p b_this_page 0x%p b_private 0x%p\n",
                bh.b_page, bh.b_this_page, bh.b_private);
 
diff -Naur ../../xfs-orig/linux/kernel/ksyms.c kernel/ksyms.c
--- ../../xfs-orig/linux/kernel/ksyms.c Fri Feb 23 10:29:09 2001
+++ kernel/ksyms.c      Fri Feb 23 18:29:03 2001
@@ -277,7 +277,6 @@
 EXPORT_SYMBOL(lock_may_write);
 EXPORT_SYMBOL(dcache_readdir);
 
-
 /* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */
 EXPORT_SYMBOL(default_llseek);
 EXPORT_SYMBOL(dentry_open);
@@ -285,8 +284,6 @@
 EXPORT_SYMBOL(filemap_sync);
 EXPORT_SYMBOL(lock_page);
 
-EXPORT_SYMBOL(inode_lock);
-
 /* for page_buf cache */
 EXPORT_SYMBOL(add_to_page_cache_unique);
 EXPORT_SYMBOL(bh_cachep);
@@ -516,13 +513,6 @@
 EXPORT_SYMBOL(file_fsync);
 EXPORT_SYMBOL(fsync_inode_buffers);
 EXPORT_SYMBOL(clear_inode);
-EXPORT_SYMBOL(inactive_dirty_list);
-EXPORT_SYMBOL(nr_active_pages);
-EXPORT_SYMBOL(active_list);
-EXPORT_SYMBOL(age_page_down_ageonly);
-EXPORT_SYMBOL(deactivate_page_nolock);
-EXPORT_SYMBOL(age_page_up_nolock);
-EXPORT_SYMBOL(nr_inactive_dirty_pages);
 EXPORT_SYMBOL(nr_async_pages);
 EXPORT_SYMBOL(___strtok);
 EXPORT_SYMBOL(init_special_inode);
@@ -581,6 +571,3 @@
 
 EXPORT_SYMBOL(tasklist_lock);
 EXPORT_SYMBOL(pidhash);
-
-EXPORT_SYMBOL(pagemap_lru_lock);
-
diff -Naur ../../xfs-orig/linux/mm/page_alloc.c mm/page_alloc.c
--- ../../xfs-orig/linux/mm/page_alloc.c        Mon Feb 12 14:20:46 2001
+++ mm/page_alloc.c     Thu Feb 22 13:17:31 2001
@@ -88,11 +88,6 @@
        if (PageInactiveClean(page))
                BUG();
 
-       if (DelallocPage(page)) {
-               printk("Trying to free dirty page 0x%p\n", page);
-               BUG();
-       }
-  
        page->flags &= ~((1<<PG_referenced) | (1<<PG_dirty));
        page->age = PAGE_AGE_START;
        
diff -Naur ../../xfs-orig/linux/mm/swap.c mm/swap.c
diff -Naur ../../xfs-orig/linux/mm/vmscan.c mm/vmscan.c
--- ../../xfs-orig/linux/mm/vmscan.c    Thu Feb 22 14:36:37 2001
+++ mm/vmscan.c Fri Feb 23 23:21:29 2001
@@ -51,11 +51,6 @@
        if (TryLockPage(page))
                return;
 
-       if (DelallocPage(page)) {
-               UnlockPage(page);
-               return;
-       }
-
        /* From this point on, the odds are that we're going to
         * nuke this pte, so read and clear the pte.  This hook
         * is needed on CPUs which update the accessed and dirty
@@ -363,12 +358,6 @@
                        add_page_to_inactive_dirty_list(page);
                        continue;
                }
-               if (DelallocPage(page)) {
-                       del_page_from_inactive_clean_list(page);
-                       add_page_to_inactive_dirty_list(page);
-                       UnlockPage(page);
-                       continue;
-               }
 
                /* OK, remove the page from the caches. */
                 if (PageSwapCache(page)) {
@@ -479,10 +468,10 @@
                }
 
                /*
-                * Dirty swap-cache page or delayed allocate page?
-                * Write it out if last copy..
+                * Dirty swap-cache page? Write it out if
+                * last copy..
                 */
-               if (PageDirty(page) || DelallocPage(page)) {
+               if (PageDirty(page)) {
                        int (*writepage)(struct page *) = 
page->mapping->a_ops->writepage;
 
                        if (!writepage)
@@ -537,6 +526,9 @@
                                wait = 1;       /* Async IO */
                        else
                                wait = 0;       /* No IO */
+
+                       if (!can_queue_buffers)
+                               wait = 0;
 
                        /* Try to free the page buffers. */
                        clearedbuf = try_to_free_buffers(page, wait);
<Prev in Thread] Current Thread [Next in Thread>