xfs
[Top] [All Lists]

Re: mysterious dbench results

To: Thomas Graichen <graichen@xxxxxxxxxxxxx>
Subject: Re: mysterious dbench results
From: Rajagopal Ananthanarayanan <ananth@xxxxxxx>
Date: Tue, 20 Feb 2001 13:24:51 -0800
Cc: linux-xfs@xxxxxxxxxxx
References: <96nvna$goj$1@mate.bln.innominate.de> <news2mail-96o1pc$6nm$1@mate.bln.innominate.de> <96ot5r$ev3$1@mate.bln.innominate.de> <news2mail-96p228$ujq$1@mate.bln.innominate.de>
Sender: owner-linux-xfs@xxxxxxxxxxx
Hi Thomas,

One reason for these problems could be that
XFS pages are not balanced properly in memory.
I've been experimenting with attaching buffers
to all XFS pages & then let the flush_dirty_buffers
methods take care of the write-outs. Attached is
an experimental patch that might improve the situation.
Please note that this code is work-in-progress so use
it only on a test system.

Also, can you summarize the machine configs & the
number of dbench clients that you used?

thanks,

ananth.


--------------------------------------------------------------------------
Rajagopal Ananthanarayanan ("ananth")
Member Technical Staff, SGI.
--------------------------------------------------------------------------
diff -Naur ../../xfs-orig/linux/drivers/block/ll_rw_blk.c 
drivers/block/ll_rw_blk.c
--- ../../xfs-orig/linux/drivers/block/ll_rw_blk.c      Mon Feb 12 14:20:41 2001
+++ drivers/block/ll_rw_blk.c   Sat Feb 17 08:35:52 2001
@@ -806,7 +806,7 @@
        blkdev_release_request(next);
 }
 
-static inline void attempt_back_merge(request_queue_t * q,
+static void attempt_back_merge(request_queue_t * q,
                                      struct request *req,
                                      int max_sectors,
                                      int max_segments)
@@ -816,7 +816,7 @@
        attempt_merge(q, req, max_sectors, max_segments);
 }
 
-static inline void attempt_front_merge(request_queue_t * q,
+static void attempt_front_merge(request_queue_t * q,
                                       struct list_head * head,
                                       struct request *req,
                                       int max_sectors,
@@ -888,6 +888,23 @@
 }
  
  
+#ifdef REQ_DEBUG
+#define CHECK_REQ(req, i) \
+       do { \
+               if ((req->bh == req->bhtail && req->bh->b_reqnext) || \
+                       (req->bh != req->bhtail && !req->bh->b_reqnext)) \
+                       req_foo(req, el_ret, 1); \
+       } while (0);
+
+void
+req_foo(struct request *req, int el_ret, int i)
+{
+       printk("reqest 0x%p inconsistent (elret %d i %d)\n", req, el_ret, i);
+}
+#else
+#define CHECK_REQ(a, b)
+#endif
+
 static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh,
                          struct kiobuf * kiobuf, kdev_t dev,
                          unsigned int sector, unsigned int count)
@@ -960,22 +977,30 @@
        switch (el_ret) {
 
                case ELEVATOR_BACK_MERGE:
+                       CHECK_REQ(req, 1);
                        if (!q->back_merge_fn(q, req, bh, max_segments))
                                break;
+                       CHECK_REQ(req, 2);
                        elevator->elevator_merge_cleanup_fn(q, req, count);
+                       CHECK_REQ(req, 3);
                        req->bhtail->b_reqnext = bh;
                        req->bhtail = bh;
                        req->nr_sectors = req->hard_nr_sectors += count;
                        blk_started_io(count);
                        drive_stat_acct(req->rq_dev, req->cmd, count, 0);
                        req_new_io(req, 1, count);
+                       CHECK_REQ(req, 4);
                        attempt_back_merge(q, req, max_sectors, max_segments);
+                       CHECK_REQ(req, 5);
                        goto out;
 
                case ELEVATOR_FRONT_MERGE:
+                       CHECK_REQ(req, 6);
                        if (!q->front_merge_fn(q, req, bh, max_segments))
                                break;
+                       CHECK_REQ(req, 7);
                        elevator->elevator_merge_cleanup_fn(q, req, count);
+                       CHECK_REQ(req, 8);
                        bh->b_reqnext = req->bh;
                        req->bh = bh;
                        req->buffer = bh->b_data;
@@ -986,7 +1011,9 @@
                        blk_started_io(count);
                        drive_stat_acct(req->rq_dev, req->cmd, count, 0);
                        req_new_io(req, 1, count);
+                       CHECK_REQ(req, 9);
                        attempt_front_merge(q, head, req, max_sectors, 
max_segments);
+                       CHECK_REQ(req, 10);
                        goto out;
 
                /*
@@ -1043,7 +1070,9 @@
        req_new_io(req, 0, count);
        blk_started_io(count);
        add_request(q, req, insert_here);
+       CHECK_REQ(req, 11);
 out:
+       CHECK_REQ(req, 12);
        if (freereq)
                blkdev_release_request(freereq);
        if (!q->plugged)
@@ -1255,6 +1284,9 @@
 
        if (!nr)
                return;
+
+       if (test_bit(BH_Delay, &bhs[0]->b_state) || bhs[0]->b_blocknr < 0)
+               BUG();
 
        major = MAJOR(bhs[0]->b_dev);
 
diff -Naur ../../xfs-orig/linux/drivers/scsi/scsi_merge.c 
drivers/scsi/scsi_merge.c
--- ../../xfs-orig/linux/drivers/scsi/scsi_merge.c      Mon Feb 12 14:20:43 2001
+++ drivers/scsi/scsi_merge.c   Sat Feb 17 08:36:46 2001
@@ -92,7 +92,7 @@
        printk("counted segments is %x\n", segments);
        printk("Flags %d %d\n", use_clustering, dma_host);
        if (req->bh != NULL) {
-            for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext) {    
+            for (bh = req->bh; bh != NULL; bh = bh->b_reqnext) {       
                  printk("Segment 0x%p, blocks %d, addr 0x%lx\n",
                         bh,
                         bh->b_size >> 9,
diff -Naur ../../xfs-orig/linux/fs/buffer.c fs/buffer.c
--- ../../xfs-orig/linux/fs/buffer.c    Mon Feb 12 14:21:30 2001
+++ fs/buffer.c Sat Feb 17 08:33:56 2001
@@ -161,6 +161,31 @@
        atomic_dec(&bh->b_count);
 }
 
+#define buffer_delay_busy(bh) \
+       (test_bit(BH_Delay, &bh->b_state) && bh->b_page && 
PageLocked(bh->b_page))
+       
+void
+write_buffer(struct buffer_head *bh)
+{
+       struct page *page;
+
+       if ((page = bh->b_page) != 0 && DelallocPage(page)) {
+               if (TryLockPage(page))
+                       return;
+               if (!buffer_dirty(bh)) {
+                       if (DelallocPage(page))
+                               BUG();
+                       UnlockPage(page);
+                       return;
+               }
+               page->mapping->a_ops->writepage(page);
+               if (DelallocPage(page))
+                       BUG();
+       } else
+               ll_rw_block(WRITE, 1, &bh);
+}
+
+
 /* Call sync_buffers with wait!=0 to ensure that the call does not
  * return until all buffer writes have completed.  Sync() may return
  * before the writes have finished; fsync() may not.
@@ -232,7 +257,7 @@
 
                        atomic_inc(&bh->b_count);
                        spin_unlock(&lru_list_lock);
-                       ll_rw_block(WRITE, 1, &bh);
+                       write_buffer(bh);
                        atomic_dec(&bh->b_count);
                        retry = 1;
                        goto repeat;
@@ -507,6 +532,8 @@
        struct bh_free_head *head = &free_list[BUFSIZE_INDEX(bh->b_size)];
        struct buffer_head **bhp = &head->list;
 
+       if (test_bit(BH_Delay, &bh->b_state))
+               BUG();
        bh->b_state = 0;
 
        spin_lock(&head->lock);
@@ -879,7 +906,7 @@
                        if (buffer_dirty(bh)) {
                                atomic_inc(&bh->b_count);
                                spin_unlock(&lru_list_lock);
-                               ll_rw_block(WRITE, 1, &bh);
+                               write_buffer(bh);
                                brelse(bh);
                                spin_lock(&lru_list_lock);
                        }
@@ -1103,6 +1130,7 @@
        }
 }
 
+
 /*
  * A buffer may need to be moved from one buffer list to another
  * (e.g. in case it is not shared any more). Handle this.
@@ -1121,6 +1149,8 @@
                bh->b_list = dispose;
                if (dispose == BUF_CLEAN)
                        remove_inode_queue(bh);
+               if (dispose == BUF_DIRTY && bh->b_blocknr == -6)
+                       BUG();
                __insert_into_lru_list(bh, dispose);
        }
 }
@@ -1395,8 +1425,18 @@
        head = page->buffers;
        bh = head;
 
+       if (DelallocPage(page)) {
+               page->mapping->a_ops->writepage(page);
+               lock_page(page); // XXX
+               if (bh != page->buffers) {
+                       printk("Page 0x%p buffers bh 0x%p changed to 0x%p\n",
+                               page, bh, page->buffers);
+                       return 1;
+               }
+       }
        if (DelallocPage(page))
                BUG();
+
        do {
                unsigned int next_off = curr_off + bh->b_size;
                next = bh->b_this_page;
@@ -2381,7 +2421,7 @@
                        if (wait > 1)
                                __wait_on_buffer(p);
                } else if (buffer_dirty(p))
-                       ll_rw_block(WRITE, 1, &p);
+                       write_buffer(p);
        } while (tmp != bh);
 }
 
@@ -2408,6 +2448,18 @@
        int index = BUFSIZE_INDEX(bh->b_size);
        int loop = 0;
 
+       if (DelallocPage(page)) {
+               page->mapping->a_ops->writepage(page);
+               lock_page(page); // XXX
+               if (bh != page->buffers) {
+                       printk("Page 0x%p buffers bh 0x%p changed to 0x%p\n",
+                               page, bh, page->buffers);
+                       return 1;
+               }
+       }
+       if (DelallocPage(page))
+               BUG();
+
 cleaned_buffers_try_again:
        spin_lock(&lru_list_lock);
        write_lock(&hash_table_lock);
@@ -2609,7 +2661,7 @@
                        __refile_buffer(bh);
                        continue;
                }
-               if (buffer_locked(bh))
+               if (buffer_locked(bh) || buffer_delay_busy(bh))
                        continue;
 
                if (check_flushtime) {
@@ -2627,7 +2679,8 @@
                /* OK, now we are committed to write it out. */
                atomic_inc(&bh->b_count);
                spin_unlock(&lru_list_lock);
-               ll_rw_block(WRITE, 1, &bh);
+
+               write_buffer(bh);
                atomic_dec(&bh->b_count);
 
                if (current->need_resched)
diff -Naur ../../xfs-orig/linux/fs/pagebuf/page_buf.c fs/pagebuf/page_buf.c
--- ../../xfs-orig/linux/fs/pagebuf/page_buf.c  Sat Feb 17 18:31:59 2001
+++ fs/pagebuf/page_buf.c       Fri Feb 16 19:02:40 2001
@@ -1314,7 +1314,7 @@
        struct buffer_head *bh;
        off_t blk_offset;
        size_t blk_length;
-       int err=0;
+       int err=0, retry = 0;
        int concat_ok = ((MAJOR(dev) != LVM_BLK_MAJOR) && (MAJOR(dev) != 
MD_MAJOR));
 
        /* Calculate the block offsets and length we will be using */
@@ -1340,12 +1340,17 @@
         * Call generic_make_request
         */
 
+retry_alloc:
        psync = (pagesync_t *) kmalloc(sizeof(pagesync_t) +
-           blk_length * sizeof(struct buffer_head *), GFP_BUFFER);
+           blk_length * sizeof(struct buffer_head *), GFP_ATOMIC);
 
        /* Ugh - out of memory condition here */
-       if (psync == NULL)
-               BUG();
+       if (psync == NULL) {
+               if (retry++ < 16)
+                       goto retry_alloc;
+               else
+                       BUG();
+       }
 
        psync->pb = pb;
        psync->count = blk_length;
@@ -2120,6 +2125,7 @@
        spin_unlock_irqrestore(&current->sigmask_lock, flags);
 
        strcpy(current->comm, "pagebuf_daemon");
+       current->flags |= PF_MEMALLOC;
 
        do {
                if (pb_daemon->active == 1) {
diff -Naur ../../xfs-orig/linux/fs/pagebuf/page_buf_io.c 
fs/pagebuf/page_buf_io.c
--- ../../xfs-orig/linux/fs/pagebuf/page_buf_io.c       Sat Feb 17 18:31:59 2001
+++ fs/pagebuf/page_buf_io.c    Fri Feb 16 19:06:56 2001
@@ -101,7 +101,6 @@
 /*
  * Globals
  */
-static int pcd_active;
 int PB_MAX_DIRTY_FACTOR = 4;
 
 static DECLARE_WAIT_QUEUE_HEAD(pcd_waitq);
@@ -163,13 +162,29 @@
        __pb_block_commit_write_async(inode, page, mp, 0);
 }
 
-static inline void
-_unmark_delalloc(struct page *page)
+static void
+_unmark_delalloc(struct page *page, int toss)
 {
+       struct buffer_head *bh = page->buffers;
+
        if (!PageLocked(page))
                PAGE_BUG(page);
-       if (test_and_clear_bit(PG_delalloc, &page->flags))
-               atomic_dec(&pb_delalloc_pages);
+       if (!DelallocPage(page))
+               PAGE_BUG(page);
+       if (!bh)
+               BUG();
+       clear_bit(BH_Delay, &bh->b_state);
+       atomic_dec(&pb_delalloc_pages);
+       if (!toss && bh->b_blocknr == -8)
+               printk("warning: unmarking unmapped buffer page 0x%p\n", page);
+       if (toss && bh->b_blocknr == -8) {
+               if (!buffer_dirty(bh))
+                       BUG();
+               bh->b_blocknr = -6;
+               mark_buffer_clean(bh);
+               if (bh->b_list != BUF_CLEAN)
+                       printk("buffer bh 0x%p not clean\n", bh);
+       }
 }
 
 /*
@@ -528,27 +543,19 @@
                        return (-ENOMEM);
                }
                assert(((csize + cpoff) <= PAGE_CACHE_SIZE));
+               lock_page(page);
                memset((void *) (kmap(page) + cpoff), 0, csize);
                kunmap(page);
                SetPageUptodate(page);
                if (pb->pb_bn == PAGE_BUF_DADDR_NULL) {
-                       if (test_and_set_bit(PG_delalloc, &page->flags) == 0) {
-                               atomic_inc(&pb_delalloc_pages);
-                       }
+                       __pb_block_commit_write_async(pb->pb_target, page, 
NULL, 0);
                }
+               UnlockPage(page);
        }
 
        pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
        pb->pb_flags &= ~(_PBF_SOME_INVALID_PAGES | PBF_PARTIAL | PBF_NONE);
 
-       if (!pcd_active && (pb->pb_bn == PAGE_BUF_DADDR_NULL)) {
-               unsigned int    np = atomic_read(&pb_delalloc_pages);
-
-               if (np > pb_params.p_un.max_dirty_pages)
-                       wake_up_interruptible(&pcd_waitq);
-       }
-
-
        return (0);
 }
 
@@ -1016,15 +1023,13 @@
                                        PAGE_CACHE_SIZE,
                                        &map, 1, &nmaps, PBF_READ);
 
-               hook_buffers_to_page(inode, page, &map, PAGE_CACHE_SHIFT);
-               bh = page->buffers;
                if (map.pbm_bn > 0) {
+                       hook_buffers_to_page(inode, page, &map, 
PAGE_CACHE_SHIFT);
                        bh = head = page->buffers;
                } else if (map.pbm_flags & (PBMF_HOLE|PBMF_DELAY)) {
                        memset(kmap(page), 0, PAGE_CACHE_SIZE);
                        flush_dcache_page(page);
                        kunmap(page);
-                       set_bit(BH_Uptodate, &bh->b_state);
                        goto page_done;
                } else {
                        printk("pagebuf_read_full_page: page 0x%p map 0x%p\n",
@@ -1096,6 +1101,8 @@
        count = pagebuf_delalloc_convert(page, pb_flags, cpages);
 
        do_write_pages += count;
+       if (DelallocPage(page))
+               BUG();
        if (cpages)
                kfree(cpages);
 
@@ -1119,10 +1126,11 @@
                return __pagebuf_write_full_page(inode, page);
 
        /* things got complicated... */
-       offset = inode->i_size & PAGE_CACHE_MASK_LL;
+       offset = inode->i_size & (~PAGE_CACHE_MASK_LL);
        /* OK, are we completely out? */
        if ((page->index >= end_index+1) || !offset) {
                UnlockPage(page);
+               printk("Bad write on page 0x%p\n", page);
                return -EIO;
        }
 
@@ -1139,6 +1147,8 @@
                __pb_block_commit_write_async(inode, page, NULL, 0);
        }
 
+       if (DelallocPage(page))
+               BUG();
        kunmap(page);
        UnlockPage(page);
        return err;
@@ -1146,12 +1156,33 @@
 
 
 STATIC void
+hook_buffers_to_page_delay(struct inode *inode, struct page *page)
+{
+       struct buffer_head      *bh;
+
+       if (page->buffers)
+               BUG();
+       create_empty_buffers(page, inode->i_dev, PAGE_CACHE_SIZE);
+       bh = page->buffers;
+       bh->b_state = (1 << BH_Delay);
+       atomic_inc(&pb_delalloc_pages);
+       bh->b_blocknr = -8; 
+       __mark_buffer_dirty(bh);
+       balance_dirty(bh->b_dev);
+}
+
+STATIC void
 hook_buffers_to_page(struct inode *inode,
        struct page *page, page_buf_bmap_t *mp, ulong bshift)
 {
        struct buffer_head      *bh;
        page_buf_daddr_t        bn;
 
+       if (mp->pbm_bn < 0) {
+               printk("hook_buffers_to_page: bad bn page 0x%p mp 0x%p\n",
+                       page, mp);
+               BUG();
+       }
        if (!page->buffers)
                create_empty_buffers(page, inode->i_dev, PAGE_CACHE_SIZE);
 
@@ -1160,21 +1191,13 @@
        bh->b_end_io = end_pb_buffer_io_async;
        bh->b_private = (void *) 0;
 
-       if (mp->pbm_flags & (PBMF_HOLE|PBMF_DELAY)) {
-               bh->b_blocknr = 0; 
-               bh->b_state = (1 << BH_Req) | (1 << BH_End_io);
-               return;
-       }
-       if (mp->pbm_bn < 0) {
-               printk("hook_buffers_to_page: bad bn page 0x%p mp 0x%p\n",
-                       page, mp);
-               BUG();
-       }
        bn = mp->pbm_bn >>
                (bshift - inode->i_sb->s_blocksize_bits);
        bn += (mp->pbm_delta >> bshift);
        bh->b_blocknr = bn; 
-       bh->b_state = (1 << BH_Mapped) | (1 << BH_Req) | (1 << BH_End_io);
+       if (buffer_locked(bh) || buffer_req(bh))
+               BUG();
+       bh->b_state |= (1 << BH_Mapped) | (1 << BH_Req) | (1 << BH_End_io);
 }
 
 
@@ -1190,6 +1213,7 @@
        set_bit(BH_Uptodate, &bh->b_state);
        if (!buffer_dirty(bh)) {
                bh->b_end_io = end_pb_buffer_io_async;
+               bh->b_state |= (1 << BH_End_io);
                need_balance_dirty = 1;
        }
        __mark_buffer_dirty(bh);
@@ -1205,7 +1229,7 @@
 {
        struct buffer_head      *bh;
        int                     err = 0;
-       int                     nmaps;
+       int                     nmaps, dp = DelallocPage(page);
        char                    *kaddr = kmap(page);
        page_buf_bmap_t         map;
 
@@ -1218,7 +1242,8 @@
         * go get some space.
         */
        bh = page->buffers;
-       if ((!bh || !buffer_mapped(bh)) && !DelallocPage(page)) {
+       if ((!bh || !buffer_mapped(bh)) && (!dp || (flags & PBF_FILE_ALLOCATE)))
+       {
                if (!mp) {
                        mp = &map;
                        err = inode->i_op->pagebuf_bmap(inode,
@@ -1233,6 +1258,8 @@
                }
                if (mp->pbm_bn > 0) {
                        hook_buffers_to_page(inode, page, mp, PAGE_CACHE_SHIFT);
+                       if (dp)
+                               _unmark_delalloc(page, 0);
                        bh = page->buffers;
                }
        }
@@ -1247,7 +1274,7 @@
        /*
         * Partial write. Is the page valid anyway?
         */
-       if (Page_Uptodate(page) || DelallocPage(page)) {
+       if (Page_Uptodate(page) || dp) {
                goto out;
        }
        /*
@@ -1348,7 +1375,6 @@
                                int             partial)
 {
        struct buffer_head      *bh;
-       unsigned int            np;
 
        /*
         * Prepare write took care of reading/zero-out
@@ -1358,15 +1384,8 @@
        SetPageUptodate(page);
        if ((bh = page->buffers) && buffer_mapped(bh)) {
                set_buffer_dirty_uptodate(page->buffers, partial);
-       } else if (test_and_set_bit(PG_delalloc, &page->flags) == 0) {
-               atomic_inc(&pb_delalloc_pages);
-               if (!pcd_active) {
-                       np = atomic_read(&pb_delalloc_pages);
-                       if (np > pb_params.p_un.max_dirty_pages)
-                               wake_up_interruptible(&pcd_waitq);
-               }
-               if (!partial)
-                       balance_dirty(inode->i_rdev);
+       } else if (!DelallocPage(page)) {
+               hook_buffers_to_page_delay(inode, page);
        }
 
        /* Advance though extent no matter what */
@@ -1737,21 +1756,11 @@
                page_cache_release(page);
                return NULL;
        }
-       /* In the case where we probe a page - push it back down the LRU
-        * so we do not hit it on the next pass.
-        */
-
-       spin_lock(&pagemap_lru_lock);
-       if (PageInactiveDirty(page)) {
-               list_del(&page->lru);
-               list_add(&page->lru, &inactive_dirty_list);
-       }
-       spin_unlock(&pagemap_lru_lock);
-       _unmark_delalloc(page);
        return page;
 }
 
 
+#if 0
 /*
  * Convert & write out a cluster of pages in the same extent as defined
  * by mp and surrounding "startpage". startpage is locked & has an extra
@@ -1822,16 +1831,35 @@
 
        return count;
 }
+#endif
+
 /*
  * Allocate & map buffers for page given the extent map.
  */
 STATIC void
 convert_page(struct inode *inode, struct page *page, page_buf_bmap_t *mp)
 {
-       mp->pbm_delta = (page->index << PAGE_CACHE_SHIFT) - mp->pbm_offset;
-       hook_buffers_to_page(inode, page, mp, PAGE_CACHE_SHIFT);
-       set_buffer_dirty_uptodate(page->buffers, 0);
+       struct buffer_head *bh = page->buffers;
+       int dp = DelallocPage(page);
+
+       if (!bh || dp) {
+               mp->pbm_delta = (page->index << PAGE_CACHE_SHIFT) - 
mp->pbm_offset;
+               hook_buffers_to_page(inode, page, mp, PAGE_CACHE_SHIFT);
+               if (dp)
+                       _unmark_delalloc(page, 0);
+       }
+       bh = page->buffers;
+       /*
+        * 1 == don't balance dirty, we are doing I/O just below here.
+        * otherwise causes nasty recursions.
+        */
+       set_buffer_dirty_uptodate(bh, 1);
        UnlockPage(page);
+
+       atomic_inc(&bh->b_count);
+       ll_rw_block(WRITE, 1, &bh);
+       atomic_dec(&bh->b_count);
+
        page_cache_release(page);
 }
 
@@ -1879,7 +1907,7 @@
        if (!PageLocked(page))
                BUG();
 
-       _unmark_delalloc(page);
+       _unmark_delalloc(page, 1);
 }
 
 
@@ -1891,7 +1919,7 @@
 {
        page_buf_bmap_t maps[PBF_MAX_MAPS];
        struct inode *inode;
-       int maps_returned, error, count;
+       int maps_returned, error;
        u_long pb_flags;
        loff_t rounded_offset;
 
@@ -1901,7 +1929,8 @@
         * anything.
         */
        if (!inode->i_nlink && (inode->i_state & I_FREEING)) {
-               _unmark_delalloc(page);
+               BUG();
+               _unmark_delalloc(page, 1);
                UnlockPage(page);
                return 0;
        }
@@ -1941,18 +1970,12 @@
        }
 
        page_cache_get(page);
-       _unmark_delalloc(page);
        /*
         * page needs to be setup as though find_page(...) returned it,
         * which is a locked page with an extra reference.
         */
-       if (cpages) {
-               count = kio_cluster_write(inode, page, &maps[0], cpages);
-       } else {
-               cluster_write(inode, page, &maps[0]);
-               count = 1;
-       }
-       return count;
+       cluster_write(inode, page, &maps[0]);
+       return 1;
 }
 
 /*
@@ -2002,6 +2025,7 @@
 STATIC int
 page_cleaner_daemon(void *data)
 {
+#if 0
        struct page *page;
        u_long flags;
        struct buffer_head *bh;
@@ -2072,7 +2096,7 @@
                                 */
 
                                spin_unlock(&pagemap_lru_lock);
-                               _unmark_delalloc(page);
+                               _unmark_delalloc(page, 0);
                                set_buffer_dirty_uptodate(bh, 0);
                                UnlockPage(page);
                                spin_lock(&pagemap_lru_lock);
@@ -2123,6 +2147,7 @@
                pcd_active = 1;
        }
        kfree(cpages);
+#endif
        return 0;
 }
 
diff -Naur ../../xfs-orig/linux/fs/xfs/xfs_log.c fs/xfs/xfs_log.c
--- ../../xfs-orig/linux/fs/xfs/xfs_log.c       Tue Jan  9 18:20:23 2001
+++ fs/xfs/xfs_log.c    Fri Feb 16 16:58:54 2001
@@ -1342,6 +1342,7 @@
        uint            count;          /* byte count of bwrite */
        int             split = 0;      /* split write into two regions */
        int             error;
+       unsigned long save_flags = current->flags;
 
        XFS_STATS_INC(xs_log_writes);
        ASSERT(iclog->ic_refcnt == 0);
@@ -1351,6 +1352,8 @@
                xlog_panic("xlog_sync: illegal flag");
 #endif
        
+       current->flags |= PF_MEMALLOC;
+
        xlog_pack_data(log, iclog);       /* put cycle number in every block */
        INT_SET(iclog->ic_header.h_len, ARCH_CONVERT, iclog->ic_offset);        
/* real byte length */
 
@@ -1409,6 +1412,7 @@
        if (error = XFS_bwrite(bp)) {
                xfs_ioerror_alert("xlog_sync", log->l_mp, XFS_BUF_TARGET(bp), 
                                  XFS_BUF_ADDR(bp));
+               current->flags = save_flags;
                return (error);
        }
        if (split) {
@@ -1445,9 +1449,11 @@
                if (error = XFS_bwrite(bp)) {
                        xfs_ioerror_alert("xlog_sync (split)", log->l_mp, 
                                          XFS_BUF_TARGET(bp), XFS_BUF_ADDR(bp));
+                       current->flags = save_flags;
                        return (error);
                }
        }
+       current->flags = save_flags;
        return (0);
 }      /* xlog_sync */
 
diff -Naur ../../xfs-orig/linux/include/linux/fs.h include/linux/fs.h
--- ../../xfs-orig/linux/include/linux/fs.h     Mon Feb 12 14:21:30 2001
+++ include/linux/fs.h  Mon Feb 12 17:38:59 2001
@@ -212,6 +212,8 @@
 #define BH_New         5       /* 1 if the buffer is new and not yet written 
out */
 #define BH_Protected   6       /* 1 if the buffer is protected */
 #define BH_End_io      7       /* 1 End io function defined don't remap it */
+#define BH_Delay       8       /* disk mapping is delayed */
+
 
 /*
  * Try to keep the most commonly used fields in single cache lines (16
diff -Naur ../../xfs-orig/linux/include/linux/mm.h include/linux/mm.h
--- ../../xfs-orig/linux/include/linux/mm.h     Sat Feb 17 18:31:59 2001
+++ include/linux/mm.h  Fri Feb 16 19:01:31 2001
@@ -182,7 +182,7 @@
 #define PageLocked(page)       test_bit(PG_locked, &(page)->flags)
 #define LockPage(page)         set_bit(PG_locked, &(page)->flags)
 #define TryLockPage(page)      test_and_set_bit(PG_locked, &(page)->flags)
-#define DelallocPage(page)     test_bit(PG_delalloc, &(page)->flags)
+#define DelallocPage(page)     (page->buffers && test_bit(BH_Delay, 
&(page)->buffers->b_state))
 
 extern void __set_page_dirty(struct page *);
 
diff -Naur ../../xfs-orig/linux/kdb/modules/kdbm_pg.c kdb/modules/kdbm_pg.c
--- ../../xfs-orig/linux/kdb/modules/kdbm_pg.c  Sat Feb 17 18:31:59 2001
+++ kdb/modules/kdbm_pg.c       Fri Feb 16 19:01:31 2001
@@ -28,7 +28,7 @@
 
 static char    *bh_state_vals[] = {
        "Uptodate", "Dirty", "Lock", "Req", "Mapped", "New",
-       "Protected", NULL };
+       "Protected", "End_io", "Delay", NULL };
 
 static char    *map_flags(unsigned long flags, char *mapping[])
 {
@@ -88,9 +88,9 @@
        kdb_printf("  next 0x%p bno %ld rsec %ld size %d dev 0x%x rdev 0x%x\n",
                bh.b_next, bh.b_blocknr, bh.b_rsector,
                bh.b_size, bh.b_dev, bh.b_rdev);
-       kdb_printf("  count %d state 0x%lx [%s] ftime 0x%lx\n",
+       kdb_printf("  count %d state 0x%lx [%s] ftime 0x%lx b_list %d b_reqnext 
0x%p b_data 0x%p\n",
                bh.b_count.counter, bh.b_state, map_flags(bh.b_state, 
bh_state_vals),
-               bh.b_flushtime);
+               bh.b_flushtime, bh.b_list, bh.b_reqnext, bh.b_data);
        kdb_printf("  b_page 0x%p b_this_page 0x%p b_private 0x%p\n",
                bh.b_page, bh.b_this_page, bh.b_private);
 
diff -Naur ../../xfs-orig/linux/mm/swap.c mm/swap.c
--- ../../xfs-orig/linux/mm/swap.c      Mon Feb 12 14:20:46 2001
+++ mm/swap.c   Tue Feb 13 11:55:49 2001
@@ -255,7 +255,7 @@
        } else if (PageInactiveClean(page)) {
                del_page_from_inactive_clean_list(page);
        } else {
-               printk("VM: __lru_cache_del, found unknown page ?!\n");
+               printk("VM: __lru_cache_del, found unknown page 0x%p?!\n", 
page);
        }
        DEBUG_ADD_PAGE
 }
diff -Naur ../../xfs-orig/linux/mm/vmscan.c mm/vmscan.c
--- ../../xfs-orig/linux/mm/vmscan.c    Mon Feb 12 14:20:46 2001
+++ mm/vmscan.c Sat Feb 10 08:56:13 2001
@@ -364,10 +364,8 @@
                        continue;
                }
                if (DelallocPage(page)) {
-                       del_page_from_inactive_clean_list(page);
-                       add_page_to_inactive_dirty_list(page);
-                       UnlockPage(page);
-                       continue;
+                       printk("delalloc page 0x%p in clean list\n", page);
+                       BUG();
                }
 
                /* OK, remove the page from the caches. */
@@ -481,7 +479,7 @@
                 * Dirty swap-cache page or delayed allocate page?
                 * Write it out if last copy..
                 */
-               if (PageDirty(page) || DelallocPage(page)) {
+               if (PageDirty(page)) {
                        int (*writepage)(struct page *) = 
page->mapping->a_ops->writepage;
 
                        if (!writepage)
<Prev in Thread] Current Thread [Next in Thread>