xfs
[Top] [All Lists]

Re: data corruption on nfs+xfs

To: Masanori TSUDA <tsuda@xxxxxxxxxxxxxx>
Subject: Re: data corruption on nfs+xfs
From: Nathan Scott <nathans@xxxxxxx>
Date: Thu, 10 Jun 2004 18:48:03 +1000
Cc: linux-xfs@xxxxxxxxxxx, Kazuyuki Goto <kazuyuki@xxxxxxxxxxxxxxxxxxx>
In-reply-to: <200406100130.AA00198@xxxxxxxxxxxxxxxxxxxxxxxx>
References: <200405271558.EJG73779.VJBLYZVL@xxxxxxxxxxxxxxxxxxx> <200406100130.AA00198@xxxxxxxxxxxxxxxxxxxxxxxx>
Sender: linux-xfs-bounce@xxxxxxxxxxx
User-agent: Mutt/1.5.3i
On Thu, Jun 10, 2004 at 10:30:31AM +0900, Masanori TSUDA wrote:
> Hi,

Hi there,

> I have reproduced similar problem on xfs1.3.1 (based on 2.4.21),
> my environment is as follows.
> 
> I have investigated the issue using the kernel embeded local trace.
> I think that the issue was caused by the delayed allocation mechanism.
> I explain the example of curruption scenario which I guess.

Thank you, and an excellent explanation too.  I think you may
well have figured it out.

> I think that one of solution for the issue is to flush only buffers in
> end of the file before allocating disk space for delayed allocation blocks,
> don't flush buffers over that.

I'm sure Russell will have more to add tomorrow when he's
awake.  I've ported your patch forward to current CVS (for
2.4 and 2.6 kernels), for my own testing purposes and have
included it below for anyone else who is interested.

thanks!

-- 
Nathan


Index: xfs-linux/linux-2.4/xfs_aops.c
===================================================================
--- xfs-linux.orig/linux-2.4/xfs_aops.c 2004-06-04 16:52:51.919925776 +1000
+++ xfs-linux/linux-2.4/xfs_aops.c      2004-06-10 17:05:48.183806544 +1000
@@ -626,13 +626,12 @@
        pgoff_t                 tindex,
        xfs_iomap_t             *iomapp,
        int                     startio,
-       int                     all_bh)
+       int                     all_bh,
+       pgoff_t                 tlast)
 {
-       pgoff_t                 tlast;
        struct page             *page;
 
-       tlast = (iomapp->iomap_offset + iomapp->iomap_bsize) >> 
PAGE_CACHE_SHIFT;
-       for (; tindex < tlast; tindex++) {
+       for (; tindex <= tlast; tindex++) {
                page = xfs_probe_delalloc_page(inode, tindex);
                if (!page)
                        break;
@@ -668,17 +667,20 @@
 {
        struct buffer_head      *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
        xfs_iomap_t             *iomp, iomap;
-       unsigned long           p_offset = 0, end_index;
+       unsigned long           p_offset = 0;
+       __uint64_t              end_offset;
+       pgoff_t                 end_index, last_index, tlast;
        loff_t                  offset;
-       unsigned long long      end_offset;
        int                     len, err, i, cnt = 0, uptodate = 1;
        int                     flags = startio ? 0 : BMAPI_TRYLOCK;
        int                     page_dirty = 1;
+       int                     delalloc = 0;
 
 
        /* Are we off the end of the file ? */
        offset = i_size_read(inode);
        end_index = offset >> PAGE_CACHE_SHIFT;
+       last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
        if (page->index >= end_index) {
                if ((page->index >= end_index + 1) ||
                    !(offset & (PAGE_CACHE_SIZE - 1))) {
@@ -751,6 +753,7 @@
                 */
                } else if (buffer_delay(bh)) {
                        if (!iomp) {
+                               delalloc = 1;
                                err = xfs_map_blocks(inode, offset, len, &iomap,
                                                BMAPI_ALLOCATE | flags);
                                if (err) {
@@ -822,8 +825,14 @@
        if (startio)
                xfs_submit_page(page, bh_arr, cnt);
 
-       if (iomp)
-               xfs_cluster_write(inode, page->index + 1, iomp, startio, 
unmapped);
+       if (iomp) {
+               tlast = (iomp->iomap_offset + iomp->iomap_bsize - 1) >>
+                                       PAGE_CACHE_SHIFT;
+               if (delalloc && (tlast > last_index))
+                       tlast = last_index;
+               xfs_cluster_write(inode, page->index + 1, iomp,
+                                       startio, unmapped, tlast);
+       }
 
        return page_dirty;
 
Index: xfs-linux/linux-2.6/xfs_aops.c
===================================================================
--- xfs-linux.orig/linux-2.6/xfs_aops.c 2004-06-04 16:52:51.926924712 +1000
+++ xfs-linux/linux-2.6/xfs_aops.c      2004-06-10 17:05:54.924781760 +1000
@@ -668,13 +668,12 @@
        xfs_iomap_t             *iomapp,
        struct writeback_control *wbc,
        int                     startio,
-       int                     all_bh)
+       int                     all_bh,
+       pgoff_t                 tlast)
 {
-       pgoff_t                 tlast;
        struct page             *page;
 
-       tlast = (iomapp->iomap_offset + iomapp->iomap_bsize) >> 
PAGE_CACHE_SHIFT;
-       for (; tindex < tlast; tindex++) {
+       for (; tindex <= tlast; tindex++) {
                page = xfs_probe_delalloc_page(inode, tindex);
                if (!page)
                        break;
@@ -713,17 +712,19 @@
        struct buffer_head      *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
        xfs_iomap_t             *iomp, iomap;
        unsigned long           p_offset = 0;
-       pgoff_t                 end_index;
+       __uint64_t              end_offset;
+       pgoff_t                 end_index, last_index, tlast;
        loff_t                  offset;
-       unsigned long long      end_offset;
        int                     len, err, i, cnt = 0, uptodate = 1;
        int                     flags = startio ? 0 : BMAPI_TRYLOCK;
        int                     page_dirty = 1;
+       int                     delalloc = 1;
 
 
        /* Are we off the end of the file ? */
        offset = i_size_read(inode);
        end_index = offset >> PAGE_CACHE_SHIFT;
+       last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
        if (page->index >= end_index) {
                if ((page->index >= end_index + 1) ||
                    !(offset & (PAGE_CACHE_SIZE - 1))) {
@@ -796,6 +797,7 @@
                 */
                } else if (buffer_delay(bh)) {
                        if (!iomp) {
+                               delalloc = 1;
                                err = xfs_map_blocks(inode, offset, len, &iomap,
                                                BMAPI_ALLOCATE | flags);
                                if (err) {
@@ -870,8 +872,12 @@
                xfs_submit_page(page, bh_arr, cnt);
 
        if (iomp) {
+               tlast = (iomp->iomap_offset + iomp->iomap_bsize - 1) >>
+                                       PAGE_CACHE_SHIFT;
+               if (delalloc && (tlast > last_index))
+                       tlast = last_index;
                xfs_cluster_write(inode, page->index + 1, iomp, wbc,
-                               startio, unmapped);
+                                       startio, unmapped, tlast);
        }
 
        return page_dirty;


<Prev in Thread] Current Thread [Next in Thread>