[Top] [All Lists]

[Bug] XFS: DIO random write + BufferIO read

To: xfstests <xfs@xxxxxxxxxxx>
Subject: [Bug] XFS: DIO random write + BufferIO read
From: Zhi Yong Wu <zwu.kernel@xxxxxxxxx>
Date: Fri, 4 Apr 2014 23:09:10 +0800
Cc: Dave Chinner <david@xxxxxxxxxxxxx>, jack@xxxxxxx, "linux-fsdevel@xxxxxxxxxxxxxxx" <linux-fsdevel@xxxxxxxxxxxxxxx>, "Theodore Ts'o" <tytso@xxxxxxx>
Delivered-to: xfs@xxxxxxxxxxx
Dkim-signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=mime-version:date:message-id:subject:from:to:cc:content-type; bh=CY7Y7eqIhI7lpH82+M3UL/gNzw++EpAQZwNuBt+r+AM=; b=Oc5RiAiuTX65WBHvhHfCuNCZjzKXVZ8dngtLZgz3+w0vO064VxkGBcsly2et/96U2s wdOVtxRLbc36nP98xRMOdWFu61LD/ijJ+guzlCBbyFLH13kZwYuIfd+YiBkTWMFLhtTs LbfW3pHwK6qdjW30410L0OsgphF0hFIzPptjmT8X0jBW4Y+buw8319PljBO6QW0qzvc6 F9Gkw7u7nb64U4OlCZxMqHjFl2luPR0onqCqePWlj2Ang3JV/vGZ2Q45uJ3AjKTOEoye ZlSRuVkyxhddI3II3H1XJ6EzHpgd6gVqJOgHVYMSQ4HQQJHtH3EOaM7AHlgjwgmiCSW/ liMw==

When i try something on XFS filesytem, i hit one issue as below:

One main task create multiple threads at first, Then it will dio
random write some files with random offset and length for each thread.
When those files get ready, those multiple threads will bufferio read
them and check if the data are same as their corresponding buffer. In
theory, they should be same, but the actual result isn't what we

By the way, i did the same try on ext3 filesystem, but didn't get any
such issue.

Below is the test code:

#include <errno.h>
#include <string.h>
#include <time.h>
#include <pthread.h>
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <libaio.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <syscall.h>
#include <fcntl.h>
#include <libaio.h>
#include <string>
#include <vector>

using namespace std;

#define AIO_BLKSIZE  4096
#define AIO_MAXIO 64

static long gettid()
    return static_cast<long>(pthread_self());

static void* aioThreadWrapper(void* arg);

class DioTest;
struct cbParam {
    cbParam(DioTest* p, int fileseq) :p_(p), fileseq_(fileseq) {}

    DioTest* p_;
    int fileseq_;

class DioTest
        DioTest(const char* name) {
            filename = name;
            memset(&myctx, 0, sizeof(myctx));
            io_queue_init(AIO_MAXIO, &myctx);
            pthread_t tid;
            pthread_create(&tid, NULL, aioThreadWrapper, this);

        void wr_done(int fileseq, struct iocb *iocb, long res, long res2) {

            if (res2 != 0) {
                printf("aio write error n");
            if (res != iocb->u.c.nbytes) {
                printf("write missed bytes expect %ld got %ld n",
                        iocb->u.c.nbytes, res);
            size_t length = iocb->u.c.nbytes;
            size_t offset = iocb->u.c.offset;
            char path[1024];
            snprintf(path, sizeof(path), "%s%d", filename, fileseq);
            int fd = open(path, O_RDONLY);
            assert(fd >= 0);
            char* readbuf = (char*)malloc(length);

            memset(readbuf, 0, length);
            ssize_t ret = pread(fd, readbuf, length, offset);
            assert (ret == length);

            int cmp = memcmp(readbuf, iocb->u.c.buf, length);
            if (cmp != 0)
                printf("tid=%ld data dismatch.cmp=%d file=%s
offset=%lu length=%lu!\n",
                        gettid(), cmp, path, offset, length);
            printf("tid=%ld check=success file=%s offset=%lu length=%lu\n",
                    gettid(), path, offset, length);


        bool writeRequest(int fileseq, size_t offset, size_t length) {
            struct iocb *io = (struct iocb *)malloc(sizeof(struct iocb));
            assert (io);
            char path[1024];
            snprintf(path, sizeof(path), "%s%d", filename, fileseq);
            int fd  = open(path, O_RDWR|O_DIRECT|O_CREAT, S_IWUSR | S_IRUSR);
            assert (fd >= 0);
            void* buf=NULL;
            int ret = posix_memalign(&buf, getpagesize(), length);
            assert(ret == 0);
            memset(buf, 'a', length);
            io_prep_pwrite(io, fd, buf, length, offset);
            io->data = new cbParam(this, fileseq);

            int rc = io_submit(myctx, 1, &io);
            if (rc < 0){
                printf("tid=%ld io_submit fail.file=%s offset=%lu
length=%lu ret=%d errno=%s\n",
                        gettid(), path, offset, length, ret, strerror(errno));
                delete (cbParam*)(io->data);
                return false;
            assert (rc != 0);
            printf("tid=%ld file=%s offset=%lu length=%lu\n",gettid(),
path, offset, length);
            return true;

        void aioThread() {
            while (true)
                struct io_event events[AIO_MAXIO];
                io_callback_t cb;
                int ret = io_getevents(myctx, 1, AIO_MAXIO, events, NULL);
                printf("tid=%ld %d io_request completed \n", gettid(), ret);

                for (int i = 0; i < ret; i++) {
                    struct iocb *io = events[i].obj;
                    printf("tid=%ld events[%d]res = %ld, res2 = %ld\n",
                            gettid(), i, events[i].res, events[i].res2);

                    cbParam* param = (cbParam*)io->data;
                    DioTest* p = param->p_;
                    p->wr_done(param->fileseq_, io, events[i].res,
                    delete param;
        io_context_t myctx;
        const char* filename;

static void* aioThreadWrapper(void* arg)
    DioTest* p = (DioTest*)arg;
    return NULL;

int main(int args, char *argv[])
    if (args < 2) {
        printf("./%s filename", argv[0]);
    const char* filename = argv[1];
    vector<DioTest*> dioTests;
    const int threadNumber = 9;

    for (int i = 0; i < threadNumber; ++i) {
        dioTests.push_back(new DioTest(filename));

    while (true) {
        size_t offset = (rand() % (64*1024*1024/AIO_BLKSIZE)) * AIO_BLKSIZE;
        size_t length = 0;
        while (length == 0) {
            length = abs(static_cast<int>(rand()*1.0/RAND_MAX*16))*AIO_BLKSIZE;

        int seq = rand() % 100;
        DioTest* p = dioTests[rand() % threadNumber];
        for (int i = 0; i < 4; ++i){
            p->writeRequest(seq, offset, length);
            offset += (length + 4096);
        usleep(rand() % 10000);
    return 0;


Zhi Yong Wu

<Prev in Thread] Current Thread [Next in Thread>