[BACK]Return to rdwr.c CVS log [TXT][DIR] Up to [Development] / xfs-cmds / xfsprogs / libxfs

File: [Development] / xfs-cmds / xfsprogs / libxfs / rdwr.c (download)

Revision 1.32, Tue Aug 8 15:32:10 2006 UTC (11 years, 2 months ago) by nathans.longdrop.melbourne.sgi.com
Branch: MAIN
Changes since 1.31: +3 -12 lines

Allow tools to use direct IO on Linux when reading from the device, if teh device supports it, and if the tools is OK with that (most are).  Mainly for xfs_repair speedups, now that libxfs caches metadata buffers internally.
Merge of master-melb:xfs-cmds:26728a by kenmcd.

/*
 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
 * All Rights Reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it would be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write the Free Software Foundation,
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

#include <xfs/libxfs.h>
#include <xfs/xfs_log.h>
#include <xfs/xfs_log_priv.h>
#include "init.h"

#define BDSTRAT_SIZE	(256 * 1024)
#define min(x, y)	((x) < (y) ? (x) : (y))

void
libxfs_device_zero(dev_t dev, xfs_daddr_t start, uint len)
{
	xfs_off_t	start_offset, end_offset, offset;
	ssize_t		zsize, bytes;
	char		*z;
	int		fd;

	zsize = min(BDSTRAT_SIZE, BBTOB(len));
	if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) {
		fprintf(stderr,
			_("%s: %s can't memalign %d bytes: %s\n"),
			progname, __FUNCTION__, (int)zsize, strerror(errno));
		exit(1);
	}
	memset(z, 0, zsize);

	fd = libxfs_device_to_fd(dev);
	start_offset = LIBXFS_BBTOOFF64(start);

	if ((lseek64(fd, start_offset, SEEK_SET)) < 0) {
		fprintf(stderr, _("%s: %s seek to offset %llu failed: %s\n"),
			progname, __FUNCTION__,
			(unsigned long long)start_offset, strerror(errno));
		exit(1);
	}

	end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset;
	for (offset = 0; offset < end_offset; ) {
		bytes = min((ssize_t)(end_offset - offset), zsize);
		if ((bytes = write(fd, z, bytes)) < 0) {
			fprintf(stderr, _("%s: %s write failed: %s\n"),
				progname, __FUNCTION__, strerror(errno));
			exit(1);
		} else if (bytes == 0) {
			fprintf(stderr, _("%s: %s not progressing?\n"),
				progname, __FUNCTION__);
			exit(1);
		}
		offset += bytes;
	}
	free(z);
}

static void unmount_record(void *p)
{
	xlog_op_header_t	*op = (xlog_op_header_t *)p;
	/* the data section must be 32 bit size aligned */
	struct {
	    __uint16_t magic;
	    __uint16_t pad1;
	    __uint32_t pad2; /* may as well make it 64 bits */
	} magic = { XLOG_UNMOUNT_TYPE, 0, 0 };

	memset(p, 0, BBSIZE);
	INT_SET(op->oh_tid,		ARCH_CONVERT, 1);
	INT_SET(op->oh_len,		ARCH_CONVERT, sizeof(magic));
	INT_SET(op->oh_clientid,	ARCH_CONVERT, XFS_LOG);
	INT_SET(op->oh_flags,		ARCH_CONVERT, XLOG_UNMOUNT_TRANS);
	INT_SET(op->oh_res2,		ARCH_CONVERT, 0);

	/* and the data for this op */
	memcpy((char *)p + sizeof(xlog_op_header_t), &magic, sizeof(magic));
}

static xfs_caddr_t next(xfs_caddr_t ptr, int offset, void *private)
{
	xfs_buf_t	*buf = (xfs_buf_t *)private;

	if (XFS_BUF_COUNT(buf) < (int)(ptr - XFS_BUF_PTR(buf)) + offset)
		abort();
	return ptr + offset;
}

int
libxfs_log_clear(
	dev_t			device,
	xfs_daddr_t		start,
	uint			length,
	uuid_t			*fs_uuid,
	int			version,
	int			sunit,
	int			fmt)
{
	xfs_buf_t		*buf;
	int			len;

	if (!device || !fs_uuid)
		return -EINVAL;

	/* first zero the log */
	libxfs_device_zero(device, start, length);

	/* then write a log record header */
	len = ((version == 2) && sunit) ? BTOBB(sunit) : 2;
	len = MAX(len, 2);
	buf = libxfs_getbuf(device, start, len);
	libxfs_log_header(XFS_BUF_PTR(buf),
			  fs_uuid, version, sunit, fmt, next, buf);
	libxfs_writebufr(buf);
	libxfs_putbuf(buf);
	return 0;
}

int
libxfs_log_header(
	xfs_caddr_t		caddr,
	uuid_t			*fs_uuid,
	int			version,
	int			sunit,
	int			fmt,
	libxfs_get_block_t	*nextfunc,
	void			*private)
{
	xlog_rec_header_t	*head = (xlog_rec_header_t *)caddr;
	xfs_caddr_t		p = caddr;
	uint			cycle_lsn;
	int			i, len;

	len = ((version == 2) && sunit) ? BTOBB(sunit) : 1;

	/* note that oh_tid actually contains the cycle number
	 * and the tid is stored in h_cycle_data[0] - that's the
	 * way things end up on disk.
	 */
	memset(p, 0, BBSIZE);
	INT_SET(head->h_magicno,	ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM);
	INT_SET(head->h_cycle,		ARCH_CONVERT, 1);
	INT_SET(head->h_version,	ARCH_CONVERT, version);
	if (len != 1)
		INT_SET(head->h_len,		ARCH_CONVERT, sunit - BBSIZE);
	else
		INT_SET(head->h_len,		ARCH_CONVERT, 20);
	INT_SET(head->h_chksum,		ARCH_CONVERT, 0);
	INT_SET(head->h_prev_block,	ARCH_CONVERT, -1);
	INT_SET(head->h_num_logops,	ARCH_CONVERT, 1);
	INT_SET(head->h_cycle_data[0],	ARCH_CONVERT, 0xb0c0d0d0);
	INT_SET(head->h_fmt,		ARCH_CONVERT, fmt);
	INT_SET(head->h_size,		ARCH_CONVERT, XLOG_HEADER_CYCLE_SIZE);

	ASSIGN_ANY_LSN_DISK(head->h_lsn, 1, 0);
	ASSIGN_ANY_LSN_DISK(head->h_tail_lsn, 1, 0);

	memcpy(&head->h_fs_uuid, fs_uuid, sizeof(uuid_t));

	len = MAX(len, 2);
	p = nextfunc(p, BBSIZE, private);
	unmount_record(p);

	cycle_lsn = CYCLE_LSN_DISK(head->h_lsn);
	for (i = 2; i < len; i++) {
		p = nextfunc(p, BBSIZE, private);
		memset(p, 0, BBSIZE);
		*(uint *)p = cycle_lsn;
	}

	return BBTOB(len);
}

xfs_buf_t *
libxfs_getsb(xfs_mount_t *mp, int flags)
{
	return libxfs_readbuf(mp->m_dev, XFS_SB_DADDR,
				XFS_FSS_TO_BB(mp, 1), flags);
}


/*
 * Simple I/O (buffer cache) interface
 */

xfs_zone_t	*xfs_buf_zone;

typedef struct {
	dev_t		device;
	xfs_daddr_t	blkno;
	unsigned int	count;
} xfs_bufkey_t;

static unsigned int
libxfs_bhash(cache_key_t key, unsigned int hashsize)
{
	return ((unsigned int)((xfs_bufkey_t *)key)->blkno) % hashsize;
}

static int
libxfs_bcompare(struct cache_node *node, cache_key_t key)
{
	xfs_buf_t	*bp = (xfs_buf_t *)node;
	xfs_bufkey_t	*bkey = (xfs_bufkey_t *)key;

#ifdef IO_BCOMPARE_CHECK
	if (bp->b_dev == bkey->device &&
	    bp->b_blkno == bkey->blkno &&
	    bp->b_bcount != bkey->count)
		fprintf(stderr, "Badness in key lookup (length)\n"
			"bp=(bno %llu, len %u bb) key=(bno %llu, len %u bbs)\n",
			(unsigned long long)bp->b_blkno, (int)bp->b_bcount,
			(unsigned long long)bkey->blkno, (int)bkey->count);
#endif

	return (bp->b_dev == bkey->device &&
		bp->b_blkno == bkey->blkno &&
		bp->b_bcount == bkey->count);
}

void
libxfs_bprint(xfs_buf_t *bp)
{
	fprintf(stderr, "Buffer 0x%p blkno=%llu bytes=%u flags=0x%x count=%u\n",
		bp, (unsigned long long)bp->b_blkno, (unsigned)bp->b_bcount,
		bp->b_flags, bp->b_node.cn_count);
}

xfs_buf_t *
libxfs_getbuf(dev_t device, xfs_daddr_t blkno, int len)
{
	xfs_buf_t	*bp;
	xfs_bufkey_t	key;
	unsigned int	bytes = BBTOB(len);

	key.device = device;
	key.blkno = blkno;
	key.count = bytes;

	if (cache_node_get(libxfs_bcache, &key, (struct cache_node **)&bp)) {
#ifdef IO_DEBUG
		fprintf(stderr, "%s: allocated buffer, key=%llu(%llu), %p\n",
			__FUNCTION__, BBTOB(len), LIBXFS_BBTOOFF64(blkno), blkno, buf);
#endif
		bp->b_flags = 0;
		bp->b_blkno = blkno;
		bp->b_bcount = bytes;
		bp->b_dev = device;
		bp->b_addr = memalign(libxfs_device_alignment(), bytes);
		if (!bp->b_addr) {
			fprintf(stderr,
				_("%s: %s can't memalign %d bytes: %s\n"),
				progname, __FUNCTION__, (int)bytes,
				strerror(errno));
			exit(1);
		}
	}
	return bp;
}

void
libxfs_putbuf(xfs_buf_t *bp)
{
	cache_node_put((struct cache_node *)bp);
}

void
libxfs_purgebuf(xfs_buf_t *bp)
{
	xfs_bufkey_t	key;

	key.device = bp->b_dev;
	key.blkno = bp->b_blkno;
	key.count = bp->b_bcount;

	cache_node_purge(libxfs_bcache, &key, (struct cache_node *)bp);
}

static struct cache_node *
libxfs_balloc(void)
{
	return libxfs_zone_zalloc(xfs_buf_zone);
}

int
libxfs_readbufr(dev_t dev, xfs_daddr_t blkno, xfs_buf_t *bp, int len, int flags)
{
	int	fd = libxfs_device_to_fd(dev);
	int	bytes = BBTOB(len);

	ASSERT(BBTOB(len) <= bp->b_bcount);

	if (pread64(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno)) < 0) {
		fprintf(stderr, _("%s: read failed: %s\n"),
			progname, strerror(errno));
		if (flags & LIBXFS_EXIT_ON_FAILURE)
			exit(1);
		return errno;
	}
#ifdef IO_DEBUG
	fprintf(stderr, "readbufr read %ubytes, blkno=%llu(%llu), %p\n",
		bytes, LIBXFS_BBTOOFF64(blkno), blkno, bp);
#endif
	if (bp->b_dev == dev &&
	    bp->b_blkno == blkno &&
	    bp->b_bcount == bytes)
		bp->b_flags |= LIBXFS_B_UPTODATE;
	return 0;
}

xfs_buf_t *
libxfs_readbuf(dev_t dev, xfs_daddr_t blkno, int len, int flags)
{
	xfs_buf_t	*bp;
	int		error;

	bp = libxfs_getbuf(dev, blkno, len);
	if (!(bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) {
		error = libxfs_readbufr(dev, blkno, bp, len, flags);
		if (error) {
			libxfs_putbuf(bp);
			return NULL;
		}
	}
	return bp;
}

int
libxfs_writebufr(xfs_buf_t *bp)
{
	int	sts;
	int	fd = libxfs_device_to_fd(bp->b_dev);

	sts = pwrite64(fd, bp->b_addr, bp->b_bcount, LIBXFS_BBTOOFF64(bp->b_blkno));
	if (sts < 0) {
		fprintf(stderr, _("%s: pwrite64 failed: %s\n"),
			progname, strerror(errno));
		if (bp->b_flags & LIBXFS_B_EXIT)
			exit(1);
		return errno;
	}
	else if (sts != bp->b_bcount) {
		fprintf(stderr, _("%s: error - wrote only %d of %d bytes\n"),
			progname, sts, bp->b_bcount);
		if (bp->b_flags & LIBXFS_B_EXIT)
			exit(1);
		return EIO;
	}
#ifdef IO_DEBUG
	fprintf(stderr, "writebufr wrote %ubytes, blkno=%llu(%llu), %p\n",
		bp->b_bcount, LIBXFS_BBTOOFF64(bp->b_blkno), bp->b_blkno, bp);
#endif
	bp->b_flags |= LIBXFS_B_UPTODATE;
	bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_EXIT);
	return 0;
}

int
libxfs_writebuf_int(xfs_buf_t *bp, int flags)
{
	bp->b_flags |= (LIBXFS_B_DIRTY | flags);
	return 0;
}

int
libxfs_writebuf(xfs_buf_t *bp, int flags)
{
	bp->b_flags |= (LIBXFS_B_DIRTY | flags);
	libxfs_putbuf(bp);
	return 0;
}

void
libxfs_iomove(xfs_buf_t *bp, uint boff, int len, void *data, int flags)
{
#ifdef IO_DEBUG
	if (boff + len > bp->b_bcount) {
		fprintf(stderr, "Badness, iomove out of range!\n"
			"bp=(bno %llu, bytes %u) range=(boff %u, bytes %u)\n",
			bp->b_blkno, bp->b_bcount, boff, len);
		abort();
	}
#endif
	switch (flags) {
	case LIBXFS_BZERO:
		memset(bp->b_addr + boff, 0, len);
		break;
	case LIBXFS_BREAD:
		memcpy(data, bp->b_addr + boff, len);
		break;
	case LIBXFS_BWRITE:
		memcpy(bp->b_addr + boff, data, len);
		break;
	}
}

static void
libxfs_bflush(struct cache_node *node)
{
	xfs_buf_t		*bp = (xfs_buf_t *)node;

	if ((bp != NULL) && (bp->b_flags & LIBXFS_B_DIRTY))
		libxfs_writebufr(bp);
}

static void
libxfs_brelse(struct cache_node *node)
{
	xfs_buf_t		*bp = (xfs_buf_t *)node;
	xfs_buf_log_item_t	*bip;
	extern xfs_zone_t	*xfs_buf_item_zone;

	if (bp != NULL) {
		if (bp->b_flags & LIBXFS_B_DIRTY)
			libxfs_writebufr(bp);
		bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
		if (bip)
		    libxfs_zone_free(xfs_buf_item_zone, bip);
		free(bp->b_addr);
		bp->b_addr = NULL;
		bp->b_flags = 0;
		free(bp);
		bp = NULL;
	}
}

void
libxfs_bcache_purge(void)
{
	cache_purge(libxfs_bcache);
}

void 
libxfs_bcache_flush(void)
{
	cache_flush(libxfs_bcache);
}

struct cache_operations libxfs_bcache_operations = {
	/* .hash */	libxfs_bhash,
	/* .alloc */	libxfs_balloc,
	/* .flush */	libxfs_bflush,
	/* .relse */	libxfs_brelse,
	/* .compare */	libxfs_bcompare,
	/* .bulkrelse */ NULL	/* TODO: lio_listio64 interface? */
};


/*
 * Simple memory interface
 */

xfs_zone_t *
libxfs_zone_init(int size, char *name)
{
	xfs_zone_t	*ptr;

	if ((ptr = malloc(sizeof(xfs_zone_t))) == NULL) {
		fprintf(stderr, _("%s: zone init failed (%s, %d bytes): %s\n"),
			progname, name, (int)sizeof(xfs_zone_t), strerror(errno));
		exit(1);
	}
	ptr->zone_unitsize = size;
	ptr->zone_name = name;
#ifdef MEM_DEBUG
	ptr->allocated = 0;
	fprintf(stderr, "new zone %p for \"%s\", size=%d\n", ptr, name, size);
#endif
	return ptr;
}

void *
libxfs_zone_zalloc(xfs_zone_t *z)
{
	void	*ptr;

	if ((ptr = calloc(z->zone_unitsize, 1)) == NULL) {
		fprintf(stderr, _("%s: zone calloc failed (%s, %d bytes): %s\n"),
			progname, z->zone_name, z->zone_unitsize,
			strerror(errno));
		exit(1);
	}
#ifdef MEM_DEBUG
	z->allocated++;
	fprintf(stderr, "## zone alloc'd item %p from %s (%d bytes) (%d active)\n",
		ptr, z->zone_name,  z->zone_unitsize,
		z->allocated);
#endif
	return ptr;
}

void
libxfs_zone_free(xfs_zone_t *z, void *ptr)
{
#ifdef MEM_DEBUG
	z->allocated--;
	fprintf(stderr, "## zone freed item %p from %s (%d bytes) (%d active)\n",
		ptr, z->zone_name, z->zone_unitsize,
		z->allocated);
#endif
	if (ptr != NULL) {
		free(ptr);
		ptr = NULL;
	}
}

void *
libxfs_malloc(size_t size)
{
	void	*ptr;

	if ((ptr = calloc(1, size)) == NULL) {
		fprintf(stderr, _("%s: calloc failed (%d bytes): %s\n"),
			progname, (int)size, strerror(errno));
		exit(1);
	}
#ifdef MEM_DEBUG
	fprintf(stderr, "## calloc'd item %p size %d bytes\n", ptr, size);
#endif
	return ptr;
}

void
libxfs_free(void *ptr)
{
#ifdef MEM_DEBUG
	fprintf(stderr, "## freed item %p\n", ptr);
#endif
	if (ptr != NULL) {
		free(ptr);
		ptr = NULL;
	}
}

void *
libxfs_realloc(void *ptr, size_t size)
{
#ifdef MEM_DEBUG
	void *optr=ptr;
#endif
	if ((ptr = realloc(ptr, size)) == NULL) {
		fprintf(stderr, _("%s: realloc failed (%d bytes): %s\n"),
			progname, (int)size, strerror(errno));
		exit(1);
	}
#ifdef MEM_DEBUG
	fprintf(stderr, "## realloc'd item %p now %p size %d bytes\n",
		optr, ptr, size);
#endif
	return ptr;
}


/*
 * Inode cache interfaces
 */

extern xfs_zone_t	*xfs_ili_zone;
extern xfs_zone_t	*xfs_inode_zone;

static unsigned int
libxfs_ihash(cache_key_t key, unsigned int hashsize)
{
	return ((unsigned int)*(xfs_ino_t *)key) % hashsize;
}

static int
libxfs_icompare(struct cache_node *node, cache_key_t key)
{
	xfs_inode_t	*ip = (xfs_inode_t *)node;

	return (ip->i_ino == *(xfs_ino_t *)key);
}

int
libxfs_iget(xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, uint lock_flags,
		xfs_inode_t **ipp, xfs_daddr_t bno)
{
	xfs_inode_t	*ip;
	int		error = 0;

	if (cache_node_get(libxfs_icache, &ino, (struct cache_node **)&ip)) {
#ifdef INO_DEBUG
		fprintf(stderr, "%s: allocated inode, ino=%llu(%llu), %p\n",
			__FUNCTION__, (unsigned long long)ino, bno, ip);
#endif
		if ((error = libxfs_iread(mp, tp, ino, ip, bno))) {
			cache_node_purge(libxfs_icache, &ino,
					(struct cache_node *)ip);
			ip = NULL;
		}
	}
	*ipp = ip;
	return error;
}

void
libxfs_iput(xfs_inode_t *ip, uint lock_flags)
{
	cache_node_put((struct cache_node *)ip);
}

static struct cache_node *
libxfs_ialloc(void)
{
	return libxfs_zone_zalloc(xfs_inode_zone);
}

static void
libxfs_idestroy(xfs_inode_t *ip)
{
	switch (ip->i_d.di_mode & S_IFMT) {
		case S_IFREG:
		case S_IFDIR:
		case S_IFLNK:
			libxfs_idestroy_fork(ip, XFS_DATA_FORK);
			break;
	}
	if (ip->i_afp)
		libxfs_idestroy_fork(ip, XFS_ATTR_FORK);
}

static void
libxfs_irelse(struct cache_node *node)
{
	xfs_inode_t	*ip = (xfs_inode_t *)node;

	if (ip != NULL) {
		if (ip->i_itemp)
			libxfs_zone_free(xfs_ili_zone, ip->i_itemp);
		ip->i_itemp = NULL;
		libxfs_idestroy(ip);
		libxfs_zone_free(xfs_inode_zone, ip);
		ip = NULL;
	}
}

void
libxfs_icache_purge(void)
{
	cache_purge(libxfs_icache);
}

struct cache_operations libxfs_icache_operations = {
	/* .hash */	libxfs_ihash,
	/* .alloc */	libxfs_ialloc,
	/* .flush */	NULL,
	/* .relse */	libxfs_irelse,
	/* .compare */	libxfs_icompare,
	/* .bulkrelse */ NULL
};