diff --exclude=dmapi -rNu ORIG/fs/inode.c HACK/fs/inode.c --- ORIG/fs/inode.c 2003-07-23 09:17:11.000000000 -0500 +++ HACK/fs/inode.c 2003-07-23 10:09:39.000000000 -0500 @@ -141,6 +141,11 @@ void inode_init_once(struct inode *inode) { memset(inode, 0, sizeof(*inode)); + _inode_init_once(inode); +} + +void _inode_init_once(struct inode *inode) +{ init_waitqueue_head(&inode->i_wait); INIT_LIST_HEAD(&inode->i_hash); INIT_LIST_HEAD(&inode->i_data.clean_pages); diff --exclude=dmapi -rNu ORIG/fs/xfs/linux/Makefile HACK/fs/xfs/linux/Makefile --- ORIG/fs/xfs/linux/Makefile 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/linux/Makefile 2003-06-02 11:53:19.000000000 -0500 @@ -55,7 +55,6 @@ xfs_iomap.o \ xfs_iops.o \ xfs_lrw.o \ - xfs_syncd.o \ xfs_super.o \ xfs_vfs.o \ xfs_vnode.o diff --exclude=dmapi -rNu ORIG/fs/xfs/linux/xfs_aops.c HACK/fs/xfs/linux/xfs_aops.c --- ORIG/fs/xfs/linux/xfs_aops.c 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/linux/xfs_aops.c 2003-07-23 12:41:40.000000000 -0500 @@ -93,6 +93,7 @@ XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); XFS_BUF_UNDATAIO(bp); + iput(LINVFS_GET_IP(vp)); pagebuf_iodone(bp, 0, 0); } @@ -375,7 +376,16 @@ pb = pagebuf_lookup(mp->pbm_target, mp->pbm_offset, mp->pbm_bsize, 0); if (!pb) - return -ENOMEM; + return -EAGAIN; + + /* Take a reference to the inode to prevent it from + * being reclaimed while we have outstanding unwritten + * extent IO on it. + */ + if ((igrab(inode)) != inode) { + pagebuf_free(pb); + return -EAGAIN; + } /* Set the count to 1 initially, this will stop an I/O * completion callout which happens before we have started @@ -433,8 +443,7 @@ if (page) { nblocks += bs; atomic_add(bs, &pb->pb_io_remaining); - convert_page(inode, page, - mp, pb, 1, all_bh); + convert_page(inode, page, mp, pb, 1, all_bh); } } } @@ -598,11 +607,11 @@ STATIC int page_state_convert( + struct inode *inode, struct page *page, int startio, int unmapped) /* also implies page uptodate */ { - struct inode *inode = page->mapping->host; struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; page_buf_bmap_t *mp, map; unsigned long p_offset = 0, end_index; @@ -888,10 +897,10 @@ create, 1, BMAP_WRITE|BMAP_DIRECT); } -STATIC int +STATIC sector_t linvfs_bmap( struct address_space *mapping, - long block) + sector_t block) { struct inode *inode = (struct inode *)mapping->host; vnode_t *vp = LINVFS_GET_VP(inode); @@ -959,7 +968,6 @@ * the page, we have to check the process flags first, if we * are already in a transaction or disk I/O during allocations * is off, we need to fail the writepage and redirty the page. - * We also need to set PF_NOIO ourselves. */ STATIC int @@ -970,6 +978,7 @@ int need_trans; int delalloc, unmapped, unwritten; struct inode *inode = page->mapping->host; + xfs_pflags_t pflags; /* * We need a transaction if: @@ -996,7 +1005,7 @@ * as is. */ - if ((current->flags & (PF_FSTRANS|PF_NOIO)) && need_trans) + if ((PFLAGS_TEST_FSTRANS() || PFLAGS_TEST_NOIO()) && need_trans) goto out_fail; /* @@ -1011,10 +1020,10 @@ * to real space and flush out to disk. */ if (need_trans) - current->flags |= PF_NOIO; - error = page_state_convert(page, 1, unmapped); + PFLAGS_SET_NOIO(&pflags); + error = page_state_convert(inode, page, 1, unmapped); if (need_trans) - current->flags &= ~PF_NOIO; + PFLAGS_RESTORE(&pflags); if (error == -EAGAIN) goto out_fail; @@ -1055,6 +1064,7 @@ struct page *page, int gfp_mask) { + struct inode *inode = page->mapping->host; int delalloc, unmapped, unwritten; count_page_state(page, &delalloc, &unmapped, &unwritten); @@ -1070,7 +1080,7 @@ * Never need to allocate space here - we will always * come back to writepage in that case. */ - return (page_state_convert(page, 0, 0) == 0) ? 1 : 0; + return (page_state_convert(inode, page, 0, 0) == 0) ? 1 : 0; } STATIC int @@ -1092,14 +1102,17 @@ /* * Initiate I/O on a kiobuf of user memory */ + STATIC int linvfs_direct_IO( int rw, - struct file *file, + struct inode *inode, struct kiobuf *iobuf, - unsigned long blocknr, + sector_t blocknr, int blocksize) { + struct page **maplist; + size_t page_offset; page_buf_t *pb; page_buf_bmap_t map; int error = 0; @@ -1107,15 +1120,15 @@ size_t length, total; loff_t offset; size_t map_size, size; - struct inode *inode = file->f_dentry->d_inode; vnode_t *vp = LINVFS_GET_VP(inode); - struct page **maplist = iobuf->maplist; - size_t page_offset = iobuf->offset; total = length = iobuf->length; offset = blocknr; offset <<= inode->i_blkbits; + maplist = iobuf->maplist; + page_offset = iobuf->offset; + map_flags = (rw ? BMAP_WRITE : BMAP_READ) | BMAP_DIRECT; pb_flags = (rw ? PBF_WRITE : PBF_READ) | PBF_FORCEIO; while (length) { @@ -1176,6 +1189,8 @@ XFS_BUF_DATAIO(pb); if (map.pbm_flags & PBMF_UNWRITTEN) { + if ((igrab(inode)) != inode) + BUG(); XFS_BUF_SET_FSPRIVATE(pb, vp); XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_conv); } @@ -1184,6 +1199,8 @@ pagebuf_rele(pb); if (error) { + if (map.pbm_flags & PBMF_UNWRITTEN) + iput(inode); if (error > 0) error = -error; break; @@ -1202,6 +1219,38 @@ return (error ? error : (int)(total - length)); } +STATIC int +linvfs_direct_IO_filp(int rw, + struct file * filp, + struct kiobuf * iobuf, + sector_t blocknr, + int blocksize) { + struct inode * inode = filp->f_dentry->d_inode->i_mapping->host; + linvfs_direct_IO(rw, inode, iobuf, blocknr, blocksize); +} + + +/* since the address_space_operations are not consitent with the type used + * for block indexes we must cast the functions into what is expected.. + * thus the following 2 lines. + * If running on a kernel with LBD support and hence bmap and direct_IO + * correctly defined with sector_t params. use the second set of typedefs + * or casts from the address space_operations + * RMC + */ + +#define RHBETA +#ifndef HAVE_SECTOR_T +typedef int (bmap_proc)(struct address_space *, long); +typedef int (direct_IO_proc)(int, struct inode *, struct kiobuf *, unsigned long, int); +#endif + +#if defined(RHBETA) +typedef int (direct_IO_filp_proc)(int, struct file *, struct kiobuf *, unsigned long, int); +#endif + + + struct address_space_operations linvfs_aops = { .readpage = linvfs_readpage, .writepage = linvfs_writepage, @@ -1209,6 +1258,14 @@ .releasepage = linvfs_release_page, .prepare_write = linvfs_prepare_write, .commit_write = generic_commit_write, +#if defined(HAVE_SECTOR_T) && !defined(RHBETA) + .bmap = (bmap_proc *)linvfs_bmap, + .direct_IO = (direct_IO_proc *)linvfs_direct_IO, +#elif defined(RHBETA) + .bmap = (bmap_proc *)linvfs_bmap, + .direct_IO = (direct_IO_filp_proc *)linvfs_direct_IO_filp, +#else .bmap = linvfs_bmap, .direct_IO = linvfs_direct_IO, +#endif }; diff --exclude=dmapi -rNu ORIG/fs/xfs/linux/xfs_globals.c HACK/fs/xfs/linux/xfs_globals.c --- ORIG/fs/xfs/linux/xfs_globals.c 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/linux/xfs_globals.c 2003-07-08 11:02:08.000000000 -0500 @@ -36,8 +36,11 @@ */ #include "xfs.h" +#include "xfs_macros.h" +#include "xfs_types.h" #include "xfs_bmap_btree.h" #include "xfs_bit.h" +#include "xfs_rw.h" /* * System memory size - used to scale certain data structures in XFS. @@ -48,7 +51,19 @@ * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, * other XFS code uses these values. */ -xfs_param_t xfs_params = { 128, 32, 0, 1, 0, 0, 0, 3, 30 * HZ }; + +xfs_param_t xfs_params = { + /* MIN DFLT MAX */ + .refcache_size = { 0, 128, XFS_REFCACHE_SIZE_MAX }, + .refcache_purge = { 0, 32, XFS_REFCACHE_SIZE_MAX }, + .restrict_chown = { 0, 1, 1 }, + .sgid_inherit = { 0, 0, 1 }, + .symlink_mode = { 0, 0, 1 }, + .panic_mask = { 0, 0, 127 }, + .error_level = { 0, 3, 11 }, + .sync_interval = { HZ, 30*HZ, 60*HZ }, + .stats_clear = { 0, 0, 1 }, +}; /* * Global system credential structure. @@ -62,3 +77,83 @@ #if ARCH_CONVERT != ARCH_NOCONVERT EXPORT_SYMBOL(xfs_bmbt_disk_get_all); #endif + +#if defined(CONFIG_XFS_DEBUG) +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_imap.h" +#include "xfs_alloc.h" +#include "xfs_ialloc.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dir2_data.h" +#include "xfs_dinode.h" +#include "xfs_inode_item.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_buf_item.h" +#include "xfs_rw.h" +#include "xfs_error.h" +#include "xfs_utils.h" +#include "xfs_dir2_trace.h" +#include "xfs_quota.h" +#include "xfs_mac.h" +#include "xfs_acl.h" +#include "xfs_da_btree.h" +#include "xfs_dir_leaf.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" +#include "xfs_dir2_node.h" +#include "xfs_dir2_sf.h" +#include "xfs_dir2_trace.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" + +extern ktrace_t *xfs_alloc_trace_buf; + +EXPORT_SYMBOL(xfs_fsb_to_agbno); +EXPORT_SYMBOL(xfs_dir2_data_unused_tag_p_arch); +EXPORT_SYMBOL(xfs_attr_leaf_name_remote); +EXPORT_SYMBOL(xfs_lic_slot); +EXPORT_SYMBOL(xfs_dir2_sf_firstentry); +EXPORT_SYMBOL(xfs_ino_to_agno); +EXPORT_SYMBOL(xfs_dir2_sf_get_inumber_arch); +EXPORT_SYMBOL(xfs_ifork_q); +EXPORT_SYMBOL(xfs_dir2_data_entry_tag_p); +EXPORT_SYMBOL(xfs_dir2_sf_inumberp); +EXPORT_SYMBOL(xfs_dir2_data_entsize); +EXPORT_SYMBOL(xfs_lic_isfree); +EXPORT_SYMBOL(xfs_attr_leaf_name_local); +EXPORT_SYMBOL(xfs_bmap_broot_ptr_addr); +EXPORT_SYMBOL(xfs_dir_sf_get_dirino_arch); +EXPORT_SYMBOL(xfs_alloc_trace_buf); +EXPORT_SYMBOL(xfs_ino_to_agbno); +EXPORT_SYMBOL(xfs_fsb_to_agno); +EXPORT_SYMBOL(xfs_dir2_leaf_bests_p_arch); +EXPORT_SYMBOL(xfs_dir2_sf_get_offset_arch); +EXPORT_SYMBOL(startblockval); +EXPORT_SYMBOL(xfs_attr_sf_nextentry); +EXPORT_SYMBOL(xfs_bmap_broot_key_addr); +EXPORT_SYMBOL(xfs_dir2_block_leaf_p_arch); +EXPORT_SYMBOL(xfs_mtovfs); +EXPORT_SYMBOL(xfs_dir_leaf_namestruct); +EXPORT_SYMBOL(xfs_ino_to_offset); +EXPORT_SYMBOL(xfs_itobhv); +EXPORT_SYMBOL(xfs_ifork_ptr); +EXPORT_SYMBOL(isnullstartblock); +EXPORT_SYMBOL(xfs_lic_are_all_free); +EXPORT_SYMBOL(xfs_dir_sf_nextentry); +EXPORT_SYMBOL(xfs_dir2_sf_nextentry); +#endif diff --exclude=dmapi -rNu ORIG/fs/xfs/linux/xfs_iomap.c HACK/fs/xfs/linux/xfs_iomap.c --- ORIG/fs/xfs/linux/xfs_iomap.c 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/linux/xfs_iomap.c 2003-07-15 22:08:36.000000000 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -172,8 +172,11 @@ BUG(); } + ASSERT(offset <= mp->m_maxioffset); + if ((xfs_fsize_t)offset + count > mp->m_maxioffset) + count = mp->m_maxioffset - offset; + end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); - end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); error = XFS_BMAPI(mp, NULL, io, offset_fsb, (xfs_filblks_t)(end_fsb - offset_fsb) , diff --exclude=dmapi -rNu ORIG/fs/xfs/linux/xfs_iops.c HACK/fs/xfs/linux/xfs_iops.c --- ORIG/fs/xfs/linux/xfs_iops.c 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/linux/xfs_iops.c 2003-06-10 12:32:21.000000000 -0500 @@ -157,8 +157,10 @@ if (S_ISCHR(mode) || S_ISBLK(mode)) ip->i_rdev = to_kdev_t(rdev); - validate_fields(dir); + else if (S_ISDIR(mode)) + validate_fields(ip); d_instantiate(dentry, ip); + validate_fields(dir); } if (!error && have_default_acl) { diff --exclude=dmapi -rNu ORIG/fs/xfs/linux/xfs_linux.h HACK/fs/xfs/linux/xfs_linux.h --- ORIG/fs/xfs/linux/xfs_linux.h 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/linux/xfs_linux.h 2003-07-23 12:12:12.000000000 -0500 @@ -101,11 +101,15 @@ bh->b_end_io = linvfs_unwritten_done; } -#define restricted_chown xfs_params.restrict_chown -#define irix_sgid_inherit xfs_params.sgid_inherit -#define irix_symlink_mode xfs_params.symlink_mode -#define xfs_panic_mask xfs_params.panic_mask -#define xfs_error_level xfs_params.error_level +#define xfs_refcache_size xfs_params.refcache_size.val +#define xfs_refcache_purge_count xfs_params.refcache_purge.val +#define restricted_chown xfs_params.restrict_chown.val +#define irix_sgid_inherit xfs_params.sgid_inherit.val +#define irix_symlink_mode xfs_params.symlink_mode.val +#define xfs_panic_mask xfs_params.panic_mask.val +#define xfs_error_level xfs_params.error_level.val +#define xfs_syncd_interval xfs_params.sync_interval.val +#define xfs_stats_clear xfs_params.stats_clear.val #define NBPP PAGE_SIZE #define DPPSHFT (PAGE_SHIFT - 9) diff --exclude=dmapi -rNu ORIG/fs/xfs/linux/xfs_lrw.c HACK/fs/xfs/linux/xfs_lrw.c --- ORIG/fs/xfs/linux/xfs_lrw.c 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/linux/xfs_lrw.c 2003-07-15 22:25:33.000000000 -0500 @@ -174,7 +174,7 @@ } - n = XFS_MAX_FILE_OFFSET - *offset; + n = XFS_MAXIOFFSET(mp) - *offset; if ((n <= 0) || (size == 0)) return 0; @@ -381,7 +381,8 @@ } ASSERT(nimaps > 0); - if (imap.br_startblock == HOLESTARTBLOCK) { + if (imap.br_state == XFS_EXT_UNWRITTEN || + imap.br_startblock == HOLESTARTBLOCK) { /* * This loop handles initializing pages that were * partially initialized by the code below this @@ -454,7 +455,7 @@ ssize_t ret; int error = 0; xfs_fsize_t isize, new_size; - xfs_fsize_t n, limit = XFS_MAX_FILE_OFFSET; + xfs_fsize_t n, limit; xfs_iocore_t *io; vnode_t *vp; int iolock; @@ -500,6 +501,7 @@ xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = xip->i_d.di_size; + limit = XFS_MAXIOFFSET(mp); if (file->f_flags & O_APPEND) *offset = isize; @@ -589,7 +591,7 @@ xfs_inval_cached_pages(vp, &xip->i_iocore, *offset, 1, 1); } - ret = do_generic_file_write(file, buf, size, offset); + ret = generic_file_write_nolock(file, buf, size, offset); if (unlikely(file->f_mode & FINVIS)) { /* generic_file_write updates the mtime/ctime but we need @@ -809,29 +811,23 @@ return (xfs_bioerror_relse(bp)); } - -void -XFS_bflush(xfs_buftarg_t *target) -{ - pagebuf_delwri_flush(target, PBDF_WAIT, NULL); -} - /* - * If the underlying (log or data) device is readonly, there are some + * If the underlying (data/log/rt) device is readonly, there are some * operations that cannot proceed. */ int -xfs_dev_is_read_only(xfs_mount_t *mp, char *message) +xfs_dev_is_read_only( + xfs_mount_t *mp, + char *message) { - if (is_read_only(mp->m_ddev_targp->pbr_kdev) || - is_read_only(mp->m_logdev_targp->pbr_kdev) || - (mp->m_rtdev_targp && is_read_only(mp->m_rtdev_targp->pbr_kdev))) { + if (xfs_readonly_buftarg(mp->m_ddev_targp) || + xfs_readonly_buftarg(mp->m_logdev_targp) || + (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { cmn_err(CE_NOTE, "XFS: %s required on read-only device.", message); cmn_err(CE_NOTE, "XFS: write access unavailable, cannot proceed."); return EROFS; } - return 0; } diff --exclude=dmapi -rNu ORIG/fs/xfs/linux/xfs_super.c HACK/fs/xfs/linux/xfs_super.c --- ORIG/fs/xfs/linux/xfs_super.c 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/linux/xfs_super.c 2003-07-23 10:06:13.000000000 -0500 @@ -75,7 +75,7 @@ STATIC kmem_cache_t * linvfs_inode_cachep; STATIC struct xfs_mount_args * -args_allocate( +xfs_args_allocate( struct super_block *sb) { struct xfs_mount_args *args; @@ -94,6 +94,40 @@ return args; } +__uint64_t +xfs_max_file_offset( + unsigned int blockshift) +{ + unsigned int pagefactor = 1; + unsigned int bitshift = BITS_PER_LONG - 1; + + /* Figure out maximum filesize, on Linux this can depend on + * the filesystem blocksize (on 32 bit platforms). + * __block_prepare_write does this in an [unsigned] long... + * page->index << (PAGE_CACHE_SHIFT - bbits) + * So, for page sized blocks (4K on 32 bit platforms), + * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is + * (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) + * but for smaller blocksizes it is less (bbits = log2 bsize). + * Note1: get_block_t takes a long (implicit cast from above) + * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch + * can optionally convert the [unsigned] long from above into + * an [unsigned] long long. + */ + +#if BITS_PER_LONG == 32 +# if defined(HAVE_SECTOR_T) + ASSERT(sizeof(sector_t) == 8); + pagefactor = PAGE_CACHE_SIZE; + bitshift = BITS_PER_LONG; +# else + pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift); +# endif +#endif + + return (((__uint64_t)pagefactor) << bitshift) - 1; +} + STATIC __inline__ void xfs_set_inodeops( struct inode *inode) @@ -233,13 +267,27 @@ } void -xfs_free_buftarg( +xfs_flush_buftarg( xfs_buftarg_t *btp) { pagebuf_delwri_flush(btp, PBDF_WAIT, NULL); +} + +void +xfs_free_buftarg( + xfs_buftarg_t *btp) +{ + xfs_flush_buftarg(btp); kmem_free(btp, sizeof(*btp)); } +int +xfs_readonly_buftarg( + xfs_buftarg_t *btp) +{ + return is_read_only(btp->pbr_kdev); +} + void xfs_relse_buftarg( xfs_buftarg_t *btp) @@ -300,20 +348,14 @@ return btp; } -STATIC __inline__ unsigned int gfp_mask(void) -{ - /* If we're not in a transaction, FS activity is ok */ - if (current->flags & PF_FSTRANS) return GFP_NOFS; - return GFP_KERNEL; -} - STATIC struct inode * linvfs_alloc_inode( struct super_block *sb) { vnode_t *vp; - vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_cachep, gfp_mask()); + vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_cachep, + kmem_flags_convert(KM_SLEEP)); if (!vp) return NULL; return LINVFS_GET_IP(vp); @@ -340,20 +382,8 @@ if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { struct inode *inode = LINVFS_GET_IP(vp); - memset(vp, 0, VNODE_SIZE); - init_waitqueue_head(&inode->i_wait); - INIT_LIST_HEAD(&inode->i_hash); - INIT_LIST_HEAD(&inode->i_data.clean_pages); - INIT_LIST_HEAD(&inode->i_data.dirty_pages); - INIT_LIST_HEAD(&inode->i_data.locked_pages); - INIT_LIST_HEAD(&inode->i_dentry); - INIT_LIST_HEAD(&inode->i_dirty_buffers); - INIT_LIST_HEAD(&inode->i_dirty_data_buffers); - INIT_LIST_HEAD(&inode->i_devices); - sema_init(&inode->i_sem, 1); - sema_init(&inode->i_zombie, 1); - spin_lock_init(&inode->i_data.i_shared_lock); + _inode_init_once(inode); } } @@ -414,6 +444,68 @@ } } + +#define SYNCD_FLAGS (SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR|SYNC_REFCACHE) + +STATIC int +syncd(void *arg) +{ + vfs_t *vfsp = (vfs_t *) arg; + int error; + + daemonize(); + reparent_to_init(); + sigmask_lock(); + sigfillset(¤t->blocked); + recalc_sigpending_(current); + sigmask_unlock(); + + sprintf(current->comm, "xfssyncd"); + + vfsp->vfs_sync_task = current; + wmb(); + wake_up(&vfsp->vfs_wait_sync_task); + + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(xfs_syncd_interval); + if (vfsp->vfs_flag & VFS_UMOUNT) + break; + if (vfsp->vfs_flag & VFS_RDONLY) + continue; + VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error); + } + + vfsp->vfs_sync_task = NULL; + wmb(); + wake_up(&vfsp->vfs_wait_sync_task); + + return 0; +} + +STATIC int +linvfs_start_syncd(vfs_t *vfsp) +{ + int pid; + + pid = kernel_thread(syncd, (void *) vfsp, + CLONE_VM | CLONE_FS | CLONE_FILES); + if (pid < 0) + return pid; + wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task); + return 0; +} + +STATIC void +linvfs_stop_syncd(vfs_t *vfsp) +{ + vfsp->vfs_flag |= VFS_UMOUNT; + wmb(); + + wake_up_process(vfsp->vfs_sync_task); + wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task); +} + STATIC void linvfs_put_super( struct super_block *sb) @@ -423,9 +515,8 @@ linvfs_stop_syncd(vfsp); VFS_SYNC(vfsp, SYNC_ATTR|SYNC_DELWRI, NULL, error); - if (error == 0) { + if (!error) VFS_UNMOUNT(vfsp, 0, NULL, error); - } if (error) { printk("XFS unmount got error %d\n", error); printk("%s: vfsp/0x%p left dangling!\n", __FUNCTION__, vfsp); @@ -460,7 +551,7 @@ int error; VFS_STATVFS(vfsp, statp, NULL, error); - return error; + return -error; } STATIC int @@ -470,29 +561,24 @@ char *options) { vfs_t *vfsp = LINVFS_GET_VFS(sb); - struct xfs_mount_args *args = args_allocate(sb); + struct xfs_mount_args *args = xfs_args_allocate(sb); int error; VFS_PARSEARGS(vfsp, options, args, 1, error); - if (error) - goto out; - - VFS_MNTUPDATE(vfsp, flags, args, error); - -out: + if (!error) + VFS_MNTUPDATE(vfsp, flags, args, error); kmem_free(args, sizeof(*args)); - return error; + return -error; } STATIC void linvfs_freeze_fs( struct super_block *sb) { - vfs_t *vfsp; + vfs_t *vfsp = LINVFS_GET_VFS(sb); vnode_t *vp; int error; - vfsp = LINVFS_GET_VFS(sb); if (sb->s_flags & MS_RDONLY) return; VFS_ROOT(vfsp, &vp, error); @@ -504,11 +590,10 @@ linvfs_unfreeze_fs( struct super_block *sb) { - vfs_t *vfsp; + vfs_t *vfsp = LINVFS_GET_VFS(sb); vnode_t *vp; int error; - vfsp = LINVFS_GET_VFS(sb); VFS_ROOT(vfsp, &vp, error); VOP_IOCTL(vp, LINVFS_GET_IP(vp), NULL, XFS_IOC_THAW, 0, error); VN_RELE(vp); @@ -682,7 +767,7 @@ { vnode_t *rootvp; struct vfs *vfsp = vfs_allocate(); - struct xfs_mount_args *args = args_allocate(sb); + struct xfs_mount_args *args = xfs_args_allocate(sb); struct statfs statvfs; int error; @@ -699,7 +784,6 @@ } sb_min_blocksize(sb, BBSIZE); - sb->s_maxbytes = XFS_MAX_FILE_OFFSET; sb->s_qcop = &linvfs_qops; sb->s_op = &linvfs_sops; @@ -714,9 +798,10 @@ goto fail_unmount; sb->s_dirt = 1; - sb->s_magic = XFS_SB_MAGIC; + sb->s_magic = statvfs.f_type; sb->s_blocksize = statvfs.f_bsize; sb->s_blocksize_bits = ffs(statvfs.f_bsize) - 1; + sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); set_posix_acl_flag(sb); VFS_ROOT(vfsp, &rootvp, error); diff --exclude=dmapi -rNu ORIG/fs/xfs/linux/xfs_super.h HACK/fs/xfs/linux/xfs_super.h --- ORIG/fs/xfs/linux/xfs_super.h 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/linux/xfs_super.h 2003-07-23 12:12:11.000000000 -0500 @@ -66,6 +66,12 @@ # define XFS_REALTIME_STRING #endif +#if XFS_BIG_FILESYSTEMS +# define XFS_BIGFS_STRING "big filesystems, " +#else +# define XFS_BIGFS_STRING +#endif + #ifdef CONFIG_XFS_VNODE_TRACING # define XFS_VNTRACE_STRING "VN-trace, " #else @@ -80,6 +86,7 @@ #define XFS_BUILD_OPTIONS XFS_ACL_STRING \ XFS_REALTIME_STRING \ + XFS_BIGFS_STRING \ XFS_VNTRACE_STRING \ XFS_DBG_STRING /* DBG must be last */ @@ -92,6 +99,8 @@ struct pb_target; struct block_device; +extern __uint64_t xfs_max_file_offset(unsigned int); + extern struct inode *xfs_get_inode(bhv_desc_t *, xfs_ino_t, int); extern void xfs_initialize_vnode(bhv_desc_t *, vnode_t *, bhv_desc_t *, int); @@ -102,10 +111,9 @@ extern struct pb_target *xfs_alloc_buftarg(struct block_device *); extern void xfs_relse_buftarg(struct pb_target *); extern void xfs_free_buftarg(struct pb_target *); - +extern void xfs_flush_buftarg(struct pb_target *); +extern int xfs_readonly_buftarg(struct pb_target *); extern void xfs_setsize_buftarg(struct pb_target *, unsigned int, unsigned int); extern unsigned int xfs_getsize_buftarg(struct pb_target *); -extern int linvfs_start_syncd(vfs_t *); -extern void linvfs_stop_syncd(vfs_t *); #endif /* __XFS_SUPER_H__ */ diff --exclude=dmapi -rNu ORIG/fs/xfs/linux/xfs_sysctl.c HACK/fs/xfs/linux/xfs_sysctl.c --- ORIG/fs/xfs/linux/xfs_sysctl.c 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/linux/xfs_sysctl.c 2003-07-08 09:00:23.000000000 -0500 @@ -36,12 +36,6 @@ #include -STATIC ulong xfs_min[XFS_PARAM] = { \ - 0, 0, 0, 0, 0, 0, 0, 0, HZ }; -STATIC ulong xfs_max[XFS_PARAM] = { \ - XFS_REFCACHE_SIZE_MAX, XFS_REFCACHE_SIZE_MAX, - 1, 1, 1, 1, 127, 11, HZ * 60 }; - static struct ctl_table_header *xfs_table_header; @@ -65,13 +59,14 @@ if (!ret && write && xfs_refcache_new_size != xfs_refcache_old_size) { xfs_refcache_resize(xfs_refcache_new_size); /* Don't purge more than size of the cache */ - if (xfs_refcache_new_size < xfs_params.refcache_purge) - xfs_params.refcache_purge = xfs_refcache_new_size; + if (xfs_refcache_new_size < xfs_refcache_purge_count) + xfs_refcache_purge_count = xfs_refcache_new_size; } return ret; } +#ifdef CONFIG_PROC_FS STATIC int xfs_stats_clear_proc_handler( ctl_table *ctl, @@ -91,48 +86,62 @@ vn_active = xfsstats.vn_active; memset(&xfsstats, 0, sizeof(xfsstats)); xfsstats.vn_active = vn_active; - xfs_params.stats_clear = 0; + xfs_stats_clear = 0; } return ret; } +#endif /* CONFIG_PROC_FS */ STATIC ctl_table xfs_table[] = { - {XFS_REFCACHE_SIZE, "refcache_size", &xfs_params.refcache_size, + {XFS_REFCACHE_SIZE, "refcache_size", &xfs_params.refcache_size.val, sizeof(ulong), 0644, NULL, &xfs_refcache_resize_proc_handler, - &sysctl_intvec, NULL, &xfs_min[0], &xfs_max[0]}, + &sysctl_intvec, NULL, + &xfs_params.refcache_size.min, &xfs_params.refcache_size.max}, - {XFS_REFCACHE_PURGE, "refcache_purge", &xfs_params.refcache_purge, + /* Note, the max here is different, it is the current refcache size */ + {XFS_REFCACHE_PURGE, "refcache_purge", &xfs_params.refcache_purge.val, sizeof(ulong), 0644, NULL, &proc_doulongvec_minmax, - &sysctl_intvec, NULL, &xfs_min[1], &xfs_params.refcache_size}, - - {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear, - sizeof(ulong), 0644, NULL, &xfs_stats_clear_proc_handler, - &sysctl_intvec, NULL, &xfs_min[2], &xfs_max[2]}, + &sysctl_intvec, NULL, + &xfs_params.refcache_purge.min, &xfs_params.refcache_size.val}, - {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown, + {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val, sizeof(ulong), 0644, NULL, &proc_doulongvec_minmax, - &sysctl_intvec, NULL, &xfs_min[3], &xfs_max[3]}, + &sysctl_intvec, NULL, + &xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max}, - {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit, + {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val, sizeof(ulong), 0644, NULL, &proc_doulongvec_minmax, - &sysctl_intvec, NULL, &xfs_min[4], &xfs_max[4]}, + &sysctl_intvec, NULL, + &xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max}, - {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode, + {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val, sizeof(ulong), 0644, NULL, &proc_doulongvec_minmax, - &sysctl_intvec, NULL, &xfs_min[5], &xfs_max[5]}, + &sysctl_intvec, NULL, + &xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max}, - {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask, + {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val, sizeof(ulong), 0644, NULL, &proc_doulongvec_minmax, - &sysctl_intvec, NULL, &xfs_min[6], &xfs_max[6]}, + &sysctl_intvec, NULL, + &xfs_params.panic_mask.min, &xfs_params.panic_mask.max}, - {XFS_ERRLEVEL, "error_level", &xfs_params.error_level, + {XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val, sizeof(ulong), 0644, NULL, &proc_doulongvec_minmax, - &sysctl_intvec, NULL, &xfs_min[7], &xfs_max[7]}, + &sysctl_intvec, NULL, + &xfs_params.error_level.min, &xfs_params.error_level.max}, - {XFS_SYNC_INTERVAL, "sync_interval", &xfs_params.sync_interval, + {XFS_SYNC_INTERVAL, "sync_interval", &xfs_params.sync_interval.val, sizeof(ulong), 0644, NULL, &proc_doulongvec_minmax, - &sysctl_intvec, NULL, &xfs_min[8], &xfs_max[8]}, + &sysctl_intvec, NULL, + &xfs_params.sync_interval.min, &xfs_params.sync_interval.max}, + + /* please keep this the last entry */ +#ifdef CONFIG_PROC_FS + {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val, + sizeof(ulong), 0644, NULL, &xfs_stats_clear_proc_handler, + &sysctl_intvec, NULL, + &xfs_params.stats_clear.min, &xfs_params.stats_clear.max}, +#endif /* CONFIG_PROC_FS */ {0} }; diff --exclude=dmapi -rNu ORIG/fs/xfs/linux/xfs_sysctl.h HACK/fs/xfs/linux/xfs_sysctl.h --- ORIG/fs/xfs/linux/xfs_sysctl.h 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/linux/xfs_sysctl.h 2003-07-23 12:12:12.000000000 -0500 @@ -39,19 +39,24 @@ * Tunable xfs parameters */ -#define XFS_PARAM (sizeof(struct xfs_param) / sizeof(ulong)) +typedef struct xfs_sysctl_val { + ulong min; + ulong val; + ulong max; +} xfs_sysctl_val_t; typedef struct xfs_param { - ulong refcache_size; /* Size of NFS reference cache. */ - ulong refcache_purge; /* # of entries to purge each time. */ - ulong stats_clear; /* Reset all XFS statistics to zero. */ - ulong restrict_chown; /* Root/non-root can give away files. */ - ulong sgid_inherit; /* Inherit ISGID bit if process' GID is */ - /* not a member of the parent dir GID. */ - ulong symlink_mode; /* Symlink creat mode affected by umask. */ - ulong panic_mask; /* bitmask to specify panics on errors. */ - ulong error_level; /* Degree of reporting for internal probs*/ - ulong sync_interval; /* time between sync calls */ + xfs_sysctl_val_t refcache_size; /* Size of NFS reference cache. */ + xfs_sysctl_val_t refcache_purge;/* # of entries to purge each time. */ + xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/ + xfs_sysctl_val_t sgid_inherit; /* Inherit ISGID bit if process' GID + * is not a member of the parent dir + * GID */ + xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */ + xfs_sysctl_val_t panic_mask; /* bitmask to cause panic on errors. */ + xfs_sysctl_val_t error_level; /* Degree of reporting for problems */ + xfs_sysctl_val_t sync_interval; /* time between sync calls */ + xfs_sysctl_val_t stats_clear; /* Reset all XFS statistics to zero. */ } xfs_param_t; /* @@ -72,13 +77,13 @@ enum { XFS_REFCACHE_SIZE = 1, XFS_REFCACHE_PURGE = 2, - XFS_STATS_CLEAR = 3, - XFS_RESTRICT_CHOWN = 4, - XFS_SGID_INHERIT = 5, - XFS_SYMLINK_MODE = 6, - XFS_PANIC_MASK = 7, - XFS_ERRLEVEL = 8, - XFS_SYNC_INTERVAL = 9, + XFS_RESTRICT_CHOWN = 3, + XFS_SGID_INHERIT = 4, + XFS_SYMLINK_MODE = 5, + XFS_PANIC_MASK = 6, + XFS_ERRLEVEL = 7, + XFS_SYNC_INTERVAL = 8, + XFS_STATS_CLEAR = 9, }; extern xfs_param_t xfs_params; diff --exclude=dmapi -rNu ORIG/fs/xfs/linux/xfs_version.h HACK/fs/xfs/linux/xfs_version.h --- ORIG/fs/xfs/linux/xfs_version.h 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/linux/xfs_version.h 2003-06-13 12:18:27.000000000 -0500 @@ -39,6 +39,6 @@ #ifndef __XFS_VERSION_H__ #define __XFS_VERSION_H__ -#define XFS_VERSION_STRING "SGI XFS" +#define XFS_VERSION_STRING "SGI XFS 1.3.0pre2" #endif /* __XFS_VERSION_H__ */ diff --exclude=dmapi -rNu ORIG/fs/xfs/linux/xfs_vfs.c HACK/fs/xfs/linux/xfs_vfs.c --- ORIG/fs/xfs/linux/xfs_vfs.c 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/linux/xfs_vfs.c 2003-06-02 11:53:22.000000000 -0500 @@ -252,7 +252,6 @@ vfsp = kmem_zalloc(sizeof(vfs_t), KM_SLEEP); bhv_head_init(VFS_BHVHEAD(vfsp), "vfs"); init_waitqueue_head(&vfsp->vfs_wait_sync_task); - init_waitqueue_head(&vfsp->vfs_sync); return vfsp; } diff --exclude=dmapi -rNu ORIG/fs/xfs/linux/xfs_vfs.h HACK/fs/xfs/linux/xfs_vfs.h --- ORIG/fs/xfs/linux/xfs_vfs.h 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/linux/xfs_vfs.h 2003-07-23 12:12:12.000000000 -0500 @@ -49,7 +49,6 @@ bhv_head_t vfs_bh; /* head of vfs behavior chain */ struct super_block *vfs_super; /* Linux superblock structure */ struct task_struct *vfs_sync_task; - wait_queue_head_t vfs_sync; wait_queue_head_t vfs_wait_sync_task; } vfs_t; @@ -88,10 +87,10 @@ #define SYNC_CLOSE 0x0002 /* close file system down */ #define SYNC_DELWRI 0x0004 /* look at delayed writes */ #define SYNC_WAIT 0x0008 /* wait for i/o to complete */ -#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ #define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */ -#define SYNC_REFCACHE 0x0020 /* prune some of the nfs ref cache */ - +#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ +#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ +#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ #define IGET_NOALLOC 0x0001 /* vfs_get_inode may return NULL */ diff --exclude=dmapi -rNu ORIG/fs/xfs/Makefile HACK/fs/xfs/Makefile --- ORIG/fs/xfs/Makefile 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/Makefile 2003-06-02 11:53:18.000000000 -0500 @@ -114,6 +114,11 @@ subdir-$(CONFIG_XFS_FS) += pagebuf linux support +ifeq ($(CONFIG_XFS_DMAPI),y) + subdir-$(CONFIG_XFS_FS) += dmapi + obj-y += dmapi/xfs_dmapi.o +endif + ifeq ($(CONFIG_XFS_QUOTA),y) subdir-$(CONFIG_XFS_FS) += quota obj-y += quota/xfs_quota.o diff --exclude=dmapi -rNu ORIG/fs/xfs/pagebuf/page_buf.c HACK/fs/xfs/pagebuf/page_buf.c --- ORIG/fs/xfs/pagebuf/page_buf.c 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/pagebuf/page_buf.c 2003-07-23 10:06:13.000000000 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -64,6 +64,7 @@ #define NBBY 8 #define BBSHIFT 9 +#define BBMASK ((1 << BBSHIFT) - 1) #define BN_ALIGN_MASK ((1 << (PAGE_CACHE_SHIFT - BBSHIFT)) - 1) #ifndef GFP_READAHEAD @@ -104,11 +105,11 @@ */ #ifdef PAGEBUF_TRACE -static spinlock_t pb_trace_lock = SPIN_LOCK_UNLOCKED; +static spinlock_t pb_trace_lock = SPIN_LOCK_UNLOCKED; struct pagebuf_trace_buf pb_trace; EXPORT_SYMBOL(pb_trace); EXPORT_SYMBOL(pb_trace_func); -#define CIRC_INC(i) (((i) + 1) & (PB_TRACE_BUFSIZE - 1)) +#define CIRC_INC(i) (((i) + 1) & (PB_TRACE_BUFSIZE - 1)) void pb_trace_func( @@ -120,7 +121,7 @@ int j; unsigned long flags; - if (!pb_params.p_un.debug) return; + if (!pb_params.debug.val) return; if (ra == NULL) ra = (void *)__builtin_return_address(0); @@ -176,10 +177,13 @@ * /proc/sys/vm/pagebuf */ -unsigned long pagebuf_min[P_PARAM] = { HZ/2, 1*HZ, 0, 0 }; -unsigned long pagebuf_max[P_PARAM] = { HZ*30, HZ*300, 1, 1 }; - -pagebuf_param_t pb_params = {{ HZ, 15 * HZ, 0, 0 }}; +pagebuf_param_t pb_params = { + /* MIN DFLT MAX */ + .flush_interval = { HZ/2, HZ, 30*HZ }, + .age_buffer = { 1*HZ, 15*HZ, 300*HZ }, + .stats_clear = { 0, 0, 1 }, + .debug = { 0, 0, 1 }, +}; /* * Pagebuf statistics variables @@ -228,7 +232,7 @@ * dev_t is 16 bits, loff_t is always 64 bits */ base ^= dev; - for (bit = hval = 0; base != 0 && bit < sizeof(base) * 8; bit += NBITS) { + for (bit = hval = 0; base && bit < sizeof(base) * 8; bit += NBITS) { hval ^= (int)base & (NHASH-1); base >>= NBITS; } @@ -236,18 +240,18 @@ } /* - * Mapping of multi-page buffers into contingous virtual space + * Mapping of multi-page buffers into contiguous virtual space */ STATIC void *pagebuf_mapout_locked(page_buf_t *); -STATIC spinlock_t as_lock = SPIN_LOCK_UNLOCKED; typedef struct a_list { - void *vm_addr; + void *vm_addr; struct a_list *next; } a_list_t; -STATIC a_list_t *as_free_head; -STATIC int as_list_len; +STATIC a_list_t *as_free_head; +STATIC int as_list_len; +STATIC spinlock_t as_lock = SPIN_LOCK_UNLOCKED; /* @@ -930,7 +934,7 @@ { page_buf_t *pb; - flags |= _PBF_PRIVATE_BH; + flags |= _PBF_PRIVATE_BH | _PBF_LOCKABLE; pb = pagebuf_allocate(flags); if (pb) { _pagebuf_initialize(pb, target, ioff, isize, flags); @@ -1502,27 +1506,29 @@ cache_ok = !((pb->pb_flags & PBF_FORCEIO) || (rw == WRITE)); public_bh = multi_ok = 1; + sector = 1 << sector_shift; if (!page_has_buffers(page)) { if (!locking) { lock_page(page); if (!page_has_buffers(page)) create_empty_buffers(page, - pbr->pbr_kdev, - 1 << sector_shift); + pbr->pbr_kdev, sector); unlock_page(page); } else { - create_empty_buffers(page, pbr->pbr_kdev, - 1 << sector_shift); + create_empty_buffers(page, + pbr->pbr_kdev, sector); } } + i = sector >> BBSHIFT; + bn -= (pg_offset >> BBSHIFT); + /* Find buffer_heads belonging to just this pagebuf */ bh = head = page_buffers(page); do { if (buffer_uptodate(bh) && cache_ok) continue; - blk_length = i << sector_shift; if (blk_length < pg_offset) continue; if (blk_length >= pg_offset + pg_length) @@ -1530,10 +1536,13 @@ lock_buffer(bh); get_bh(bh); - bh->b_size = 1 << sector_shift; - bh->b_blocknr = bn + (i - (pg_offset >> sector_shift)); + bh->b_size = sector; + bh->b_blocknr = bn; bufferlist[cnt++] = bh; - } while (i++, (bh = bh->b_this_page) != head); + + } while ((bn += i), + (blk_length += sector), + (bh = bh->b_this_page) != head); goto request; } @@ -1580,14 +1589,15 @@ } multi_ok = (blk_length != 1); + i = sector >> BBSHIFT; - for (; blk_length > 0; blk_length--, pg_offset += sector) { + for (; blk_length > 0; bn += i, blk_length--, pg_offset += sector) { bh = kmem_cache_alloc(bh_cachep, SLAB_NOFS); if (!bh) bh = _pagebuf_get_prealloc_bh(); memset(bh, 0, sizeof(*bh)); + bh->b_blocknr = bn; bh->b_size = sector; - bh->b_blocknr = bn++; bh->b_dev = pbr->pbr_kdev; set_bit(BH_Lock, &bh->b_state); set_bh_page(bh, page, pg_offset); @@ -1650,13 +1660,13 @@ if ((pbr->pbr_bsize == PAGE_CACHE_SIZE) && (pb->pb_buffer_length < PAGE_CACHE_SIZE) && (pb->pb_flags & PBF_READ) && pb->pb_locked) { - bn -= (pb->pb_offset >> pbr->pbr_sshift); + bn -= (pb->pb_offset >> BBSHIFT); pg_offset = 0; pg_length = PAGE_CACHE_SIZE; } else { pb_offset = offset - pb->pb_file_offset; if (pb_offset) { - bn += (pb_offset + pbr->pbr_smask) >> pbr->pbr_sshift; + bn += (pb_offset + BBMASK) >> BBSHIFT; } } @@ -1878,7 +1888,7 @@ } list_add_tail(&pb->pb_list, &pbd_delwrite_queue); - pb->pb_flushtime = jiffies + pb_params.p_un.age_buffer; + pb->pb_flushtime = jiffies + pb_params.age_buffer.val; spin_unlock(&pbd_delwrite_lock); if (unlock && (pb->pb_flags & _PBF_LOCKABLE)) { @@ -1920,10 +1930,10 @@ daemonize(); /* Avoid signals */ - spin_lock_irq(¤t->sigmask_lock); + sigmask_lock(); sigfillset(¤t->blocked); - recalc_sigpending(current); - spin_unlock_irq(¤t->sigmask_lock); + recalc_sigpending_(current); + sigmask_unlock(); /* Migrate to the right CPU */ migrate_to_cpu(cpu); @@ -2021,10 +2031,10 @@ daemonize(); /* Avoid signals */ - spin_lock_irq(¤t->sigmask_lock); + sigmask_lock(); sigfillset(¤t->blocked); - recalc_sigpending(current); - spin_unlock_irq(¤t->sigmask_lock); + recalc_sigpending_(current); + sigmask_unlock(); strcpy(current->comm, "pagebufd"); current->flags |= PF_MEMALLOC; @@ -2033,7 +2043,7 @@ do { if (pbd_active == 1) { mod_timer(&pb_daemon_timer, - jiffies + pb_params.p_un.flush_interval); + jiffies + pb_params.flush_interval.val); interruptible_sleep_on(&pbd_waitq); } @@ -2101,11 +2111,11 @@ int pincount = 0; int flush_cnt = 0; + pagebuf_runall_queues(pagebuf_dataiodone_tq); + spin_lock(&pbd_delwrite_lock); INIT_LIST_HEAD(&tmp); - pagebuf_runall_queues(pagebuf_dataiodone_tq); - list_for_each_safe(curr, next, &pbd_delwrite_queue) { pb = list_entry(curr, page_buf_t, pb_list); @@ -2261,7 +2271,7 @@ if (!ret && write && *valp) { printk("XFS Clearing pbstats\n"); memset(&pbstats, 0, sizeof(pbstats)); - pb_params.p_un.stats_clear = 0; + pb_params.stats_clear.val = 0; } return ret; @@ -2270,22 +2280,26 @@ STATIC struct ctl_table_header *pagebuf_table_header; STATIC ctl_table pagebuf_table[] = { - {PB_FLUSH_INT, "flush_int", &pb_params.data[0], + {PB_FLUSH_INT, "flush_int", &pb_params.flush_interval.val, sizeof(ulong), 0644, NULL, &proc_doulongvec_ms_jiffies_minmax, - &sysctl_intvec, NULL, &pagebuf_min[0], &pagebuf_max[0]}, + &sysctl_intvec, NULL, + &pb_params.flush_interval.min, &pb_params.flush_interval.max}, - {PB_FLUSH_AGE, "flush_age", &pb_params.data[1], + {PB_FLUSH_AGE, "flush_age", &pb_params.age_buffer.val, sizeof(ulong), 0644, NULL, &proc_doulongvec_ms_jiffies_minmax, - &sysctl_intvec, NULL, &pagebuf_min[1], &pagebuf_max[1]}, + &sysctl_intvec, NULL, + &pb_params.age_buffer.min, &pb_params.age_buffer.max}, - {PB_STATS_CLEAR, "stats_clear", &pb_params.data[2], + {PB_STATS_CLEAR, "stats_clear", &pb_params.stats_clear.val, sizeof(ulong), 0644, NULL, &pb_stats_clear_handler, - &sysctl_intvec, NULL, &pagebuf_min[2], &pagebuf_max[2]}, + &sysctl_intvec, NULL, + &pb_params.stats_clear.min, &pb_params.stats_clear.max}, #ifdef PAGEBUF_TRACE - {PB_DEBUG, "debug", &pb_params.data[3], + {PB_DEBUG, "debug", &pb_params.debug.val, sizeof(ulong), 0644, NULL, &proc_doulongvec_minmax, - &sysctl_intvec, NULL, &pagebuf_min[3], &pagebuf_max[3]}, + &sysctl_intvec, NULL, + &pb_params.debug.min, &pb_params.debug.max}, #endif {0} }; diff --exclude=dmapi -rNu ORIG/fs/xfs/pagebuf/page_buf.h HACK/fs/xfs/pagebuf/page_buf.h --- ORIG/fs/xfs/pagebuf/page_buf.h 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/pagebuf/page_buf.h 2003-07-23 12:12:12.000000000 -0500 @@ -48,6 +48,17 @@ #include +/* RH 9 changes where the sigmask_lock is defined */ +#ifdef CLONE_SIGNAL +#define sigmask_lock() spin_lock_irq(¤t->sigmask_lock); +#define sigmask_unlock() spin_unlock_irq(¤t->sigmask_lock); +#define recalc_sigpending_(x) recalc_sigpending(x) +#else +/* RH9.0 */ +#define sigmask_lock() spin_lock_irq(¤t->sighand->siglock); +#define sigmask_unlock() spin_unlock_irq(¤t->sighand->siglock); +#define recalc_sigpending_(x) recalc_sigpending() +#endif /* * Turn this on to get pagebuf lock ownership #define PAGEBUF_LOCK_TRACKING diff --exclude=dmapi -rNu ORIG/fs/xfs/pagebuf/page_buf_internal.h HACK/fs/xfs/pagebuf/page_buf_internal.h --- ORIG/fs/xfs/pagebuf/page_buf_internal.h 2003-07-23 09:16:37.000000000 -0500 +++ HACK/fs/xfs/pagebuf/page_buf_internal.h 2003-07-23 12:12:12.000000000 -0500 @@ -85,18 +85,19 @@ * Tunable pagebuf parameters */ -#define P_PARAM 4 +typedef struct pb_sysctl_val { + ulong min; + ulong val; + ulong max; +} pb_sysctl_val_t; -typedef union pagebuf_param { - struct { - ulong flush_interval; /* interval between runs of the +typedef struct pagebuf_param { + pb_sysctl_val_t flush_interval; /* interval between runs of the * delwri flush daemon. */ - ulong age_buffer; /* time for buffer to age before + pb_sysctl_val_t age_buffer; /* time for buffer to age before * we flush it. */ - ulong debug; /* debug tracing on or off */ - ulong stats_clear; /* clear the pagebuf stats */ - } p_un; - ulong data[P_PARAM]; + pb_sysctl_val_t stats_clear; /* clear the pagebuf stats */ + pb_sysctl_val_t debug; /* debug tracing on or off */ } pagebuf_param_t; enum { diff --exclude=dmapi -rNu ORIG/fs/xfs/quota/xfs_qm.c HACK/fs/xfs/quota/xfs_qm.c --- ORIG/fs/xfs/quota/xfs_qm.c 2003-07-23 09:16:38.000000000 -0500 +++ HACK/fs/xfs/quota/xfs_qm.c 2003-07-15 22:06:55.000000000 -0500 @@ -1609,7 +1609,7 @@ map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP); lblkno = 0; - maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAX_FILE_OFFSET); + maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); do { nmaps = XFS_DQITER_MAP_SIZE; /* diff --exclude=dmapi -rNu ORIG/fs/xfs/sgiReleaseNumber HACK/fs/xfs/sgiReleaseNumber --- ORIG/fs/xfs/sgiReleaseNumber 1969-12-31 18:00:00.000000000 -0600 +++ HACK/fs/xfs/sgiReleaseNumber 2003-06-13 13:21:46.000000000 -0500 @@ -0,0 +1 @@ +1 diff --exclude=dmapi -rNu ORIG/fs/xfs/support/kmem.c HACK/fs/xfs/support/kmem.c --- ORIG/fs/xfs/support/kmem.c 2003-07-23 09:16:38.000000000 -0500 +++ HACK/fs/xfs/support/kmem.c 2003-07-15 22:10:11.000000000 -0500 @@ -41,25 +41,6 @@ #define DEF_PRIORITY (6) #define MAX_SLAB_SIZE 0x10000 -static __inline unsigned int flag_convert(int flags) -{ -#if DEBUG - if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS))) { - printk(KERN_WARNING - "XFS: memory allocation with wrong flags (%x)\n", flags); - BUG(); - } -#endif - - if (flags & KM_NOSLEEP) - return GFP_ATOMIC; - /* If we're in a transaction, FS activity is not ok */ - else if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) - return GFP_NOFS; - else - return GFP_KERNEL; -} - #define MAX_SHAKE 8 static kmem_shake_func_t shake_list[MAX_SHAKE]; @@ -113,18 +94,20 @@ void * kmem_alloc(size_t size, int flags) { - int shrink = DEF_PRIORITY; /* # times to try to shrink cache */ + int shrink = DEF_PRIORITY; /* # times to try to shrink cache */ + int lflags = kmem_flags_convert(flags); + int nosleep = flags & KM_NOSLEEP; void *rval; repeat: if (MAX_SLAB_SIZE < size) { /* Avoid doing filesystem sensitive stuff to get this */ - rval = __vmalloc(size, flag_convert(flags), PAGE_KERNEL); + rval = __vmalloc(size, lflags, PAGE_KERNEL); } else { - rval = kmalloc(size, flag_convert(flags)); + rval = kmalloc(size, lflags); } - if (rval || (flags & KM_NOSLEEP)) + if (rval || nosleep) return rval; /* @@ -137,8 +120,8 @@ goto repeat; } - rval = __vmalloc(size, flag_convert(flags), PAGE_KERNEL); - if (!rval && (flags & KM_SLEEP)) + rval = __vmalloc(size, lflags, PAGE_KERNEL); + if (!rval && !nosleep) panic("kmem_alloc: NULL memory on KM_SLEEP request!"); return rval; @@ -197,7 +180,7 @@ void *ptr = NULL; repeat: - ptr = kmem_cache_alloc(zone, flag_convert(flags)); + ptr = kmem_cache_alloc(zone, kmem_flags_convert(flags)); if (ptr || (flags & KM_NOSLEEP)) return ptr; @@ -225,7 +208,7 @@ void *ptr = NULL; repeat: - ptr = kmem_cache_alloc(zone, flag_convert(flags)); + ptr = kmem_cache_alloc(zone, kmem_flags_convert(flags)); if (ptr) { memset(ptr, 0, kmem_cache_size(zone)); diff --exclude=dmapi -rNu ORIG/fs/xfs/support/kmem.h HACK/fs/xfs/support/kmem.h --- ORIG/fs/xfs/support/kmem.h 2003-07-23 09:16:38.000000000 -0500 +++ HACK/fs/xfs/support/kmem.h 2003-07-23 12:12:12.000000000 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -44,6 +44,50 @@ #define kmem_zone kmem_cache_s #define kmem_zone_t kmem_cache_t +typedef unsigned long xfs_pflags_t; + +#define PFLAGS_TEST_NOIO() (current->flags & PF_NOIO) +#define PFLAGS_TEST_FSTRANS() (current->flags & PF_FSTRANS) + +#define PFLAGS_SET_NOIO(STATEP) do { \ + *(STATEP) = current->flags; \ + current->flags |= PF_NOIO; \ +} while (0) + +#define PFLAGS_SET_FSTRANS(STATEP) do { \ + *(STATEP) = current->flags; \ + current->flags |= PF_FSTRANS; \ +} while (0) + +#define PFLAGS_RESTORE(STATEP) do { \ + current->flags = *(STATEP); \ +} while (0) + +#define PFLAGS_DUP(OSTATEP, NSTATEP) do { \ + *(NSTATEP) = *(OSTATEP); \ +} while (0); + +static __inline unsigned int kmem_flags_convert(int flags) +{ + int lflags; + +#if DEBUG + if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS))) { + printk(KERN_WARNING + "XFS: memory allocation with wrong flags (%x)\n", flags); + BUG(); + } +#endif + + lflags = (flags & KM_NOSLEEP) ? GFP_ATOMIC : GFP_KERNEL; + + /* avoid recusive callbacks to filesystem during transactions */ + if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS)) + lflags &= ~__GFP_FS; + + return lflags; +} + extern kmem_zone_t *kmem_zone_init(int, char *); extern void *kmem_zone_zalloc(kmem_zone_t *, int); extern void *kmem_zone_alloc(kmem_zone_t *, int); diff --exclude=dmapi -rNu ORIG/fs/xfs/support/spin.h HACK/fs/xfs/support/spin.h --- ORIG/fs/xfs/support/spin.h 2003-07-23 09:16:38.000000000 -0500 +++ HACK/fs/xfs/support/spin.h 2003-07-23 12:12:12.000000000 -0500 @@ -43,6 +43,8 @@ * We don't need to worry about SMP or not here. */ +#define SPLDECL(s) unsigned long s + typedef spinlock_t lock_t; #define spinlock_init(lock, name) spin_lock_init(lock) diff --exclude=dmapi -rNu ORIG/fs/xfs/VERSION HACK/fs/xfs/VERSION --- ORIG/fs/xfs/VERSION 1969-12-31 18:00:00.000000000 -0600 +++ HACK/fs/xfs/VERSION 2003-06-13 13:22:00.000000000 -0500 @@ -0,0 +1 @@ +1.3.0pre2 diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_ag.h HACK/fs/xfs/xfs_ag.h --- ORIG/fs/xfs/xfs_ag.h 2003-07-23 09:16:38.000000000 -0500 +++ HACK/fs/xfs/xfs_ag.h 2003-06-02 11:53:23.000000000 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -185,9 +185,8 @@ #endif #define XFS_AGFL_SIZE(mp) ((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t)) -/* -- nathans TODO ... use of BBSIZE here - should be sector size -- */ typedef struct xfs_agfl { - xfs_agblock_t agfl_bno[BBSIZE/sizeof(xfs_agblock_t)]; + xfs_agblock_t agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */ } xfs_agfl_t; /* diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_attr_leaf.c HACK/fs/xfs/xfs_attr_leaf.c --- ORIG/fs/xfs/xfs_attr_leaf.c 2003-07-23 09:16:38.000000000 -0500 +++ HACK/fs/xfs/xfs_attr_leaf.c 2003-06-02 11:53:24.000000000 -0500 @@ -486,8 +486,7 @@ i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) { if (unlikely( ((char *)sfe < (char *)sf) || - ((char *)sfe >= ((char *)sf + dp->i_afp->if_bytes)) || - (sfe->namelen >= MAXNAMELEN))) { + ((char *)sfe >= ((char *)sf + dp->i_afp->if_bytes)))) { XFS_CORRUPTION_ERROR("xfs_attr_shortform_list", XFS_ERRLEVEL_LOW, context->dp->i_mount, sfe); diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_bmap.c HACK/fs/xfs/xfs_bmap.c --- ORIG/fs/xfs/xfs_bmap.c 2003-07-23 09:16:39.000000000 -0500 +++ HACK/fs/xfs/xfs_bmap.c 2003-07-15 22:06:57.000000000 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -5579,7 +5579,7 @@ if (whichfork == XFS_DATA_FORK) { if (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC) { prealloced = 1; - fixlen = XFS_MAX_FILE_OFFSET; + fixlen = XFS_MAXIOFFSET(mp); } else { prealloced = 0; fixlen = ip->i_d.di_size; diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_buf.h HACK/fs/xfs/xfs_buf.h --- ORIG/fs/xfs/xfs_buf.h 2003-07-23 09:16:39.000000000 -0500 +++ HACK/fs/xfs/xfs_buf.h 2003-06-02 11:53:24.000000000 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -283,7 +283,6 @@ return error; } - #define XFS_bdwrite(pb) \ pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC) @@ -307,15 +306,15 @@ * of its metadata. */ -extern void XFS_bflush(xfs_buftarg_t *); -#define xfs_binval(buftarg) XFS_bflush(buftarg) +#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg) + +#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg) #define xfs_incore_relse(buftarg,delwri_only,wait) \ xfs_relse_buftarg(buftarg) #define xfs_baread(target, rablkno, ralen) \ - pagebuf_readahead((target), (rablkno), \ - (ralen), PBF_DONT_BLOCK) + pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK) #define XFS_getrbuf(sleep,mp) \ pagebuf_get_empty((mp)->m_ddev_targp) diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_dir_leaf.c HACK/fs/xfs/xfs_dir_leaf.c --- ORIG/fs/xfs/xfs_dir_leaf.c 2003-07-23 09:16:40.000000000 -0500 +++ HACK/fs/xfs/xfs_dir_leaf.c 2003-06-02 11:53:25.000000000 -0500 @@ -483,8 +483,7 @@ if (unlikely( ((char *)sfe < (char *)sf) || - ((char *)sfe >= ((char *)sf + dp->i_df.if_bytes)) || - (sfe->namelen >= MAXNAMELEN))) { + ((char *)sfe >= ((char *)sf + dp->i_df.if_bytes)))) { xfs_dir_trace_g_du("sf: corrupted", dp, uio); XFS_CORRUPTION_ERROR("xfs_dir_shortform_getdents", XFS_ERRLEVEL_LOW, mp, sfe); @@ -2001,8 +2000,7 @@ if (unlikely( ((char *)namest < (char *)leaf) || - ((char *)namest >= (char *)leaf + XFS_LBSIZE(mp)) || - (entry->namelen >= MAXNAMELEN))) { + ((char *)namest >= (char *)leaf + XFS_LBSIZE(mp)))) { XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(1)", XFS_ERRLEVEL_LOW, mp, leaf); xfs_dir_trace_g_du("leaf: corrupted", dp, uio); @@ -2065,8 +2063,7 @@ if (unlikely( ((char *)namest < (char *)leaf) || - ((char *)namest >= (char *)leaf + XFS_LBSIZE(mp)) || - (entry->namelen >= MAXNAMELEN))) { + ((char *)namest >= (char *)leaf + XFS_LBSIZE(mp)))) { XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(2)", XFS_ERRLEVEL_LOW, mp, leaf); xfs_dir_trace_g_du("leaf: corrupted", dp, uio); diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_error.c HACK/fs/xfs/xfs_error.c --- ORIG/fs/xfs/xfs_error.c 2003-07-23 09:16:40.000000000 -0500 +++ HACK/fs/xfs/xfs_error.c 2003-07-15 22:22:50.000000000 -0500 @@ -323,6 +323,7 @@ int linenum, inst_t *ra) { - xfs_hex_dump(p, 16); + if (level <= xfs_error_level) + xfs_hex_dump(p, 16); xfs_error_report(tag, level, mp, fname, linenum, ra); } diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_ialloc.c HACK/fs/xfs/xfs_ialloc.c --- ORIG/fs/xfs/xfs_ialloc.c 2003-07-23 09:16:40.000000000 -0500 +++ HACK/fs/xfs/xfs_ialloc.c 2003-06-02 11:53:25.000000000 -0500 @@ -151,7 +151,6 @@ int ninodes; /* num inodes per buf */ xfs_agino_t thisino; /* current inode number, for loop */ int version; /* inode version number to use */ - static xfs_timestamp_t ztime; /* zero xfs timestamp */ int isaligned; /* inode allocation at stripe unit */ /* boundary */ xfs_dinode_core_t dic; /* a dinode_core to copy to new */ @@ -265,6 +264,11 @@ version = XFS_DINODE_VERSION_2; else version = XFS_DINODE_VERSION_1; + + memset(&dic, 0, sizeof(xfs_dinode_core_t)); + INT_SET(dic.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC); + INT_SET(dic.di_version, ARCH_CONVERT, version); + for (j = 0; j < nbufs; j++) { /* * Get the block. @@ -279,36 +283,6 @@ /* * Loop over the inodes in this buffer. */ - INT_SET(dic.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC); - INT_ZERO(dic.di_mode, ARCH_CONVERT); - INT_SET(dic.di_version, ARCH_CONVERT, version); - INT_ZERO(dic.di_format, ARCH_CONVERT); - INT_ZERO(dic.di_onlink, ARCH_CONVERT); - INT_ZERO(dic.di_uid, ARCH_CONVERT); - INT_ZERO(dic.di_gid, ARCH_CONVERT); - INT_ZERO(dic.di_nlink, ARCH_CONVERT); - INT_ZERO(dic.di_projid, ARCH_CONVERT); - memset(&(dic.di_pad[0]), 0, sizeof(dic.di_pad)); - INT_SET(dic.di_atime.t_sec, ARCH_CONVERT, ztime.t_sec); - INT_SET(dic.di_atime.t_nsec, ARCH_CONVERT, ztime.t_nsec); - - INT_SET(dic.di_mtime.t_sec, ARCH_CONVERT, ztime.t_sec); - INT_SET(dic.di_mtime.t_nsec, ARCH_CONVERT, ztime.t_nsec); - - INT_SET(dic.di_ctime.t_sec, ARCH_CONVERT, ztime.t_sec); - INT_SET(dic.di_ctime.t_nsec, ARCH_CONVERT, ztime.t_nsec); - - INT_ZERO(dic.di_size, ARCH_CONVERT); - INT_ZERO(dic.di_nblocks, ARCH_CONVERT); - INT_ZERO(dic.di_extsize, ARCH_CONVERT); - INT_ZERO(dic.di_nextents, ARCH_CONVERT); - INT_ZERO(dic.di_anextents, ARCH_CONVERT); - INT_ZERO(dic.di_forkoff, ARCH_CONVERT); - INT_ZERO(dic.di_aformat, ARCH_CONVERT); - INT_ZERO(dic.di_dmevmask, ARCH_CONVERT); - INT_ZERO(dic.di_dmstate, ARCH_CONVERT); - INT_ZERO(dic.di_flags, ARCH_CONVERT); - INT_ZERO(dic.di_gen, ARCH_CONVERT); for (i = 0; i < ninodes; i++) { free = XFS_MAKE_IPTR(args.mp, fbuf, i); diff --exclude=dmapi -rNu ORIG/fs/xfs/xfsidbg.c HACK/fs/xfs/xfsidbg.c --- ORIG/fs/xfs/xfsidbg.c 2003-07-23 09:16:40.000000000 -0500 +++ HACK/fs/xfs/xfsidbg.c 2003-06-26 11:54:05.000000000 -0500 @@ -1406,33 +1406,40 @@ } -static void printvnode(vnode_t *vp, unsigned long addr) +static void printbhv(bhv_desc_t *bdp) { - bhv_desc_t *bh; kdb_symtab_t symtab; + if (bdp == NULL) { + kdb_printf("NULL bhv\n"); + return; + } + + kdb_printf("bhv at 0x%p\n", bdp); + while (bdp) { + if (kdbnearsym((unsigned long)bdp->bd_ops, &symtab)) + kdb_printf(" ops %s", symtab.sym_name); + else + kdb_printf(" ops %s/0x%p", "???", (void *)bdp->bd_ops); + + kdb_printf(" vobj 0x%p pdata 0x%p next 0x%p\n", + bdp->bd_vobj, bdp->bd_pdata, bdp->bd_next); + + bdp = bdp->bd_next; + } +} + +static void printvnode(vnode_t *vp, unsigned long addr) +{ kdb_printf("vnode: 0x%lx type ", addr); if ((size_t)vp->v_type >= sizeof(vnode_type)/sizeof(vnode_type[0])) kdb_printf("out of range 0x%x", vp->v_type); else kdb_printf("%s", vnode_type[vp->v_type]); - kdb_printf(" v_bh %p\n", &vp->v_bh); - - if ((bh = vp->v_bh.bh_first)) { - kdb_printf(" v_inode 0x%p v_bh->bh_first 0x%p pobj 0x%p\n", - LINVFS_GET_IP((struct vnode *) addr), - bh, bh->bd_pdata); + kdb_printf(" v_bh 0x%p\n", &vp->v_bh); - if (kdbnearsym((unsigned long)bh->bd_ops, &symtab)) - kdb_printf(" ops %s ", symtab.sym_name); - else - kdb_printf(" ops %s/0x%p ", - "???", (void *)bh->bd_ops); - } else { - kdb_printf(" v_inode 0x%p v_bh->bh_first = NULLBHV ", - LINVFS_GET_IP((struct vnode *) addr)); - } + printbhv(vp->v_fbhv); printflags((__psunsigned_t)vp->v_flag, tab_vflags, "flag ="); kdb_printf("\n"); @@ -1477,7 +1484,38 @@ print_vfs(vfs_t *vfs, unsigned long addr) { kdb_printf("vfsp at 0x%lx", addr); - kdb_printf(" vfs_fbhv 0x%p sb 0x%p\n", vfs->vfs_fbhv, vfs->vfs_super); + kdb_printf(" vfs_flag 0x%x\n", vfs->vfs_flag); + kdb_printf(" vfs_super 0x%p", vfs->vfs_super); + kdb_printf(" vfs_bh 0x%p\n", &vfs->vfs_bh); + + printbhv(vfs->vfs_fbhv); +} + +static int kdbm_bhv( + int argc, + const char **argv, + const char **envp, + struct pt_regs *regs) +{ + unsigned long addr; + int nextarg = 1; + long offset = 0; + int diag; + bhv_desc_t *bh; + + if (argc != 1) + return KDB_ARGCOUNT; + + diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL, regs); + + if (diag) + return diag; + + bh = (bhv_desc_t *)addr; + + printbhv(bh); + + return 0; } static int kdbm_vfs( @@ -2172,6 +2210,7 @@ char *args; char *help; } xfsidbg_funcs[] = { + { "bhv", kdbm_bhv, "", "Dump bhv chain"}, { "vn", kdbm_vn, "", "Dump inode/vnode/trace"}, { "vnode", kdbm_vnode, "", "Dump vnode"}, { "vfs", kdbm_vfs, "", "Dump vfs"}, @@ -4305,8 +4344,10 @@ kdb_printf("iclog_bak: 0x%p iclog_size: 0x%x (%d) num iclogs: %d\n", log->l_iclog_bak, log->l_iclog_size, log->l_iclog_size, log->l_iclog_bufs); - kdb_printf("l_iclog_hsize %d l_iclog_heads %d\n", - log->l_iclog_hsize, log->l_iclog_heads); + kdb_printf("l_stripemask %d l_iclog_hsize %d l_iclog_heads %d\n", + log->l_stripemask, log->l_iclog_hsize, log->l_iclog_heads); + kdb_printf("l_sectbb_log %u l_sectbb_mask %u\n", + log->l_sectbb_log, log->l_sectbb_mask); kdb_printf("&grant_lock: 0x%p resHeadQ: 0x%p wrHeadQ: 0x%p\n", &log->l_grant_lock, log->l_reserve_headq, log->l_write_headq); kdb_printf("GResCycle: %d GResBytes: %d GWrCycle: %d GWrBytes: %d\n", @@ -4748,7 +4789,6 @@ (xfs_dfiloff_t)mp->m_dirfreeblk); kdb_printf("chsize %d chash 0x%p\n", mp->m_chsize, mp->m_chash); - kdb_printf("m_lstripemask %d\n", mp->m_lstripemask); kdb_printf("m_frozen %d m_active_trans %d\n", mp->m_frozen, mp->m_active_trans.counter); if (mp->m_fsname != NULL) @@ -4918,13 +4958,8 @@ xfs_inode_t *ip; while (chl != NULL) { -#ifdef DEBUG - kdb_printf("hashlist inode 0x%p blkno %Ld buf 0x%p", - chl->chl_ip, chl->chl_blkno, chl->chl_buf); -#else - kdb_printf("hashlist inode 0x%p blkno %lld", - chl->chl_ip, (long long) chl->chl_blkno); -#endif + kdb_printf("hashlist inode 0x%p blkno %lld buf 0x%p", + chl->chl_ip, (long long) chl->chl_blkno, chl->chl_buf); kdb_printf("\n"); diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_iget.c HACK/fs/xfs/xfs_iget.c --- ORIG/fs/xfs/xfs_iget.c 2003-07-23 09:16:40.000000000 -0500 +++ HACK/fs/xfs/xfs_iget.c 2003-06-13 10:46:53.000000000 -0500 @@ -214,7 +214,13 @@ XFS_STATS_INC(xfsstats.xs_ig_found); + ip->i_flags &= ~XFS_IRECLAIMABLE; read_unlock(&ih->ih_lock); + + XFS_MOUNT_ILOCK(mp); + list_del_init(&ip->i_reclaim); + XFS_MOUNT_IUNLOCK(mp); + goto finish_inode; } else if (vp != inode_vp) { @@ -253,10 +259,6 @@ xfs_iocore_inode_reinit(ip); } - XFS_MOUNT_ILOCK(mp); - list_del_init(&ip->i_reclaim); - XFS_MOUNT_IUNLOCK(mp); - vn_trace_exit(vp, "xfs_iget.found", (inst_t *)__return_address); goto return_ip; diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_inode.c HACK/fs/xfs/xfs_inode.c --- ORIG/fs/xfs/xfs_inode.c 2003-07-23 09:16:40.000000000 -0500 +++ HACK/fs/xfs/xfs_inode.c 2003-07-15 22:06:56.000000000 -0500 @@ -412,7 +412,7 @@ mp->m_dev, (unsigned long long)imap.im_blkno, i, INT_GET(dip->di_core.di_magic, ARCH_CONVERT)); #endif - XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_LOW, + XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH, mp, dip); xfs_trans_brelse(tp, bp); return XFS_ERROR(EFSCORRUPTED); @@ -1265,7 +1265,7 @@ */ if (xfs_bmapi(NULL, ip, map_first, (XFS_B_TO_FSB(mp, - (xfs_ufsize_t)XFS_MAX_FILE_OFFSET) - + (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - map_first), XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, NULL)) @@ -1319,11 +1319,11 @@ last_byte = XFS_FSB_TO_B(mp, last_block); if (last_byte < 0) { - return XFS_MAX_FILE_OFFSET; + return XFS_MAXIOFFSET(mp); } last_byte += (1 << mp->m_writeio_log); if (last_byte < 0) { - return XFS_MAX_FILE_OFFSET; + return XFS_MAXIOFFSET(mp); } return last_byte; } @@ -1613,7 +1613,7 @@ * beyond the maximum file size (ie it is the same as last_block), * then there is nothing to do. */ - last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAX_FILE_OFFSET); + last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); ASSERT(first_unmap_block <= last_block); done = 0; if (last_block == first_unmap_block) { @@ -2629,7 +2629,8 @@ if (vp) { struct inode *inode = LINVFS_GET_IP(vp); - mark_inode_dirty_sync(inode); + if (!(inode->i_state & I_NEW)) + mark_inode_dirty_sync(inode); } wake_up(&ip->i_ipin_wait); @@ -2993,9 +2994,7 @@ * see if other inodes can be gathered into this write */ -#ifdef DEBUG - ip->i_chash->chl_buf = bp; /* inode clustering debug */ -#endif + ip->i_chash->chl_buf = bp; ch = XFS_CHASH(mp, ip->i_blkno); s = mutex_spinlock(&ch->ch_lock); diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_inode.h HACK/fs/xfs/xfs_inode.h --- ORIG/fs/xfs/xfs_inode.h 2003-07-23 09:16:40.000000000 -0500 +++ HACK/fs/xfs/xfs_inode.h 2003-07-15 22:06:56.000000000 -0500 @@ -192,9 +192,7 @@ struct xfs_inode *chl_ip; xfs_daddr_t chl_blkno; /* starting block number of * the cluster */ -#ifdef DEBUG - struct xfs_buf *chl_buf; /* debug: the inode buffer */ -#endif + struct xfs_buf *chl_buf; /* the inode buffer */ } xfs_chashlist_t; typedef struct xfs_chash { @@ -366,6 +364,7 @@ #define XFS_IUIOSZ 0x0002 /* inode i/o sizes have been explicitly set */ #define XFS_IQUIESCE 0x0004 /* we have started quiescing for this inode */ #define XFS_IRECLAIM 0x0008 /* we have started reclaiming this inode */ +#define XFS_IRECLAIMABLE 0x0010 /* inode can be reclaimed */ /* * Flags for inode locking. @@ -409,14 +408,6 @@ #define XFS_ITRUNC_DEFINITE 0x1 #define XFS_ITRUNC_MAYBE 0x2 -/* - * max file offset is 2^(31+PAGE_SHIFT) - 1 (due to linux page cache) - * - * NOTE: XFS itself can handle 2^63 - 1 (largest positive value of xfs_fsize_t) - * but this is the Linux limit. - */ -#define XFS_MAX_FILE_OFFSET MAX_LFS_FILESIZE - #if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ITOV) struct vnode *xfs_itov(xfs_inode_t *ip); #define XFS_ITOV(ip) xfs_itov(ip) diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_inode_item.c HACK/fs/xfs/xfs_inode_item.c --- ORIG/fs/xfs/xfs_inode_item.c 2003-07-23 09:16:40.000000000 -0500 +++ HACK/fs/xfs/xfs_inode_item.c 2003-06-02 11:53:25.000000000 -0500 @@ -879,7 +879,7 @@ * Write out the inode. The completion routine ('iflush_done') will * pull it from the AIL, mark it clean, unlock the flush lock. */ - (void) xfs_iflush(ip, XFS_IFLUSH_DELWRI); + (void) xfs_iflush(ip, XFS_IFLUSH_ASYNC); xfs_iunlock(ip, XFS_ILOCK_SHARED); return; diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_iocore.c HACK/fs/xfs/xfs_iocore.c --- ORIG/fs/xfs/xfs_iocore.c 2003-07-23 09:16:40.000000000 -0500 +++ HACK/fs/xfs/xfs_iocore.c 2003-06-16 10:40:36.000000000 -0500 @@ -77,8 +77,7 @@ struct xfs_mount_args *mntargs, int flags) { - return xfs_mountfs(vfsp, XFS_VFSTOM(vfsp), - vfsp->vfs_super->s_bdev->bd_dev, flags); + return xfs_mountfs(vfsp, XFS_VFSTOM(vfsp), flags); } xfs_ioops_t xfs_iocore_xfs = { diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_log.c HACK/fs/xfs/xfs_log.c --- ORIG/fs/xfs/xfs_log.c 2003-07-23 09:16:40.000000000 -0500 +++ HACK/fs/xfs/xfs_log.c 2003-07-15 22:24:11.000000000 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -414,19 +414,6 @@ } /* - * Initialize log manager data. This routine is intended to be called when - * a system boots up. It is not a per filesystem initialization. - * - * As you can see, we currently do nothing. - */ -int -xfs_log_init(void) -{ - return( 0 ); -} - - -/* * 1. Reserve an amount of on-disk log space and return a ticket corresponding * to the reservation. * 2. Potentially, push buffers at tail of log to disk. @@ -497,8 +484,6 @@ xfs_daddr_t blk_offset, int num_bblks) { - xlog_t *log; - if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname); else { @@ -508,7 +493,7 @@ ASSERT(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY); } - mp->m_log = log = xlog_alloc_log(mp, log_dev, blk_offset, num_bblks); + mp->m_log = xlog_alloc_log(mp, log_dev, blk_offset, num_bblks); #if defined(DEBUG) || defined(XLOG_NOLOG) if (! xlog_debug) { @@ -528,19 +513,19 @@ if (readonly) vfsp->vfs_flag &= ~VFS_RDONLY; - error = xlog_recover(log, readonly); + error = xlog_recover(mp->m_log, readonly); if (readonly) vfsp->vfs_flag |= VFS_RDONLY; if (error) { cmn_err(CE_WARN, "XFS: log mount/recovery failed"); - xlog_unalloc_log(log); + xlog_unalloc_log(mp->m_log); return error; } } /* Normal transactions can now occur */ - log->l_flags &= ~XLOG_ACTIVE_RECOVERY; + mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; /* End mounting message in xfs_log_mount_finish */ return 0; @@ -809,8 +794,9 @@ do { ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); - if (free_bytes < tic->t_unit_res) + if (free_bytes < tic->t_unit_res && tail_lsn != 1) break; + tail_lsn = 0; free_bytes -= tic->t_unit_res; sv_signal(&tic->t_sema); tic = tic->t_next; @@ -829,8 +815,9 @@ need_bytes = tic->t_unit_res*tic->t_cnt; else need_bytes = tic->t_unit_res; - if (free_bytes < need_bytes) + if (free_bytes < need_bytes && tail_lsn != 1) break; + tail_lsn = 0; free_bytes -= need_bytes; sv_signal(&tic->t_sema); tic = tic->t_next; @@ -851,8 +838,10 @@ SPLDECL(s); int needed = 0, gen; xlog_t *log = mp->m_log; + vfs_t *vfsp = XFS_MTOVFS(mp); - if (mp->m_frozen || XFS_FORCED_SHUTDOWN(mp)) + if (mp->m_frozen || XFS_FORCED_SHUTDOWN(mp) || + (vfsp->vfs_flag & VFS_RDONLY)) return 0; s = LOG_LOCK(log); @@ -1077,7 +1066,7 @@ if (mp->m_logbufs == 0) { xlog_debug = 0; xlog_devt = log->l_dev; - log->l_iclog_bufs = XLOG_NUM_ICLOGS; + log->l_iclog_bufs = XLOG_MIN_ICLOGS; } else #endif { @@ -1085,9 +1074,16 @@ * This is the normal path. If m_logbufs == -1, then the * admin has chosen to use the system defaults for logbuffers. */ - if (mp->m_logbufs == -1) - log->l_iclog_bufs = XLOG_NUM_ICLOGS; - else + if (mp->m_logbufs == -1) { + if (xfs_physmem <= btoc(128*1024*1024)) { + log->l_iclog_bufs = XLOG_MIN_ICLOGS; + } else if (xfs_physmem <= btoc(400*1024*1024)) { + log->l_iclog_bufs = XLOG_MED_ICLOGS;; + } else { + /* 256K with 32K bufs */ + log->l_iclog_bufs = XLOG_MAX_ICLOGS; + } + } else log->l_iclog_bufs = mp->m_logbufs; #if defined(DEBUG) || defined(XLOG_NOLOG) @@ -1191,28 +1187,42 @@ int i; int iclogsize; - log = (void *)kmem_zalloc(sizeof(xlog_t), KM_SLEEP); + log = (xlog_t *)kmem_zalloc(sizeof(xlog_t), KM_SLEEP); log->l_mp = mp; log->l_dev = log_dev; log->l_logsize = BBTOB(num_bblks); log->l_logBBstart = blk_offset; log->l_logBBsize = num_bblks; - log->l_roundoff = 0; log->l_covered_state = XLOG_STATE_COVER_IDLE; log->l_flags |= XLOG_ACTIVE_RECOVERY; log->l_prev_block = -1; ASSIGN_ANY_LSN(log->l_tail_lsn, 1, 0, ARCH_NOCONVERT); - /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ + /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ log->l_last_sync_lsn = log->l_tail_lsn; log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ - log->l_curr_block = 0; /* filled in by xlog_recover */ - log->l_grant_reserve_bytes = 0; log->l_grant_reserve_cycle = 1; - log->l_grant_write_bytes = 0; log->l_grant_write_cycle = 1; - log->l_quotaoffs_flag = 0; /* XFS_LI_QUOTAOFF logitems */ + + if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) { + if (mp->m_sb.sb_logsunit <= 1) { + log->l_stripemask = 1; + } else { + log->l_stripemask = 1 << + xfs_highbit32(mp->m_sb.sb_logsunit >> BBSHIFT); + } + } + if (XFS_SB_VERSION_HASSECTOR(&mp->m_sb)) { + log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT; + ASSERT(log->l_sectbb_log <= mp->m_sectbb_log); + /* for larger sector sizes, must have v2 or external log */ + ASSERT(log->l_sectbb_log == 0 || + log->l_logBBstart == 0 || + XFS_SB_VERSION_HASLOGV2(&mp->m_sb)); + ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT); + } + log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1; xlog_get_iclog_buffer_size(mp, log); @@ -2811,10 +2821,9 @@ /* Round up to next log-sunit */ if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { - if (log->l_curr_block & (log->l_mp->m_lstripemask - 1)) { - roundup = log->l_mp->m_lstripemask - - (log->l_curr_block & - (log->l_mp->m_lstripemask - 1)); + if (log->l_curr_block & (log->l_stripemask - 1)) { + roundup = log->l_stripemask - + (log->l_curr_block & (log->l_stripemask - 1)); } else { roundup = 0; } @@ -3293,15 +3302,17 @@ { xfs_buf_t *bp; uint cycle_no; + xfs_caddr_t ptr; xfs_daddr_t i; if (BLOCK_LSN(iclog->ic_header.h_lsn, ARCH_CONVERT) < 10) { cycle_no = CYCLE_LSN(iclog->ic_header.h_lsn, ARCH_CONVERT); - bp = xlog_get_bp(1, log->l_mp); + bp = xlog_get_bp(log, 1); ASSERT(bp); for (i = 0; i < BLOCK_LSN(iclog->ic_header.h_lsn, ARCH_CONVERT); i++) { xlog_bread(log, i, 1, bp); - if (GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT) != cycle_no) + ptr = xlog_align(log, i, 1, bp); + if (GET_CYCLE(ptr, ARCH_CONVERT) != cycle_no) xlog_warn("XFS: xlog_verify_disk_cycle_no: bad cycle no"); } xlog_put_bp(bp); diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_log.h HACK/fs/xfs/xfs_log.h --- ORIG/fs/xfs/xfs_log.h 2003-07-23 09:16:40.000000000 -0500 +++ HACK/fs/xfs/xfs_log.h 2003-06-02 11:53:25.000000000 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -153,7 +153,6 @@ int xfs_log_force(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags); -int xfs_log_init(void); int xfs_log_mount(struct xfs_mount *mp, dev_t log_dev, xfs_daddr_t start_block, diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_log_priv.h HACK/fs/xfs/xfs_log_priv.h --- ORIG/fs/xfs/xfs_log_priv.h 2003-07-23 09:16:40.000000000 -0500 +++ HACK/fs/xfs/xfs_log_priv.h 2003-07-15 22:24:11.000000000 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -50,7 +50,8 @@ * Macros, structures, prototypes for internal log manager use. */ -#define XLOG_NUM_ICLOGS 2 +#define XLOG_MIN_ICLOGS 2 +#define XLOG_MED_ICLOGS 4 #define XLOG_MAX_ICLOGS 8 #define XLOG_CALLBACK_SIZE 10 #define XLOG_HEADER_MAGIC_NUM 0xFEEDbabe /* Illegal cycle number */ @@ -73,6 +74,9 @@ #define XLOG_HEADER_SIZE 512 +#define XLOG_REC_SHIFT(log) \ + BTOBB(1 << (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? \ + XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) #define XLOG_TOTAL_REC_SHIFT(log) \ BTOBB(XLOG_MAX_ICLOGS << (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? \ XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) @@ -202,9 +206,9 @@ #define LOG_LOCK(log) mutex_spinlock(&(log)->l_icloglock) #define LOG_UNLOCK(log, s) mutex_spinunlock(&(log)->l_icloglock, s) -#define xlog_panic(s) {cmn_err(CE_PANIC, s); } -#define xlog_exit(s) {cmn_err(CE_PANIC, s); } -#define xlog_warn(s) {cmn_err(CE_WARN, s); } +#define xlog_panic(args...) cmn_err(CE_PANIC, ## args) +#define xlog_exit(args...) cmn_err(CE_PANIC, ## args) +#define xlog_warn(args...) cmn_err(CE_WARN, ## args) /* * In core log state @@ -403,6 +407,7 @@ uint xh_cycle; /* write cycle of log : 4 */ uint xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* : 256 */ } xlog_rec_ext_header_t; + #ifdef __KERNEL__ /* * - A log record header is 512 bytes. There is plenty of room to grow the @@ -441,12 +446,10 @@ char *ic_datap; /* pointer to iclog data */ } xlog_iclog_fields_t; -typedef struct xlog_in_core2 { - union { - xlog_rec_header_t hic_header; - xlog_rec_ext_header_t hic_xheader; - char hic_sector[XLOG_HEADER_SIZE]; - } ic_h; +typedef union xlog_in_core2 { + xlog_rec_header_t hic_header; + xlog_rec_ext_header_t hic_xheader; + char hic_sector[XLOG_HEADER_SIZE]; } xlog_in_core_2_t; typedef struct xlog_in_core { @@ -473,7 +476,7 @@ #define ic_bwritecnt hic_fields.ic_bwritecnt #define ic_state hic_fields.ic_state #define ic_datap hic_fields.ic_datap -#define ic_header hic_data->ic_h.hic_header +#define ic_header hic_data->hic_header /* * The reservation head lsn is not made up of a cycle number and block number. @@ -530,8 +533,11 @@ uint l_flags; uint l_quotaoffs_flag;/* XFS_DQ_*, if QUOTAOFFs found */ struct xfs_buf_cancel **l_buf_cancel_table; + int l_stripemask; /* log stripe mask */ int l_iclog_hsize; /* size of iclog header */ int l_iclog_heads; /* number of iclog header sectors */ + uint l_sectbb_log; /* log2 of sector size in bbs */ + uint l_sectbb_mask; /* sector size in bbs alignment mask */ } xlog_t; @@ -546,11 +552,13 @@ extern int xlog_recover(xlog_t *log, int readonly); extern int xlog_recover_finish(xlog_t *log, int mfsi_flags); extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog); -extern struct xfs_buf *xlog_get_bp(int,xfs_mount_t *); -extern void xlog_put_bp(struct xfs_buf *); -extern int xlog_bread(xlog_t *, xfs_daddr_t blkno, int bblks, struct xfs_buf *bp); extern void xlog_recover_process_iunlinks(xlog_t *log); +extern struct xfs_buf *xlog_get_bp(xlog_t *, int); +extern void xlog_put_bp(struct xfs_buf *); +extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); +extern xfs_caddr_t xlog_align(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); + #define XLOG_TRACE_GRAB_FLUSH 1 #define XLOG_TRACE_REL_FLUSH 2 #define XLOG_TRACE_SLEEP_FLUSH 3 diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_log_recover.c HACK/fs/xfs/xfs_log_recover.c --- ORIG/fs/xfs/xfs_log_recover.c 2003-07-23 09:16:40.000000000 -0500 +++ HACK/fs/xfs/xfs_log_recover.c 2003-06-26 11:49:36.000000000 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -65,53 +65,68 @@ #include "xfs_quota.h" #include "xfs_rw.h" -STATIC int xlog_find_zeroed(struct log *log, xfs_daddr_t *blk_no); - -STATIC int xlog_clear_stale_blocks(xlog_t *log, xfs_lsn_t tail_lsn); +STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); +STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q, xlog_recover_item_t *item); - #if defined(DEBUG) -STATIC void xlog_recover_check_summary(xlog_t *log); -STATIC void xlog_recover_check_ail(xfs_mount_t *mp, xfs_log_item_t *lip, - int gen); +STATIC void xlog_recover_check_summary(xlog_t *); +STATIC void xlog_recover_check_ail(xfs_mount_t *, xfs_log_item_t *, int); #else #define xlog_recover_check_summary(log) #define xlog_recover_check_ail(mp, lip, gen) -#endif /* DEBUG */ +#endif +/* + * Sector aligned buffer routines for buffer create/read/write/access + */ + +#define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs) \ + ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \ + ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) +#define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) + xfs_buf_t * -xlog_get_bp(int num_bblks, xfs_mount_t *mp) +xlog_get_bp( + xlog_t *log, + int num_bblks) { - xfs_buf_t *bp; - ASSERT(num_bblks > 0); - bp = XFS_ngetrbuf(BBTOB(num_bblks),mp); - return bp; -} /* xlog_get_bp */ - + if (log->l_sectbb_log) { + if (num_bblks > 1) + num_bblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); + num_bblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, num_bblks); + } + return XFS_ngetrbuf(BBTOB(num_bblks), log->l_mp); +} void -xlog_put_bp(xfs_buf_t *bp) +xlog_put_bp( + xfs_buf_t *bp) { XFS_nfreerbuf(bp); -} /* xlog_put_bp */ +} /* * nbblks should be uint, but oh well. Just want to catch that 32-bit length. */ int -xlog_bread(xlog_t *log, - xfs_daddr_t blk_no, - int nbblks, - xfs_buf_t *bp) +xlog_bread( + xlog_t *log, + xfs_daddr_t blk_no, + int nbblks, + xfs_buf_t *bp) { - int error; + int error; + + if (log->l_sectbb_log) { + blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); + nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); + } - ASSERT(log); ASSERT(nbblks > 0); ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); ASSERT(bp); @@ -123,14 +138,11 @@ XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); xfsbdstrat(log->l_mp, bp); - if ((error = xfs_iowait(bp))) { + if ((error = xfs_iowait(bp))) xfs_ioerror_alert("xlog_bread", log->l_mp, bp, XFS_BUF_ADDR(bp)); - return (error); - } return error; -} /* xlog_bread */ - +} /* * Write out the buffer at the given block for the given number of blocks. @@ -139,12 +151,17 @@ */ int xlog_bwrite( - xlog_t *log, - int blk_no, - int nbblks, + xlog_t *log, + xfs_daddr_t blk_no, + int nbblks, xfs_buf_t *bp) { - int error; + int error; + + if (log->l_sectbb_log) { + blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); + nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); + } ASSERT(nbblks > 0); ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); @@ -160,94 +177,109 @@ if ((error = xfs_bwrite(log->l_mp, bp))) xfs_ioerror_alert("xlog_bwrite", log->l_mp, bp, XFS_BUF_ADDR(bp)); + return error; +} - return (error); -} /* xlog_bwrite */ +xfs_caddr_t +xlog_align( + xlog_t *log, + xfs_daddr_t blk_no, + int nbblks, + xfs_buf_t *bp) +{ + xfs_caddr_t ptr; + + if (!log->l_sectbb_log) + return XFS_BUF_PTR(bp); + + ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask); + ASSERT(XFS_BUF_SIZE(bp) >= + BBTOB(nbblks + (blk_no & log->l_sectbb_mask))); + return ptr; +} #ifdef DEBUG /* - * check log record header for recovery + * dump debug superblock and log record information */ -static void -xlog_header_check_dump(xfs_mount_t *mp, xlog_rec_header_t *head) +STATIC void +xlog_header_check_dump( + xfs_mount_t *mp, + xlog_rec_header_t *head) { - int b; + int b; - printk("%s: SB : uuid = ", __FUNCTION__); - for (b=0;b<16;b++) printk("%02x",((unsigned char *)&mp->m_sb.sb_uuid)[b]); - printk(", fmt = %d\n",XLOG_FMT); - printk(" log : uuid = "); - for (b=0;b<16;b++) printk("%02x",((unsigned char *)&head->h_fs_uuid)[b]); - printk(", fmt = %d\n", INT_GET(head->h_fmt, ARCH_CONVERT)); + printk("%s: SB : uuid = ", __FUNCTION__); + for (b = 0; b < 16; b++) + printk("%02x",((unsigned char *)&mp->m_sb.sb_uuid)[b]); + printk(", fmt = %d\n", XLOG_FMT); + printk(" log : uuid = "); + for (b = 0; b < 16; b++) + printk("%02x",((unsigned char *)&head->h_fs_uuid)[b]); + printk(", fmt = %d\n", INT_GET(head->h_fmt, ARCH_CONVERT)); } +#else +#define xlog_header_check_dump(mp, head) #endif /* * check log record header for recovery */ - STATIC int -xlog_header_check_recover(xfs_mount_t *mp, xlog_rec_header_t *head) +xlog_header_check_recover( + xfs_mount_t *mp, + xlog_rec_header_t *head) { - ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); - - /* - * IRIX doesn't write the h_fmt field and leaves it zeroed - * (XLOG_FMT_UNKNOWN). This stops us from trying to recover - * a dirty log created in IRIX. - */ + ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); - if (unlikely(INT_GET(head->h_fmt, ARCH_CONVERT) != XLOG_FMT)) { - xlog_warn("XFS: dirty log written in incompatible format - can't recover"); -#ifdef DEBUG - xlog_header_check_dump(mp, head); -#endif - XFS_ERROR_REPORT("xlog_header_check_recover(1)", - XFS_ERRLEVEL_HIGH, mp); - return XFS_ERROR(EFSCORRUPTED); - } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { - xlog_warn("XFS: dirty log entry has mismatched uuid - can't recover"); -#ifdef DEBUG - xlog_header_check_dump(mp, head); -#endif - XFS_ERROR_REPORT("xlog_header_check_recover(2)", - XFS_ERRLEVEL_HIGH, mp); - return XFS_ERROR(EFSCORRUPTED); - } - - return 0; + /* + * IRIX doesn't write the h_fmt field and leaves it zeroed + * (XLOG_FMT_UNKNOWN). This stops us from trying to recover + * a dirty log created in IRIX. + */ + if (unlikely(INT_GET(head->h_fmt, ARCH_CONVERT) != XLOG_FMT)) { + xlog_warn( + "XFS: dirty log written in incompatible format - can't recover"); + xlog_header_check_dump(mp, head); + XFS_ERROR_REPORT("xlog_header_check_recover(1)", + XFS_ERRLEVEL_HIGH, mp); + return XFS_ERROR(EFSCORRUPTED); + } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { + xlog_warn( + "XFS: dirty log entry has mismatched uuid - can't recover"); + xlog_header_check_dump(mp, head); + XFS_ERROR_REPORT("xlog_header_check_recover(2)", + XFS_ERRLEVEL_HIGH, mp); + return XFS_ERROR(EFSCORRUPTED); + } + return 0; } /* * read the head block of the log and check the header */ - STATIC int -xlog_header_check_mount(xfs_mount_t *mp, xlog_rec_header_t *head) +xlog_header_check_mount( + xfs_mount_t *mp, + xlog_rec_header_t *head) { - ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); - - if (uuid_is_nil(&head->h_fs_uuid)) { + ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); - /* - * IRIX doesn't write the h_fs_uuid or h_fmt fields. If - * h_fs_uuid is nil, we assume this log was last mounted - * by IRIX and continue. - */ - - xlog_warn("XFS: nil uuid in log - IRIX style log"); - - } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { - xlog_warn("XFS: log has mismatched uuid - can't recover"); -#ifdef DEBUG - xlog_header_check_dump(mp, head); -#endif - XFS_ERROR_REPORT("xlog_header_check_mount", - XFS_ERRLEVEL_HIGH, mp); - return XFS_ERROR(EFSCORRUPTED); - } - - return 0; + if (uuid_is_nil(&head->h_fs_uuid)) { + /* + * IRIX doesn't write the h_fs_uuid or h_fmt fields. If + * h_fs_uuid is nil, we assume this log was last mounted + * by IRIX and continue. + */ + xlog_warn("XFS: nil uuid in log - IRIX style log"); + } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { + xlog_warn("XFS: log has mismatched uuid - can't recover"); + xlog_header_check_dump(mp, head); + XFS_ERROR_REPORT("xlog_header_check_mount", + XFS_ERRLEVEL_HIGH, mp); + return XFS_ERROR(EFSCORRUPTED); + } + return 0; } STATIC void @@ -255,6 +287,7 @@ struct xfs_buf *bp) { xfs_mount_t *mp; + ASSERT(XFS_BUF_FSPRIVATE(bp, void *)); if (XFS_BUF_GETERROR(bp)) { @@ -279,12 +312,14 @@ * necessarily be perfect. */ int -xlog_find_cycle_start(xlog_t *log, - xfs_buf_t *bp, - xfs_daddr_t first_blk, - xfs_daddr_t *last_blk, - uint cycle) +xlog_find_cycle_start( + xlog_t *log, + xfs_buf_t *bp, + xfs_daddr_t first_blk, + xfs_daddr_t *last_blk, + uint cycle) { + xfs_caddr_t offset; xfs_daddr_t mid_blk; uint mid_cycle; int error; @@ -293,7 +328,8 @@ while (mid_blk != first_blk && mid_blk != *last_blk) { if ((error = xlog_bread(log, mid_blk, 1, bp))) return error; - mid_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); + offset = xlog_align(log, mid_blk, 1, bp); + mid_cycle = GET_CYCLE(offset, ARCH_CONVERT); if (mid_cycle == cycle) { *last_blk = mid_blk; /* last_half_cycle == mid_cycle */ @@ -307,8 +343,7 @@ (mid_blk == *last_blk && mid_blk-1 == first_blk)); return 0; -} /* xlog_find_cycle_start */ - +} /* * Check that the range of blocks does not contain the cycle number @@ -320,27 +355,27 @@ * Set blkno to -1 if we encounter no errors. This is an invalid block number * since we don't ever expect logs to get this large. */ - STATIC int -xlog_find_verify_cycle( xlog_t *log, - xfs_daddr_t start_blk, - int nbblks, - uint stop_on_cycle_no, - xfs_daddr_t *new_blk) +xlog_find_verify_cycle( + xlog_t *log, + xfs_daddr_t start_blk, + int nbblks, + uint stop_on_cycle_no, + xfs_daddr_t *new_blk) { - xfs_daddr_t i, j; - uint cycle; - xfs_buf_t *bp; - char *buf = NULL; - int error = 0; - xfs_daddr_t bufblks; + xfs_daddr_t i, j; + uint cycle; + xfs_buf_t *bp; + xfs_daddr_t bufblks; + xfs_caddr_t buf = NULL; + int error = 0; bufblks = 1 << ffs(nbblks); - while (!(bp = xlog_get_bp(bufblks, log->l_mp))) { + while (!(bp = xlog_get_bp(log, bufblks))) { /* can't get enough memory to do everything in one big buffer */ bufblks >>= 1; - if (!bufblks) + if (bufblks <= log->l_sectbb_log) return ENOMEM; } @@ -352,7 +387,7 @@ if ((error = xlog_bread(log, i, bcount, bp))) goto out; - buf = XFS_BUF_PTR(bp); + buf = xlog_align(log, i, bcount, bp); for (j = 0; j < bcount; j++) { cycle = GET_CYCLE(buf, ARCH_CONVERT); if (cycle == stop_on_cycle_no) { @@ -368,10 +403,8 @@ out: xlog_put_bp(bp); - return error; -} /* xlog_find_verify_cycle */ - +} /* * Potentially backup over partial log record write. @@ -385,98 +418,103 @@ * extra_bblks is the number of blocks potentially verified on a previous * call to this routine. */ - STATIC int -xlog_find_verify_log_record(xlog_t *log, - xfs_daddr_t start_blk, - xfs_daddr_t *last_blk, - int extra_bblks) -{ - xfs_daddr_t i; - xfs_buf_t *bp; - char *buf = NULL; - xlog_rec_header_t *head = NULL; - int error = 0; - int smallmem = 0; - int num_blks = *last_blk - start_blk; - int xhdrs; - - ASSERT(start_blk != 0 || *last_blk != start_blk); - - if (!(bp = xlog_get_bp(num_blks, log->l_mp))) { - if (!(bp = xlog_get_bp(1, log->l_mp))) - return ENOMEM; - smallmem = 1; - buf = XFS_BUF_PTR(bp); - } else { - if ((error = xlog_bread(log, start_blk, num_blks, bp))) - goto out; - buf = XFS_BUF_PTR(bp) + ((num_blks - 1) << BBSHIFT); - } - - for (i = (*last_blk) - 1; i >= 0; i--) { - if (i < start_blk) { - /* legal log record not found */ - xlog_warn("XFS: Log inconsistent (didn't find previous header)"); - ASSERT(0); - error = XFS_ERROR(EIO); - goto out; +xlog_find_verify_log_record( + xlog_t *log, + xfs_daddr_t start_blk, + xfs_daddr_t *last_blk, + int extra_bblks) +{ + xfs_daddr_t i; + xfs_buf_t *bp; + xfs_caddr_t offset = NULL; + xlog_rec_header_t *head = NULL; + int error = 0; + int smallmem = 0; + int num_blks = *last_blk - start_blk; + int xhdrs; + + ASSERT(start_blk != 0 || *last_blk != start_blk); + + if (!(bp = xlog_get_bp(log, num_blks))) { + if (!(bp = xlog_get_bp(log, 1))) + return ENOMEM; + smallmem = 1; + } else { + if ((error = xlog_bread(log, start_blk, num_blks, bp))) + goto out; + offset = xlog_align(log, start_blk, num_blks, bp); + offset += ((num_blks - 1) << BBSHIFT); } - if (smallmem && (error = xlog_bread(log, i, 1, bp))) - goto out; - head = (xlog_rec_header_t*)buf; - - if (INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) - break; - - if (!smallmem) - buf -= BBSIZE; - } - - /* - * We hit the beginning of the physical log & still no header. Return - * to caller. If caller can handle a return of -1, then this routine - * will be called again for the end of the physical log. - */ - if (i == -1) { - error = -1; - goto out; - } - - /* we have the final block of the good log (the first block - * of the log record _before_ the head. So we check the uuid. - */ - - if ((error = xlog_header_check_mount(log->l_mp, head))) - goto out; - - /* - * We may have found a log record header before we expected one. - * last_blk will be the 1st block # with a given cycle #. We may end - * up reading an entire log record. In this case, we don't want to - * reset last_blk. Only when last_blk points in the middle of a log - * record do we update last_blk. - */ - if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { - uint h_size = INT_GET(head->h_size, ARCH_CONVERT); - - xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; - if (h_size % XLOG_HEADER_CYCLE_SIZE) - xhdrs++; - } else { - xhdrs = 1; - } - - if (*last_blk - i + extra_bblks - != BTOBB(INT_GET(head->h_len, ARCH_CONVERT))+xhdrs) - *last_blk = i; + for (i = (*last_blk) - 1; i >= 0; i--) { + if (i < start_blk) { + /* legal log record not found */ + xlog_warn( + "XFS: Log inconsistent (didn't find previous header)"); + ASSERT(0); + error = XFS_ERROR(EIO); + goto out; + } -out: - xlog_put_bp(bp); + if (smallmem) { + if ((error = xlog_bread(log, i, 1, bp))) + goto out; + offset = xlog_align(log, i, 1, bp); + } - return error; -} /* xlog_find_verify_log_record */ + head = (xlog_rec_header_t *)offset; + + if (XLOG_HEADER_MAGIC_NUM == + INT_GET(head->h_magicno, ARCH_CONVERT)) + break; + + if (!smallmem) + offset -= BBSIZE; + } + + /* + * We hit the beginning of the physical log & still no header. Return + * to caller. If caller can handle a return of -1, then this routine + * will be called again for the end of the physical log. + */ + if (i == -1) { + error = -1; + goto out; + } + + /* + * We have the final block of the good log (the first block + * of the log record _before_ the head. So we check the uuid. + */ + if ((error = xlog_header_check_mount(log->l_mp, head))) + goto out; + + /* + * We may have found a log record header before we expected one. + * last_blk will be the 1st block # with a given cycle #. We may end + * up reading an entire log record. In this case, we don't want to + * reset last_blk. Only when last_blk points in the middle of a log + * record do we update last_blk. + */ + if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { + uint h_size = INT_GET(head->h_size, ARCH_CONVERT); + + xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; + if (h_size % XLOG_HEADER_CYCLE_SIZE) + xhdrs++; + } else { + xhdrs = 1; + } + + if (*last_blk - i + extra_bblks + != BTOBB(INT_GET(head->h_len, ARCH_CONVERT)) + xhdrs) + *last_blk = i; + +out: + xlog_put_bp(bp); + return error; +} /* * Head is defined to be the point of the log where the next log write @@ -489,252 +527,257 @@ * last_blk contains the block number of the first block with a given * cycle number. * - * Also called from xfs_log_print.c - * * Return: zero if normal, non-zero if error. */ int -xlog_find_head(xlog_t *log, - xfs_daddr_t *return_head_blk) +xlog_find_head( + xlog_t *log, + xfs_daddr_t *return_head_blk) { - xfs_buf_t *bp; - xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; - int num_scan_bblks; - uint first_half_cycle, last_half_cycle; - uint stop_on_cycle; - int error, log_bbnum = log->l_logBBsize; - - /* Is the end of the log device zeroed? */ - if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { - *return_head_blk = first_blk; - - /* is the whole lot zeroed? */ - if (!first_blk) { - /* Linux XFS shouldn't generate totally zeroed logs - - * mkfs etc write a dummy unmount record to a fresh - * log so we can store the uuid in there - */ - xlog_warn("XFS: totally zeroed log"); - } + xfs_buf_t *bp; + xfs_caddr_t offset; + xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; + int num_scan_bblks; + uint first_half_cycle, last_half_cycle; + uint stop_on_cycle; + int error, log_bbnum = log->l_logBBsize; + + /* Is the end of the log device zeroed? */ + if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { + *return_head_blk = first_blk; + + /* Is the whole lot zeroed? */ + if (!first_blk) { + /* Linux XFS shouldn't generate totally zeroed logs - + * mkfs etc write a dummy unmount record to a fresh + * log so we can store the uuid in there + */ + xlog_warn("XFS: totally zeroed log"); + } - return 0; - } else if (error) { - xlog_warn("XFS: empty log check failed"); - return error; - } + return 0; + } else if (error) { + xlog_warn("XFS: empty log check failed"); + return error; + } - first_blk = 0; /* get cycle # of 1st block */ - bp = xlog_get_bp(1,log->l_mp); - if (!bp) - return ENOMEM; - if ((error = xlog_bread(log, 0, 1, bp))) - goto bp_err; - first_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); - - last_blk = head_blk = log_bbnum-1; /* get cycle # of last block */ - if ((error = xlog_bread(log, last_blk, 1, bp))) - goto bp_err; - last_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); - ASSERT(last_half_cycle != 0); - - /* - * If the 1st half cycle number is equal to the last half cycle number, - * then the entire log is stamped with the same cycle number. In this - * case, head_blk can't be set to zero (which makes sense). The below - * math doesn't work out properly with head_blk equal to zero. Instead, - * we set it to log_bbnum which is an illegal block number, but this - * value makes the math correct. If head_blk doesn't changed through - * all the tests below, *head_blk is set to zero at the very end rather - * than log_bbnum. In a sense, log_bbnum and zero are the same block - * in a circular file. - */ - if (first_half_cycle == last_half_cycle) { - /* - * In this case we believe that the entire log should have cycle - * number last_half_cycle. We need to scan backwards from the - * end verifying that there are no holes still containing - * last_half_cycle - 1. If we find such a hole, then the start - * of that hole will be the new head. The simple case looks like - * x | x ... | x - 1 | x - * Another case that fits this picture would be - * x | x + 1 | x ... | x - * In this case the head really is somwhere at the end of the - * log, as one of the latest writes at the beginning was incomplete. - * One more case is - * x | x + 1 | x ... | x - 1 | x - * This is really the combination of the above two cases, and the - * head has to end up at the start of the x-1 hole at the end of - * the log. - * - * In the 256k log case, we will read from the beginning to the - * end of the log and search for cycle numbers equal to x-1. We - * don't worry about the x+1 blocks that we encounter, because - * we know that they cannot be the head since the log started with - * x. - */ - head_blk = log_bbnum; - stop_on_cycle = last_half_cycle - 1; - } else { - /* - * In this case we want to find the first block with cycle number - * matching last_half_cycle. We expect the log to be some - * variation on - * x + 1 ... | x ... - * The first block with cycle number x (last_half_cycle) will be - * where the new head belongs. First we do a binary search for - * the first occurrence of last_half_cycle. The binary search - * may not be totally accurate, so then we scan back from there - * looking for occurrences of last_half_cycle before us. If - * that backwards scan wraps around the beginning of the log, - * then we look for occurrences of last_half_cycle - 1 at the - * end of the log. The cases we're looking for look like - * x + 1 ... | x | x + 1 | x ... - * ^ binary search stopped here - * or - * x + 1 ... | x ... | x - 1 | x - * <---------> less than scan distance - */ - stop_on_cycle = last_half_cycle; - if ((error = xlog_find_cycle_start(log, bp, first_blk, - &head_blk, last_half_cycle))) - goto bp_err; - } + first_blk = 0; /* get cycle # of 1st block */ + bp = xlog_get_bp(log, 1); + if (!bp) + return ENOMEM; + if ((error = xlog_bread(log, 0, 1, bp))) + goto bp_err; + offset = xlog_align(log, 0, 1, bp); + first_half_cycle = GET_CYCLE(offset, ARCH_CONVERT); - /* - * Now validate the answer. Scan back some number of maximum possible - * blocks and make sure each one has the expected cycle number. The - * maximum is determined by the total possible amount of buffering - * in the in-core log. The following number can be made tighter if - * we actually look at the block size of the filesystem. - */ - num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); - if (head_blk >= num_scan_bblks) { - /* - * We are guaranteed that the entire check can be performed - * in one buffer. - */ - start_blk = head_blk - num_scan_bblks; - if ((error = xlog_find_verify_cycle(log, start_blk, num_scan_bblks, - stop_on_cycle, &new_blk))) - goto bp_err; - if (new_blk != -1) - head_blk = new_blk; - } else { /* need to read 2 parts of log */ - /* - * We are going to scan backwards in the log in two parts. First - * we scan the physical end of the log. In this part of the log, - * we are looking for blocks with cycle number last_half_cycle - 1. - * If we find one, then we know that the log starts there, as we've - * found a hole that didn't get written in going around the end - * of the physical log. The simple case for this is - * x + 1 ... | x ... | x - 1 | x - * <---------> less than scan distance - * If all of the blocks at the end of the log have cycle number - * last_half_cycle, then we check the blocks at the start of the - * log looking for occurrences of last_half_cycle. If we find one, - * then our current estimate for the location of the first - * occurrence of last_half_cycle is wrong and we move back to the - * hole we've found. This case looks like - * x + 1 ... | x | x + 1 | x ... - * ^ binary search stopped here - * Another case we need to handle that only occurs in 256k logs is - * x + 1 ... | x ... | x+1 | x ... - * ^ binary search stops here - * In a 256k log, the scan at the end of the log will see the x+1 - * blocks. We need to skip past those since that is certainly not - * the head of the log. By searching for last_half_cycle-1 we - * accomplish that. - */ - start_blk = log_bbnum - num_scan_bblks + head_blk; - ASSERT(head_blk <= INT_MAX && (xfs_daddr_t) num_scan_bblks-head_blk >= 0); - if ((error = xlog_find_verify_cycle(log, start_blk, - num_scan_bblks-(int)head_blk, (stop_on_cycle - 1), - &new_blk))) + last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ + if ((error = xlog_bread(log, last_blk, 1, bp))) goto bp_err; - if (new_blk != -1) { - head_blk = new_blk; - goto bad_blk; + offset = xlog_align(log, last_blk, 1, bp); + last_half_cycle = GET_CYCLE(offset, ARCH_CONVERT); + ASSERT(last_half_cycle != 0); + + /* + * If the 1st half cycle number is equal to the last half cycle number, + * then the entire log is stamped with the same cycle number. In this + * case, head_blk can't be set to zero (which makes sense). The below + * math doesn't work out properly with head_blk equal to zero. Instead, + * we set it to log_bbnum which is an illegal block number, but this + * value makes the math correct. If head_blk doesn't changed through + * all the tests below, *head_blk is set to zero at the very end rather + * than log_bbnum. In a sense, log_bbnum and zero are the same block + * in a circular file. + */ + if (first_half_cycle == last_half_cycle) { + /* + * In this case we believe that the entire log should have + * cycle number last_half_cycle. We need to scan backwards + * from the end verifying that there are no holes still + * containing last_half_cycle - 1. If we find such a hole, + * then the start of that hole will be the new head. The + * simple case looks like + * x | x ... | x - 1 | x + * Another case that fits this picture would be + * x | x + 1 | x ... | x + * In this case the head really is somwhere at the end of the + * log, as one of the latest writes at the beginning was + * incomplete. + * One more case is + * x | x + 1 | x ... | x - 1 | x + * This is really the combination of the above two cases, and + * the head has to end up at the start of the x-1 hole at the + * end of the log. + * + * In the 256k log case, we will read from the beginning to the + * end of the log and search for cycle numbers equal to x-1. + * We don't worry about the x+1 blocks that we encounter, + * because we know that they cannot be the head since the log + * started with x. + */ + head_blk = log_bbnum; + stop_on_cycle = last_half_cycle - 1; + } else { + /* + * In this case we want to find the first block with cycle + * number matching last_half_cycle. We expect the log to be + * some variation on + * x + 1 ... | x ... + * The first block with cycle number x (last_half_cycle) will + * be where the new head belongs. First we do a binary search + * for the first occurrence of last_half_cycle. The binary + * search may not be totally accurate, so then we scan back + * from there looking for occurrences of last_half_cycle before + * us. If that backwards scan wraps around the beginning of + * the log, then we look for occurrences of last_half_cycle - 1 + * at the end of the log. The cases we're looking for look + * like + * x + 1 ... | x | x + 1 | x ... + * ^ binary search stopped here + * or + * x + 1 ... | x ... | x - 1 | x + * <---------> less than scan distance + */ + stop_on_cycle = last_half_cycle; + if ((error = xlog_find_cycle_start(log, bp, first_blk, + &head_blk, last_half_cycle))) + goto bp_err; } /* - * Scan beginning of log now. The last part of the physical log - * is good. This scan needs to verify that it doesn't find the - * last_half_cycle. + * Now validate the answer. Scan back some number of maximum possible + * blocks and make sure each one has the expected cycle number. The + * maximum is determined by the total possible amount of buffering + * in the in-core log. The following number can be made tighter if + * we actually look at the block size of the filesystem. */ - start_blk = 0; - ASSERT(head_blk <= INT_MAX); - if ((error = xlog_find_verify_cycle(log, start_blk, (int) head_blk, - stop_on_cycle, &new_blk))) - goto bp_err; - if (new_blk != -1) - head_blk = new_blk; - } - -bad_blk: - /* - * Now we need to make sure head_blk is not pointing to a block in - * the middle of a log record. - */ - num_scan_bblks = BTOBB(XLOG_MAX_RECORD_BSIZE); - if (head_blk >= num_scan_bblks) { - start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ - - /* start ptr at last block ptr before head_blk */ - if ((error = xlog_find_verify_log_record(log, - start_blk, - &head_blk, - 0)) == -1) { - error = XFS_ERROR(EIO); - goto bp_err; - } else if (error) - goto bp_err; - } else { - start_blk = 0; - ASSERT(head_blk <= INT_MAX); - if ((error = xlog_find_verify_log_record(log, - start_blk, - &head_blk, - 0)) == -1) { - /* We hit the beginning of the log during our search */ - start_blk = log_bbnum - num_scan_bblks + head_blk; - new_blk = log_bbnum; - ASSERT(start_blk <= INT_MAX && (xfs_daddr_t) log_bbnum-start_blk >= 0); - ASSERT(head_blk <= INT_MAX); - if ((error = xlog_find_verify_log_record(log, - start_blk, - &new_blk, - (int)head_blk)) == -1) { - error = XFS_ERROR(EIO); - goto bp_err; - } else if (error) - goto bp_err; - if (new_blk != log_bbnum) - head_blk = new_blk; - } else if (error) - goto bp_err; - } + num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); + if (head_blk >= num_scan_bblks) { + /* + * We are guaranteed that the entire check can be performed + * in one buffer. + */ + start_blk = head_blk - num_scan_bblks; + if ((error = xlog_find_verify_cycle(log, + start_blk, num_scan_bblks, + stop_on_cycle, &new_blk))) + goto bp_err; + if (new_blk != -1) + head_blk = new_blk; + } else { /* need to read 2 parts of log */ + /* + * We are going to scan backwards in the log in two parts. + * First we scan the physical end of the log. In this part + * of the log, we are looking for blocks with cycle number + * last_half_cycle - 1. + * If we find one, then we know that the log starts there, as + * we've found a hole that didn't get written in going around + * the end of the physical log. The simple case for this is + * x + 1 ... | x ... | x - 1 | x + * <---------> less than scan distance + * If all of the blocks at the end of the log have cycle number + * last_half_cycle, then we check the blocks at the start of + * the log looking for occurrences of last_half_cycle. If we + * find one, then our current estimate for the location of the + * first occurrence of last_half_cycle is wrong and we move + * back to the hole we've found. This case looks like + * x + 1 ... | x | x + 1 | x ... + * ^ binary search stopped here + * Another case we need to handle that only occurs in 256k + * logs is + * x + 1 ... | x ... | x+1 | x ... + * ^ binary search stops here + * In a 256k log, the scan at the end of the log will see the + * x + 1 blocks. We need to skip past those since that is + * certainly not the head of the log. By searching for + * last_half_cycle-1 we accomplish that. + */ + start_blk = log_bbnum - num_scan_bblks + head_blk; + ASSERT(head_blk <= INT_MAX && + (xfs_daddr_t) num_scan_bblks - head_blk >= 0); + if ((error = xlog_find_verify_cycle(log, start_blk, + num_scan_bblks - (int)head_blk, + (stop_on_cycle - 1), &new_blk))) + goto bp_err; + if (new_blk != -1) { + head_blk = new_blk; + goto bad_blk; + } + + /* + * Scan beginning of log now. The last part of the physical + * log is good. This scan needs to verify that it doesn't find + * the last_half_cycle. + */ + start_blk = 0; + ASSERT(head_blk <= INT_MAX); + if ((error = xlog_find_verify_cycle(log, + start_blk, (int)head_blk, + stop_on_cycle, &new_blk))) + goto bp_err; + if (new_blk != -1) + head_blk = new_blk; + } + + bad_blk: + /* + * Now we need to make sure head_blk is not pointing to a block in + * the middle of a log record. + */ + num_scan_bblks = XLOG_REC_SHIFT(log); + if (head_blk >= num_scan_bblks) { + start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ + + /* start ptr at last block ptr before head_blk */ + if ((error = xlog_find_verify_log_record(log, start_blk, + &head_blk, 0)) == -1) { + error = XFS_ERROR(EIO); + goto bp_err; + } else if (error) + goto bp_err; + } else { + start_blk = 0; + ASSERT(head_blk <= INT_MAX); + if ((error = xlog_find_verify_log_record(log, start_blk, + &head_blk, 0)) == -1) { + /* We hit the beginning of the log during our search */ + start_blk = log_bbnum - num_scan_bblks + head_blk; + new_blk = log_bbnum; + ASSERT(start_blk <= INT_MAX && + (xfs_daddr_t) log_bbnum-start_blk >= 0); + ASSERT(head_blk <= INT_MAX); + if ((error = xlog_find_verify_log_record(log, + start_blk, &new_blk, + (int)head_blk)) == -1) { + error = XFS_ERROR(EIO); + goto bp_err; + } else if (error) + goto bp_err; + if (new_blk != log_bbnum) + head_blk = new_blk; + } else if (error) + goto bp_err; + } - xlog_put_bp(bp); - if (head_blk == log_bbnum) - *return_head_blk = 0; - else - *return_head_blk = head_blk; - /* - * When returning here, we have a good block number. Bad block - * means that during a previous crash, we didn't have a clean break - * from cycle number N to cycle number N-1. In this case, we need - * to find the first block with cycle number N-1. - */ - return 0; + xlog_put_bp(bp); + if (head_blk == log_bbnum) + *return_head_blk = 0; + else + *return_head_blk = head_blk; + /* + * When returning here, we have a good block number. Bad block + * means that during a previous crash, we didn't have a clean break + * from cycle number N to cycle number N-1. In this case, we need + * to find the first block with cycle number N-1. + */ + return 0; -bp_err: + bp_err: xlog_put_bp(bp); if (error) xlog_warn("XFS: failed to find log head"); - return error; -} /* xlog_find_head */ +} /* * Find the sync block number or the tail of the log. @@ -753,13 +796,15 @@ * available. */ int -xlog_find_tail(xlog_t *log, - xfs_daddr_t *head_blk, - xfs_daddr_t *tail_blk, - int readonly) +xlog_find_tail( + xlog_t *log, + xfs_daddr_t *head_blk, + xfs_daddr_t *tail_blk, + int readonly) { xlog_rec_header_t *rhead; xlog_op_header_t *op_head; + xfs_caddr_t offset = NULL; xfs_buf_t *bp; int error, i, found; xfs_daddr_t umount_data_blk; @@ -775,13 +820,14 @@ if ((error = xlog_find_head(log, head_blk))) return error; - bp = xlog_get_bp(1,log->l_mp); + bp = xlog_get_bp(log, 1); if (!bp) return ENOMEM; if (*head_blk == 0) { /* special case */ if ((error = xlog_bread(log, 0, 1, bp))) goto bread_err; - if (GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT) == 0) { + offset = xlog_align(log, 0, 1, bp); + if (GET_CYCLE(offset, ARCH_CONVERT) == 0) { *tail_blk = 0; /* leave all other log inited values alone */ goto exit; @@ -795,8 +841,9 @@ for (i = (int)(*head_blk) - 1; i >= 0; i--) { if ((error = xlog_bread(log, i, 1, bp))) goto bread_err; + offset = xlog_align(log, i, 1, bp); if (XLOG_HEADER_MAGIC_NUM == - INT_GET(*(uint *)(XFS_BUF_PTR(bp)), ARCH_CONVERT)) { + INT_GET(*(uint *)offset, ARCH_CONVERT)) { found = 1; break; } @@ -811,8 +858,9 @@ for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { if ((error = xlog_bread(log, i, 1, bp))) goto bread_err; + offset = xlog_align(log, i, 1, bp); if (XLOG_HEADER_MAGIC_NUM == - INT_GET(*(uint*)(XFS_BUF_PTR(bp)), ARCH_CONVERT)) { + INT_GET(*(uint*)offset, ARCH_CONVERT)) { found = 2; break; } @@ -825,7 +873,7 @@ } /* find blk_no of tail of log */ - rhead = (xlog_rec_header_t *)XFS_BUF_PTR(bp); + rhead = (xlog_rec_header_t *)offset; *tail_blk = BLOCK_LSN(rhead->h_tail_lsn, ARCH_CONVERT); /* @@ -885,7 +933,8 @@ if ((error = xlog_bread(log, umount_data_blk, 1, bp))) { goto bread_err; } - op_head = (xlog_op_header_t *)XFS_BUF_PTR(bp); + offset = xlog_align(log, umount_data_blk, 1, bp); + op_head = (xlog_op_header_t *)offset; if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { /* * Set tail and last sync so that newly written @@ -900,7 +949,6 @@ } } -#ifdef __KERNEL__ /* * Make sure that there are no blocks in front of the head * with the same cycle number as the head. This can happen @@ -920,11 +968,9 @@ * But... if the -device- itself is readonly, just skip this. * We can't recover this device anyway, so it won't matter. */ - - if (!is_read_only(log->l_mp->m_logdev_targp->pbr_kdev)) { + if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { error = xlog_clear_stale_blocks(log, tail_lsn); } -#endif bread_err: exit: @@ -932,10 +978,8 @@ if (error) xlog_warn("XFS: failed to locate log tail"); - return error; -} /* xlog_find_tail */ - +} /* * Is the log zeroed at all? @@ -954,22 +998,25 @@ * >0 => error has occurred */ int -xlog_find_zeroed(struct log *log, - xfs_daddr_t *blk_no) +xlog_find_zeroed( + xlog_t *log, + xfs_daddr_t *blk_no) { xfs_buf_t *bp; + xfs_caddr_t offset; uint first_cycle, last_cycle; xfs_daddr_t new_blk, last_blk, start_blk; xfs_daddr_t num_scan_bblks; int error, log_bbnum = log->l_logBBsize; /* check totally zeroed log */ - bp = xlog_get_bp(1,log->l_mp); + bp = xlog_get_bp(log, 1); if (!bp) return ENOMEM; if ((error = xlog_bread(log, 0, 1, bp))) goto bp_err; - first_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); + offset = xlog_align(log, 0, 1, bp); + first_cycle = GET_CYCLE(offset, ARCH_CONVERT); if (first_cycle == 0) { /* completely zeroed log */ *blk_no = 0; xlog_put_bp(bp); @@ -979,7 +1026,8 @@ /* check partially zeroed log */ if ((error = xlog_bread(log, log_bbnum-1, 1, bp))) goto bp_err; - last_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); + offset = xlog_align(log, log_bbnum-1, 1, bp); + last_cycle = GET_CYCLE(offset, ARCH_CONVERT); if (last_cycle != 0) { /* log completely written to */ xlog_put_bp(bp); return 0; @@ -1040,67 +1088,106 @@ if (error) return error; return -1; -} /* xlog_find_zeroed */ +} /* - * This is simply a subroutine used by xlog_clear_stale_blocks() below + * These are simple subroutines used by xlog_clear_stale_blocks() below * to initialize a buffer full of empty log record headers and write * them into the log. */ +STATIC void +xlog_add_record( + xlog_t *log, + xfs_caddr_t buf, + int cycle, + int block, + int tail_cycle, + int tail_block) +{ + xlog_rec_header_t *recp = (xlog_rec_header_t *)buf; + + memset(buf, 0, BBSIZE); + INT_SET(recp->h_magicno, ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM); + INT_SET(recp->h_cycle, ARCH_CONVERT, cycle); + INT_SET(recp->h_version, ARCH_CONVERT, + XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1); + ASSIGN_ANY_LSN(recp->h_lsn, cycle, block, ARCH_CONVERT); + ASSIGN_ANY_LSN(recp->h_tail_lsn, tail_cycle, tail_block, ARCH_CONVERT); + INT_SET(recp->h_fmt, ARCH_CONVERT, XLOG_FMT); + memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t)); +} + STATIC int xlog_write_log_records( - xlog_t *log, - int cycle, - int start_block, - int blocks, - int tail_cycle, - int tail_block) -{ - xlog_rec_header_t *recp; - int i, j; - int end_block = start_block + blocks; - int error = 0; - xfs_buf_t *bp; - char *buf; - int bufblks; + xlog_t *log, + int cycle, + int start_block, + int blocks, + int tail_cycle, + int tail_block) +{ + xfs_caddr_t offset; + xfs_buf_t *bp; + int balign, ealign; + int sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); + int end_block = start_block + blocks; + int bufblks; + int error = 0; + int i, j = 0; bufblks = 1 << ffs(blocks); - while (!(bp = xlog_get_bp(bufblks, log->l_mp))) { + while (!(bp = xlog_get_bp(log, bufblks))) { bufblks >>= 1; - if (!bufblks) + if (bufblks <= log->l_sectbb_log) return ENOMEM; } - buf = XFS_BUF_PTR(bp); - recp = (xlog_rec_header_t*)buf; - - memset(buf, 0, BBSIZE); - INT_SET(recp->h_magicno, ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM); - INT_SET(recp->h_cycle, ARCH_CONVERT, cycle); - INT_SET(recp->h_version, ARCH_CONVERT, - XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1); - ASSIGN_ANY_LSN(recp->h_tail_lsn, tail_cycle, tail_block, ARCH_CONVERT); + /* We may need to do a read at the start to fill in part of + * the buffer in the starting sector not covered by the first + * write below. + */ + balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block); + if (balign != start_block) { + if ((error = xlog_bread(log, start_block, 1, bp))) { + xlog_put_bp(bp); + return error; + } + j = start_block - balign; + } for (i = start_block; i < end_block; i += bufblks) { - int bcount = min(bufblks, end_block - start_block); - /* with plenty of memory, we duplicate the block - * right through the buffer and modify each entry - */ - ASSIGN_ANY_LSN(recp->h_lsn, cycle, i, ARCH_CONVERT); - for (j = 1; j < bcount; j++) { - buf += BBSIZE; - recp = (xlog_rec_header_t*)buf; - memcpy(buf, XFS_BUF_PTR(bp), BBSIZE); - ASSIGN_ANY_LSN(recp->h_lsn, cycle, i+j, ARCH_CONVERT); - } - /* then write the whole lot out at once */ - error = xlog_bwrite(log, start_block, bcount, bp); - start_block += bcount; - buf = XFS_BUF_PTR(bp); - recp = (xlog_rec_header_t*)buf; + int bcount, endcount; + + bcount = min(bufblks, end_block - start_block); + endcount = bcount - j; + + /* We may need to do a read at the end to fill in part of + * the buffer in the final sector not covered by the write. + * If this is the same sector as the above read, skip it. + */ + ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block); + if (j == 0 && (start_block + endcount > ealign)) { + offset = XFS_BUF_PTR(bp); + balign = BBTOB(ealign - start_block); + XFS_BUF_SET_PTR(bp, offset + balign, BBTOB(sectbb)); + if ((error = xlog_bread(log, ealign, sectbb, bp))) + break; + XFS_BUF_SET_PTR(bp, offset, bufblks); + } + + offset = xlog_align(log, start_block, endcount, bp); + for (; j < endcount; j++) { + xlog_add_record(log, offset, cycle, i+j, + tail_cycle, tail_block); + offset += BBSIZE; + } + error = xlog_bwrite(log, start_block, endcount, bp); + if (error) + break; + start_block += endcount; + j = 0; } xlog_put_bp(bp); - return error; } @@ -1244,10 +1331,11 @@ */ STATIC xlog_recover_t * -xlog_recover_find_tid(xlog_recover_t *q, - xlog_tid_t tid) +xlog_recover_find_tid( + xlog_recover_t *q, + xlog_tid_t tid) { - xlog_recover_t *p = q; + xlog_recover_t *p = q; while (p != NULL) { if (p->r_log_tid == tid) @@ -1255,42 +1343,43 @@ p = p->r_next; } return p; -} /* xlog_recover_find_tid */ - +} STATIC void -xlog_recover_put_hashq(xlog_recover_t **q, - xlog_recover_t *trans) +xlog_recover_put_hashq( + xlog_recover_t **q, + xlog_recover_t *trans) { trans->r_next = *q; *q = trans; -} /* xlog_recover_put_hashq */ - +} STATIC void -xlog_recover_add_item(xlog_recover_item_t **itemq) +xlog_recover_add_item( + xlog_recover_item_t **itemq) { - xlog_recover_item_t *item; + xlog_recover_item_t *item; item = kmem_zalloc(sizeof(xlog_recover_item_t), 0); xlog_recover_insert_item_backq(itemq, item); -} /* xlog_recover_add_item */ - +} STATIC int -xlog_recover_add_to_cont_trans(xlog_recover_t *trans, - xfs_caddr_t dp, - int len) +xlog_recover_add_to_cont_trans( + xlog_recover_t *trans, + xfs_caddr_t dp, + int len) { xlog_recover_item_t *item; - xfs_caddr_t ptr, old_ptr; + xfs_caddr_t ptr, old_ptr; int old_len; item = trans->r_itemq; if (item == 0) { /* finish copying rest of trans header */ xlog_recover_add_item(&trans->r_itemq); - ptr = (xfs_caddr_t)&trans->r_theader+sizeof(xfs_trans_header_t)-len; + ptr = (xfs_caddr_t) &trans->r_theader + + sizeof(xfs_trans_header_t) - len; memcpy(ptr, dp, len); /* d, s, l */ return 0; } @@ -1304,10 +1393,10 @@ item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_addr = ptr; return 0; -} /* xlog_recover_add_to_cont_trans */ - +} -/* The next region to add is the start of a new region. It could be +/* + * The next region to add is the start of a new region. It could be * a whole region or it could be the first part of a new region. Because * of this, the assumption here is that the type and size fields of all * format structures fit into the first 32 bits of the structure. @@ -1320,13 +1409,14 @@ * will appear in the current log item. */ STATIC int -xlog_recover_add_to_trans(xlog_recover_t *trans, - xfs_caddr_t dp, - int len) -{ - xfs_inode_log_format_t *in_f; /* any will do */ - xlog_recover_item_t *item; - xfs_caddr_t ptr; +xlog_recover_add_to_trans( + xlog_recover_t *trans, + xfs_caddr_t dp, + int len) +{ + xfs_inode_log_format_t *in_f; /* any will do */ + xlog_recover_item_t *item; + xfs_caddr_t ptr; if (!len) return 0; @@ -1339,7 +1429,7 @@ return 0; } - ptr = kmem_alloc(len, 0); + ptr = kmem_alloc(len, KM_SLEEP); memcpy(ptr, dp, len); in_f = (xfs_inode_log_format_t *)ptr; @@ -1362,29 +1452,29 @@ item->ri_buf[item->ri_cnt].i_len = len; item->ri_cnt++; return 0; -} /* xlog_recover_add_to_trans */ - +} STATIC void -xlog_recover_new_tid(xlog_recover_t **q, - xlog_tid_t tid, - xfs_lsn_t lsn) +xlog_recover_new_tid( + xlog_recover_t **q, + xlog_tid_t tid, + xfs_lsn_t lsn) { - xlog_recover_t *trans; + xlog_recover_t *trans; - trans = kmem_zalloc(sizeof(xlog_recover_t), 0); + trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); trans->r_log_tid = tid; trans->r_lsn = lsn; xlog_recover_put_hashq(q, trans); -} /* xlog_recover_new_tid */ - +} STATIC int -xlog_recover_unlink_tid(xlog_recover_t **q, - xlog_recover_t *trans) +xlog_recover_unlink_tid( + xlog_recover_t **q, + xlog_recover_t *trans) { - xlog_recover_t *tp; - int found = 0; + xlog_recover_t *tp; + int found = 0; ASSERT(trans != 0); if (trans == *q) { @@ -1407,11 +1497,12 @@ tp->r_next = tp->r_next->r_next; } return 0; -} /* xlog_recover_unlink_tid */ +} STATIC void -xlog_recover_insert_item_backq(xlog_recover_item_t **q, - xlog_recover_item_t *item) +xlog_recover_insert_item_backq( + xlog_recover_item_t **q, + xlog_recover_item_t *item) { if (*q == 0) { item->ri_prev = item->ri_next = item; @@ -1422,55 +1513,53 @@ (*q)->ri_prev = item; item->ri_prev->ri_next = item; } -} /* xlog_recover_insert_item_backq */ +} STATIC void -xlog_recover_insert_item_frontq(xlog_recover_item_t **q, - xlog_recover_item_t *item) +xlog_recover_insert_item_frontq( + xlog_recover_item_t **q, + xlog_recover_item_t *item) { xlog_recover_insert_item_backq(q, item); *q = item; -} /* xlog_recover_insert_item_frontq */ +} STATIC int -xlog_recover_reorder_trans(xlog_t *log, - xlog_recover_t *trans) +xlog_recover_reorder_trans( + xlog_t *log, + xlog_recover_t *trans) { - xlog_recover_item_t *first_item, *itemq, *itemq_next; + xlog_recover_item_t *first_item, *itemq, *itemq_next; - first_item = itemq = trans->r_itemq; - trans->r_itemq = NULL; - do { - itemq_next = itemq->ri_next; - switch (ITEM_TYPE(itemq)) { - case XFS_LI_BUF: - case XFS_LI_6_1_BUF: - case XFS_LI_5_3_BUF: { - xlog_recover_insert_item_frontq(&trans->r_itemq, itemq); - break; - } - case XFS_LI_INODE: - case XFS_LI_6_1_INODE: - case XFS_LI_5_3_INODE: - case XFS_LI_DQUOT: - case XFS_LI_QUOTAOFF: - case XFS_LI_EFD: - case XFS_LI_EFI: { - xlog_recover_insert_item_backq(&trans->r_itemq, itemq); - break; - } - default: { - xlog_warn( + first_item = itemq = trans->r_itemq; + trans->r_itemq = NULL; + do { + itemq_next = itemq->ri_next; + switch (ITEM_TYPE(itemq)) { + case XFS_LI_BUF: + case XFS_LI_6_1_BUF: + case XFS_LI_5_3_BUF: + xlog_recover_insert_item_frontq(&trans->r_itemq, itemq); + break; + case XFS_LI_INODE: + case XFS_LI_6_1_INODE: + case XFS_LI_5_3_INODE: + case XFS_LI_DQUOT: + case XFS_LI_QUOTAOFF: + case XFS_LI_EFD: + case XFS_LI_EFI: + xlog_recover_insert_item_backq(&trans->r_itemq, itemq); + break; + default: + xlog_warn( "XFS: xlog_recover_reorder_trans: unrecognized type of log operation"); - ASSERT(0); - return XFS_ERROR(EIO); - } - } - itemq = itemq_next; - } while (first_item != itemq); - return 0; -} /* xlog_recover_reorder_trans */ - + ASSERT(0); + return XFS_ERROR(EIO); + } + itemq = itemq_next; + } while (first_item != itemq); + return 0; +} /* * Build up the table of buf cancel records so that we don't replay @@ -1485,17 +1574,18 @@ * record during the second pass. */ STATIC void -xlog_recover_do_buffer_pass1(xlog_t *log, - xfs_buf_log_format_t *buf_f) +xlog_recover_do_buffer_pass1( + xlog_t *log, + xfs_buf_log_format_t *buf_f) { xfs_buf_cancel_t *bcp; xfs_buf_cancel_t *nextp; xfs_buf_cancel_t *prevp; xfs_buf_cancel_t **bucket; xfs_buf_log_format_v1_t *obuf_f; - xfs_daddr_t blkno=0; - uint len=0; - ushort flags=0; + xfs_daddr_t blkno = 0; + uint len = 0; + ushort flags = 0; switch (buf_f->blf_type) { case XFS_LI_BUF: @@ -1515,9 +1605,8 @@ /* * If this isn't a cancel buffer item, then just return. */ - if (!(flags & XFS_BLI_CANCEL)) { + if (!(flags & XFS_BLI_CANCEL)) return; - } /* * Insert an xfs_buf_cancel record into the hash table of @@ -1531,8 +1620,8 @@ * the bucket. */ if (*bucket == NULL) { - bcp = (xfs_buf_cancel_t*)kmem_alloc(sizeof(xfs_buf_cancel_t), - KM_SLEEP); + bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), + KM_SLEEP); bcp->bc_blkno = blkno; bcp->bc_len = len; bcp->bc_refcount = 1; @@ -1557,8 +1646,8 @@ nextp = nextp->bc_next; } ASSERT(prevp != NULL); - bcp = (xfs_buf_cancel_t*)kmem_alloc(sizeof(xfs_buf_cancel_t), - KM_SLEEP); + bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t), + KM_SLEEP); bcp->bc_blkno = blkno; bcp->bc_len = len; bcp->bc_refcount = 1; @@ -1580,17 +1669,17 @@ * made at that point. */ STATIC int -xlog_recover_do_buffer_pass2(xlog_t *log, - xfs_buf_log_format_t *buf_f) +xlog_recover_do_buffer_pass2( + xlog_t *log, + xfs_buf_log_format_t *buf_f) { xfs_buf_cancel_t *bcp; xfs_buf_cancel_t *prevp; xfs_buf_cancel_t **bucket; xfs_buf_log_format_v1_t *obuf_f; - xfs_daddr_t blkno=0; - ushort flags=0; - uint len=0; - + xfs_daddr_t blkno = 0; + ushort flags = 0; + uint len = 0; switch (buf_f->blf_type) { case XFS_LI_BUF: @@ -1667,7 +1756,6 @@ return 0; } - /* * Perform recovery for a buffer full of inodes. In these buffers, * the only data which should be recovered is that which corresponds @@ -1682,10 +1770,11 @@ * sent to xlog_recover_do_reg_buffer() below during recovery. */ STATIC int -xlog_recover_do_inode_buffer(xfs_mount_t *mp, - xlog_recover_item_t *item, - xfs_buf_t *bp, - xfs_buf_log_format_t *buf_f) +xlog_recover_do_inode_buffer( + xfs_mount_t *mp, + xlog_recover_item_t *item, + xfs_buf_t *bp, + xfs_buf_log_format_t *buf_f) { int i; int item_index; @@ -1698,8 +1787,8 @@ xfs_agino_t *logged_nextp; xfs_agino_t *buffer_nextp; xfs_buf_log_format_v1_t *obuf_f; - unsigned int *data_map=NULL; - unsigned int map_size=0; + unsigned int *data_map = NULL; + unsigned int map_size = 0; switch (buf_f->blf_type) { case XFS_LI_BUF: @@ -1790,7 +1879,7 @@ } return 0; -} /* xlog_recover_do_inode_buffer */ +} /* * Perform a 'normal' buffer recovery. Each logged region of the @@ -1800,17 +1889,18 @@ */ /*ARGSUSED*/ STATIC void -xlog_recover_do_reg_buffer(xfs_mount_t *mp, - xlog_recover_item_t *item, - xfs_buf_t *bp, - xfs_buf_log_format_t *buf_f) +xlog_recover_do_reg_buffer( + xfs_mount_t *mp, + xlog_recover_item_t *item, + xfs_buf_t *bp, + xfs_buf_log_format_t *buf_f) { int i; int bit; int nbits; xfs_buf_log_format_v1_t *obuf_f; - unsigned int *data_map=NULL; - unsigned int map_size=0; + unsigned int *data_map = NULL; + unsigned int map_size = 0; int error; switch (buf_f->blf_type) { @@ -1860,7 +1950,7 @@ /* Shouldn't be any more regions */ ASSERT(i == item->ri_total); -} /* xlog_recover_do_reg_buffer */ +} /* * Do some primitive error checking on ondisk dquot data structures. @@ -1991,7 +2081,7 @@ xfs_buf_t *bp, xfs_buf_log_format_t *buf_f) { - uint type; + uint type; /* * Filesystems are required to send in quota flags at mount time. @@ -2038,9 +2128,10 @@ * for more details on the implementation of the table of cancel records. */ STATIC int -xlog_recover_do_buffer_trans(xlog_t *log, - xlog_recover_item_t *item, - int pass) +xlog_recover_do_buffer_trans( + xlog_t *log, + xlog_recover_item_t *item, + int pass) { xfs_buf_log_format_t *buf_f; xfs_buf_log_format_v1_t *obuf_f; @@ -2075,19 +2166,19 @@ } } switch (buf_f->blf_type) { - case XFS_LI_BUF: + case XFS_LI_BUF: blkno = buf_f->blf_blkno; len = buf_f->blf_len; flags = buf_f->blf_flags; break; - case XFS_LI_6_1_BUF: - case XFS_LI_5_3_BUF: + case XFS_LI_6_1_BUF: + case XFS_LI_5_3_BUF: obuf_f = (xfs_buf_log_format_v1_t*)buf_f; blkno = obuf_f->blf_blkno; len = obuf_f->blf_len; flags = obuf_f->blf_flags; break; - default: + default: xfs_fs_cmn_err(CE_ALERT, log->l_mp, "xfs_log_recover: unknown buffer type 0x%x, dev 0x%x", buf_f->blf_type, log->l_dev); @@ -2152,12 +2243,13 @@ } return (error); -} /* xlog_recover_do_buffer_trans */ +} STATIC int -xlog_recover_do_inode_trans(xlog_t *log, - xlog_recover_item_t *item, - int pass) +xlog_recover_do_inode_trans( + xlog_t *log, + xlog_recover_item_t *item, + int pass) { xfs_inode_log_format_t *in_f; xfs_mount_t *mp; @@ -2377,7 +2469,6 @@ } } - write_inode_buffer: if (ITEM_TYPE(item) == XFS_LI_INODE) { ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || @@ -2391,8 +2482,7 @@ } return (error); -} /* xlog_recover_do_inode_trans */ - +} /* * Recover QUOTAOFF records. We simply make a note of it in the xlog_t @@ -2400,11 +2490,12 @@ * of that type. */ STATIC int -xlog_recover_do_quotaoff_trans(xlog_t *log, - xlog_recover_item_t *item, - int pass) +xlog_recover_do_quotaoff_trans( + xlog_t *log, + xlog_recover_item_t *item, + int pass) { - xfs_qoff_logformat_t *qoff_f; + xfs_qoff_logformat_t *qoff_f; if (pass == XLOG_RECOVER_PASS2) { return (0); @@ -2425,14 +2516,14 @@ return (0); } - /* * Recover a dquot record */ STATIC int -xlog_recover_do_dquot_trans(xlog_t *log, - xlog_recover_item_t *item, - int pass) +xlog_recover_do_dquot_trans( + xlog_t *log, + xlog_recover_item_t *item, + int pass) { xfs_mount_t *mp; xfs_buf_t *bp; @@ -2516,7 +2607,7 @@ xfs_bdwrite(mp, bp); return (0); -} /* xlog_recover_do_dquot_trans */ +} /* * This routine is called to create an in-core extent free intent @@ -2526,10 +2617,11 @@ * LSN. */ STATIC void -xlog_recover_do_efi_trans(xlog_t *log, - xlog_recover_item_t *item, - xfs_lsn_t lsn, - int pass) +xlog_recover_do_efi_trans( + xlog_t *log, + xlog_recover_item_t *item, + xfs_lsn_t lsn, + int pass) { xfs_mount_t *mp; xfs_efi_log_item_t *efip; @@ -2558,7 +2650,7 @@ * xfs_trans_update_ail() drops the AIL lock. */ xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn, s); -} /* xlog_recover_do_efi_trans */ +} /* @@ -2570,13 +2662,14 @@ * AIL and free it. */ STATIC void -xlog_recover_do_efd_trans(xlog_t *log, - xlog_recover_item_t *item, - int pass) +xlog_recover_do_efd_trans( + xlog_t *log, + xlog_recover_item_t *item, + int pass) { xfs_mount_t *mp; xfs_efd_log_format_t *efd_formatp; - xfs_efi_log_item_t *efip=NULL; + xfs_efi_log_item_t *efip = NULL; xfs_log_item_t *lip; int gen; int nexts; @@ -2629,9 +2722,9 @@ ((nexts - 1) * sizeof(xfs_extent_t))); } else { kmem_zone_free(xfs_efi_zone, efip); + } } - } -} /* xlog_recover_do_efd_trans */ +} /* * Perform the transaction @@ -2640,12 +2733,13 @@ * EFIs and EFDs get queued up by adding entries into the AIL for them. */ STATIC int -xlog_recover_do_trans(xlog_t *log, - xlog_recover_t *trans, - int pass) +xlog_recover_do_trans( + xlog_t *log, + xlog_recover_t *trans, + int pass) { - int error = 0; - xlog_recover_item_t *item, *first_item; + int error = 0; + xlog_recover_item_t *item, *first_item; if ((error = xlog_recover_reorder_trans(log, trans))) return error; @@ -2695,8 +2789,7 @@ } while (first_item != item); return error; -} /* xlog_recover_do_trans */ - +} /* * Free up any resources allocated by the transaction @@ -2704,10 +2797,11 @@ * Remember that EFIs, EFDs, and IUNLINKs are handled later. */ STATIC void -xlog_recover_free_trans(xlog_recover_t *trans) +xlog_recover_free_trans( + xlog_recover_t *trans) { - xlog_recover_item_t *first_item, *item, *free_item; - int i; + xlog_recover_item_t *first_item, *item, *free_item; + int i; item = first_item = trans->r_itemq; do { @@ -2725,16 +2819,16 @@ } while (first_item != item); /* Free the transaction recover structure */ kmem_free(trans, sizeof(xlog_recover_t)); -} /* xlog_recover_free_trans */ - +} STATIC int -xlog_recover_commit_trans(xlog_t *log, - xlog_recover_t **q, - xlog_recover_t *trans, - int pass) +xlog_recover_commit_trans( + xlog_t *log, + xlog_recover_t **q, + xlog_recover_t *trans, + int pass) { - int error; + int error; if ((error = xlog_recover_unlink_tid(q, trans))) return error; @@ -2742,18 +2836,16 @@ return error; xlog_recover_free_trans(trans); /* no error */ return 0; -} /* xlog_recover_commit_trans */ - +} -/*ARGSUSED*/ STATIC int -xlog_recover_unmount_trans(xlog_recover_t *trans) +xlog_recover_unmount_trans( + xlog_recover_t *trans) { /* Do nothing now */ xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); - return( 0 ); -} /* xlog_recover_unmount_trans */ - + return 0; +} /* * There are two valid states of the r_state field. 0 indicates that the @@ -2765,97 +2857,101 @@ * NOTE: skip LRs with 0 data length. */ STATIC int -xlog_recover_process_data(xlog_t *log, - xlog_recover_t *rhash[], - xlog_rec_header_t *rhead, - xfs_caddr_t dp, - int pass) -{ - xfs_caddr_t lp = dp+INT_GET(rhead->h_len, ARCH_CONVERT); - int num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT); - xlog_op_header_t *ohead; - xlog_recover_t *trans; - xlog_tid_t tid; - int error; - unsigned long hash; - uint flags; - - /* check the log format matches our own - else we can't recover */ - if (xlog_header_check_recover(log->l_mp, rhead)) - return (XFS_ERROR(EIO)); - - while ((dp < lp) && num_logops) { - ASSERT(dp + sizeof(xlog_op_header_t) <= lp); - ohead = (xlog_op_header_t *)dp; - dp += sizeof(xlog_op_header_t); - if (ohead->oh_clientid != XFS_TRANSACTION && - ohead->oh_clientid != XFS_LOG) { - xlog_warn("XFS: xlog_recover_process_data: bad clientid"); - ASSERT(0); - return (XFS_ERROR(EIO)); - } - tid = INT_GET(ohead->oh_tid, ARCH_CONVERT); - hash = XLOG_RHASH(tid); - trans = xlog_recover_find_tid(rhash[hash], tid); - if (trans == NULL) { /* not found; add new tid */ - if (ohead->oh_flags & XLOG_START_TRANS) - xlog_recover_new_tid(&rhash[hash], tid, INT_GET(rhead->h_lsn, ARCH_CONVERT)); - } else { - ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp); - flags = ohead->oh_flags & ~XLOG_END_TRANS; - if (flags & XLOG_WAS_CONT_TRANS) - flags &= ~XLOG_CONTINUE_TRANS; - switch (flags) { - case XLOG_COMMIT_TRANS: { - error = xlog_recover_commit_trans(log, &rhash[hash], - trans, pass); - break; - } - case XLOG_UNMOUNT_TRANS: { - error = xlog_recover_unmount_trans(trans); - break; - } - case XLOG_WAS_CONT_TRANS: { - error = xlog_recover_add_to_cont_trans(trans, dp, - INT_GET(ohead->oh_len, ARCH_CONVERT)); - break; - } - case XLOG_START_TRANS : { - xlog_warn("XFS: xlog_recover_process_data: bad transaction"); - ASSERT(0); - error = XFS_ERROR(EIO); - break; - } - case 0: - case XLOG_CONTINUE_TRANS: { - error = xlog_recover_add_to_trans(trans, dp, - INT_GET(ohead->oh_len, ARCH_CONVERT)); - break; +xlog_recover_process_data( + xlog_t *log, + xlog_recover_t *rhash[], + xlog_rec_header_t *rhead, + xfs_caddr_t dp, + int pass) +{ + xfs_caddr_t lp; + int num_logops; + xlog_op_header_t *ohead; + xlog_recover_t *trans; + xlog_tid_t tid; + int error; + unsigned long hash; + uint flags; + + lp = dp + INT_GET(rhead->h_len, ARCH_CONVERT); + num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT); + + /* check the log format matches our own - else we can't recover */ + if (xlog_header_check_recover(log->l_mp, rhead)) + return (XFS_ERROR(EIO)); + + while ((dp < lp) && num_logops) { + ASSERT(dp + sizeof(xlog_op_header_t) <= lp); + ohead = (xlog_op_header_t *)dp; + dp += sizeof(xlog_op_header_t); + if (ohead->oh_clientid != XFS_TRANSACTION && + ohead->oh_clientid != XFS_LOG) { + xlog_warn( + "XFS: xlog_recover_process_data: bad clientid"); + ASSERT(0); + return (XFS_ERROR(EIO)); } - default: { - xlog_warn("XFS: xlog_recover_process_data: bad flag"); - ASSERT(0); - error = XFS_ERROR(EIO); - break; + tid = INT_GET(ohead->oh_tid, ARCH_CONVERT); + hash = XLOG_RHASH(tid); + trans = xlog_recover_find_tid(rhash[hash], tid); + if (trans == NULL) { /* not found; add new tid */ + if (ohead->oh_flags & XLOG_START_TRANS) + xlog_recover_new_tid(&rhash[hash], tid, + INT_GET(rhead->h_lsn, ARCH_CONVERT)); + } else { + ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp); + flags = ohead->oh_flags & ~XLOG_END_TRANS; + if (flags & XLOG_WAS_CONT_TRANS) + flags &= ~XLOG_CONTINUE_TRANS; + switch (flags) { + case XLOG_COMMIT_TRANS: + error = xlog_recover_commit_trans(log, + &rhash[hash], trans, pass); + break; + case XLOG_UNMOUNT_TRANS: + error = xlog_recover_unmount_trans(trans); + break; + case XLOG_WAS_CONT_TRANS: + error = xlog_recover_add_to_cont_trans(trans, + dp, INT_GET(ohead->oh_len, + ARCH_CONVERT)); + break; + case XLOG_START_TRANS: + xlog_warn( + "XFS: xlog_recover_process_data: bad transaction"); + ASSERT(0); + error = XFS_ERROR(EIO); + break; + case 0: + case XLOG_CONTINUE_TRANS: + error = xlog_recover_add_to_trans(trans, + dp, INT_GET(ohead->oh_len, + ARCH_CONVERT)); + break; + default: + xlog_warn( + "XFS: xlog_recover_process_data: bad flag"); + ASSERT(0); + error = XFS_ERROR(EIO); + break; + } + if (error) + return error; } - } /* switch */ - if (error) - return error; - } /* if */ - dp += INT_GET(ohead->oh_len, ARCH_CONVERT); - num_logops--; - } - return( 0 ); -} /* xlog_recover_process_data */ - + dp += INT_GET(ohead->oh_len, ARCH_CONVERT); + num_logops--; + } + return 0; +} /* * Process an extent free intent item that was recovered from * the log. We need to free the extents that it describes. */ STATIC void -xlog_recover_process_efi(xfs_mount_t *mp, - xfs_efi_log_item_t *efip) +xlog_recover_process_efi( + xfs_mount_t *mp, + xfs_efi_log_item_t *efip) { xfs_efd_log_item_t *efdp; xfs_trans_t *tp; @@ -2900,8 +2996,7 @@ efip->efi_flags |= XFS_EFI_RECOVERED; xfs_trans_commit(tp, 0, NULL); -} /* xlog_recover_process_efi */ - +} /* * Verify that once we've encountered something other than an EFI @@ -2909,13 +3004,13 @@ */ #if defined(DEBUG) STATIC void -xlog_recover_check_ail(xfs_mount_t *mp, - xfs_log_item_t *lip, - int gen) +xlog_recover_check_ail( + xfs_mount_t *mp, + xfs_log_item_t *lip, + int gen) { - int orig_gen; + int orig_gen = gen; - orig_gen = gen; do { ASSERT(lip->li_type != XFS_LI_EFI); lip = xfs_trans_next_ail(mp, lip, &gen, NULL); @@ -2930,7 +3025,6 @@ } #endif /* DEBUG */ - /* * When this is called, all of the EFIs which did not have * corresponding EFDs should be in the AIL. What we do now @@ -2950,7 +3044,8 @@ * we see something other than an EFI in the AIL. */ STATIC void -xlog_recover_process_efis(xlog_t *log) +xlog_recover_process_efis( + xlog_t *log) { xfs_log_item_t *lip; xfs_efi_log_item_t *efip; @@ -2986,8 +3081,7 @@ lip = xfs_trans_next_ail(mp, lip, &gen, NULL); } AIL_UNLOCK(mp, s); -} /* xlog_recover_process_efis */ - +} /* * This routine performs a transaction to null out a bad inode pointer @@ -3030,8 +3124,7 @@ (offset + sizeof(xfs_agino_t) - 1)); (void) xfs_trans_commit(tp, 0, NULL); -} /* xlog_recover_clear_agi_bucket */ - +} /* * xlog_iunlink_recover @@ -3046,7 +3139,8 @@ * atomic. */ void -xlog_recover_process_iunlinks(xlog_t *log) +xlog_recover_process_iunlinks( + xlog_t *log) { xfs_mount_t *mp; xfs_agnumber_t agno; @@ -3188,40 +3282,47 @@ } mp->m_dmevmask = mp_dmevmask; +} -} /* xlog_recover_process_iunlinks */ - - -/* - * Stamp cycle number in every block - * - * This routine is also called in xfs_log.c - */ -/*ARGSUSED*/ -void -xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog) -{ - int i, j, k; - int size = iclog->ic_offset + iclog->ic_roundoff; - xfs_caddr_t dp; - union ich { - xlog_rec_ext_header_t hic_xheader; - char hic_sector[XLOG_HEADER_SIZE]; - } *xhdr; - uint cycle_lsn; #ifdef DEBUG - uint *up; - uint chksum = 0; +STATIC void +xlog_pack_data_checksum( + xlog_t *log, + xlog_in_core_t *iclog, + int size) +{ + int i; + uint *up; + uint chksum = 0; up = (uint *)iclog->ic_datap; /* divide length by 4 to get # words */ - for (i=0; i> 2; i++) { + for (i = 0; i < (size >> 2); i++) { chksum ^= INT_GET(*up, ARCH_CONVERT); up++; } INT_SET(iclog->ic_header.h_chksum, ARCH_CONVERT, chksum); -#endif /* DEBUG */ +} +#else +#define xlog_pack_data_checksum(log, iclog, size) +#endif + +/* + * Stamp cycle number in every block + */ +void +xlog_pack_data( + xlog_t *log, + xlog_in_core_t *iclog) +{ + int i, j, k; + int size = iclog->ic_offset + iclog->ic_roundoff; + uint cycle_lsn; + xfs_caddr_t dp; + xlog_in_core_2_t *xhdr; + + xlog_pack_data_checksum(log, iclog, size); cycle_lsn = CYCLE_LSN_NOCONV(iclog->ic_header.h_lsn, ARCH_CONVERT); @@ -3234,7 +3335,7 @@ } if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { - xhdr = (union ich*)&iclog->ic_header; + xhdr = (xlog_in_core_2_t *)&iclog->ic_header; for ( ; i < BTOBB(size); i++) { j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); @@ -3247,45 +3348,18 @@ xhdr[i].hic_xheader.xh_cycle = cycle_lsn; } } - -} /* xlog_pack_data */ - - -/*ARGSUSED*/ -STATIC void -xlog_unpack_data(xlog_rec_header_t *rhead, - xfs_caddr_t dp, - xlog_t *log) -{ - int i, j, k; - union ich { - xlog_rec_header_t hic_header; - xlog_rec_ext_header_t hic_xheader; - char hic_sector[XLOG_HEADER_SIZE]; - } *xhdr; +} #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) - uint *up = (uint *)dp; - uint chksum = 0; -#endif - - for (i=0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) && - i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { - *(uint *)dp = *(uint *)&rhead->h_cycle_data[i]; - dp += BBSIZE; - } - - if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { - xhdr = (union ich*)rhead; - for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) { - j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); - k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); - *(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; - dp += BBSIZE; - } - } +STATIC void +xlog_unpack_data_checksum( + xlog_rec_header_t *rhead, + xfs_caddr_t dp, + xlog_t *log) +{ + uint *up = (uint *)dp; + uint chksum = 0; -#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) /* divide length by 4 to get # words */ for (i=0; i < INT_GET(rhead->h_len, ARCH_CONVERT) >> 2; i++) { chksum ^= INT_GET(*up, ARCH_CONVERT); @@ -3306,9 +3380,77 @@ log->l_flags |= XLOG_CHKSUM_MISMATCH; } } -#endif /* DEBUG && XFS_LOUD_RECOVERY */ -} /* xlog_unpack_data */ +} +#else +#define xlog_unpack_data_checksum(rhead, dp, log) +#endif + +STATIC void +xlog_unpack_data( + xlog_rec_header_t *rhead, + xfs_caddr_t dp, + xlog_t *log) +{ + int i, j, k; + xlog_in_core_2_t *xhdr; + + for (i = 0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) && + i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { + *(uint *)dp = *(uint *)&rhead->h_cycle_data[i]; + dp += BBSIZE; + } + + if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { + xhdr = (xlog_in_core_2_t *)rhead; + for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) { + j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); + k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); + *(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; + dp += BBSIZE; + } + } + + xlog_unpack_data_checksum(rhead, dp, log); +} + +STATIC int +xlog_valid_rec_header( + xlog_t *log, + xlog_rec_header_t *rhead, + xfs_daddr_t blkno) +{ + int bblks; + + if (unlikely( + (INT_GET(rhead->h_magicno, ARCH_CONVERT) != + XLOG_HEADER_MAGIC_NUM))) { + XFS_ERROR_REPORT("xlog_valid_rec_header(1)", + XFS_ERRLEVEL_LOW, log->l_mp); + return XFS_ERROR(EFSCORRUPTED); + } + if (unlikely( + (INT_ISZERO(rhead->h_version, ARCH_CONVERT) || + (INT_GET(rhead->h_version, ARCH_CONVERT) & + (~XLOG_VERSION_OKBITS)) != 0))) { + xlog_warn("XFS: %s: unrecognised log version (%d).", + __FUNCTION__, INT_GET(rhead->h_version, ARCH_CONVERT)); + return XFS_ERROR(EIO); + } + /* LR body must have data or it wouldn't have been written */ + bblks = INT_GET(rhead->h_len, ARCH_CONVERT); + if (unlikely( bblks <= 0 || bblks > INT_MAX )) { + XFS_ERROR_REPORT("xlog_valid_rec_header(2)", + XFS_ERRLEVEL_LOW, log->l_mp); + return XFS_ERROR(EFSCORRUPTED); + } + if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { + XFS_ERROR_REPORT("xlog_valid_rec_header(3)", + XFS_ERRLEVEL_LOW, log->l_mp); + return XFS_ERROR(EFSCORRUPTED); + } + return 0; +} /* * Read the log from tail to head and process the log records found. @@ -3319,223 +3461,246 @@ * here. */ STATIC int -xlog_do_recovery_pass(xlog_t *log, - xfs_daddr_t head_blk, - xfs_daddr_t tail_blk, - int pass) -{ - xlog_rec_header_t *rhead; - xfs_daddr_t blk_no; - xfs_caddr_t bufaddr; - xfs_buf_t *hbp, *dbp; - int error, h_size; - int bblks, split_bblks; - int hblks, split_hblks, wrapped_hblks; - xlog_recover_t *rhash[XLOG_RHASH_SIZE]; - - error = 0; - - - /* - * Read the header of the tail block and get the iclog buffer size from - * h_size. Use this to tell how many sectors make up the log header. - */ - if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { - /* - * When using variable length iclogs, read first sector of iclog - * header and extract the header size from it. Get a new hbp that - * is the correct size. - */ - hbp = xlog_get_bp(1, log->l_mp); - if (!hbp) - return ENOMEM; - if ((error = xlog_bread(log, tail_blk, 1, hbp))) - goto bread_err1; - rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp); - ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == - XLOG_HEADER_MAGIC_NUM); - if ((INT_GET(rhead->h_version, ARCH_CONVERT) & (~XLOG_VERSION_OKBITS)) != 0) { - xlog_warn("XFS: xlog_do_recovery_pass: unrecognised log version number."); - error = XFS_ERROR(EIO); - goto bread_err1; - } - h_size = INT_GET(rhead->h_size, ARCH_CONVERT); +xlog_do_recovery_pass( + xlog_t *log, + xfs_daddr_t head_blk, + xfs_daddr_t tail_blk, + int pass) +{ + xlog_rec_header_t *rhead; + xfs_daddr_t blk_no; + xfs_caddr_t bufaddr, offset; + xfs_buf_t *hbp, *dbp; + int error = 0, h_size; + int bblks, split_bblks; + int hblks, split_hblks, wrapped_hblks; + xlog_recover_t *rhash[XLOG_RHASH_SIZE]; + + ASSERT(head_blk != tail_blk); - if ((INT_GET(rhead->h_version, ARCH_CONVERT) & XLOG_VERSION_2) && - (h_size > XLOG_HEADER_CYCLE_SIZE)) { - hblks = h_size / XLOG_HEADER_CYCLE_SIZE; - if (h_size % XLOG_HEADER_CYCLE_SIZE) - hblks++; - xlog_put_bp(hbp); - hbp = xlog_get_bp(hblks, log->l_mp); + /* + * Read the header of the tail block and get the iclog buffer size from + * h_size. Use this to tell how many sectors make up the log header. + */ + if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) { + /* + * When using variable length iclogs, read first sector of + * iclog header and extract the header size from it. Get a + * new hbp that is the correct size. + */ + hbp = xlog_get_bp(log, 1); + if (!hbp) + return ENOMEM; + if ((error = xlog_bread(log, tail_blk, 1, hbp))) + goto bread_err1; + offset = xlog_align(log, tail_blk, 1, hbp); + rhead = (xlog_rec_header_t *)offset; + error = xlog_valid_rec_header(log, rhead, tail_blk); + if (error) + goto bread_err1; + h_size = INT_GET(rhead->h_size, ARCH_CONVERT); + if ((INT_GET(rhead->h_version, ARCH_CONVERT) + & XLOG_VERSION_2) && + (h_size > XLOG_HEADER_CYCLE_SIZE)) { + hblks = h_size / XLOG_HEADER_CYCLE_SIZE; + if (h_size % XLOG_HEADER_CYCLE_SIZE) + hblks++; + xlog_put_bp(hbp); + hbp = xlog_get_bp(log, hblks); + } else { + hblks = 1; + } } else { - hblks=1; + ASSERT(log->l_sectbb_log == 0); + hblks = 1; + hbp = xlog_get_bp(log, 1); + h_size = XLOG_BIG_RECORD_BSIZE; } - } else { - hblks=1; - hbp = xlog_get_bp(1, log->l_mp); - h_size = XLOG_BIG_RECORD_BSIZE; - } - - if (!hbp) - return ENOMEM; - dbp = xlog_get_bp(BTOBB(h_size),log->l_mp); - if (!dbp) { - xlog_put_bp(hbp); - return ENOMEM; - } - - memset(rhash, 0, sizeof(rhash)); - if (tail_blk <= head_blk) { - for (blk_no = tail_blk; blk_no < head_blk; ) { - if ((error = xlog_bread(log, blk_no, hblks, hbp))) - goto bread_err2; - rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp); - ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); - ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX)); - bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); /* blocks in data section */ - - if (unlikely((INT_GET(rhead->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) || - (BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) > INT_MAX)) || - (bblks <= 0) || - (blk_no > log->l_logBBsize))) { - XFS_ERROR_REPORT("xlog_do_recovery_pass(1)", - XFS_ERRLEVEL_LOW, log->l_mp); - error = EFSCORRUPTED; - goto bread_err2; - } - if ((INT_GET(rhead->h_version, ARCH_CONVERT) & (~XLOG_VERSION_OKBITS)) != 0) { - xlog_warn("XFS: xlog_do_recovery_pass: unrecognised log version number."); - error = XFS_ERROR(EIO); - goto bread_err2; - } - bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); /* blocks in data section */ - if (bblks > 0) { - if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) - goto bread_err2; - xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log); - if ((error = xlog_recover_process_data(log, rhash, - rhead, XFS_BUF_PTR(dbp), - pass))) - goto bread_err2; - } - blk_no += (bblks+hblks); + if (!hbp) + return ENOMEM; + dbp = xlog_get_bp(log, BTOBB(h_size)); + if (!dbp) { + xlog_put_bp(hbp); + return ENOMEM; } - } else { - /* - * Perform recovery around the end of the physical log. When the head - * is not on the same cycle number as the tail, we can't do a sequential - * recovery as above. - */ - blk_no = tail_blk; - while (blk_no < log->l_logBBsize) { - /* - * Check for header wrapping around physical end-of-log - */ - wrapped_hblks = 0; - if (blk_no+hblks <= log->l_logBBsize) { - /* Read header in one read */ - if ((error = xlog_bread(log, blk_no, hblks, hbp))) - goto bread_err2; - } else { - /* This log record is split across physical end of log */ - split_hblks = 0; - if (blk_no != log->l_logBBsize) { - /* some data is before physical end of log */ - ASSERT(blk_no <= INT_MAX); - split_hblks = log->l_logBBsize - (int)blk_no; - ASSERT(split_hblks > 0); - if ((error = xlog_bread(log, blk_no, split_hblks, hbp))) - goto bread_err2; - } - bufaddr = XFS_BUF_PTR(hbp); - XFS_BUF_SET_PTR(hbp, bufaddr + BBTOB(split_hblks), - BBTOB(hblks - split_hblks)); - wrapped_hblks = hblks - split_hblks; - if ((error = xlog_bread(log, 0, wrapped_hblks, hbp))) - goto bread_err2; - XFS_BUF_SET_PTR(hbp, bufaddr, hblks); - } - rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp); - ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); - ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX)); - bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); - - /* LR body must have data or it wouldn't have been written */ - ASSERT(bblks > 0); - blk_no += hblks; /* successfully read header */ - - if (unlikely((INT_GET(rhead->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) || - (BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) > INT_MAX)) || - (bblks <= 0))) { - XFS_ERROR_REPORT("xlog_do_recovery_pass(2)", - XFS_ERRLEVEL_LOW, log->l_mp); - error = EFSCORRUPTED; - goto bread_err2; - } - /* Read in data for log record */ - if (blk_no+bblks <= log->l_logBBsize) { - if ((error = xlog_bread(log, blk_no, bblks, dbp))) - goto bread_err2; - } else { - /* This log record is split across physical end of log */ - split_bblks = 0; - if (blk_no != log->l_logBBsize) { - - /* some data is before physical end of log */ - ASSERT(blk_no <= INT_MAX); - split_bblks = log->l_logBBsize - (int)blk_no; - ASSERT(split_bblks > 0); - if ((error = xlog_bread(log, blk_no, split_bblks, dbp))) - goto bread_err2; - } - bufaddr = XFS_BUF_PTR(dbp); - XFS_BUF_SET_PTR(dbp, bufaddr + BBTOB(split_bblks), - BBTOB(bblks - split_bblks)); - if ((error = xlog_bread(log, wrapped_hblks, - bblks - split_bblks, dbp))) - goto bread_err2; - XFS_BUF_SET_PTR(dbp, bufaddr, XLOG_BIG_RECORD_BSIZE); - } - xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log); - if ((error = xlog_recover_process_data(log, rhash, - rhead, XFS_BUF_PTR(dbp), - pass))) - goto bread_err2; - blk_no += bblks; - } - - ASSERT(blk_no >= log->l_logBBsize); - blk_no -= log->l_logBBsize; - - /* read first part of physical log */ - while (blk_no < head_blk) { - if ((error = xlog_bread(log, blk_no, hblks, hbp))) - goto bread_err2; - rhead = (xlog_rec_header_t *)XFS_BUF_PTR(hbp); - ASSERT(INT_GET(rhead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM); - ASSERT(BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT) <= INT_MAX)); - bblks = (int) BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); - ASSERT(bblks > 0); - if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) - goto bread_err2; - xlog_unpack_data(rhead, XFS_BUF_PTR(dbp), log); - if ((error = xlog_recover_process_data(log, rhash, - rhead, XFS_BUF_PTR(dbp), - pass))) - goto bread_err2; - blk_no += (bblks+hblks); - } - } - -bread_err2: - xlog_put_bp(dbp); -bread_err1: - xlog_put_bp(hbp); + memset(rhash, 0, sizeof(rhash)); + if (tail_blk <= head_blk) { + for (blk_no = tail_blk; blk_no < head_blk; ) { + if ((error = xlog_bread(log, blk_no, hblks, hbp))) + goto bread_err2; + offset = xlog_align(log, blk_no, hblks, hbp); + rhead = (xlog_rec_header_t *)offset; + error = xlog_valid_rec_header(log, rhead, blk_no); + if (error) + goto bread_err2; + + /* blocks in data section */ + bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); + error = xlog_bread(log, blk_no + hblks, bblks, dbp); + if (error) + goto bread_err2; + offset = xlog_align(log, blk_no + hblks, bblks, dbp); + xlog_unpack_data(rhead, offset, log); + if ((error = xlog_recover_process_data(log, + rhash, rhead, offset, pass))) + goto bread_err2; + blk_no += bblks + hblks; + } + } else { + /* + * Perform recovery around the end of the physical log. + * When the head is not on the same cycle number as the tail, + * we can't do a sequential recovery as above. + */ + blk_no = tail_blk; + while (blk_no < log->l_logBBsize) { + /* + * Check for header wrapping around physical end-of-log + */ + offset = NULL; + split_hblks = 0; + wrapped_hblks = 0; + if (blk_no + hblks <= log->l_logBBsize) { + /* Read header in one read */ + error = xlog_bread(log, blk_no, hblks, hbp); + if (error) + goto bread_err2; + offset = xlog_align(log, blk_no, hblks, hbp); + } else { + /* This LR is split across physical log end */ + if (blk_no != log->l_logBBsize) { + /* some data before physical log end */ + ASSERT(blk_no <= INT_MAX); + split_hblks = log->l_logBBsize - (int)blk_no; + ASSERT(split_hblks > 0); + if ((error = xlog_bread(log, blk_no, + split_hblks, hbp))) + goto bread_err2; + offset = xlog_align(log, blk_no, + split_hblks, hbp); + } + /* + * Note: this black magic still works with + * large sector sizes (non-512) only because: + * - we increased the buffer size originally + * by 1 sector giving us enough extra space + * for the second read; + * - the log start is guaranteed to be sector + * aligned; + * - we read the log end (LR header start) + * _first_, then the log start (LR header end) + * - order is important. + */ + bufaddr = XFS_BUF_PTR(hbp); + XFS_BUF_SET_PTR(hbp, + bufaddr + BBTOB(split_hblks), + BBTOB(hblks - split_hblks)); + wrapped_hblks = hblks - split_hblks; + error = xlog_bread(log, 0, wrapped_hblks, hbp); + if (error) + goto bread_err2; + XFS_BUF_SET_PTR(hbp, bufaddr, hblks); + if (!offset) + offset = xlog_align(log, 0, + wrapped_hblks, hbp); + } + rhead = (xlog_rec_header_t *)offset; + error = xlog_valid_rec_header(log, rhead, + split_hblks ? blk_no : 0); + if (error) + goto bread_err2; + + bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); + blk_no += hblks; + + /* Read in data for log record */ + if (blk_no + bblks <= log->l_logBBsize) { + error = xlog_bread(log, blk_no, bblks, dbp); + if (error) + goto bread_err2; + offset = xlog_align(log, blk_no, bblks, dbp); + } else { + /* This log record is split across the + * physical end of log */ + offset = NULL; + split_bblks = 0; + if (blk_no != log->l_logBBsize) { + /* some data is before the physical + * end of log */ + ASSERT(!wrapped_hblks); + ASSERT(blk_no <= INT_MAX); + split_bblks = + log->l_logBBsize - (int)blk_no; + ASSERT(split_bblks > 0); + if ((error = xlog_bread(log, blk_no, + split_bblks, dbp))) + goto bread_err2; + offset = xlog_align(log, blk_no, + split_bblks, dbp); + } + /* + * Note: this black magic still works with + * large sector sizes (non-512) only because: + * - we increased the buffer size originally + * by 1 sector giving us enough extra space + * for the second read; + * - the log start is guaranteed to be sector + * aligned; + * - we read the log end (LR header start) + * _first_, then the log start (LR header end) + * - order is important. + */ + bufaddr = XFS_BUF_PTR(dbp); + XFS_BUF_SET_PTR(dbp, + bufaddr + BBTOB(split_bblks), + BBTOB(bblks - split_bblks)); + if ((error = xlog_bread(log, wrapped_hblks, + bblks - split_bblks, dbp))) + goto bread_err2; + XFS_BUF_SET_PTR(dbp, bufaddr, + XLOG_BIG_RECORD_BSIZE); + if (!offset) + offset = xlog_align(log, wrapped_hblks, + bblks - split_bblks, dbp); + } + xlog_unpack_data(rhead, offset, log); + if ((error = xlog_recover_process_data(log, rhash, + rhead, offset, pass))) + goto bread_err2; + blk_no += bblks; + } + + ASSERT(blk_no >= log->l_logBBsize); + blk_no -= log->l_logBBsize; + + /* read first part of physical log */ + while (blk_no < head_blk) { + if ((error = xlog_bread(log, blk_no, hblks, hbp))) + goto bread_err2; + offset = xlog_align(log, blk_no, hblks, hbp); + rhead = (xlog_rec_header_t *)offset; + error = xlog_valid_rec_header(log, rhead, blk_no); + if (error) + goto bread_err2; + bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); + if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp))) + goto bread_err2; + offset = xlog_align(log, blk_no+hblks, bblks, dbp); + xlog_unpack_data(rhead, offset, log); + if ((error = xlog_recover_process_data(log, rhash, + rhead, offset, pass))) + goto bread_err2; + blk_no += bblks + hblks; + } + } - return error; + bread_err2: + xlog_put_bp(dbp); + bread_err1: + xlog_put_bp(hbp); + return error; } /* @@ -3552,14 +3717,14 @@ * the log recovery has been completed. */ STATIC int -xlog_do_log_recovery(xlog_t *log, - xfs_daddr_t head_blk, - xfs_daddr_t tail_blk) +xlog_do_log_recovery( + xlog_t *log, + xfs_daddr_t head_blk, + xfs_daddr_t tail_blk) { int error; -#ifdef DEBUG - int i; -#endif + + ASSERT(head_blk != tail_blk); /* * First do a pass to find all of the cancelled buf log items. @@ -3583,11 +3748,15 @@ */ error = xlog_do_recovery_pass(log, head_blk, tail_blk, XLOG_RECOVER_PASS2); -#ifdef DEBUG - for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) { - ASSERT(log->l_buf_cancel_table[i] == NULL); +#ifdef DEBUG + { + int i; + + for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) + ASSERT(log->l_buf_cancel_table[i] == NULL); } #endif /* DEBUG */ + kmem_free(log->l_buf_cancel_table, XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*)); log->l_buf_cancel_table = NULL; @@ -3599,9 +3768,10 @@ * Do the actual recovery */ STATIC int -xlog_do_recover(xlog_t *log, - xfs_daddr_t head_blk, - xfs_daddr_t tail_blk) +xlog_do_recover( + xlog_t *log, + xfs_daddr_t head_blk, + xfs_daddr_t tail_blk) { int error; xfs_buf_t *bp; @@ -3663,7 +3833,7 @@ /* Normal transactions can now occur */ log->l_flags &= ~XLOG_ACTIVE_RECOVERY; return 0; -} /* xlog_do_recover */ +} /* * Perform recovery and re-initialize some log variables in xlog_find_tail. @@ -3671,22 +3841,18 @@ * Return error or zero. */ int -xlog_recover(xlog_t *log, int readonly) +xlog_recover( + xlog_t *log, + int readonly) { - xfs_daddr_t head_blk, tail_blk; - int error; + xfs_daddr_t head_blk, tail_blk; + int error; /* find the tail of the log */ - if ((error = xlog_find_tail(log, &head_blk, &tail_blk, readonly))) return error; if (tail_blk != head_blk) { -#ifndef __KERNEL__ - extern xfs_daddr_t HEAD_BLK, TAIL_BLK; - head_blk = HEAD_BLK; - tail_blk = TAIL_BLK; -#endif /* There used to be a comment here: * * disallow recovery on read-only mounts. note -- mount @@ -3698,36 +3864,21 @@ * under the vfs layer, so we can get away with it unless * the device itself is read-only, in which case we fail. */ -#ifdef __KERNEL__ if ((error = xfs_dev_is_read_only(log->l_mp, "recovery required"))) { return error; } -#else - if (readonly) { - return ENOSPC; - } -#endif -#ifdef __KERNEL__ -#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) cmn_err(CE_NOTE, "Starting XFS recovery on filesystem: %s (dev: %d/%d)", log->l_mp->m_fsname, MAJOR(log->l_dev), MINOR(log->l_dev)); -#else - cmn_err(CE_NOTE, - "!Starting XFS recovery on filesystem: %s (dev: %d/%d)", - log->l_mp->m_fsname, MAJOR(log->l_dev), - MINOR(log->l_dev)); -#endif -#endif + error = xlog_do_recover(log, head_blk, tail_blk); log->l_flags |= XLOG_RECOVERY_NEEDED; } return error; -} /* xlog_recover */ - +} /* * In the first part of recovery we replay inodes and buffers and build @@ -3739,7 +3890,9 @@ * in the real-time portion of the file system. */ int -xlog_recover_finish(xlog_t *log, int mfsi_flags) +xlog_recover_finish( + xlog_t *log, + int mfsi_flags) { /* * Now we're ready to do the transactions needed for the @@ -3761,23 +3914,16 @@ (XFS_LOG_FORCE | XFS_LOG_SYNC)); if ( (mfsi_flags & XFS_MFSI_NOUNLINK) == 0 ) { - xlog_recover_process_iunlinks(log); } xlog_recover_check_summary(log); -#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) cmn_err(CE_NOTE, "Ending XFS recovery on filesystem: %s (dev: %d/%d)", log->l_mp->m_fsname, MAJOR(log->l_dev), MINOR(log->l_dev)); -#else - cmn_err(CE_NOTE, - "!Ending XFS recovery on filesystem: %s (dev: %d/%d)", - log->l_mp->m_fsname, MAJOR(log->l_dev), - MINOR(log->l_dev)); -#endif + log->l_flags &= ~XLOG_RECOVERY_NEEDED; } else { cmn_err(CE_DEBUG, @@ -3785,7 +3931,7 @@ log->l_mp->m_fsname); } return 0; -} /* xlog_recover_finish */ +} #if defined(DEBUG) @@ -3794,7 +3940,8 @@ * are consistent with the superblock counters. */ void -xlog_recover_check_summary(xlog_t *log) +xlog_recover_check_summary( + xlog_t *log) { xfs_mount_t *mp; xfs_agf_t *agfp; diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_mount.c HACK/fs/xfs/xfs_mount.c --- ORIG/fs/xfs/xfs_mount.c 2003-07-23 09:16:41.000000000 -0500 +++ HACK/fs/xfs/xfs_mount.c 2003-07-15 22:06:55.000000000 -0500 @@ -467,7 +467,11 @@ bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), extra_flags); - ASSERT(bp); + if (!bp || XFS_BUF_ISERROR(bp)) { + cmn_err(CE_WARN, "XFS: SB read failed"); + error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; + goto fail; + } ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); @@ -482,9 +486,7 @@ error = xfs_mount_validate_sb(mp, &(mp->m_sb)); if (error) { cmn_err(CE_WARN, "XFS: SB validate failed"); - XFS_BUF_UNMANAGE(bp); - xfs_buf_relse(bp); - return error; + goto fail; } /* @@ -494,9 +496,8 @@ cmn_err(CE_WARN, "XFS: device supports only %u byte sectors (not %u)", sector_size, mp->m_sb.sb_sectsize); - XFS_BUF_UNMANAGE(bp); - xfs_buf_relse(bp); - return XFS_ERROR(ENOSYS); + error = ENOSYS; + goto fail; } /* @@ -509,7 +510,11 @@ sector_size = mp->m_sb.sb_sectsize; bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), extra_flags); - ASSERT(bp); + if (!bp || XFS_BUF_ISERROR(bp)) { + cmn_err(CE_WARN, "XFS: SB re-read failed"); + error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; + goto fail; + } ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); } @@ -518,6 +523,13 @@ xfs_buf_relse(bp); ASSERT(XFS_BUF_VALUSEMA(bp) > 0); return 0; + + fail: + if (bp) { + XFS_BUF_UNMANAGE(bp); + xfs_buf_relse(bp); + } + return error; } @@ -548,16 +560,6 @@ mp->m_blockwmask = mp->m_blockwsize - 1; INIT_LIST_HEAD(&mp->m_del_inodes); - - if (XFS_SB_VERSION_HASLOGV2(sbp)) { - if (sbp->sb_logsunit <= 1) { - mp->m_lstripemask = 1; - } else { - mp->m_lstripemask = - 1 << xfs_highbit32(sbp->sb_logsunit >> BBSHIFT); - } - } - /* * Setup for attributes, in case they get created. * This value is for inodes getting attributes for the first time, @@ -619,7 +621,6 @@ xfs_mountfs( vfs_t *vfsp, xfs_mount_t *mp, - dev_t dev, int mfsi_flags) { xfs_buf_t *bp; @@ -632,11 +633,10 @@ __uint64_t ret64; __int64_t update_flags; uint quotamount, quotaflags; - int agno, noio; + int agno; int uuid_mounted = 0; int error = 0; - noio = dev == 0 && mp->m_sb_bp != NULL; if (mp->m_sb_bp == NULL) { if ((error = xfs_readsb(mp))) { return (error); @@ -729,6 +729,8 @@ } else mp->m_maxicount = 0; + mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog); + /* * XFS uses the uuid from the superblock as the unique * identifier for fsid. We can not use the uuid from the volume @@ -825,22 +827,20 @@ error = XFS_ERROR(E2BIG); goto error1; } - if (!noio) { - error = xfs_read_buf(mp, mp->m_ddev_targp, - d - XFS_FSS_TO_BB(mp, 1), - XFS_FSS_TO_BB(mp, 1), 0, &bp); - if (!error) { - xfs_buf_relse(bp); - } else { - cmn_err(CE_WARN, "XFS: size check 2 failed"); - if (error == ENOSPC) { - error = XFS_ERROR(E2BIG); - } - goto error1; + error = xfs_read_buf(mp, mp->m_ddev_targp, + d - XFS_FSS_TO_BB(mp, 1), + XFS_FSS_TO_BB(mp, 1), 0, &bp); + if (!error) { + xfs_buf_relse(bp); + } else { + cmn_err(CE_WARN, "XFS: size check 2 failed"); + if (error == ENOSPC) { + error = XFS_ERROR(E2BIG); } + goto error1; } - if (!noio && ((mfsi_flags & XFS_MFSI_CLIENT) == 0) && + if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) && mp->m_logdev_targp != mp->m_ddev_targp) { d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { @@ -917,10 +917,6 @@ * Initialize the precomputed transaction reservations values. */ xfs_trans_init(mp); - if (noio) { - ASSERT((mfsi_flags & XFS_MFSI_CLIENT) == 0); - return 0; - } /* * Allocate and initialize the inode hash table for this diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_mount.h HACK/fs/xfs/xfs_mount.h --- ORIG/fs/xfs/xfs_mount.h 2003-07-23 09:16:41.000000000 -0500 +++ HACK/fs/xfs/xfs_mount.h 2003-07-15 22:10:11.000000000 -0500 @@ -68,6 +68,7 @@ ((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks)) #else struct cred; +struct log; struct vfs; struct vnode; struct xfs_mount_args; @@ -79,7 +80,6 @@ struct xfs_bmbt_irec; struct xfs_bmap_free; -#define SPLDECL(s) unsigned long s #define AIL_LOCK_T lock_t #define AIL_LOCKINIT(x,y) spinlock_init(x,y) #define AIL_LOCK_DESTROY(x) spinlock_destroy(x) @@ -303,7 +303,7 @@ uint m_readio_blocks; /* min read size blocks */ uint m_writeio_log; /* min write size log bytes */ uint m_writeio_blocks; /* min write size blocks */ - void *m_log; /* log specific stuff */ + struct log *m_log; /* log specific stuff */ int m_logbufs; /* number of log buffers */ int m_logbsize; /* size of each log buffer */ uint m_rsumlevels; /* rt summary levels */ @@ -351,6 +351,7 @@ uint m_qflags; /* quota status flags */ xfs_trans_reservations_t m_reservations;/* precomputed res values */ __uint64_t m_maxicount; /* maximum inode count */ + __uint64_t m_maxioffset; /* maximum inode offset */ __uint64_t m_resblks; /* total reserved blocks */ __uint64_t m_resblks_avail;/* available reserved blocks */ #if XFS_BIG_FILESYSTEMS @@ -358,7 +359,6 @@ #endif int m_dalign; /* stripe unit */ int m_swidth; /* stripe width */ - int m_lstripemask; /* log stripe mask */ int m_sinoalign; /* stripe unit inode alignmnt */ int m_attr_magicpct;/* 37% of the blocksize */ int m_dir_magicpct; /* 37% of the dir blocksize */ @@ -418,8 +418,6 @@ * 32 bits in size */ #define XFS_MOUNT_NOLOGFLUSH 0x00010000 -#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) - /* * Default minimum read and write sizes. */ @@ -444,6 +442,9 @@ #define XFS_WSYNC_READIO_LOG 15 /* 32K */ #define XFS_WSYNC_WRITEIO_LOG 14 /* 16K */ +#define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset) + +#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) #define xfs_force_shutdown(m,f) \ VFS_FORCE_SHUTDOWN((XFS_MTOVFS(m)), f, __FILE__, __LINE__) @@ -539,7 +540,7 @@ extern xfs_mount_t *xfs_mount_init(void); extern void xfs_mod_sb(xfs_trans_t *, __int64_t); extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv); -extern int xfs_mountfs(struct vfs *, xfs_mount_t *mp, dev_t, int); +extern int xfs_mountfs(struct vfs *, xfs_mount_t *mp, int); extern int xfs_unmountfs(xfs_mount_t *, struct cred *); extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *); diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_rw.c HACK/fs/xfs/xfs_rw.c --- ORIG/fs/xfs/xfs_rw.c 2003-07-23 09:16:41.000000000 -0500 +++ HACK/fs/xfs/xfs_rw.c 2003-07-08 09:00:23.000000000 -0500 @@ -408,7 +408,6 @@ spinlock_t xfs_refcache_lock = SPIN_LOCK_UNLOCKED; xfs_inode_t **xfs_refcache; -int xfs_refcache_size; int xfs_refcache_index; int xfs_refcache_busy; int xfs_refcache_count; @@ -635,15 +634,13 @@ xfs_inode_t *ip; int iplist_index; xfs_inode_t **iplist; - int purge_count; if ((xfs_refcache == NULL) || (xfs_refcache_count == 0)) { return; } iplist_index = 0; - purge_count = xfs_params.refcache_purge; - iplist = (xfs_inode_t **)kmem_zalloc(purge_count * + iplist = (xfs_inode_t **)kmem_zalloc(xfs_refcache_purge_count * sizeof(xfs_inode_t *), KM_SLEEP); spin_lock(&xfs_refcache_lock); @@ -656,7 +653,7 @@ * forward as we go so that we are sure to eventually clear * out the entire cache when the system goes idle. */ - for (i = 0; i < purge_count; i++) { + for (i = 0; i < xfs_refcache_purge_count; i++) { ip = xfs_refcache[xfs_refcache_index]; if (ip != NULL) { xfs_refcache[xfs_refcache_index] = NULL; @@ -682,7 +679,7 @@ VN_RELE(XFS_ITOV(iplist[i])); } - kmem_free(iplist, purge_count * + kmem_free(iplist, xfs_refcache_purge_count * sizeof(xfs_inode_t *)); } diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_sb.h HACK/fs/xfs/xfs_sb.h --- ORIG/fs/xfs/xfs_sb.h 2003-07-23 09:16:41.000000000 -0500 +++ HACK/fs/xfs/xfs_sb.h 2003-06-26 11:52:34.000000000 -0500 @@ -81,7 +81,7 @@ XFS_SB_VERSION_OKREALFBITS | \ XFS_SB_VERSION_OKSASHFBITS) #define XFS_SB_VERSION_MKFS(ia,dia,extflag,dirv2,na,sflag) \ - (((ia) || (dia) || (extflag) || (dirv2) || (na)) ? \ + (((ia) || (dia) || (extflag) || (dirv2) || (na) || (sflag)) ? \ (XFS_SB_VERSION_4 | \ ((ia) ? XFS_SB_VERSION_ALIGNBIT : 0) | \ ((dia) ? XFS_SB_VERSION_DALIGNBIT : 0) | \ diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_trans.c HACK/fs/xfs/xfs_trans.c --- ORIG/fs/xfs/xfs_trans.c 2003-07-23 09:16:41.000000000 -0500 +++ HACK/fs/xfs/xfs_trans.c 2003-07-15 22:17:23.000000000 -0500 @@ -200,6 +200,7 @@ tp->t_blk_res = tp->t_blk_res_used; ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; tp->t_rtx_res = tp->t_rtx_res_used; + PFLAGS_DUP(&tp->t_pflags, &ntp->t_pflags); XFS_TRANS_DUP_DQINFO(tp->t_mountp, tp, ntp); @@ -238,7 +239,7 @@ rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; /* Mark this thread as being in a transaction */ - current->flags |= PF_FSTRANS; + PFLAGS_SET_FSTRANS(&tp->t_pflags); /* * Attempt to reserve the needed disk blocks by decrementing @@ -249,7 +250,7 @@ error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, -blocks, rsvd); if (error != 0) { - current->flags &= ~PF_FSTRANS; + PFLAGS_RESTORE(&tp->t_pflags); return (XFS_ERROR(ENOSPC)); } tp->t_blk_res += blocks; @@ -322,7 +323,7 @@ tp->t_blk_res = 0; } - current->flags &= ~PF_FSTRANS; + PFLAGS_RESTORE(&tp->t_pflags); return (error); } @@ -734,13 +735,13 @@ if (commit_lsn == -1 && !shutdown) shutdown = XFS_ERROR(EIO); } + PFLAGS_RESTORE(&tp->t_pflags); xfs_trans_free_items(tp, shutdown? XFS_TRANS_ABORT : 0); xfs_trans_free_busy(tp); xfs_trans_free(tp); XFS_STATS_INC(xfsstats.xs_trans_empty); if (commit_lsn_p) *commit_lsn_p = commit_lsn; - current->flags &= ~PF_FSTRANS; return (shutdown); } #if defined(XLOG_NOLOG) || defined(DEBUG) @@ -823,8 +824,8 @@ * had pinned, clean up, free trans structure, and return error. */ if (error || commit_lsn == -1) { + PFLAGS_RESTORE(&tp->t_pflags); xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT); - current->flags &= ~PF_FSTRANS; return XFS_ERROR(EIO); } @@ -850,15 +851,6 @@ * running in simulation mode (the log is explicitly turned * off). */ -#if defined(XLOG_NOLOG) || defined(DEBUG) - if (xlog_debug) { - tp->t_logcb.cb_func = (void(*)(void*, int))xfs_trans_committed; - tp->t_logcb.cb_arg = tp; - error = xfs_log_notify(mp, commit_iclog, &(tp->t_logcb)); - } else { - xfs_trans_committed(tp, 0); - } -#else tp->t_logcb.cb_func = (void(*)(void*, int))xfs_trans_committed; tp->t_logcb.cb_arg = tp; @@ -869,7 +861,9 @@ * waiting for an item to unlock. */ error = xfs_log_notify(mp, commit_iclog, &(tp->t_logcb)); -#endif + + /* mark this thread as no longer being in a transaction */ + PFLAGS_RESTORE(&tp->t_pflags); /* * Once all the items of the transaction have been copied @@ -906,9 +900,6 @@ XFS_STATS_INC(xfsstats.xs_trans_async); } - /* mark this thread as no longer being in a transaction */ - current->flags &= ~PF_FSTRANS; - return (error); } @@ -1108,12 +1099,13 @@ } xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags); } + + /* mark this thread as no longer being in a transaction */ + PFLAGS_RESTORE(&tp->t_pflags); + xfs_trans_free_items(tp, flags); xfs_trans_free_busy(tp); xfs_trans_free(tp); - - /* mark this thread as no longer being in a transaction */ - current->flags &= ~PF_FSTRANS; } diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_trans.h HACK/fs/xfs/xfs_trans.h --- ORIG/fs/xfs/xfs_trans.h 2003-07-23 09:16:41.000000000 -0500 +++ HACK/fs/xfs/xfs_trans.h 2003-07-15 22:10:12.000000000 -0500 @@ -409,6 +409,7 @@ xfs_trans_header_t t_header; /* header for in-log trans */ unsigned int t_busy_free; /* busy descs free */ xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */ + xfs_pflags_t t_pflags; /* saved pflags state */ } xfs_trans_t; #endif /* __KERNEL__ */ diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_vfsops.c HACK/fs/xfs/xfs_vfsops.c --- ORIG/fs/xfs/xfs_vfsops.c 2003-07-23 09:16:41.000000000 -0500 +++ HACK/fs/xfs/xfs_vfsops.c 2003-07-15 22:24:11.000000000 -0500 @@ -96,10 +96,6 @@ #endif /* DEBUG */ #ifdef XFS_DABUF_DEBUG extern lock_t xfs_dabuf_global_lock; -#endif - extern int xfs_refcache_size; - -#ifdef XFS_DABUF_DEBUG spinlock_init(&xfs_dabuf_global_lock, "xfsda"); #endif @@ -177,8 +173,6 @@ xfs_init_procfs(); xfs_sysctl_register(); - xfs_refcache_size = xfs_params.refcache_size; - /* * The inode hash table is created on a per mounted * file system bases. @@ -244,7 +238,7 @@ /* * At this point the superblock has not been read * in, therefore we do not know the block size. - * Before, the mount call ends we will convert + * Before the mount call ends we will convert * these to FSBs. */ mp->m_dalign = ap->sunit; @@ -252,11 +246,11 @@ } if (ap->logbufs != 0 && ap->logbufs != -1 && - (ap->logbufs < XLOG_NUM_ICLOGS || + (ap->logbufs < XLOG_MIN_ICLOGS || ap->logbufs > XLOG_MAX_ICLOGS)) { cmn_err(CE_WARN, "XFS: invalid logbufs value: %d [not %d-%d]", - ap->logbufs, XLOG_NUM_ICLOGS, XLOG_MAX_ICLOGS); + ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); return XFS_ERROR(EINVAL); } mp->m_logbufs = ap->logbufs; @@ -619,6 +613,8 @@ return XFS_ERROR(error); } +#define REMOUNT_READONLY_FLAGS (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT) + STATIC int xfs_mntupdate( bhv_desc_t *bdp, @@ -644,7 +640,7 @@ xfs_finish_reclaim_all(mp, 0); do { - VFS_SYNC(vfsp, SYNC_ATTR|SYNC_WAIT, NULL, error); + VFS_SYNC(vfsp, REMOUNT_READONLY_FLAGS, NULL, error); pagebuf_delwri_flush(mp->m_ddev_targp, PBDF_WAIT, &pincount); } while (pincount); @@ -1514,7 +1510,7 @@ * Now check to see if the log needs a "dummy" transaction. */ - if (xfs_log_need_covered(mp)) { + if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { xfs_trans_t *tp; xfs_inode_t *ip; @@ -1650,7 +1646,7 @@ if (!value || !*value) { printk("XFS: %s option requires an argument\n", MNTOPT_LOGBUFS); - return -EINVAL; + return EINVAL; } args->logbufs = simple_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { @@ -1659,7 +1655,7 @@ if (!value || !*value) { printk("XFS: %s option requires an argument\n", MNTOPT_LOGBSIZE); - return -EINVAL; + return EINVAL; } last = strlen(value) - 1; if (value[last] == 'K' || value[last] == 'k') { @@ -1673,28 +1669,28 @@ if (!value || !*value) { printk("XFS: %s option requires an argument\n", MNTOPT_LOGDEV); - return -EINVAL; + return EINVAL; } strncpy(args->logname, value, MAXNAMELEN); } else if (!strcmp(this_char, MNTOPT_MTPT)) { if (!value || !*value) { printk("XFS: %s option requires an argument\n", MNTOPT_MTPT); - return -EINVAL; + return EINVAL; } strncpy(args->mtpt, value, MAXNAMELEN); } else if (!strcmp(this_char, MNTOPT_RTDEV)) { if (!value || !*value) { printk("XFS: %s option requires an argument\n", MNTOPT_RTDEV); - return -EINVAL; + return EINVAL; } strncpy(args->rtname, value, MAXNAMELEN); } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { if (!value || !*value) { printk("XFS: %s option requires an argument\n", MNTOPT_BIOSIZE); - return -EINVAL; + return EINVAL; } iosize = simple_strtoul(value, &eov, 10); args->flags |= XFSMNT_IOSIZE; @@ -1710,7 +1706,7 @@ #ifndef XFS_BIG_FILESYSTEMS printk("XFS: %s option not allowed on this system\n", MNTOPT_INO64); - return -EINVAL; + return EINVAL; #endif } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { args->flags |= XFSMNT_NOALIGN; @@ -1718,14 +1714,14 @@ if (!value || !*value) { printk("XFS: %s option requires an argument\n", MNTOPT_SUNIT); - return -EINVAL; + return EINVAL; } dsunit = simple_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { if (!value || !*value) { printk("XFS: %s option requires an argument\n", MNTOPT_SWIDTH); - return -EINVAL; + return EINVAL; } dswidth = simple_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_NOUUID)) { @@ -1739,33 +1735,33 @@ printk("XFS: irixsgid is now a sysctl(2) variable, option is deprecated.\n"); } else { printk("XFS: unknown mount option [%s].\n", this_char); - return -EINVAL; + return EINVAL; } } if (args->flags & XFSMNT_NORECOVERY) { if ((vfsp->vfs_flag & VFS_RDONLY) == 0) { printk("XFS: no-recovery mounts must be read-only.\n"); - return -EINVAL; + return EINVAL; } } if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) { printk( "XFS: sunit and swidth options incompatible with the noalign option\n"); - return -EINVAL; + return EINVAL; } if ((dsunit && !dswidth) || (!dsunit && dswidth)) { printk("XFS: sunit and swidth must be specified together\n"); - return -EINVAL; + return EINVAL; } if (dsunit && (dswidth % dsunit != 0)) { printk( "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)\n", dswidth, dsunit); - return -EINVAL; + return EINVAL; } if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { diff --exclude=dmapi -rNu ORIG/fs/xfs/xfs_vnodeops.c HACK/fs/xfs/xfs_vnodeops.c --- ORIG/fs/xfs/xfs_vnodeops.c 2003-07-23 09:16:41.000000000 -0500 +++ HACK/fs/xfs/xfs_vnodeops.c 2003-07-15 22:06:56.000000000 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -318,6 +318,9 @@ vp = BHV_TO_VNODE(bdp); vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); + if (vp->v_vfsp->vfs_flag & VFS_RDONLY) + return XFS_ERROR(EROFS); + /* * Cannot set certain attributes. */ @@ -578,7 +581,8 @@ /* * Can't change extent size if any extents are allocated. */ - if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) && + if ((ip->i_d.di_nextents || ip->i_delayed_blks) && + (mask & XFS_AT_EXTSIZE) && ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != vap->va_extsize) ) { code = XFS_ERROR(EINVAL); /* EFBIG? */ @@ -658,7 +662,7 @@ if (vap->va_size > ip->i_d.di_size) { code = xfs_igrow_start(ip, vap->va_size, credp); xfs_iunlock(ip, XFS_ILOCK_EXCL); - } else if (vap->va_size < ip->i_d.di_size) { + } else if (vap->va_size <= ip->i_d.di_size) { xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, (xfs_fsize_t)vap->va_size); @@ -701,7 +705,7 @@ if (vap->va_size > ip->i_d.di_size) { xfs_igrow_finish(tp, ip, vap->va_size, !(flags & ATTR_DMI)); - } else if ((vap->va_size < ip->i_d.di_size) || + } else if ((vap->va_size <= ip->i_d.di_size) || ((vap->va_size == 0) && ip->i_d.di_nextents)) { /* * signal a sync transaction unless @@ -1286,7 +1290,7 @@ * of the file. If not, then there is nothing to do. */ end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_d.di_size)); - last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAX_FILE_OFFSET); + last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); map_len = last_fsb - end_fsb; if (map_len <= 0) return (0); @@ -3966,6 +3970,7 @@ */ if (!ip->i_update_core && (ip->i_itemp == NULL)) { xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_iflock(ip); return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); } else { xfs_mount_t *mp = ip->i_mount; @@ -3974,7 +3979,7 @@ XFS_MOUNT_ILOCK(mp); vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); - + ip->i_flags |= XFS_IRECLAIMABLE; XFS_MOUNT_IUNLOCK(mp); } return 0; @@ -3989,19 +3994,20 @@ xfs_ihash_t *ih = ip->i_hash; int error; - if (!locked) - xfs_ilock(ip, XFS_ILOCK_EXCL); - /* The hash lock here protects a thread in xfs_iget_core from * racing with us on linking the inode back with a vnode. * Once we have the XFS_IRECLAIM flag set it will not touch * us. */ write_lock(&ih->ih_lock); - if (ip->i_flags & XFS_IRECLAIM || (!locked && XFS_ITOV_NULL(ip))) { + if ((ip->i_flags & XFS_IRECLAIM) || + (!(ip->i_flags & XFS_IRECLAIMABLE) && + (XFS_ITOV_NULL(ip) == NULL))) { write_unlock(&ih->ih_lock); - if (!locked) + if (locked) { + xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); + } return(1); } ip->i_flags |= XFS_IRECLAIM; @@ -4020,6 +4026,7 @@ */ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { if (!locked) { + xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_iflock(ip); } @@ -4043,8 +4050,16 @@ ASSERT(ip->i_update_core == 0); ASSERT(ip->i_itemp == NULL || ip->i_itemp->ili_format.ilf_fields == 0); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } else if (locked) { + /* + * We are not interested in doing an iflush if we're + * in the process of shutting down the filesystem forcibly. + * So, just reclaim the inode. + */ + xfs_ifunlock(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); } - xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_ireclaim(ip); return 0; @@ -4636,9 +4651,9 @@ llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len; if ( (bf->l_start < 0) - || (bf->l_start > XFS_MAX_FILE_OFFSET) + || (bf->l_start > XFS_MAXIOFFSET(mp)) || (bf->l_start + llen < 0) - || (bf->l_start + llen > XFS_MAX_FILE_OFFSET)) + || (bf->l_start + llen > XFS_MAXIOFFSET(mp))) return XFS_ERROR(EINVAL); bf->l_whence = 0; diff --exclude=dmapi -rNu ORIG/include/linux/fs.h HACK/include/linux/fs.h --- ORIG/include/linux/fs.h 2003-07-23 09:17:10.000000000 -0500 +++ HACK/include/linux/fs.h 2003-07-23 12:12:12.000000000 -0500 @@ -1393,6 +1393,7 @@ #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) extern void inode_init_once(struct inode *); +extern void _inode_init_once(struct inode *); extern void iput(struct inode *); extern void refile_inode(struct inode *inode); diff --exclude=dmapi -rNu ORIG/kernel/ksyms.c HACK/kernel/ksyms.c --- ORIG/kernel/ksyms.c 2003-07-23 09:17:27.000000000 -0500 +++ HACK/kernel/ksyms.c 2003-07-23 10:09:55.000000000 -0500 @@ -163,6 +163,7 @@ EXPORT_SYMBOL(unlock_new_inode); EXPORT_SYMBOL(iput); EXPORT_SYMBOL(inode_init_once); +EXPORT_SYMBOL(_inode_init_once); EXPORT_SYMBOL(force_delete); EXPORT_SYMBOL(follow_up); EXPORT_SYMBOL(follow_down); diff --exclude=dmapi -rNu ORIG/mm/filemap.c HACK/mm/filemap.c --- ORIG/mm/filemap.c 2003-07-23 09:17:12.000000000 -0500 +++ HACK/mm/filemap.c 2003-07-23 12:16:55.000000000 -0500 @@ -3358,6 +3358,36 @@ return err; } + +ssize_t +generic_file_write_nolock(struct file *file,const char *buf,size_t count, loff_t *ppos) +{ + struct inode *inode = file->f_dentry->d_inode->i_mapping->host; + int err; + + if ((ssize_t) count < 0) + return -EINVAL; + + if (!access_ok(VERIFY_READ, buf, count)) + return -EFAULT; + + if (file->f_flags & O_DIRECT) { + /* do_generic_direct_write may drop i_sem during the + actual IO */ + down_read(&inode->i_alloc_sem); + err = do_generic_direct_write(file, buf, count, ppos); + up_read(&inode->i_alloc_sem); + if (unlikely(err == -ENOTBLK)) + err = do_odirect_fallback(file, inode, buf, count, ppos); + } else { + err = do_generic_file_write(file, buf, count, ppos); + } + + return err; +} + +EXPORT_SYMBOL(generic_file_write_nolock); + void __init page_cache_init(unsigned long mempages) { unsigned long htable_size, order;