From: Dave Chinner <dchinner@xxxxxxxxxx>
v5 filesystems use 512 byte inodes as a minimum, so read inodes in
clusters that are effectively half the size of a v4 filesystem with
256 byte inodes. For v5 fielsystems, scale the inode cluster size
with the size of the inode so that we keep a constant 32 inodes per
cluster ratio for all inode IO.
This only works if mkfs.xfs sets the inode alignment appropriately
for larger inode clusters, so this functionality is made conditional
on mkfs doing the right thing. xfs_repair needs to know about
the inode alignment changes, too.
FWIW, results with lookaside cache size of 37 entries with this
patch are (Note: finobt enabled on v5 filesystems, v4 using defaults
including 256 byte inode size):
Wall time:
create bulkstat find+stat ls -R unlink
v4 237s 161s 173s 201s 299s
v5 235s 163s 205s 31s 356s
patched 234s 160s 182s 29s 317s
System time:
create bulkstat find+stat ls -R unlink
v4 2601s 2490s 1653s 1656s 2960s
v5 2637s 2497s 1681s 20s 3216s
patched 2613s 2451s 1658s 20s 3007s
Lookaside cache hit rate:
create bulkstat find+stat ls -R unlink
v4 0.73 0.91 0.71 0.70 0.71
v5 0.76 0.88 0.68 0.10 0.75
patched 0.81 0.93 0.70 0.08 0.84
So, wall time same or down across the board, system time same or
down across the board, and cache hit rates all improve except for
the ls -R case which is a pure cold cache directory read workload
on v5 filesystems...
So, this patch removes most of the performance and CPU usage
differential between v4 and v5 filesystems on traversal related
workloads.
Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
---
fs/xfs/xfs_mount.c | 14 ++++++++++++++
fs/xfs/xfs_mount.h | 2 +-
fs/xfs/xfs_trans_resv.c | 3 +--
3 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 8ac98c7..788d666d 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -722,8 +722,22 @@ xfs_mountfs(
* Set the inode cluster size.
* This may still be overridden by the file system
* block size if it is larger than the chosen cluster size.
+ *
+ * For v5 filesystems, scale the cluster size with the inode size to
+ * keep a constant ratio of inode per cluster buffer, but only if mkfs
+ * has set the inode alignment value appropriately for larger cluster
+ * sizes.
*/
mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ int new_size = mp->m_inode_cluster_size;
+
+ new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
+ if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
+ mp->m_inode_cluster_size = new_size;
+ xfs_info(mp, "Using inode cluster size of %d bytes",
+ mp->m_inode_cluster_size);
+ }
/*
* Set inode alignment fields
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 2a997dc..a4f7f94 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -111,7 +111,7 @@ typedef struct xfs_mount {
__uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
__uint8_t m_agno_log; /* log #ag's */
__uint8_t m_agino_log; /* #bits for agino in inum */
- __uint16_t m_inode_cluster_size;/* min inode buf size */
+ uint m_inode_cluster_size;/* min inode buf size */
uint m_blockmask; /* sb_blocksize-1 */
uint m_blockwsize; /* sb_blocksize in words */
uint m_blockwmask; /* blockwsize-1 */
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c
index ae7a185..1494f62 100644
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/xfs_trans_resv.c
@@ -403,8 +403,7 @@ xfs_calc_ifree_reservation(
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(2, XFS_FSB_TO_B(mp, 1)) +
- MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
- XFS_INODE_CLUSTER_SIZE(mp)) +
+ MAX(XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) +
xfs_calc_buf_res(1, 0) +
xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
mp->m_in_maxlevels, 0) +
--
1.8.3.2
|