On Mon, Feb 24, 2014 at 05:29:20PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
>
> A bit of perf magic showed that scalability was limits to 3-4
> concurrent threads due to contention on a lock inside in something
> called __dcigettext(). That some library somewhere that repair is
> linked against, and it turns out to be inside the translation
> infrastructure to support the _() string mechanism:
>
> # Samples: 34K of event 'cs'
> # Event count (approx.): 495567
> #
> # Overhead Command Shared Object Symbol
> # ........ ............. ................. ..............
> #
> 60.30% xfs_repair [kernel.kallsyms] [k] __schedule
> |
> --- 0x63fffff9c
> process_bmbt_reclist_int
> |
> |--39.95%-- __dcigettext
> | __lll_lock_wait
> | system_call_fastpath
> | SyS_futex
> | do_futex
> | futex_wait
> | futex_wait_queue_me
> | schedule
> | __schedule
> |
> |--8.91%-- __lll_lock_wait
> | system_call_fastpath
> | SyS_futex
> | do_futex
> | futex_wait
> | futex_wait_queue_me
> | schedule
> | __schedule
> --51.13%-- [...]
>
> Fix this by initialising global variables that hold the translated
> strings at startup, hence avoiding the need to do repeated runtime
> translation of the same strings.
>
> Runtime of an unpatched xfs_repair is roughly:
>
> XFS_REPAIR Summary Fri Dec 6 11:15:50 2013
>
> Phase Start End Duration
> Phase 1: 12/06 10:56:21 12/06 10:56:21
> Phase 2: 12/06 10:56:21 12/06 10:56:23 2 seconds
> Phase 3: 12/06 10:56:23 12/06 11:01:31 5 minutes, 8 seconds
> Phase 4: 12/06 11:01:31 12/06 11:07:08 5 minutes, 37 seconds
> Phase 5: 12/06 11:07:08 12/06 11:07:09 1 second
> Phase 6: 12/06 11:07:09 12/06 11:15:49 8 minutes, 40 seconds
> Phase 7: 12/06 11:15:49 12/06 11:15:50 1 second
>
> Total run time: 19 minutes, 29 seconds
>
> Patched version:
>
> Phase Start End Duration
> Phase 1: 12/06 10:36:29 12/06 10:36:29
> Phase 2: 12/06 10:36:29 12/06 10:36:31 2 seconds
> Phase 3: 12/06 10:36:31 12/06 10:40:08 3 minutes, 37 seconds
> Phase 4: 12/06 10:40:08 12/06 10:43:42 3 minutes, 34 seconds
> Phase 5: 12/06 10:43:42 12/06 10:43:42
> Phase 6: 12/06 10:43:42 12/06 10:50:28 6 minutes, 46 seconds
> Phase 7: 12/06 10:50:28 12/06 10:50:29 1 second
>
> Total run time: 14 minutes
>
> Big win!
>
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
> repair/dinode.c | 49 +++++++++++++++++++++++++++++++++++--------------
> repair/dinode.h | 3 +++
> repair/scan.c | 7 +------
> repair/xfs_repair.c | 2 ++
> 4 files changed, 41 insertions(+), 20 deletions(-)
>
> diff --git a/repair/dinode.c b/repair/dinode.c
> index 3115bd0..4953a56 100644
> --- a/repair/dinode.c
> +++ b/repair/dinode.c
> @@ -32,6 +32,37 @@
> #include "threads.h"
>
> /*
> + * gettext lookups for translations of strings use mutexes internally to
> + * the library. Hence when we come through here doing parallel scans in
> + * multiple AGs, then all do concurrent text conversions and serialise
Typo: they ?
Reviewed-by: Brian Foster <bfoster@xxxxxxxxxx>
> + * on the translation string lookups. Let's avoid doing repeated lookups
> + * by making them static variables and only assigning the translation
> + * once.
> + */
> +static char *forkname_data;
> +static char *forkname_attr;
> +static char *ftype_real_time;
> +static char *ftype_regular;
> +
> +void
> +dinode_bmbt_translation_init(void)
> +{
> + forkname_data = _("data");
> + forkname_attr = _("attr");
> + ftype_real_time = _("real-time");
> + ftype_regular = _("regular");
> +}
> +
> +char *
> +get_forkname(int whichfork)
> +{
> +
> + if (whichfork == XFS_DATA_FORK)
> + return forkname_data;
> + return forkname_attr;
> +}
> +
> +/*
> * inode clearing routines
> */
>
> @@ -542,7 +573,7 @@ process_bmbt_reclist_int(
> xfs_dfiloff_t op = 0; /* prev offset */
> xfs_dfsbno_t b;
> char *ftype;
> - char *forkname;
> + char *forkname = get_forkname(whichfork);
> int i;
> int state;
> xfs_agnumber_t agno;
> @@ -552,15 +583,10 @@ process_bmbt_reclist_int(
> xfs_agnumber_t locked_agno = -1;
> int error = 1;
>
> - if (whichfork == XFS_DATA_FORK)
> - forkname = _("data");
> - else
> - forkname = _("attr");
> -
> if (type == XR_INO_RTDATA)
> - ftype = _("real-time");
> + ftype = ftype_real_time;
> else
> - ftype = _("regular");
> + ftype = ftype_regular;
>
> for (i = 0; i < *numrecs; i++) {
> libxfs_bmbt_disk_get_all(rp + i, &irec);
> @@ -1110,7 +1136,7 @@ process_btinode(
> xfs_ino_t lino;
> xfs_bmbt_ptr_t *pp;
> xfs_bmbt_key_t *pkey;
> - char *forkname;
> + char *forkname = get_forkname(whichfork);
> int i;
> int level;
> int numrecs;
> @@ -1122,11 +1148,6 @@ process_btinode(
> *tot = 0;
> *nex = 0;
>
> - if (whichfork == XFS_DATA_FORK)
> - forkname = _("data");
> - else
> - forkname = _("attr");
> -
> magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_BMAP_CRC_MAGIC
> : XFS_BMAP_MAGIC;
>
> diff --git a/repair/dinode.h b/repair/dinode.h
> index d9197c1..7521521 100644
> --- a/repair/dinode.h
> +++ b/repair/dinode.h
> @@ -127,4 +127,7 @@ get_bmapi(xfs_mount_t *mp,
> xfs_dfiloff_t bno,
> int whichfork );
>
> +void dinode_bmbt_translation_init(void);
> +char * get_forkname(int whichfork);
> +
> #endif /* _XR_DINODE_H */
> diff --git a/repair/scan.c b/repair/scan.c
> index 73b4581..b12f48b 100644
> --- a/repair/scan.c
> +++ b/repair/scan.c
> @@ -171,17 +171,12 @@ scan_bmapbt(
> xfs_bmbt_rec_t *rp;
> xfs_dfiloff_t first_key;
> xfs_dfiloff_t last_key;
> - char *forkname;
> + char *forkname = get_forkname(whichfork);
> int numrecs;
> xfs_agnumber_t agno;
> xfs_agblock_t agbno;
> int state;
>
> - if (whichfork == XFS_DATA_FORK)
> - forkname = _("data");
> - else
> - forkname = _("attr");
> -
> /*
> * unlike the ag freeblock btrees, if anything looks wrong
> * in an inode bmap tree, just bail. it's possible that
> diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> index 9e0502a..bac334f 100644
> --- a/repair/xfs_repair.c
> +++ b/repair/xfs_repair.c
> @@ -29,6 +29,7 @@
> #include "prefetch.h"
> #include "threads.h"
> #include "progress.h"
> +#include "dinode.h"
>
> #define rounddown(x, y) (((x)/(y))*(y))
>
> @@ -533,6 +534,7 @@ main(int argc, char **argv)
> setlocale(LC_ALL, "");
> bindtextdomain(PACKAGE, LOCALEDIR);
> textdomain(PACKAGE);
> + dinode_bmbt_translation_init();
>
> temp_mp = &xfs_m;
> setbuf(stdout, NULL);
> --
> 1.8.4.rc3
>
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs
|