xfs
[Top] [All Lists]

GCC 2.95.x and stalled processes [PATCH]

To: <linux-xfs@xxxxxxxxxxx>
Subject: GCC 2.95.x and stalled processes [PATCH]
From: "Hong H. Pham" <hhp@xxxxxxxxxxxxx>
Date: Wed, 7 Feb 2001 13:53:16 -0500 (EST)
Reply-to: <hhp@xxxxxxxxxxxxx>
Sender: owner-linux-xfs@xxxxxxxxxxx
Hi,

I have tracked down the probable cause for processes hanging when acting
on an XFS filesystem, running on a system with a kernel compiled with
GCC 2.95.x.  I have included a patch that seems to correct this problem.
The first mention of this bug was in the following thread:
http://oss.sgi.com/projects/linux-xfs/indexed/msg01455.html

This bug can be easily observed by untaring a large tarball on to the
XFS mount.  For testing, I used the Debian base2_2.tgz tarball; it has a
good mix of large and many small files (eg. the /dev/* files).
ftp://ftp.us.debian.org:/debian/dists/potato/main/disks-i386/current/base2_2.tgz

This bug is only triggered when compiling a kernel using GCC 2.95.x with
the -march=i686 optimization flag enabled.  Using -march=i386, -march=i486,
or -march=i586 will produce a working kernel.

In xfs_trans_ail.c, xfs_trans_push_ail() line 141, GCC 2.95.x generates
incorrect code for the following line:

        while (((restarts < XFS_TRANS_PUSH_AIL_RESTARTS) &&
                (XFS_LSN_CMP(lip->li_lsn, threshold_lsn) < 0))) {

More specifically, XFS_LSN_CMP(), really a macro wrapper for the static
inline function _lsn_cmp(), defined in xfs_log.h, is inlined incorrectly
by the i686 optimizer.  The resulting code causes the condition in the
while() statement to always be false.  Consequently the body of the
while() statement never executes.

Making _lsm_cmp() into a non-inlined function is a quick fix for the
problem.  I don't know specifically what triggers this bug, so I can't
offer any suggestions for an elegant work around for an inlined
_lsm_cmp().

Cheers,
..h

---- Patch Starts ----

diff -urN linux.orig/Makefile linux/Makefile
--- linux.orig/Makefile Thu Feb  1 12:10:24 2001
+++ linux/Makefile      Wed Feb  7 13:39:22 2001
@@ -29,7 +29,7 @@
 #comment out this line if compiling on RH 7.0
 #CC            = $(CROSS_COMPILE)gcc -V egcs-2.91.66
 # AND uncomment the following line
-CC             = $(CROSS_COMPILE)kgcc
+CC             = $(CROSS_COMPILE)gcc
 CPP            = $(CC) -E
 AR             = $(CROSS_COMPILE)ar
 NM             = $(CROSS_COMPILE)nm
diff -urN linux.orig/fs/xfs/xfs_log.h linux/fs/xfs/xfs_log.h
--- linux.orig/fs/xfs/xfs_log.h Tue Nov 14 18:40:34 2000
+++ linux/fs/xfs/xfs_log.h      Wed Feb  7 13:30:17 2001
@@ -50,7 +50,8 @@
  * By comparing each compnent, we don't have to worry about extra
  * endian issues in treating two 32 bit numbers as one 64 bit number
  */
-static inline xfs_lsn_t        _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2, 
xfs_arch_t arch)
+
+static inline xfs_lsn_t        _lsn_cmp_inline(xfs_lsn_t lsn1, xfs_lsn_t lsn2, 
xfs_arch_t arch)
 {
        if (CYCLE_LSN(lsn1, arch) != CYCLE_LSN(lsn2, arch))
                return (CYCLE_LSN(lsn1, arch)<CYCLE_LSN(lsn2, arch))? -999 : 
999;
@@ -60,6 +61,19 @@

         return 0;
 }
+
+#define _lsn_cmp_inline(x,y,arch) _lsn_cmp(x,y,arch)
+/*
+ * Unfortunately, the __i686__ or __pentiumpro__ macros do not seem to be
+ * defined in gcc-2.95.x, so checking for GCC version will have to do for now.
+ */
+#ifdef __GNUC__
+# if ((__GNUC__ == 2) && (__GNUC_MINOR__ == 95))
+/* defined in xfs_trans_ail.c */
+# undef _lsn_cmp
+extern xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2, xfs_arch_t arch);
+# endif
+#endif

 #define        XFS_LSN_CMP_ARCH(x,y,arch)      _lsn_cmp(x, y, arch)
 #define        XFS_LSN_CMP(x,y) XFS_LSN_CMP_ARCH(x,y,ARCH_NOCONVERT)
diff -urN linux.orig/fs/xfs/xfs_trans_ail.c linux/fs/xfs/xfs_trans_ail.c
--- linux.orig/fs/xfs/xfs_trans_ail.c   Mon Sep 25 00:42:07 2000
+++ linux/fs/xfs/xfs_trans_ail.c        Wed Feb  7 13:30:17 2001
@@ -59,6 +59,38 @@
 #define        xfs_ail_check(a)
 #endif /* XFSDEBUG */

+#ifdef __GNUC__
+# if ((__GNUC__ == 2) && (__GNUC_MINOR__ == 95))
+/*
+ * Un-inlined version of _lsn_cmp(), to work around a gcc-2.95.x inlining
+ * bug.  This bug is only invoked by the -march=i686 flag; other
+ * architecture specific optimizations, such as -march=i586, seem fine.
+ *
+ * I don't know where this function should belong; although the lsn macros
+ * are defined in xfs_log.h, it doesn't really belong with the xlog_*
+ * functions in xfs_log.c.
+ *
+ * HHP
+ */
+xfs_lsn_t _lsn_cmp(
+       xfs_lsn_t       lsn1,
+       xfs_lsn_t       lsn2,
+       xfs_arch_t      arch)
+{
+       /* copy and pasted from xfs_log.h */
+       if (CYCLE_LSN(lsn1, arch) != CYCLE_LSN(lsn2, arch))
+               return (CYCLE_LSN(lsn1, arch)<CYCLE_LSN(lsn2, arch))? -999 : 
999;
+
+       if (BLOCK_LSN(lsn1, arch) != BLOCK_LSN(lsn2, arch))
+               return (BLOCK_LSN(lsn1, arch)<BLOCK_LSN(lsn2, arch))? -999 : 
999;
+
+       return 0;
+
+       /* return _lsn_cmp_inline( lsn1, lsn2, arch ); */
+}
+# endif
+#endif
+

 /*
  * This is called by the log manager code to determine the LSN


<Prev in Thread] Current Thread [Next in Thread>