xfs
[Top] [All Lists]

BUG 803884 - double-fault with HIGHMEM

To: nb@xxxxxxx
Subject: BUG 803884 - double-fault with HIGHMEM
From: pv@xxxxxxxxxxxxx (dxm@xxxxxxxxxxxx)
Date: Wed, 4 Oct 2000 14:35:21 -0700 (PDT)
Cc: linux-xfs@xxxxxxxxxxx
Reply-to: sgi.bugs.xfs@xxxxxxxxxxxxxxxxx
Sender: owner-linux-xfs@xxxxxxxxxxx
Webexec: webpvsubmit,PvProjectIncident
Webpv: proxy2.melbourne.sgi.com
View Incident: 
http://co-op.engr.sgi.com/BugWorks/code/bwxquery.cgi?search=Search&wlong=1&view_type=Bug&wi=803884

Submitter : dxm                       Submitter Domain : engr               
Assigned Engineer : nb                Assigned Domain : sgi.com             
Assigned Group : xfs-linux            Category : software                   
Customer Reported : F                 Priority : 3                          
Project : xfs-linux                   Status : open                         
Description :
This is very unlikely to be related to XFS.

However, running the auto-qa suite on a 1Gb machine with
HIGHMEM enabled can cause a process to hang up and render
the machine pretty much useless.

I've seen a couple of different variants but the problem 
has always occured below load_elf_binary, padzero and
clear_user.

Posting this because it's bound to bite someone who tries
to use our kernel on a HIGHMEM machine and because it's 
stopping me from running auto-qa on this config.

[0]kdb> btp 5552
    EBP       EIP         Function(args)
0xf49db8bc 0xc011721d schedule+0x415 (0xf49da000, 0xf49da000, 0xf5447220)
                               kernel .text 0xc0100000 0xc0116e08 0xc01177f0
0xf49db8e4 0xc0107a84 __down+0x6c
                               kernel .text 0xc0100000 0xc0107a18 0xc0107adc
0xf49db8f8 0xc0107c27 __down_failed+0xb (0xf49da000, 0x0, 0xf5447220, 
0xf49db98c, 0xf532231c)
                               kernel .text 0xc0100000 0xc0107c1c 0xc0107c30
           0xc01f7cd6 stext_lock+0x4de
                               kernel .text.lock 0xc01f77f8 0xc01f77f8 
0xc01fd5a0
0xf49db99c 0xc0112acc do_page_fault+0x60 (0xf49db9ac, 0x0, 0x100000, 
0xf54263ac, 0xf5426360)
                               kernel .text 0xc0100000 0xc0112a6c 0xc0112ea0
           0xc0109169 error_code+0x2d
                               kernel .text 0xc0100000 0xc010913c 0xc0109174
Interrupt registers:
eax = 0x00000000 ebx = 0x00100000 ecx = 0xf54263ac edx = 0xf5426360 
esi = 0x00000000 edi = 0xf5447220 esp = 0xf49db9e0 eip = 0xc0153ce5 
ebp = 0xf49dba18  ss = 0x00000018  cs = 0x00000010 eflags = 0x00010286 
 ds = 0xf5420018  es = 0x00000018 origeax = 0xffffffff &regs = 0xf49db9ac
           0xc0153ce5 ext2_get_block+0x119 (0xf5447220, 0xc, 0xf53429c0, 0x0)
                               kernel .text 0xc0100000 0xc0153bcc 0xc0154150
0xf49dba98 0xc0135300 block_read_full_page+0x124 (0xc1ff1fac, 0xc0153bcc)
                               kernel .text 0xc0100000 0xc01351dc 0xc01354a0
0xf49dbaa8 0xc0154349 ext2_readpage+0x11 (0xf5ac7200, 0xc1ff1fac)
[0]more> 
                               kernel .text 0xc0100000 0xc0154338 0xc0154350
0xf49dbacc 0xc0126ee4 read_cluster_nonblocking+0xdc (0xf5ac7200, 0x2, 0x18)
                               kernel .text 0xc0100000 0xc0126e08 0xc0126f20
0xf49dbb0c 0xc01282ac filemap_nopage+0x254 (0xf5aadaa0, 0x804b000, 0x1)
                               kernel .text 0xc0100000 0xc0128058 0xc0128464
0xf49dbb2c 0xc01249ed do_no_page+0x51 (0xf5322300, 0xf5aadaa0, 0x804b9cc, 0x1, 
0xf45a212c)
                               kernel .text 0xc0100000 0xc012499c 0xc0124a4c
0xf49dbb5c 0xc0124b56 handle_mm_fault+0x10a (0xf5322300, 0xf5aadaa0, 
0x804b9cc, 0x1, 0xf49da000)
                               kernel .text 0xc0100000 0xc0124a4c 0xc0124bec
0xf49dbc10 0xc0112bce do_page_fault+0x162 (0xf49dbc20, 0x2, 0x634, 0x18d, 
0x18d)
                               kernel .text 0xc0100000 0xc0112a6c 0xc0112ea0
           0xc0109169 error_code+0x2d
                               kernel .text 0xc0100000 0xc010913c 0xc0109174
Interrupt registers:
eax = 0x00000000 ebx = 0x00000634 ecx = 0x0000018d edx = 0x0000018d 
esi = 0x00000000 edi = 0x0804b9cc esp = 0xf49dbc54 eip = 0xc01f1117 
ebp = 0xf49dbc64  ss = 0x00000018  cs = 0x00000010 eflags = 0x00010246 
 ds = 0x00000018  es = 0x00000018 origeax = 0xffffffff &regs = 0xf49dbc20
           0xc01f1117 clear_user+0x37 (0x804b9cc, 0x634)
                               kernel .text 0xc0100000 0xc01f10e0 0xc01f112c
0xf49dbc74 0xc014974e padzero+0x1e (0x804b9cc, 0x804b9cc, 0x804bc10, 
0xc02b8630, 0xc0149e14)
                               kernel .text 0xc0100000 0xc0149730 0xc0149754
0xf49dbe0c 0xc014a874 load_elf_binary+0xa60 (0xf49dbe68, 0xf49dbfc4, 
0xf49dbe68)
[0]more> 
                               kernel .text 0xc0100000 0xc0149e14 0xc014a9d8
0xf49dbe44 0xc013c848 search_binary_handler+0x68 (0xf49dbe68, 0xf49dbfc4, 
0xe95e2000, 0xe95e2000, 0x80a2c78)
                               kernel .text 0xc0100000 0xc013c7e0 0xc013c990
0xf49dbf9c 0xc013cad8 do_execve+0x148 (0xe95e2000, 0x80a2c30, 0x80a3a70, 
0xf49dbfc4)
                               kernel .text 0xc0100000 0xc013c990 0xc013cb30
0xf49dbfbc 0xc010795b sys_execve+0x2f (0x80a2c78, 0x80a2c30, 0x80a3a70, 
0x80a2c30, 0x80a2c78)
                               kernel .text 0xc0100000 0xc010792c 0xc0107988
           0xc0109040 system_call+0x34
                               kernel .text 0xc0100000 0xc010900c 0xc0109044

The "ps" command I ran is now hung waiting for some lock:

    EBP       EIP         Function(args)
0xf429bee4 0xc011721d schedule+0x415 (0xf49da000, 0xf5322300, 0xf49da000)
                               kernel .text 0xc0100000 0xc0116e08 0xc01177f0
0xf429bf0c 0xc0107a84 __down+0x6c
                               kernel .text 0xc0100000 0xc0107a18 0xc0107adc
0xf429bf20 0xc0107c27 __down_failed+0xb (0xf5c063a0, 0xf4e8c000, 0xf49da000, 
0x0, 0x33e8c)
                               kernel .text 0xc0100000 0xc0107c1c 0xc0107c30
           0xc01fa49c stext_lock+0x2ca4
                               kernel .text.lock 0xc01f77f8 0xc01f77f8 
0xc01fd5a0
0xf429bf64 0xc014e4c3 proc_pid_stat+0x6f (0xf49da000, 0xf4e8c000, 0xf5433380, 
0xffffffea)
                               kernel .text 0xc0100000 0xc014e454 0xc014e6d8
0xf429bf98 0xc014c497 proc_info_read+0x5b (0xf5433380, 0x40015000, 0x1000, 
0xf54333a0, 0xf429a000)
                               kernel .text 0xc0100000 0xc014c43c 0xc014c55c
0xf429bfbc 0xc01326b8 sys_read+0xa4 (0x4, 0x40015000, 0x1000, 0x804bf90, 0x0)
                               kernel .text 0xc0100000 0xc0132614 0xc01326d0
           0xc0109040 system_call+0x34
                               kernel .text 0xc0100000 0xc010900c 0xc0109044

Here's another trace of the same problem. This time I dumped
some pages and buffer heads below.

[0]kdb> btp 5732
    EBP       EIP         Function(args)
0xf4919864 0xc0117048 schedule+0x420 (0xf4918000, 0xf4918000, 0xf49199e8)
                               kernel .text 0xc0100000 0xc0116c28 0xc0117300
0xf491988c 0xc0107a84 __down+0x6c
                               kernel .text 0xc0100000 0xc0107a18 0xc0107adc
0xf49198a0 0xc0107c27 __down_failed+0xb (0xf4918000, 0xf52f03c0, 0xf49199e8, 
0xc01abdc7, 0xf53e0e9c)
                               kernel .text 0xc0100000 0xc0107c1c 0xc0107c30
           0xc01fb528 stext_lock+0x4ec
                               kernel .text.lock 0xc01fb03c 0xc01fb03c 
0xc0200f00
0xf4919944 0xc0112a2c do_page_fault+0x60 (0xf4919954, 0x0, 0xf49199ec, 0x0, 
0xf49199e8)
                               kernel .text 0xc0100000 0xc01129cc 0xc0112e00
           0xc0109164 error_code+0x2c
                               kernel .text 0xc0100000 0xc0109138 0xc010916c
Interrupt registers:
eax = 0x00000000 ebx = 0xf49199ec ecx = 0x00000000 edx = 0xf49199e8 
esi = 0xf52f03c0 edi = 0xf49199e8 esp = 0xf4919988 eip = 0xc0153c89 
ebp = 0xf4919a1c  ss = 0x00000018  cs = 0x00000010 eflags = 0x00010246 
 ds = 0xf4910018  es = 0x00000018 origeax = 0xffffffff &regs = 0xf4919954
           0xc0153c89 ext2_get_block+0x13d (0xf53efe40, 0xc, 0xf534ec80, 0x0)
                               kernel .text 0xc0100000 0xc0153b4c 0xc0154074
0xf4919a9c 0xc0134800 block_read_full_page+0x124 (0xc202a028, 0xc0153b4c)
                               kernel .text 0xc0100000 0xc01346dc 0xc01349a0
0xf4919aac 0xc0154269 ext2_readpage+0x11 (0xf5afdd80, 0xc202a028)
[0]more> 
                               kernel .text 0xc0100000 0xc0154258 0xc0154270
0xf4919ad0 0xc01260b4 read_cluster_nonblocking+0xe0 (0xf5afdd80, 0x2, 0x15)
                               kernel .text 0xc0100000 0xc0125fd4 0xc01260f0
0xf4919b10 0xc012741c filemap_nopage+0x254 (0xf4b4eaa0, 0x804b000, 0x1)
                               kernel .text 0xc0100000 0xc01271c8 0xc01275d4
0xf4919b30 0xc0123c6d do_no_page+0x51 (0xf53e0e80, 0xf4b4eaa0, 0x804b2b0, 0x1, 
0xf481f12c)
                               kernel .text 0xc0100000 0xc0123c1c 0xc0123ccc
0xf4919b60 0xc0123dd6 handle_mm_fault+0x10a (0xf53e0e80, 0xf4b4eaa0, 
0x804b2b0, 0x1, 0xf4918000)
                               kernel .text 0xc0100000 0xc0123ccc 0xc0123e6c
0xf4919c14 0xc0112b2e do_page_fault+0x162 (0xf4919c24, 0x2, 0xd50, 0x354, 
0x354)
                               kernel .text 0xc0100000 0xc01129cc 0xc0112e00
           0xc0109164 error_code+0x2c
                               kernel .text 0xc0100000 0xc0109138 0xc010916c
Interrupt registers:
eax = 0x00000000 ebx = 0x00000d50 ecx = 0x00000354 edx = 0x00000354 
esi = 0x00000000 edi = 0x0804b2b0 esp = 0xf4919c58 eip = 0xc01f48e7 
ebp = 0xf4919c68  ss = 0x00000018  cs = 0x00000010 eflags = 0x00010246 
 ds = 0x00000018  es = 0x00000018 origeax = 0xffffffff &regs = 0xf4919c24
           0xc01f48e7 clear_user+0x37 (0x804b2b0, 0xd50)
                               kernel .text 0xc0100000 0xc01f48b0 0xc01f48fc
0xf4919c78 0xc014977e padzero+0x1e (0x804b2b0, 0x804b2b0, 0x804b4cc, 
0xc02bcaf0, 0xc0149e44)
                               kernel .text 0xc0100000 0xc0149760 0xc0149784
0xf4919e10 0xc014a8b4 load_elf_binary+0xa70 (0xf4919e68, 0xf4919fc4, 
0xf4919e68)
[0]more> 
                               kernel .text 0xc0100000 0xc0149e44 0xc014aa18
0xf4919e48 0xc013c158 search_binary_handler+0x68 (0xf4919e68, 0xf4919fc4)
                               kernel .text 0xc0100000 0xc013c0f0 0xc013c2a0
0xf4919f9c 0xc013c3e8 do_execve+0x148 (0xf4c93000, 0x809f4c8, 0x80a7540, 
0xf4919fc4)
                               kernel .text 0xc0100000 0xc013c2a0 0xc013c43c
0xf4919fbc 0xc010795f sys_execve+0x2f (0x80a4ec8, 0x809f4c8, 0x80a7540, 
0x809f4c8, 0x80a4ec8)
                               kernel .text 0xc0100000 0xc0107930 0xc010798c
           0xc010903b system_call+0x33
                               kernel .text 0xc0100000 0xc0109008 0xc0109040
[0]kdb> 
[0]kdb> 
[0]kdb> bh 0xf534ec80
buffer_head at 0xf534ec80
  next 0x00000000 bno 0 rsec 0 size 4096 dev 0x806 rdev 0x0
  count 0 state 0x0 [] ftime 0x0
  b_page 0xc202a028 b_this_page 0xf534ec80 b_private 0x00000000
[0]kdb> page 0xc202a028
struct page at 0xc202a028
  next 0xc1f212a4 prev 0xf53efedc addr space 0xf53efedc index 12 (offset 
0xc000)
  count 3 flags PG_locked PG_highmem virtual 0xfe072000
  buffers 0xf534ec80  block_map 11111111000000000000000000000000
[0]kdb> page 0xf534ec80
struct page at 0xf534ec80
  next 0x00000000 prev 0x00000000 addr space 0x00001000 index 2054 (offset 
0x806000)
  count 0 flags  virtual 0xc202a028
  buffers 0x00000000  block_map 00000000000000000000000000000000
[0]kdb> reboot

<Prev in Thread] Current Thread [Next in Thread>
  • BUG 803884 - double-fault with HIGHMEM, dxm@xxxxxxxxxxxx <=