Hi Everyone,
The attached patch attacks the SMP problems that Ethan Solomita
described but takes a different approach than his patch. I just
got this working today so it may have some rough edges. I'm looking
for buy in from Keith and Ethan.
I have split the kdb_state variable in two. The new kdb_st is used
for inter-processor synchronization while the existing kdb_state is
only modified by the local cpu. This allows the non-kdb breakpoints
to be handled without the overhead of synchronizing the processors.
The patch also changes the behavior when multiple processors hit
breakpoints at the same time. The previous behavior was to only
to process one breakpoint at a time. This patch lets them all play as
equals. This means that you can switch between them with the
cpu command and see useful state. Previously you saw a stack trace
of the breakpoint entry code. It fixes the bug described in the
comment before
Most of the changes are portable except for the addition of the
kdb_in_bp[cpu] flag which needs to be set early in the breakpoint
entry code in entry.S. This flag is used by kdb_ipi to tell if it
has nested on top of a breakpoint entry. If it finds this flag set
it returns and lets the breakpoint proceed. This change is about
10 lines on the i386.
I adopted Ethan's simplified code for single stepping past
breakpoints. The description of his patch will be helpful if some
one else has to make this work on ia64.
Jim Houston - Concurrent Computer Corp.
diff -urN -X /home/jim/dontdiff linux.old/arch/i386/kdb/kdba_bp.c
linux/arch/i386/kdb/kdba_bp.c
--- linux.old/arch/i386/kdb/kdba_bp.c Wed May 8 17:55:04 2002
+++ linux/arch/i386/kdb/kdba_bp.c Mon May 6 22:41:04 2002
@@ -95,43 +95,6 @@
if (KDB_DEBUG(BP))
kdb_printf("kdb: dr6 0x%lx dr7 0x%lx\n", dr6, dr7);
if (dr6 & DR6_BS) {
- if (KDB_STATE(SSBPT)) {
- if (KDB_DEBUG(BP))
- kdb_printf("ssbpt\n");
- KDB_STATE_CLEAR(SSBPT);
- for(i=0,bp=kdb_breakpoints;
- i < KDB_MAXBPT;
- i++, bp++) {
- if (KDB_DEBUG(BP))
- kdb_printf("bp 0x%p enabled %d delayed
%d global %d cpu %d\n",
- bp, bp->bp_enabled,
bp->bp_delayed, bp->bp_global, bp->bp_cpu);
- if (!bp->bp_enabled)
- continue;
- if (!bp->bp_global && bp->bp_cpu !=
smp_processor_id())
- continue;
- if (KDB_DEBUG(BP))
- kdb_printf("bp for this cpu\n");
- if (bp->bp_delayed) {
- bp->bp_delayed = 0;
- if (KDB_DEBUG(BP))
- kdb_printf("kdba_installbp\n");
- kdba_installbp(ef, bp);
- if (!KDB_STATE(DOING_SS)) {
- ef->eflags &= ~EF_TF;
- return(KDB_DB_SSBPT);
- }
- break;
- }
- }
- if (i == KDB_MAXBPT) {
- kdb_printf("kdb: Unable to find delayed
breakpoint\n");
- }
- if (!KDB_STATE(DOING_SS)) {
- ef->eflags &= ~EF_TF;
- return(KDB_DB_NOBPT);
- }
- /* FALLTHROUGH */
- }
/*
* KDB_STATE_DOING_SS is set when the kernel debugger is using
@@ -143,6 +106,16 @@
if (!KDB_STATE(DOING_SS))
goto unknown;
+ if (KDB_STATE(DOING_SSBPT)) {
+ if (KDB_DEBUG(BP))
+ kdb_printf("ssbpt\n");
+ KDB_STATE_CLEAR(DOING_SS);
+ KDB_STATE_CLEAR(DOING_SSBPT);
+ /* do we need to restore EF_IE? */
+ ef->eflags &= ~EF_TF;
+ return(KDB_DB_SSBPT);
+ }
+
/* single step */
rv = KDB_DB_SS; /* Indicate single step */
if (KDB_STATE(DOING_SSB)) {
@@ -321,7 +294,7 @@
i, ef->eip);
kdb_id1(ef->eip);
rv = KDB_DB_BPT;
- bp->bp_delay = 1;
+ KDB_STATE_SET(NEED_SSBPT);
break;
}
}
@@ -330,56 +303,6 @@
}
/*
- * kdba_handle_bp
- *
- * Handle an instruction-breakpoint trap. Called when re-installing
- * an enabled breakpoint which has has the bp_delay bit set.
- *
- * Parameters:
- * Returns:
- * Locking:
- * Remarks:
- *
- * Ok, we really need to:
- * 1) Restore the original instruction byte
- * 2) Single Step
- * 3) Restore breakpoint instruction
- * 4) Continue.
- *
- *
- */
-
-static void
-kdba_handle_bp(kdb_eframe_t ef, kdb_bp_t *bp)
-{
- if (!ef) {
- kdb_printf("kdba_handle_bp: ef == NULL\n");
- return;
- }
-
- if (KDB_DEBUG(BP))
- kdb_printf("ef->eip = 0x%lx\n", ef->eip);
-
- /*
- * Setup single step
- */
- kdba_setsinglestep(ef);
-
- /* KDB_STATE_SSBPT is set when the kernel debugger must single step
- * a task in order to re-establish an instruction breakpoint which
- * uses the instruction replacement mechanism.
- */
- KDB_STATE_SET(SSBPT);
-
- /*
- * Reset delay attribute
- */
- bp->bp_delay = 0;
- bp->bp_delayed = 1;
-}
-
-
-/*
* kdba_bptype
*
* Return a string describing type of breakpoint.
@@ -714,10 +637,6 @@
kdb_printf("kdba_installbp hardware reg %ld at
" kdb_bfd_vma_fmt "\n",
bp->bp_hard->bph_reg, bp->bp_addr);
}
- } else if (bp->bp_delay) {
- if (KDB_DEBUG(BP))
- kdb_printf("kdba_installbp delayed bp\n");
- kdba_handle_bp(ef, bp);
} else {
if (kdb_getarea_size(&(bp->bp_inst), bp->bp_addr, 1) ||
kdb_putword(bp->bp_addr,
IA32_BREAKPOINT_INSTRUCTION, 1)) {
diff -urN -X /home/jim/dontdiff linux.old/arch/i386/kernel/entry.S
linux/arch/i386/kernel/entry.S
--- linux.old/arch/i386/kernel/entry.S Wed May 8 17:55:04 2002
+++ linux/arch/i386/kernel/entry.S Wed May 8 13:56:06 2002
@@ -78,6 +78,7 @@
need_resched = 20
tsk_ptrace = 24
processor = 52
+cpu = 48
ENOSYS = 38
@@ -344,6 +345,14 @@
RESTORE_ALL
ENTRY(int3)
+#if defined(CONFIG_KDB)
+ pushl %eax # kdb_in_bp[smp_processor_id] = 1;
+ GET_CURRENT(%eax)
+ movl cpu(%eax),%eax
+ sall $2,%eax
+ movl $1,kdb_in_bp(%eax)
+ popl %eax
+#endif
pushl $0
pushl $ SYMBOL_NAME(do_int3)
jmp error_code
diff -urN -X /home/jim/dontdiff linux.old/include/linux/kdb.h
linux/include/linux/kdb.h
--- linux.old/include/linux/kdb.h Wed May 8 17:54:43 2002
+++ linux/include/linux/kdb.h Thu May 9 00:09:20 2002
@@ -113,8 +113,8 @@
#define KDB_STATE_HOLD_CPU 0x00000010 /* Hold this cpu inside kdb */
#define KDB_STATE_DOING_SS 0x00000020 /* Doing ss command */
#define KDB_STATE_DOING_SSB 0x00000040 /* Doing ssb command, DOING_SS
is also set */
-#define KDB_STATE_SSBPT 0x00000080 /* Install breakpoint
after one ss, independent of DOING_SS */
-#define KDB_STATE_REENTRY 0x00000100 /* Valid re-entry into kdb */
+#define KDB_STATE_DOING_SSBPT 0x00000080 /* Doing go after breakpoint,
DOING_SS set */
+#define KDB_STATE_NEED_SSBPT 0x00000100 /* If users says "go", need to
do "ss" */
#define KDB_STATE_SUPPRESS 0x00000200 /* Suppress error messages */
#define KDB_STATE_LONGJMP 0x00000400 /* longjmp() data is available
*/
/* Spare, was NO_WATCHDOG 0x00000800 */
@@ -122,7 +122,7 @@
#define KDB_STATE_WAIT_IPI 0x00002000 /* Waiting for kdb_ipi() NMI */
#define KDB_STATE_RECURSE 0x00004000 /* Recursive entry to kdb */
#define KDB_STATE_IP_ADJUSTED 0x00008000 /* Restart IP has been adjusted
*/
-#define KDB_STATE_NO_BP_DELAY 0x00010000 /* No need to delay breakpoints
*/
+#define KDB_STATE_REENTRY 0x00010000
#define KDB_STATE_ARCH 0xff000000 /* Reserved for arch specific
use */
#define KDB_STATE_CPU(flag,cpu) (kdb_state[cpu] &
KDB_STATE_##flag)
@@ -133,6 +133,24 @@
#define KDB_STATE_SET(flag) KDB_STATE_SET_CPU(flag,smp_processor_id())
#define KDB_STATE_CLEAR(flag) KDB_STATE_CLEAR_CPU(flag,smp_processor_id())
+/*
+ * kdb_st state/command used for MP interactions.
+ */
+typedef enum {
+ KDB_ST_RUNNING, /* Not in kbd. */
+ KDB_ST_LEAVING,
+ KDB_ST_WAIT_IPI, /* Waiting for kdb_ipi() NMI */
+ KDB_ST_HOLD_CPU, /* Slave waiting kdb_main_loop */
+ KDB_ST_MASTER, /* Master executing debugger */
+ KDB_ST_SLAVE,
+ KDB_ST_SSBPT, /* Single step past breakpoint */
+ KDB_ST_NULL, /* invalid/unknown state */
+} kdb_st_t;
+
+/* kdb_st - kdb state that is modified by other processors. */
+volatile extern kdb_st_t kdb_st[ /*NR_CPUS*/ ];
+volatile extern int kdb_in_bp[ /*NR_CPUS*/ ];
+
/*
* External entry point for the kernel debugger. The pt_regs
* at the time of entry are supplied along with the reason for
diff -urN -X /home/jim/dontdiff linux.old/include/linux/kdbprivate.h
linux/include/linux/kdbprivate.h
--- linux.old/include/linux/kdbprivate.h Wed May 8 17:54:43 2002
+++ linux/include/linux/kdbprivate.h Wed May 8 13:48:26 2002
@@ -90,8 +90,6 @@
unsigned int bp_hardtype:1; /* Uses hardware register */
unsigned int bp_forcehw:1; /* Force hardware register */
unsigned int bp_installed:1; /* Breakpoint is installed */
- unsigned int bp_delay:1; /* Do delayed bp handling */
- unsigned int bp_delayed:1; /* Delayed breakpoint */
int bp_cpu; /* Cpu # (if bp_global == 0) */
kdbhard_bp_t bp_template; /* Hardware breakpoint template */
@@ -168,6 +166,7 @@
unsigned long setup; /* Bytes allocated for setup data */
} kdb_ar_t;
+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
/*
* General Stack Traceback functions.
*/
@@ -176,6 +175,7 @@
kdb_machreg_t, kdb_machreg_t,
kdb_machreg_t,
kdb_ar_t *, kdb_symtab_t *);
+#endif
/*
* Architecture specific Stack Traceback functions.
@@ -186,9 +186,11 @@
extern int kdba_bt_stack(struct pt_regs *, kdb_machreg_t *,
int, struct task_struct *);
extern int kdba_bt_process(struct task_struct *, int);
+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
extern int kdba_prologue(const kdb_symtab_t *, kdb_machreg_t,
kdb_machreg_t, kdb_machreg_t, kdb_machreg_t,
int, kdb_ar_t *);
+#endif
/*
* KDB Command Table
*/
diff -urN -X /home/jim/dontdiff linux.old/kdb/kdb_bp.c linux/kdb/kdb_bp.c
--- linux.old/kdb/kdb_bp.c Wed May 8 17:54:43 2002
+++ linux/kdb/kdb_bp.c Mon May 6 22:40:11 2002
@@ -50,13 +50,18 @@
{
int i;
+ if (KDB_DEBUG(BP))
+ kdb_printf("kdb_bp_install_global cpu %d\n",
smp_processor_id());
+
for(i=0; i<KDB_MAXBPT; i++) {
- if (KDB_DEBUG(BP)) {
- kdb_printf("kdb_bp_install_global bp %d bp_enabled %d
bp_global %d\n",
- i, kdb_breakpoints[i].bp_enabled,
kdb_breakpoints[i].bp_global);
- }
if (kdb_breakpoints[i].bp_enabled
&& kdb_breakpoints[i].bp_global) {
+ if (KDB_DEBUG(BP)) {
+ kdb_printf("kdb_bp_install_global bp %d
bp_enabled %d "
+ "bp_global %d\n", i,
+ kdb_breakpoints[i].bp_enabled,
+ kdb_breakpoints[i].bp_global);
+ }
kdba_installbp(ef, &kdb_breakpoints[i]);
}
}
@@ -87,19 +92,22 @@
kdb_bp_install_local(kdb_eframe_t ef)
{
int i;
+ int cpu = smp_processor_id();
+ if (KDB_DEBUG(BP))
+ kdb_printf("kdb_bp_install_local cpu %d\n", cpu);
+
for(i=0; i<KDB_MAXBPT; i++) {
- if (KDB_DEBUG(BP)) {
- kdb_printf("kdb_bp_install_local bp %d bp_enabled %d
bp_global %d cpu %d bp_cpu %d\n",
- i, kdb_breakpoints[i].bp_enabled,
kdb_breakpoints[i].bp_global,
- smp_processor_id(), kdb_breakpoints[i].bp_cpu);
- }
- if (KDB_STATE(NO_BP_DELAY)) {
- kdb_breakpoints[i].bp_delay = 0;
- }
if (kdb_breakpoints[i].bp_enabled
- && kdb_breakpoints[i].bp_cpu == smp_processor_id()
+ && kdb_breakpoints[i].bp_cpu == cpu
&& !kdb_breakpoints[i].bp_global){
+ if (KDB_DEBUG(BP)) {
+ kdb_printf("kdb_bp_install_local bp %d
bp_enabled %d "
+ "bp_global %d cpu %d\n", i,
+ kdb_breakpoints[i].bp_enabled,
+ kdb_breakpoints[i].bp_global,
+ cpu);
+ }
kdba_installbp(ef, &kdb_breakpoints[i]);
}
}
@@ -126,13 +134,18 @@
{
int i;
+ if (KDB_DEBUG(BP))
+ kdb_printf("kdb_bp_remove_global cpu %d\n", smp_processor_id());
+
for(i=KDB_MAXBPT-1; i>=0; i--) {
- if (KDB_DEBUG(BP)) {
- kdb_printf("kdb_bp_remove_global bp %d bp_enabled %d
bp_global %d\n",
- i, kdb_breakpoints[i].bp_enabled,
kdb_breakpoints[i].bp_global);
- }
if (kdb_breakpoints[i].bp_enabled
&& kdb_breakpoints[i].bp_global) {
+ if (KDB_DEBUG(BP)) {
+ kdb_printf("kdb_bp_remove_global bp %d
bp_enabled %d "
+ "bp_global %d\n", i,
+ kdb_breakpoints[i].bp_enabled,
+ kdb_breakpoints[i].bp_global);
+ }
kdba_removebp(&kdb_breakpoints[i]);
}
}
@@ -158,17 +171,21 @@
void
kdb_bp_remove_local(void)
{
- int i;
+ int i, cpu = smp_processor_id();
+
+ if (KDB_DEBUG(BP))
+ kdb_printf("kdb_bp_remove_local cpu %d\n", cpu);
for(i=KDB_MAXBPT-1; i>=0; i--) {
- if (KDB_DEBUG(BP)) {
- kdb_printf("kdb_bp_remove_local bp %d bp_enabled %d
bp_global %d cpu %d bp_cpu %d\n",
- i, kdb_breakpoints[i].bp_enabled,
kdb_breakpoints[i].bp_global,
- smp_processor_id(), kdb_breakpoints[i].bp_cpu);
- }
if (kdb_breakpoints[i].bp_enabled
&& kdb_breakpoints[i].bp_cpu == smp_processor_id()
&& !kdb_breakpoints[i].bp_global){
+ if (KDB_DEBUG(BP)) {
+ kdb_printf("kdb_bp_remove_local bp %d
bp_enabled %d "
+ "bp_global %d cpu %d\n", i,
+ kdb_breakpoints[i].bp_enabled,
+ kdb_breakpoints[i].bp_global, cpu);
+ }
kdba_removebp(&kdb_breakpoints[i]);
}
}
diff -urN -X /home/jim/dontdiff linux.old/kdb/kdbmain.c linux/kdb/kdbmain.c
--- linux.old/kdb/kdbmain.c Wed May 8 17:54:43 2002
+++ linux/kdb/kdbmain.c Thu May 9 00:12:17 2002
@@ -34,6 +34,14 @@
* KDB v1.4
* kdb=on/off/early at boot, /proc/sys/kernel/kdb.
* Env BTAPROMPT.
+ *
+ * Jim Houston 2002/05/08
+ * Hacking at SMP handling. If multiple processors
+ * hit breakpoints at the same time they now present
+ * useful state. Before this change if you switched
+ * to another processor with the cpu command you were
+ * likely to see the breakpoint entry code on the stack.
+ * The TBR (to be revisited) comments are mine.
*/
#include <linux/config.h>
@@ -49,6 +57,7 @@
#include <linux/kallsyms.h>
#include <linux/kdb.h>
#include <linux/kdbprivate.h>
+#include <linux/delay.h>
#include <asm/system.h>
@@ -61,17 +70,26 @@
*/
volatile int kdb_flags;
- /*
- * kdb_lock protects updates to kdb_initial_cpu. Used to
- * single thread processors through the kernel debugger.
- */
-spinlock_t kdb_lock = SPIN_LOCK_UNLOCKED;
volatile int kdb_initial_cpu = -1; /* cpu number that owns kdb */
volatile int kdb_nextline = 1;
static volatile int kdb_new_cpu; /* Which cpu to switch to */
+/*
+ * Some of the flags that were in kdb_state are now in kdb_st
+ * so that kdb_state is only accessed by the local cpu.
+ * I imagine kdb_st as an enumerated state rather than a colection
+ * of flags.
+ */
volatile int kdb_state[NR_CPUS]; /* Per cpu state */
+volatile kdb_st_t kdb_st[NR_CPUS]; /* Per cpu state */
+volatile int kdb_in_bp[NR_CPUS]; /* Per cpu state */
+
+/* descriptions for kdb_st values. */
+char *kdb_st_desc[] = {
+ "running", "leaving", "wait_ipi", "hold_cpu",
+ "master", "slave", "ssbpt", "" };
+
#ifdef CONFIG_KDB_OFF
int kdb_on = 0; /* Default is off */
@@ -957,10 +975,13 @@
void kdb_print_state(const char *text, int value)
{
- kdb_printf("state: %s cpu %d value %d initial %d state %x\n",
- text, smp_processor_id(), value, kdb_initial_cpu,
kdb_state[smp_processor_id()]);
+ kdb_printf("state: %s cpu %d value %d initial %d st %x state %x\n",
+ text, smp_processor_id(), value, kdb_initial_cpu, \
+ kdb_st[smp_processor_id()], kdb_state[smp_processor_id()]);
}
+#if 0
+/* TBR - I'm not convinced this is useful. */
/*
* kdb_previous_event
*
@@ -982,11 +1003,12 @@
{
int i, leaving = 0;
for (i = 0; i < NR_CPUS; ++i) {
- if (KDB_STATE_CPU(LEAVING, i))
+ if (kdb_st[i] == KDB_ST_LEAVING)
++leaving;
}
return(leaving);
}
+#endif
/*
* kdb_main_loop
@@ -1019,67 +1041,120 @@
* none
*/
+int kdb_boring;
+
int
kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error,
- kdb_dbtrap_t db_result, kdb_eframe_t ef)
+kdb_dbtrap_t db_result, kdb_eframe_t ef)
{
int result = 1;
- /* Stay in kdb() until 'go', 'ss[b]' or an error */
+ int cpu = smp_processor_id();
+ int i;
+ kdb_st_t st, old_st;
+
+ old_st = KDB_ST_NULL;
while (1) {
- int i;
- /*
- * All processors except the one that is in control
- * will spin here.
- */
- KDB_DEBUG_STATE("kdb_main_loop 1", reason);
- while (KDB_STATE(HOLD_CPU))
- ;
- KDB_STATE_CLEAR(SUPPRESS);
- KDB_DEBUG_STATE("kdb_main_loop 2", reason);
- if (KDB_STATE(LEAVING))
- break; /* Another cpu said 'go' */
-
- /* Still using kdb, this processor is in control */
- result = kdb_local(reason2, error, ef, db_result);
- KDB_DEBUG_STATE("kdb_main_loop 3", result);
-
- if (result == KDB_CMD_CPU) {
- /* Cpu switch, hold the current cpu, release the target
one. */
- reason2 = KDB_REASON_SWITCH;
- KDB_STATE_SET(HOLD_CPU);
- KDB_STATE_CLEAR_CPU(HOLD_CPU, kdb_new_cpu);
- continue;
+ st = kdb_st[cpu];
+ if (KDB_DEBUG(STATE)) {
+ kdb_printf("cpu %d st %s -> %s master %d\n", cpu,
+ kdb_st_desc[old_st], kdb_st_desc[st], kdb_initial_cpu);
+ old_st = st;
}
-
- if (result == KDB_CMD_SS) {
+ switch (st) {
+
+ case KDB_ST_HOLD_CPU:
+ while (kdb_st[cpu] == KDB_ST_HOLD_CPU) ;
+ continue;
+
+ case KDB_ST_LEAVING:
+ return(1);
+
+ case KDB_ST_SSBPT:
KDB_STATE_SET(DOING_SS);
+ KDB_STATE_SET(DOING_SSBPT);
+ kdba_setsinglestep(ef);
+ kdb_st[cpu] = KDB_ST_MASTER;
+ return(1);
+
+ case KDB_ST_MASTER:
+ result = kdb_local(reason2, error, ef, db_result);
+ switch (result) {
+
+ case KDB_CMD_SS:
+ case KDB_CMD_SSB:
+ return(1);
+
+ case KDB_CMD_CPU:
+ reason2 = KDB_REASON_SWITCH;
+ kdb_st[cpu] = KDB_ST_HOLD_CPU;
+ kdb_st[kdb_new_cpu] = KDB_ST_MASTER;
+ kdb_initial_cpu = kdb_new_cpu;
+ break;
+
+ case KDB_CMD_GO:
+ default:
+ /*
+ * If there are cpus that need to single
+ * step off of breakpoints let them go
+ * individually.
+ */
+ for (i = 0; i < NR_CPUS; i++) {
+ if (KDB_STATE_CPU(NEED_SSBPT, i)) {
+ kdb_st[cpu] = KDB_ST_HOLD_CPU;
+ kdb_st[i] = KDB_ST_SSBPT;
+ kdb_initial_cpu = i;
+ break;
+ }
+ }
+ if (i == NR_CPUS) {
+ /* Were ready to leave. */
+ kdb_bp_install_global(ef);
+ for (i = 0; i < NR_CPUS; i++)
+ kdb_st[i] = KDB_ST_LEAVING;
+ }
+ }
break;
- }
- if (result == KDB_CMD_SSB) {
- KDB_STATE_SET(DOING_SS);
- KDB_STATE_SET(DOING_SSB);
+ default:
+ if (kdb_boring < 5) {
+ kdb_printf("kdb_main_loop: cpu %d st %s\n",
+ cpu, kdb_st_desc[st]);
+ kdb_boring++;
+ }
break;
}
+ }
+}
- if (result && result != 1 && result != KDB_CMD_GO)
- kdb_printf("\nUnexpected kdb_local return code %d\n",
result);
+/*
+ * kdb_smp_wait()
+ *
+ * After we hit the other processors with an NMI we want to
+ * wait and give the dust a chance to settle before we remove
+ * the global breakpoints.
+ */
+#define KDB_TIMEOUT 10000
- /*
- * All other return codes (including KDB_CMD_GO) from
- * kdb_local will end kdb(). Release all other cpus
- * which will see KDB_STATE(LEAVING) is set.
- */
- for (i = 0; i < NR_CPUS; ++i) {
- if (KDB_STATE_CPU(KDB, i))
- KDB_STATE_SET_CPU(LEAVING, i);
- KDB_STATE_CLEAR_CPU(WAIT_IPI, i);
- KDB_STATE_CLEAR_CPU(HOLD_CPU, i);
+kdb_smp_wait()
+{
+ int wait_cnt, i, n;
+
+ for (n = 0; n < KDB_TIMEOUT; n++) {
+ wait_cnt = 0;
+ for (i = 0; i < smp_num_cpus; i++) {
+ if (kdb_st[i] == KDB_ST_WAIT_IPI)
+ wait_cnt++;
}
- KDB_DEBUG_STATE("kdb_main_loop 4", reason);
- break;
+ if (!wait_cnt)
+ break;
+ udelay(10);
+ }
+ if (n == KDB_TIMEOUT) {
+ kdb_printf("No response from cpu - ");
+ for (i = 0; i < smp_num_cpus; i++)
+ kdb_printf(" %d", i);
+ kdb_printf("\n");
}
- return(result != 0);
}
/*
@@ -1110,6 +1185,8 @@
* the cpu is allowed to do one instruction which causes a trap
* into kdb with KDB_REASON_DEBUG.
*
+ * [Note: sparc64 leaves interrupts disabled while single-stepping]
+ *
* Inputs:
* reason The reason KDB was invoked
* error The hardware-defined error code
@@ -1147,11 +1224,22 @@
*
* Two cpus hit debug points at the same time.
*
- * kdb_lock and kdb_initial_cpu ensure that only one cpu gets
- * control of kdb. The others spin on kdb_initial_cpu until
- * they are driven through NMI into kdb_ipi. When the initial
- * cpu releases the others from NMI, they resume trying to get
- * kdb_initial_cpu to start a new event.
+ * The tricky bit here is getting all of the other processors
+ * to stop without ending up with an extra layer of nesting.
+ * The variable kdb_in_bp[cpu] is set early in entry.S. If
+ * kdb_ipi() finds this flag set it simply returns from the
+ * NMI allowing the processor to enter kdb and process the
+ * the breakpoint entry. The processors arbitrate racing
+ * to set kdb_initial_cpu to their processor number.
+ * The winning processor becomes the master cpu and does the
+ * kdb command processing. They all call into kdb_main_loop.
+ *
+ * TBR- There are still some holes I plan to fix. I plan to have
+ * kdb_ipi check the eip value to see if its in the code that
+ * sets kdb_in_bp. There is also the case where a gdb
+ * breakpoint happens at the sametime as the kdb_ipi() call.
+ * In this case it will currently return from the NMI and
+ * not stop.
*
* A cpu is released from kdb and starts a new event before the
* original event has completely ended.
@@ -1200,7 +1288,6 @@
*
*/
-int
kdb(kdb_reason_t reason, int error, kdb_eframe_t ef)
{
kdb_intstate_t int_state; /* Interrupt state */
@@ -1208,9 +1295,13 @@
int result = 1; /* Default is kdb handled it */
int ss_event;
kdb_dbtrap_t db_result=KDB_DB_NOBPT;
+ int cpu = smp_processor_id();
+ int i;
- if (!kdb_on)
+ if (!kdb_on) {
+ kdb_in_bp[cpu] = 0;
return 0;
+ }
KDB_DEBUG_STATE("kdb 1", reason);
KDB_STATE_CLEAR(SUPPRESS);
@@ -1219,7 +1310,7 @@
* the kdb smp fiddling when it is really a gdb trap.
* Save the single step status first, kdba_db_trap clears ss status.
*/
- ss_event = reason != KDB_REASON_PANIC && (KDB_STATE(DOING_SS) ||
KDB_STATE(SSBPT));
+ ss_event = reason != KDB_REASON_PANIC && KDB_STATE(DOING_SS);
if (reason == KDB_REASON_BREAK)
db_result = kdba_bp_trap(ef, error); /* Only call this once
*/
if (reason == KDB_REASON_DEBUG)
@@ -1228,33 +1319,40 @@
if ((reason == KDB_REASON_BREAK || reason == KDB_REASON_DEBUG)
&& db_result == KDB_DB_NOBPT) {
KDB_DEBUG_STATE("kdb 2", reason);
+ kdb_in_bp[cpu] = 0;
return 0; /* Not one of mine */
}
/* Turn off single step if it was being used */
- if (ss_event) {
+ if (ss_event)
kdba_clearsinglestep(ef);
- /* Single step after a breakpoint removes the need for a
delayed reinstall */
- if (reason == KDB_REASON_BREAK || reason == KDB_REASON_DEBUG) {
- KDB_STATE_SET(NO_BP_DELAY);
- }
- }
/* kdb can validly reenter but only for certain well defined conditions
*/
if (reason == KDB_REASON_DEBUG
- && !KDB_STATE(HOLD_CPU)
+ && (kdb_st[cpu] != KDB_ST_HOLD_CPU)
&& ss_event)
KDB_STATE_SET(REENTRY);
else
KDB_STATE_CLEAR(REENTRY);
+#if 0
+ /*
+ * TBR - Its broken and I don't think its needed.
+ * Non existant processors get stuck in LEAVING state.
+ */
/* Wait for previous kdb event to completely exit before starting
* a new event.
*/
while (kdb_previous_event())
;
+#endif
KDB_DEBUG_STATE("kdb 3", reason);
-
+#if 0
+ /*
+ * TBR - Its broken because the state information has changed
+ * to allow multiple processors to enter kdb for breakpoints
+ * at the same time.
+ */
/*
* If kdb is already active, print a message and try to recover.
* If recovery is not possible and recursion is allowed or
@@ -1269,7 +1367,7 @@
kdb_printf("kdb: Debugger re-entered on cpu %d, new
reason = %d\n",
smp_processor_id(), reason);
/* Should only re-enter from released cpu */
- if (KDB_STATE(HOLD_CPU)) {
+ if (kdb_st[cpu] == KDB_ST_HOLD_CPU) {
kdb_printf(" Strange, cpu %d should not be
running\n", smp_processor_id());
recover = 0;
}
@@ -1312,6 +1410,7 @@
kdb_printf("kdb: CPU switch without kdb running, I'm
confused\n");
return(0);
}
+#endif
/*
* Disable interrupts, breakpoints etc. on this processor
@@ -1343,31 +1442,34 @@
; /* drop through */
else {
KDB_DEBUG_STATE("kdb 4", reason);
- spin_lock(&kdb_lock);
-
- while (KDB_IS_RUNNING() || kdb_previous_event()) {
- spin_unlock(&kdb_lock);
-
- while (KDB_IS_RUNNING() || kdb_previous_event())
+ /*
+ * cover case where there is an old master in the
+ * process of leaving.
+ */
+ while ((i = kdb_initial_cpu) != -1 && kdb_st[i] ==
KDB_ST_LEAVING);
+ /*
+ * TBR - Is portable?
+ */
+ if (cmpxchg(&kdb_initial_cpu, -1, cpu) == -1) {
+ /*
+ * We won become master.
+ */
+ kdb_st[cpu] = KDB_ST_MASTER;
+#if 0
+ } else {
+ /*
+ * TBR - Is this needed?
+ */
+ while (kdb_in_bp[cpu] == 1)
;
-
- spin_lock(&kdb_lock);
+#endif
}
KDB_DEBUG_STATE("kdb 5", reason);
-
- kdb_initial_cpu = smp_processor_id();
- spin_unlock(&kdb_lock);
}
if (smp_processor_id() == kdb_initial_cpu
&& !KDB_STATE(REENTRY)) {
- KDB_STATE_CLEAR(HOLD_CPU);
- KDB_STATE_CLEAR(WAIT_IPI);
- /*
- * Remove the global breakpoints. This is only done
- * once from the initial processor on initial entry.
- */
- kdb_bp_remove_global();
+ kdb_st[cpu] = KDB_ST_MASTER;
/*
* If SMP, stop other processors. The other processors
@@ -1379,14 +1481,19 @@
int i;
for (i = 0; i < NR_CPUS; ++i) {
if (i != kdb_initial_cpu) {
- KDB_STATE_SET_CPU(HOLD_CPU, i);
- KDB_STATE_SET_CPU(WAIT_IPI, i);
+ kdb_st[i] = KDB_ST_WAIT_IPI;
}
}
KDB_DEBUG_STATE("kdb 7", reason);
smp_kdb_stop();
KDB_DEBUG_STATE("kdb 8", reason);
+ kdb_smp_wait();
}
+ /*
+ * Remove the global breakpoints. This is only done
+ * once from the initial processor on initial entry.
+ */
+ kdb_bp_remove_global();
}
/* Set up a consistent set of process stacks before talking to the user
*/
@@ -1400,38 +1507,28 @@
/* No breakpoints installed for SS */
if (!KDB_STATE(DOING_SS) &&
- !KDB_STATE(SSBPT) &&
!KDB_STATE(RECURSE)) {
KDB_DEBUG_STATE("kdb 12", result);
kdba_enable_lbr();
kdb_bp_install_local(ef);
- KDB_STATE_CLEAR(NO_BP_DELAY);
KDB_STATE_CLEAR(KDB_CONTROL);
}
KDB_DEBUG_STATE("kdb 13", result);
kdba_restoreint(&int_state);
+ KDB_STATE_CLEAR(NEED_SSBPT);
KDB_STATE_CLEAR(KDB); /* Main kdb state has been cleared */
- KDB_STATE_CLEAR(LEAVING); /* Elvis has left the building ... */
KDB_DEBUG_STATE("kdb 14", result);
- if (smp_processor_id() == kdb_initial_cpu &&
- !KDB_STATE(DOING_SS) &&
- !KDB_STATE(RECURSE)) {
- /*
- * (Re)install the global breakpoints. This is only done
- * once from the initial processor on final exit.
- */
- KDB_DEBUG_STATE("kdb 15", reason);
- kdb_bp_install_global(ef);
- /* Wait until all the other processors leave kdb */
- while (kdb_previous_event())
- ;
- kdb_initial_cpu = -1; /* release kdb control */
+ if (kdb_st[cpu] == KDB_ST_LEAVING) {
+ if (cpu == kdb_initial_cpu)
+ kdb_initial_cpu = -1; /* release kdb control */
+ kdb_st[cpu] = KDB_ST_RUNNING;
KDB_DEBUG_STATE("kdb 16", reason);
}
+ kdb_in_bp[cpu] = 0;
KDB_STATE_CLEAR(RECURSE);
KDB_DEBUG_STATE("kdb 17", reason);
return(result != 0);
diff -urN -X /home/jim/dontdiff linux.old/kdb/kdbsupport.c
linux/kdb/kdbsupport.c
--- linux.old/kdb/kdbsupport.c Wed May 8 17:54:43 2002
+++ linux/kdb/kdbsupport.c Wed May 8 13:46:41 2002
@@ -141,13 +141,23 @@
/* Do not print before checking and clearing WAIT_IPI, IPIs are
* going all the time.
*/
- if (KDB_STATE(WAIT_IPI)) {
+ if (kdb_st[smp_processor_id()] == KDB_ST_WAIT_IPI) {
/*
* Stopping other processors via smp_kdb_stop().
*/
if (ack_interrupt)
(*ack_interrupt)(); /* Acknowledge the interrupt */
- KDB_STATE_CLEAR(WAIT_IPI);
+ /*
+ * Avoid an extra layer of nesting if we have
+ * already entered kdb for some other reason.
+ */
+ if (kdb_in_bp[smp_processor_id()] == 1) {
+ kdb_in_bp[smp_processor_id()] = 2;
+ kdb_st[smp_processor_id()] = KDB_ST_HOLD_CPU;
+ return(1);
+ }
+
+ kdb_st[smp_processor_id()] = KDB_ST_HOLD_CPU;
KDB_DEBUG_STATE("kdb_ipi 1", 0);
kdb(KDB_REASON_SWITCH, 0, ef); /* Spin in kdb() */
KDB_DEBUG_STATE("kdb_ipi 2", 0);
|