linux/kernel/debug/debug_core.h
Douglas Anderson 2277b49258 kdb: Fix stack crawling on 'running' CPUs that aren't the master
In kdb when you do 'btc' (back trace on CPU) it doesn't necessarily
give you the right info.  Specifically on many architectures
(including arm64, where I tested) you can't dump the stack of a
"running" process that isn't the process running on the current CPU.
This can be seen by this:

 echo SOFTLOCKUP > /sys/kernel/debug/provoke-crash/DIRECT
 # wait 2 seconds
 <sysrq>g

Here's what I see now on rk3399-gru-kevin.  I see the stack crawl for
the CPU that handled the sysrq but everything else just shows me stuck
in __switch_to() which is bogus:

======

[0]kdb> btc
btc: cpu status: Currently on cpu 0
Available cpus: 0, 1-3(I), 4, 5(I)
Stack traceback for pid 0
0xffffff801101a9c0        0        0  1    0   R  0xffffff801101b3b0 *swapper/0
Call trace:
 dump_backtrace+0x0/0x138
 ...
 kgdb_compiled_brk_fn+0x34/0x44
 ...
 sysrq_handle_dbg+0x34/0x5c
Stack traceback for pid 0
0xffffffc0f175a040        0        0  1    1   I  0xffffffc0f175aa30  swapper/1
Call trace:
 __switch_to+0x1e4/0x240
 0xffffffc0f65616c0
Stack traceback for pid 0
0xffffffc0f175d040        0        0  1    2   I  0xffffffc0f175da30  swapper/2
Call trace:
 __switch_to+0x1e4/0x240
 0xffffffc0f65806c0
Stack traceback for pid 0
0xffffffc0f175b040        0        0  1    3   I  0xffffffc0f175ba30  swapper/3
Call trace:
 __switch_to+0x1e4/0x240
 0xffffffc0f659f6c0
Stack traceback for pid 1474
0xffffffc0dde8b040     1474      727  1    4   R  0xffffffc0dde8ba30  bash
Call trace:
 __switch_to+0x1e4/0x240
 __schedule+0x464/0x618
 0xffffffc0dde8b040
Stack traceback for pid 0
0xffffffc0f17b0040        0        0  1    5   I  0xffffffc0f17b0a30  swapper/5
Call trace:
 __switch_to+0x1e4/0x240
 0xffffffc0f65dd6c0

===

The problem is that 'btc' eventually boils down to
  show_stack(task_struct, NULL);

...and show_stack() doesn't work for "running" CPUs because their
registers haven't been stashed.

On x86 things might work better (I haven't tested) because kdb has a
special case for x86 in kdb_show_stack() where it passes the stack
pointer to show_stack().  This wouldn't work on arm64 where the stack
crawling function seems needs the "fp" and "pc", not the "sp" which is
presumably why arm64's show_stack() function totally ignores the "sp"
parameter.

NOTE: we _can_ get a good stack dump for all the cpus if we manually
switch each one to the kdb master and do a back trace.  AKA:
  cpu 4
  bt
...will give the expected trace.  That's because now arm64's
dump_backtrace will now see that "tsk == current" and go through a
different path.

In this patch I fix the problems by catching a request to stack crawl
a task that's running on a CPU and then I ask that CPU to do the stack
crawl.

NOTE: this will (presumably) change what stack crawls are printed for
x86 machines.  Now kdb functions will show up in the stack crawl.
Presumably this is OK but if it's not we can go back and add a special
case for x86 again.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
2019-10-10 16:28:48 +01:00

88 lines
2.5 KiB
C

/*
* Created by: Jason Wessel <jason.wessel@windriver.com>
*
* Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
*
* This file is licensed under the terms of the GNU General Public
* License version 2. This program is licensed "as is" without any
* warranty of any kind, whether express or implied.
*/
#ifndef _DEBUG_CORE_H_
#define _DEBUG_CORE_H_
/*
* These are the private implementation headers between the kernel
* debugger core and the debugger front end code.
*/
/* kernel debug core data structures */
struct kgdb_state {
int ex_vector;
int signo;
int err_code;
int cpu;
int pass_exception;
unsigned long thr_query;
unsigned long threadid;
long kgdb_usethreadid;
struct pt_regs *linux_regs;
atomic_t *send_ready;
};
/* Exception state values */
#define DCPU_WANT_MASTER 0x1 /* Waiting to become a master kgdb cpu */
#define DCPU_NEXT_MASTER 0x2 /* Transition from one master cpu to another */
#define DCPU_IS_SLAVE 0x4 /* Slave cpu enter exception */
#define DCPU_WANT_BT 0x8 /* Slave cpu should backtrace then clear flag */
struct debuggerinfo_struct {
void *debuggerinfo;
struct task_struct *task;
int exception_state;
int ret_state;
int irq_depth;
int enter_kgdb;
bool rounding_up;
};
extern struct debuggerinfo_struct kgdb_info[];
/* kernel debug core break point routines */
extern int dbg_remove_all_break(void);
extern int dbg_set_sw_break(unsigned long addr);
extern int dbg_remove_sw_break(unsigned long addr);
extern int dbg_activate_sw_breakpoints(void);
extern int dbg_deactivate_sw_breakpoints(void);
/* polled character access to i/o module */
extern int dbg_io_get_char(void);
/* stub return value for switching between the gdbstub and kdb */
#define DBG_PASS_EVENT -12345
/* Switch from one cpu to another */
#define DBG_SWITCH_CPU_EVENT -123456
extern int dbg_switch_cpu;
/* gdbstub interface functions */
extern int gdb_serial_stub(struct kgdb_state *ks);
extern void gdbstub_msg_write(const char *s, int len);
/* gdbstub functions used for kdb <-> gdbstub transition */
extern int gdbstub_state(struct kgdb_state *ks, char *cmd);
extern int dbg_kdb_mode;
#ifdef CONFIG_KGDB_KDB
extern int kdb_stub(struct kgdb_state *ks);
extern int kdb_parse(const char *cmdstr);
extern int kdb_common_init_state(struct kgdb_state *ks);
extern int kdb_common_deinit_state(void);
extern void kdb_dump_stack_on_cpu(int cpu);
#else /* ! CONFIG_KGDB_KDB */
static inline int kdb_stub(struct kgdb_state *ks)
{
return DBG_PASS_EVENT;
}
#endif /* CONFIG_KGDB_KDB */
#endif /* _DEBUG_CORE_H_ */