1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012 by Delphix. All rights reserved.
25 */
26
27 #include <mdb/mdb_modapi.h>
28 #include <mdb/mdb_ctf.h>
29
30 #include <sys/types.h>
31 #include <sys/regset.h>
32 #include <sys/stack.h>
33 #include <sys/thread.h>
34 #include <sys/modctl.h>
35
36 #include "findstack.h"
37 #include "thread.h"
38 #include "sobj.h"
39
40 #define TOO_BIG_FOR_A_STACK (1024 * 1024)
41
42 #define KTOU(p) ((p) - kbase + ubase)
43 #define UTOK(p) ((p) - ubase + kbase)
44
45 #define CRAWL_FOUNDALL (-1)
46
47 #if defined(__i386) || defined(__amd64)
48 struct rwindow {
49 uintptr_t rw_fp;
50 uintptr_t rw_rtn;
51 };
52 #endif
53
54 #ifndef STACK_BIAS
55 #define STACK_BIAS 0
56 #endif
57
58 /*
59 * Given a stack pointer, try to crawl down it to the bottom.
60 * "frame" is a VA in MDB's address space.
61 *
62 * Returns the number of frames successfully crawled down, or
63 * CRAWL_FOUNDALL if it got to the bottom of the stack.
64 */
65 static int
66 crawl(uintptr_t frame, uintptr_t kbase, uintptr_t ktop, uintptr_t ubase,
67 int kill_fp, findstack_info_t *fsip)
68 {
69 int levels = 0;
70
71 fsip->fsi_depth = 0;
72 fsip->fsi_overflow = 0;
73
74 fs_dprintf(("<0> frame = %p, kbase = %p, ktop = %p, ubase = %p\n",
75 frame, kbase, ktop, ubase));
76 for (;;) {
77 uintptr_t fp;
78 long *fpp = (long *)&((struct rwindow *)frame)->rw_fp;
79
80 fs_dprintf(("<1> fpp = %p, frame = %p\n", fpp, frame));
81
82 if ((frame & (STACK_ALIGN - 1)) != 0)
83 break;
84
85 fp = ((struct rwindow *)frame)->rw_fp + STACK_BIAS;
86 if (fsip->fsi_depth < fsip->fsi_max_depth)
87 fsip->fsi_stack[fsip->fsi_depth++] =
88 ((struct rwindow *)frame)->rw_rtn;
89 else
90 fsip->fsi_overflow = 1;
91
92 fs_dprintf(("<2> fp = %p\n", fp));
93
94 if (fp == ktop)
95 return (CRAWL_FOUNDALL);
96 fs_dprintf(("<3> not at base\n"));
97
98 #if defined(__i386) || defined(__amd64)
99 if (ktop - fp == sizeof (struct rwindow)) {
100 fs_dprintf(("<4> found base\n"));
101 return (CRAWL_FOUNDALL);
102 }
103 #endif
104
105 fs_dprintf(("<5> fp = %p, kbase = %p, ktop - size = %p\n",
106 fp, kbase, ktop - sizeof (struct rwindow)));
107
108 if (fp < kbase || fp >= (ktop - sizeof (struct rwindow)))
109 break;
110
111 frame = KTOU(fp);
112 fs_dprintf(("<6> frame = %p\n", frame));
113
114 /*
115 * NULL out the old %fp so we don't go down this stack
116 * more than once.
117 */
118 if (kill_fp) {
119 fs_dprintf(("<7> fpp = %p\n", fpp));
120 *fpp = NULL;
121 }
122
123 fs_dprintf(("<8> levels = %d\n", levels));
124 levels++;
125 }
126
127 return (levels);
128 }
129
130 typedef struct mdb_findstack_kthread {
131 struct _sobj_ops *t_sobj_ops;
132 uint_t t_state;
133 ushort_t t_flag;
134 ushort_t t_schedflag;
135 caddr_t t_stk;
136 caddr_t t_stkbase;
137 label_t t_pcb;
138 } mdb_findstack_kthread_t;
139
140 /*ARGSUSED*/
141 int
142 stacks_findstack(uintptr_t addr, findstack_info_t *fsip, uint_t print_warnings)
143 {
144 mdb_findstack_kthread_t thr;
145 size_t stksz;
146 uintptr_t ubase, utop;
147 uintptr_t kbase, ktop;
148 uintptr_t win, sp;
149
150 fsip->fsi_failed = 0;
151 fsip->fsi_pc = 0;
152 fsip->fsi_sp = 0;
153 fsip->fsi_depth = 0;
154 fsip->fsi_overflow = 0;
155
156 if (mdb_ctf_vread(&thr, "kthread_t", "mdb_findstack_kthread_t",
157 addr, print_warnings ? 0 : MDB_CTF_VREAD_QUIET) == -1) {
158 fsip->fsi_failed = FSI_FAIL_BADTHREAD;
159 return (DCMD_ERR);
160 }
161
162 fsip->fsi_sobj_ops = (uintptr_t)thr.t_sobj_ops;
163 fsip->fsi_tstate = thr.t_state;
164 fsip->fsi_panic = !!(thr.t_flag & T_PANIC);
165
166 if (thr.t_stk < thr.t_stkbase) {
167 if (print_warnings)
168 mdb_warn(
169 "stack base or stack top corrupt for thread %p\n",
170 addr);
171 fsip->fsi_failed = FSI_FAIL_THREADCORRUPT;
172 return (DCMD_ERR);
173 }
174
175 kbase = (uintptr_t)thr.t_stkbase;
176 ktop = (uintptr_t)thr.t_stk;
177 stksz = ktop - kbase;
178
179 #ifdef __amd64
180 /*
181 * The stack on amd64 is intentionally misaligned, so ignore the top
182 * half-frame. See thread_stk_init(). When handling traps, the frame
183 * is automatically aligned by the hardware, so we only alter ktop if
184 * needed.
185 */
186 if ((ktop & (STACK_ALIGN - 1)) != 0)
187 ktop -= STACK_ENTRY_ALIGN;
188 #endif
189
190 /*
191 * If the stack size is larger than a meg, assume that it's bogus.
192 */
193 if (stksz > TOO_BIG_FOR_A_STACK) {
194 if (print_warnings)
195 mdb_warn("stack size for thread %p is too big to be "
196 "reasonable\n", addr);
197 fsip->fsi_failed = FSI_FAIL_THREADCORRUPT;
198 return (DCMD_ERR);
199 }
200
201 /*
202 * This could be (and was) a UM_GC allocation. Unfortunately,
203 * stksz tends to be very large. As currently implemented, dcmds
204 * invoked as part of pipelines don't have their UM_GC-allocated
205 * memory freed until the pipeline completes. With stksz in the
206 * neighborhood of 20k, the popular ::walk thread |::findstack
207 * pipeline can easily run memory-constrained debuggers (kmdb) out
208 * of memory. This can be changed back to a gc-able allocation when
209 * the debugger is changed to free UM_GC memory more promptly.
210 */
211 ubase = (uintptr_t)mdb_alloc(stksz, UM_SLEEP);
212 utop = ubase + stksz;
213 if (mdb_vread((caddr_t)ubase, stksz, kbase) != stksz) {
214 mdb_free((void *)ubase, stksz);
215 if (print_warnings)
216 mdb_warn("couldn't read entire stack for thread %p\n",
217 addr);
218 fsip->fsi_failed = FSI_FAIL_THREADCORRUPT;
219 return (DCMD_ERR);
220 }
221
222 /*
223 * Try the saved %sp first, if it looks reasonable.
224 */
225 sp = KTOU((uintptr_t)thr.t_sp + STACK_BIAS);
226 if (sp >= ubase && sp <= utop) {
227 if (crawl(sp, kbase, ktop, ubase, 0, fsip) == CRAWL_FOUNDALL) {
228 fsip->fsi_sp = (uintptr_t)thr.t_sp;
229 #if !defined(__i386)
230 fsip->fsi_pc = (uintptr_t)thr.t_pc;
231 #endif
232 goto found;
233 }
234 }
235
236 /*
237 * Now walk through the whole stack, starting at the base,
238 * trying every possible "window".
239 */
240 for (win = ubase;
241 win + sizeof (struct rwindow) <= utop;
242 win += sizeof (struct rwindow *)) {
243 if (crawl(win, kbase, ktop, ubase, 1, fsip) == CRAWL_FOUNDALL) {
244 fsip->fsi_sp = UTOK(win) - STACK_BIAS;
245 goto found;
246 }
247 }
248
249 /*
250 * We didn't conclusively find the stack. So we'll take another lap,
251 * and print out anything that looks possible.
252 */
253 if (print_warnings)
254 mdb_printf("Possible stack pointers for thread %p:\n", addr);
255 (void) mdb_vread((caddr_t)ubase, stksz, kbase);
256
257 for (win = ubase;
258 win + sizeof (struct rwindow) <= utop;
259 win += sizeof (struct rwindow *)) {
260 uintptr_t fp = ((struct rwindow *)win)->rw_fp;
261 int levels;
262
263 if ((levels = crawl(win, kbase, ktop, ubase, 1, fsip)) > 1) {
264 if (print_warnings)
265 mdb_printf(" %p (%d)\n", fp, levels);
266 } else if (levels == CRAWL_FOUNDALL) {
267 /*
268 * If this is a live system, the stack could change
269 * between the two mdb_vread(ubase, utop, kbase)'s,
270 * and we could have a fully valid stack here.
271 */
272 fsip->fsi_sp = UTOK(win) - STACK_BIAS;
273 goto found;
274 }
275 }
276
277 fsip->fsi_depth = 0;
278 fsip->fsi_overflow = 0;
279 fsip->fsi_failed = FSI_FAIL_STACKNOTFOUND;
280
281 mdb_free((void *)ubase, stksz);
282 return (DCMD_ERR);
283 found:
284 mdb_free((void *)ubase, stksz);
285 return (DCMD_OK);
286 }
287
288 void
289 stacks_findstack_cleanup()
290 {}
291
292 /*ARGSUSED*/
293 int
294 stacks_module_cb(uintptr_t addr, const modctl_t *mp, stacks_module_t *smp)
295 {
296 char mod_modname[MODMAXNAMELEN + 1];
297
298 if (!mp->mod_modname)
299 return (WALK_NEXT);
300
301 if (mdb_readstr(mod_modname, sizeof (mod_modname),
302 (uintptr_t)mp->mod_modname) == -1) {
303 mdb_warn("failed to read mod_modname in \"modctl\" walk");
304 return (WALK_ERR);
305 }
306
307 if (strcmp(smp->sm_name, mod_modname))
308 return (WALK_NEXT);
309
310 smp->sm_text = (uintptr_t)mp->mod_text;
311 smp->sm_size = mp->mod_text_size;
312
313 return (WALK_DONE);
314 }
315
316 int
317 stacks_module(stacks_module_t *smp)
318 {
319 if (mdb_walk("modctl", (mdb_walk_cb_t)stacks_module_cb, smp) != 0) {
320 mdb_warn("cannot walk \"modctl\"");
321 return (-1);
322 }
323
324 return (0);
325 }
326
327 /*ARGSUSED*/
328 static void
329 print_sobj_help(int type, const char *name, const char *ops_name, void *ign)
330 {
331 mdb_printf(" %s", name);
332 }
333
334 /*ARGSUSED*/
335 static void
336 print_tstate_help(uint_t state, const char *name, void *ignored)
337 {
338 mdb_printf(" %s", name);
339 }
340
341 void
342 stacks_help(void)
343 {
344 mdb_printf(
345 "::stacks processes all of the thread stacks on the system, grouping\n"
346 "together threads which have the same:\n"
347 "\n"
348 " * Thread state,\n"
349 " * Sync object type, and\n"
350 " * PCs in their stack trace.\n"
351 "\n"
352 "The default output (no address or options) is just a dump of the thread\n"
353 "groups in the system. For a view of active threads, use \"::stacks -i\",\n"
354 "which filters out FREE threads (interrupt threads which are currently\n"
355 "inactive) and threads sleeping on a CV. (Note that those threads may still\n"
356 "be noteworthy; this is just for a first glance.) More general filtering\n"
357 "options are described below, in the \"FILTERS\" section.\n"
358 "\n"
359 "::stacks can be used in a pipeline. The input to ::stacks is one or more\n"
360 "thread pointers. For example, to get a summary of threads in a process,\n"
361 "you can do:\n"
362 "\n"
363 " %<b>procp%</b>::walk thread | ::stacks\n"
364 "\n"
365 "When output into a pipe, ::stacks prints all of the threads input,\n"
366 "filtered by the given filtering options. This means that multiple\n"
367 "::stacks invocations can be piped together to achieve more complicated\n"
368 "filters. For example, to get threads which have both 'fop_read' and\n"
369 "'cv_wait_sig_swap' in their stack trace, you could do:\n"
370 "\n"
371 " ::stacks -c fop_read | ::stacks -c cv_wait_sig_swap_core\n"
372 "\n"
373 "To get the full list of threads in each group, use the '-a' flag:\n"
374 "\n"
375 " ::stacks -a\n"
376 "\n");
377 mdb_dec_indent(2);
378 mdb_printf("%<b>OPTIONS%</b>\n");
379 mdb_inc_indent(2);
380 mdb_printf("%s",
381 " -a Print all of the grouped threads, instead of just a count.\n"
382 " -f Force a re-run of the thread stack gathering.\n"
383 " -v Be verbose about thread stack gathering.\n"
384 "\n");
385 mdb_dec_indent(2);
386 mdb_printf("%<b>FILTERS%</b>\n");
387 mdb_inc_indent(2);
388 mdb_printf("%s",
389 " -i Show active threads; equivalent to '-S CV -T FREE'.\n"
390 " -c func[+offset]\n"
391 " Only print threads whose stacks contain func/func+offset.\n"
392 " -C func[+offset]\n"
393 " Only print threads whose stacks do not contain func/func+offset.\n"
394 " -m module\n"
395 " Only print threads whose stacks contain functions from module.\n"
396 " -M module\n"
397 " Only print threads whose stacks do not contain functions from\n"
398 " module.\n"
399 " -s {type | ALL}\n"
400 " Only print threads which are on a 'type' synchronization object\n"
401 " (SOBJ).\n"
402 " -S {type | ALL}\n"
403 " Only print threads which are not on a 'type' SOBJ.\n"
404 " -t tstate\n"
405 " Only print threads which are in thread state 'tstate'.\n"
406 " -T tstate\n"
407 " Only print threads which are not in thread state 'tstate'.\n"
408 "\n");
409 mdb_printf(" SOBJ types:");
410 sobj_type_walk(print_sobj_help, NULL);
411 mdb_printf("\n");
412 mdb_printf("Thread states:");
413 thread_walk_states(print_tstate_help, NULL);
414 mdb_printf(" panic\n");
415 }