Print this page
patch fix-mdb
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/mdb/common/modules/genunix/kmem.c
+++ new/usr/src/cmd/mdb/common/modules/genunix/kmem.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * Copyright 2011 Joyent, Inc. All rights reserved.
28 28 * Copyright (c) 2012 by Delphix. All rights reserved.
29 29 */
30 30
31 31 #include <mdb/mdb_param.h>
32 32 #include <mdb/mdb_modapi.h>
33 33 #include <mdb/mdb_ctf.h>
34 34 #include <mdb/mdb_whatis.h>
35 35 #include <sys/cpuvar.h>
36 36 #include <sys/kmem_impl.h>
37 37 #include <sys/vmem_impl.h>
38 38 #include <sys/machelf.h>
39 39 #include <sys/modctl.h>
40 40 #include <sys/kobj.h>
41 41 #include <sys/panic.h>
42 42 #include <sys/stack.h>
43 43 #include <sys/sysmacros.h>
44 44 #include <vm/page.h>
45 45
46 46 #include "avl.h"
47 47 #include "combined.h"
48 48 #include "dist.h"
49 49 #include "kmem.h"
50 50 #include "list.h"
51 51
52 52 #define dprintf(x) if (mdb_debug_level) { \
53 53 mdb_printf("kmem debug: "); \
54 54 /*CSTYLED*/\
55 55 mdb_printf x ;\
56 56 }
57 57
58 58 #define KM_ALLOCATED 0x01
59 59 #define KM_FREE 0x02
60 60 #define KM_BUFCTL 0x04
61 61 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */
62 62 #define KM_HASH 0x10
63 63
64 64 static int mdb_debug_level = 0;
65 65
66 66 /*ARGSUSED*/
67 67 static int
68 68 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
69 69 {
70 70 mdb_walker_t w;
71 71 char descr[64];
72 72
73 73 (void) mdb_snprintf(descr, sizeof (descr),
74 74 "walk the %s cache", c->cache_name);
75 75
76 76 w.walk_name = c->cache_name;
77 77 w.walk_descr = descr;
78 78 w.walk_init = kmem_walk_init;
79 79 w.walk_step = kmem_walk_step;
80 80 w.walk_fini = kmem_walk_fini;
81 81 w.walk_init_arg = (void *)addr;
82 82
83 83 if (mdb_add_walker(&w) == -1)
84 84 mdb_warn("failed to add %s walker", c->cache_name);
85 85
86 86 return (WALK_NEXT);
87 87 }
88 88
89 89 /*ARGSUSED*/
90 90 int
91 91 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
92 92 {
93 93 mdb_debug_level ^= 1;
94 94
95 95 mdb_printf("kmem: debugging is now %s\n",
96 96 mdb_debug_level ? "on" : "off");
97 97
98 98 return (DCMD_OK);
99 99 }
100 100
101 101 int
102 102 kmem_cache_walk_init(mdb_walk_state_t *wsp)
103 103 {
104 104 GElf_Sym sym;
105 105
106 106 if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
107 107 mdb_warn("couldn't find kmem_caches");
108 108 return (WALK_ERR);
109 109 }
110 110
111 111 wsp->walk_addr = (uintptr_t)sym.st_value;
112 112
113 113 return (list_walk_init_named(wsp, "cache list", "cache"));
114 114 }
115 115
116 116 int
117 117 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
118 118 {
119 119 if (wsp->walk_addr == NULL) {
120 120 mdb_warn("kmem_cpu_cache doesn't support global walks");
121 121 return (WALK_ERR);
122 122 }
123 123
124 124 if (mdb_layered_walk("cpu", wsp) == -1) {
125 125 mdb_warn("couldn't walk 'cpu'");
126 126 return (WALK_ERR);
127 127 }
128 128
129 129 wsp->walk_data = (void *)wsp->walk_addr;
130 130
131 131 return (WALK_NEXT);
132 132 }
133 133
134 134 int
135 135 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
136 136 {
137 137 uintptr_t caddr = (uintptr_t)wsp->walk_data;
138 138 const cpu_t *cpu = wsp->walk_layer;
139 139 kmem_cpu_cache_t cc;
140 140
141 141 caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]);
142 142
143 143 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
144 144 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
145 145 return (WALK_ERR);
146 146 }
147 147
148 148 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
149 149 }
150 150
151 151 static int
152 152 kmem_slab_check(void *p, uintptr_t saddr, void *arg)
153 153 {
154 154 kmem_slab_t *sp = p;
155 155 uintptr_t caddr = (uintptr_t)arg;
156 156 if ((uintptr_t)sp->slab_cache != caddr) {
157 157 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
158 158 saddr, caddr, sp->slab_cache);
159 159 return (-1);
160 160 }
161 161
162 162 return (0);
163 163 }
164 164
165 165 static int
166 166 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
167 167 {
168 168 kmem_slab_t *sp = p;
169 169
170 170 int rc = kmem_slab_check(p, saddr, arg);
171 171 if (rc != 0) {
172 172 return (rc);
173 173 }
174 174
175 175 if (!KMEM_SLAB_IS_PARTIAL(sp)) {
176 176 mdb_warn("slab %p is not a partial slab\n", saddr);
177 177 return (-1);
178 178 }
179 179
180 180 return (0);
181 181 }
182 182
183 183 static int
184 184 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
185 185 {
186 186 kmem_slab_t *sp = p;
187 187
188 188 int rc = kmem_slab_check(p, saddr, arg);
189 189 if (rc != 0) {
190 190 return (rc);
191 191 }
192 192
193 193 if (!KMEM_SLAB_IS_ALL_USED(sp)) {
194 194 mdb_warn("slab %p is not completely allocated\n", saddr);
195 195 return (-1);
196 196 }
197 197
198 198 return (0);
199 199 }
200 200
201 201 typedef struct {
202 202 uintptr_t kns_cache_addr;
203 203 int kns_nslabs;
204 204 } kmem_nth_slab_t;
205 205
206 206 static int
207 207 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
208 208 {
209 209 kmem_nth_slab_t *chkp = arg;
210 210
211 211 int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
212 212 if (rc != 0) {
213 213 return (rc);
214 214 }
215 215
216 216 return (chkp->kns_nslabs-- == 0 ? 1 : 0);
217 217 }
218 218
219 219 static int
220 220 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
221 221 {
222 222 uintptr_t caddr = wsp->walk_addr;
223 223
224 224 wsp->walk_addr = (uintptr_t)(caddr +
225 225 offsetof(kmem_cache_t, cache_complete_slabs));
226 226
227 227 return (list_walk_init_checked(wsp, "slab list", "slab",
228 228 kmem_complete_slab_check, (void *)caddr));
229 229 }
230 230
231 231 static int
232 232 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
233 233 {
234 234 uintptr_t caddr = wsp->walk_addr;
235 235
236 236 wsp->walk_addr = (uintptr_t)(caddr +
237 237 offsetof(kmem_cache_t, cache_partial_slabs));
238 238
239 239 return (avl_walk_init_checked(wsp, "slab list", "slab",
240 240 kmem_partial_slab_check, (void *)caddr));
241 241 }
242 242
243 243 int
244 244 kmem_slab_walk_init(mdb_walk_state_t *wsp)
245 245 {
246 246 uintptr_t caddr = wsp->walk_addr;
247 247
248 248 if (caddr == NULL) {
249 249 mdb_warn("kmem_slab doesn't support global walks\n");
250 250 return (WALK_ERR);
251 251 }
252 252
253 253 combined_walk_init(wsp);
254 254 combined_walk_add(wsp,
255 255 kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
256 256 combined_walk_add(wsp,
257 257 kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
258 258
259 259 return (WALK_NEXT);
260 260 }
261 261
262 262 static int
263 263 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
264 264 {
265 265 uintptr_t caddr = wsp->walk_addr;
266 266 kmem_nth_slab_t *chk;
267 267
268 268 chk = mdb_alloc(sizeof (kmem_nth_slab_t),
269 269 UM_SLEEP | UM_GC);
270 270 chk->kns_cache_addr = caddr;
271 271 chk->kns_nslabs = 1;
272 272 wsp->walk_addr = (uintptr_t)(caddr +
273 273 offsetof(kmem_cache_t, cache_complete_slabs));
274 274
275 275 return (list_walk_init_checked(wsp, "slab list", "slab",
276 276 kmem_nth_slab_check, chk));
277 277 }
278 278
279 279 int
280 280 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
281 281 {
282 282 uintptr_t caddr = wsp->walk_addr;
283 283 kmem_cache_t c;
284 284
285 285 if (caddr == NULL) {
286 286 mdb_warn("kmem_slab_partial doesn't support global walks\n");
287 287 return (WALK_ERR);
288 288 }
289 289
290 290 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
291 291 mdb_warn("couldn't read kmem_cache at %p", caddr);
292 292 return (WALK_ERR);
293 293 }
294 294
295 295 combined_walk_init(wsp);
296 296
297 297 /*
298 298 * Some consumers (umem_walk_step(), in particular) require at
299 299 * least one callback if there are any buffers in the cache. So
300 300 * if there are *no* partial slabs, report the first full slab, if
301 301 * any.
302 302 *
303 303 * Yes, this is ugly, but it's cleaner than the other possibilities.
304 304 */
305 305 if (c.cache_partial_slabs.avl_numnodes == 0) {
306 306 combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
307 307 list_walk_step, list_walk_fini);
308 308 } else {
309 309 combined_walk_add(wsp, kmem_partial_slab_walk_init,
310 310 avl_walk_step, avl_walk_fini);
311 311 }
312 312
313 313 return (WALK_NEXT);
314 314 }
315 315
316 316 int
317 317 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
318 318 {
319 319 kmem_cache_t c;
320 320 const char *filter = NULL;
321 321
322 322 if (mdb_getopts(ac, argv,
323 323 'n', MDB_OPT_STR, &filter,
324 324 NULL) != ac) {
325 325 return (DCMD_USAGE);
326 326 }
327 327
328 328 if (!(flags & DCMD_ADDRSPEC)) {
329 329 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
330 330 mdb_warn("can't walk kmem_cache");
331 331 return (DCMD_ERR);
332 332 }
333 333 return (DCMD_OK);
334 334 }
335 335
336 336 if (DCMD_HDRSPEC(flags))
337 337 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
338 338 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
339 339
340 340 if (mdb_vread(&c, sizeof (c), addr) == -1) {
341 341 mdb_warn("couldn't read kmem_cache at %p", addr);
342 342 return (DCMD_ERR);
343 343 }
344 344
345 345 if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
346 346 return (DCMD_OK);
347 347
348 348 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
349 349 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
350 350
351 351 return (DCMD_OK);
352 352 }
353 353
354 354 void
355 355 kmem_cache_help(void)
356 356 {
357 357 mdb_printf("%s", "Print kernel memory caches.\n\n");
358 358 mdb_dec_indent(2);
359 359 mdb_printf("%<b>OPTIONS%</b>\n");
360 360 mdb_inc_indent(2);
361 361 mdb_printf("%s",
362 362 " -n name\n"
363 363 " name of kmem cache (or matching partial name)\n"
364 364 "\n"
365 365 "Column\tDescription\n"
366 366 "\n"
367 367 "ADDR\t\taddress of kmem cache\n"
368 368 "NAME\t\tname of kmem cache\n"
369 369 "FLAG\t\tvarious cache state flags\n"
370 370 "CFLAG\t\tcache creation flags\n"
371 371 "BUFSIZE\tobject size in bytes\n"
372 372 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
373 373 }
374 374
375 375 #define LABEL_WIDTH 11
376 376 static void
377 377 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
378 378 size_t maxbuckets, size_t minbucketsize)
379 379 {
380 380 uint64_t total;
381 381 int buckets;
382 382 int i;
383 383 const int *distarray;
384 384 int complete[2];
385 385
386 386 buckets = buffers_per_slab;
387 387
388 388 total = 0;
389 389 for (i = 0; i <= buffers_per_slab; i++)
390 390 total += ks_bucket[i];
391 391
392 392 if (maxbuckets > 1)
393 393 buckets = MIN(buckets, maxbuckets);
394 394
395 395 if (minbucketsize > 1) {
396 396 /*
397 397 * minbucketsize does not apply to the first bucket reserved
398 398 * for completely allocated slabs
399 399 */
400 400 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
401 401 minbucketsize));
402 402 if ((buckets < 2) && (buffers_per_slab > 1)) {
403 403 buckets = 2;
404 404 minbucketsize = (buffers_per_slab - 1);
405 405 }
406 406 }
407 407
408 408 /*
409 409 * The first printed bucket is reserved for completely allocated slabs.
410 410 * Passing (buckets - 1) excludes that bucket from the generated
411 411 * distribution, since we're handling it as a special case.
412 412 */
413 413 complete[0] = buffers_per_slab;
414 414 complete[1] = buffers_per_slab + 1;
415 415 distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
416 416
417 417 mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
418 418 dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
419 419
420 420 dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
421 421 /*
422 422 * Print bucket ranges in descending order after the first bucket for
423 423 * completely allocated slabs, so a person can see immediately whether
424 424 * or not there is fragmentation without having to scan possibly
425 425 * multiple screens of output. Starting at (buckets - 2) excludes the
426 426 * extra terminating bucket.
427 427 */
428 428 for (i = buckets - 2; i >= 0; i--) {
429 429 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
430 430 }
431 431 mdb_printf("\n");
432 432 }
433 433 #undef LABEL_WIDTH
434 434
435 435 /*ARGSUSED*/
436 436 static int
437 437 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
438 438 {
439 439 *is_slab = B_TRUE;
440 440 return (WALK_DONE);
441 441 }
442 442
443 443 /*ARGSUSED*/
444 444 static int
445 445 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
446 446 boolean_t *is_slab)
447 447 {
448 448 /*
449 449 * The "kmem_partial_slab" walker reports the first full slab if there
450 450 * are no partial slabs (for the sake of consumers that require at least
451 451 * one callback if there are any buffers in the cache).
452 452 */
453 453 *is_slab = KMEM_SLAB_IS_PARTIAL(sp);
454 454 return (WALK_DONE);
455 455 }
456 456
457 457 typedef struct kmem_slab_usage {
458 458 int ksu_refcnt; /* count of allocated buffers on slab */
459 459 boolean_t ksu_nomove; /* slab marked non-reclaimable */
460 460 } kmem_slab_usage_t;
461 461
462 462 typedef struct kmem_slab_stats {
463 463 const kmem_cache_t *ks_cp;
464 464 int ks_slabs; /* slabs in cache */
465 465 int ks_partial_slabs; /* partially allocated slabs in cache */
466 466 uint64_t ks_unused_buffers; /* total unused buffers in cache */
467 467 int ks_max_buffers_per_slab; /* max buffers per slab */
468 468 int ks_usage_len; /* ks_usage array length */
469 469 kmem_slab_usage_t *ks_usage; /* partial slab usage */
470 470 uint_t *ks_bucket; /* slab usage distribution */
471 471 } kmem_slab_stats_t;
472 472
473 473 /*ARGSUSED*/
474 474 static int
475 475 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
476 476 kmem_slab_stats_t *ks)
477 477 {
478 478 kmem_slab_usage_t *ksu;
479 479 long unused;
480 480
481 481 ks->ks_slabs++;
482 482 ks->ks_bucket[sp->slab_refcnt]++;
483 483
484 484 unused = (sp->slab_chunks - sp->slab_refcnt);
485 485 if (unused == 0) {
486 486 return (WALK_NEXT);
487 487 }
488 488
489 489 ks->ks_partial_slabs++;
490 490 ks->ks_unused_buffers += unused;
491 491
492 492 if (ks->ks_partial_slabs > ks->ks_usage_len) {
493 493 kmem_slab_usage_t *usage;
494 494 int len = ks->ks_usage_len;
495 495
496 496 len = (len == 0 ? 16 : len * 2);
497 497 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
498 498 if (ks->ks_usage != NULL) {
499 499 bcopy(ks->ks_usage, usage,
500 500 ks->ks_usage_len * sizeof (kmem_slab_usage_t));
501 501 mdb_free(ks->ks_usage,
502 502 ks->ks_usage_len * sizeof (kmem_slab_usage_t));
503 503 }
504 504 ks->ks_usage = usage;
505 505 ks->ks_usage_len = len;
506 506 }
507 507
508 508 ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
509 509 ksu->ksu_refcnt = sp->slab_refcnt;
510 510 ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
511 511 return (WALK_NEXT);
512 512 }
513 513
514 514 static void
515 515 kmem_slabs_header()
516 516 {
517 517 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
518 518 "", "", "Partial", "", "Unused", "");
519 519 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
520 520 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
521 521 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
522 522 "-------------------------", "--------", "--------", "---------",
523 523 "---------", "------");
524 524 }
525 525
526 526 int
527 527 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
528 528 {
529 529 kmem_cache_t c;
530 530 kmem_slab_stats_t stats;
531 531 mdb_walk_cb_t cb;
532 532 int pct;
533 533 int tenths_pct;
534 534 size_t maxbuckets = 1;
535 535 size_t minbucketsize = 0;
536 536 const char *filter = NULL;
537 537 const char *name = NULL;
538 538 uint_t opt_v = FALSE;
539 539 boolean_t buckets = B_FALSE;
540 540 boolean_t skip = B_FALSE;
541 541
542 542 if (mdb_getopts(argc, argv,
543 543 'B', MDB_OPT_UINTPTR, &minbucketsize,
544 544 'b', MDB_OPT_UINTPTR, &maxbuckets,
545 545 'n', MDB_OPT_STR, &filter,
546 546 'N', MDB_OPT_STR, &name,
547 547 'v', MDB_OPT_SETBITS, TRUE, &opt_v,
548 548 NULL) != argc) {
549 549 return (DCMD_USAGE);
550 550 }
551 551
552 552 if ((maxbuckets != 1) || (minbucketsize != 0)) {
553 553 buckets = B_TRUE;
554 554 }
555 555
556 556 if (!(flags & DCMD_ADDRSPEC)) {
557 557 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
558 558 argv) == -1) {
559 559 mdb_warn("can't walk kmem_cache");
560 560 return (DCMD_ERR);
561 561 }
562 562 return (DCMD_OK);
563 563 }
564 564
565 565 if (mdb_vread(&c, sizeof (c), addr) == -1) {
566 566 mdb_warn("couldn't read kmem_cache at %p", addr);
567 567 return (DCMD_ERR);
568 568 }
569 569
570 570 if (name == NULL) {
571 571 skip = ((filter != NULL) &&
572 572 (strstr(c.cache_name, filter) == NULL));
573 573 } else if (filter == NULL) {
574 574 skip = (strcmp(c.cache_name, name) != 0);
575 575 } else {
576 576 /* match either -n or -N */
577 577 skip = ((strcmp(c.cache_name, name) != 0) &&
578 578 (strstr(c.cache_name, filter) == NULL));
579 579 }
580 580
581 581 if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
582 582 kmem_slabs_header();
583 583 } else if ((opt_v || buckets) && !skip) {
584 584 if (DCMD_HDRSPEC(flags)) {
585 585 kmem_slabs_header();
586 586 } else {
587 587 boolean_t is_slab = B_FALSE;
588 588 const char *walker_name;
589 589 if (opt_v) {
590 590 cb = (mdb_walk_cb_t)kmem_first_partial_slab;
591 591 walker_name = "kmem_slab_partial";
592 592 } else {
593 593 cb = (mdb_walk_cb_t)kmem_first_slab;
594 594 walker_name = "kmem_slab";
595 595 }
596 596 (void) mdb_pwalk(walker_name, cb, &is_slab, addr);
597 597 if (is_slab) {
598 598 kmem_slabs_header();
599 599 }
600 600 }
601 601 }
602 602
603 603 if (skip) {
604 604 return (DCMD_OK);
605 605 }
606 606
607 607 bzero(&stats, sizeof (kmem_slab_stats_t));
608 608 stats.ks_cp = &c;
609 609 stats.ks_max_buffers_per_slab = c.cache_maxchunks;
610 610 /* +1 to include a zero bucket */
611 611 stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
612 612 sizeof (*stats.ks_bucket), UM_SLEEP);
613 613 cb = (mdb_walk_cb_t)kmem_slablist_stat;
614 614 (void) mdb_pwalk("kmem_slab", cb, &stats, addr);
615 615
616 616 if (c.cache_buftotal == 0) {
617 617 pct = 0;
618 618 tenths_pct = 0;
619 619 } else {
620 620 uint64_t n = stats.ks_unused_buffers * 10000;
621 621 pct = (int)(n / c.cache_buftotal);
622 622 tenths_pct = pct - ((pct / 100) * 100);
623 623 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
624 624 if (tenths_pct == 10) {
625 625 pct += 100;
626 626 tenths_pct = 0;
627 627 }
628 628 }
629 629
630 630 pct /= 100;
631 631 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
632 632 stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
633 633 stats.ks_unused_buffers, pct, tenths_pct);
634 634
635 635 if (maxbuckets == 0) {
636 636 maxbuckets = stats.ks_max_buffers_per_slab;
637 637 }
638 638
639 639 if (((maxbuckets > 1) || (minbucketsize > 0)) &&
640 640 (stats.ks_slabs > 0)) {
641 641 mdb_printf("\n");
642 642 kmem_slabs_print_dist(stats.ks_bucket,
643 643 stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
644 644 }
645 645
646 646 mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
647 647 sizeof (*stats.ks_bucket));
648 648
649 649 if (!opt_v) {
650 650 return (DCMD_OK);
651 651 }
652 652
653 653 if (opt_v && (stats.ks_partial_slabs > 0)) {
654 654 int i;
655 655 kmem_slab_usage_t *ksu;
656 656
657 657 mdb_printf(" %d complete (%d), %d partial:",
658 658 (stats.ks_slabs - stats.ks_partial_slabs),
659 659 stats.ks_max_buffers_per_slab,
660 660 stats.ks_partial_slabs);
661 661
662 662 for (i = 0; i < stats.ks_partial_slabs; i++) {
663 663 ksu = &stats.ks_usage[i];
664 664 mdb_printf(" %d%s", ksu->ksu_refcnt,
665 665 (ksu->ksu_nomove ? "*" : ""));
666 666 }
667 667 mdb_printf("\n\n");
668 668 }
669 669
670 670 if (stats.ks_usage_len > 0) {
671 671 mdb_free(stats.ks_usage,
672 672 stats.ks_usage_len * sizeof (kmem_slab_usage_t));
673 673 }
674 674
675 675 return (DCMD_OK);
676 676 }
677 677
678 678 void
679 679 kmem_slabs_help(void)
680 680 {
681 681 mdb_printf("%s",
682 682 "Display slab usage per kmem cache.\n\n");
683 683 mdb_dec_indent(2);
684 684 mdb_printf("%<b>OPTIONS%</b>\n");
685 685 mdb_inc_indent(2);
686 686 mdb_printf("%s",
687 687 " -n name\n"
688 688 " name of kmem cache (or matching partial name)\n"
689 689 " -N name\n"
690 690 " exact name of kmem cache\n"
691 691 " -b maxbins\n"
692 692 " Print a distribution of allocated buffers per slab using at\n"
693 693 " most maxbins bins. The first bin is reserved for completely\n"
694 694 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
695 695 " effect as specifying the maximum allocated buffers per slab\n"
696 696 " or setting minbinsize to 1 (-B 1).\n"
697 697 " -B minbinsize\n"
698 698 " Print a distribution of allocated buffers per slab, making\n"
699 699 " all bins (except the first, reserved for completely allocated\n"
700 700 " slabs) at least minbinsize buffers apart.\n"
701 701 " -v verbose output: List the allocated buffer count of each partial\n"
702 702 " slab on the free list in order from front to back to show how\n"
703 703 " closely the slabs are ordered by usage. For example\n"
704 704 "\n"
705 705 " 10 complete, 3 partial (8): 7 3 1\n"
706 706 "\n"
707 707 " means there are thirteen slabs with eight buffers each, including\n"
708 708 " three partially allocated slabs with less than all eight buffers\n"
709 709 " allocated.\n"
710 710 "\n"
711 711 " Buffer allocations are always from the front of the partial slab\n"
712 712 " list. When a buffer is freed from a completely used slab, that\n"
713 713 " slab is added to the front of the partial slab list. Assuming\n"
714 714 " that all buffers are equally likely to be freed soon, the\n"
715 715 " desired order of partial slabs is most-used at the front of the\n"
716 716 " list and least-used at the back (as in the example above).\n"
717 717 " However, if a slab contains an allocated buffer that will not\n"
718 718 " soon be freed, it would be better for that slab to be at the\n"
719 719 " front where all of its buffers can be allocated. Taking a slab\n"
720 720 " off the partial slab list (either with all buffers freed or all\n"
721 721 " buffers allocated) reduces cache fragmentation.\n"
722 722 "\n"
723 723 " A slab's allocated buffer count representing a partial slab (9 in\n"
724 724 " the example below) may be marked as follows:\n"
725 725 "\n"
726 726 " 9* An asterisk indicates that kmem has marked the slab non-\n"
727 727 " reclaimable because the kmem client refused to move one of the\n"
728 728 " slab's buffers. Since kmem does not expect to completely free the\n"
729 729 " slab, it moves it to the front of the list in the hope of\n"
730 730 " completely allocating it instead. A slab marked with an asterisk\n"
731 731 " stays marked for as long as it remains on the partial slab list.\n"
732 732 "\n"
733 733 "Column\t\tDescription\n"
734 734 "\n"
735 735 "Cache Name\t\tname of kmem cache\n"
736 736 "Slabs\t\t\ttotal slab count\n"
737 737 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
738 738 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
739 739 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
740 740 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
741 741 "\t\t\t for accounting structures (debug mode), slab\n"
742 742 "\t\t\t coloring (incremental small offsets to stagger\n"
743 743 "\t\t\t buffer alignment), or the per-CPU magazine layer\n");
744 744 }
745 745
746 746 static int
747 747 addrcmp(const void *lhs, const void *rhs)
748 748 {
749 749 uintptr_t p1 = *((uintptr_t *)lhs);
750 750 uintptr_t p2 = *((uintptr_t *)rhs);
751 751
752 752 if (p1 < p2)
753 753 return (-1);
754 754 if (p1 > p2)
755 755 return (1);
756 756 return (0);
757 757 }
758 758
759 759 static int
760 760 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
761 761 {
762 762 const kmem_bufctl_audit_t *bcp1 = *lhs;
763 763 const kmem_bufctl_audit_t *bcp2 = *rhs;
764 764
765 765 if (bcp1->bc_timestamp > bcp2->bc_timestamp)
766 766 return (-1);
767 767
768 768 if (bcp1->bc_timestamp < bcp2->bc_timestamp)
769 769 return (1);
770 770
771 771 return (0);
772 772 }
773 773
774 774 typedef struct kmem_hash_walk {
775 775 uintptr_t *kmhw_table;
776 776 size_t kmhw_nelems;
777 777 size_t kmhw_pos;
778 778 kmem_bufctl_t kmhw_cur;
779 779 } kmem_hash_walk_t;
780 780
781 781 int
782 782 kmem_hash_walk_init(mdb_walk_state_t *wsp)
783 783 {
784 784 kmem_hash_walk_t *kmhw;
785 785 uintptr_t *hash;
786 786 kmem_cache_t c;
787 787 uintptr_t haddr, addr = wsp->walk_addr;
788 788 size_t nelems;
789 789 size_t hsize;
790 790
791 791 if (addr == NULL) {
792 792 mdb_warn("kmem_hash doesn't support global walks\n");
793 793 return (WALK_ERR);
794 794 }
795 795
796 796 if (mdb_vread(&c, sizeof (c), addr) == -1) {
797 797 mdb_warn("couldn't read cache at addr %p", addr);
798 798 return (WALK_ERR);
799 799 }
800 800
801 801 if (!(c.cache_flags & KMF_HASH)) {
802 802 mdb_warn("cache %p doesn't have a hash table\n", addr);
803 803 return (WALK_DONE); /* nothing to do */
804 804 }
805 805
806 806 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
807 807 kmhw->kmhw_cur.bc_next = NULL;
808 808 kmhw->kmhw_pos = 0;
809 809
810 810 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
811 811 hsize = nelems * sizeof (uintptr_t);
812 812 haddr = (uintptr_t)c.cache_hash_table;
813 813
814 814 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
815 815 if (mdb_vread(hash, hsize, haddr) == -1) {
816 816 mdb_warn("failed to read hash table at %p", haddr);
817 817 mdb_free(hash, hsize);
818 818 mdb_free(kmhw, sizeof (kmem_hash_walk_t));
819 819 return (WALK_ERR);
820 820 }
821 821
822 822 wsp->walk_data = kmhw;
823 823
824 824 return (WALK_NEXT);
825 825 }
826 826
827 827 int
828 828 kmem_hash_walk_step(mdb_walk_state_t *wsp)
829 829 {
830 830 kmem_hash_walk_t *kmhw = wsp->walk_data;
831 831 uintptr_t addr = NULL;
832 832
833 833 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) {
834 834 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
835 835 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL)
836 836 break;
837 837 }
838 838 }
839 839 if (addr == NULL)
840 840 return (WALK_DONE);
841 841
842 842 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
843 843 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
844 844 return (WALK_ERR);
845 845 }
846 846
847 847 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
848 848 }
849 849
850 850 void
851 851 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
852 852 {
853 853 kmem_hash_walk_t *kmhw = wsp->walk_data;
854 854
855 855 if (kmhw == NULL)
856 856 return;
857 857
858 858 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
859 859 mdb_free(kmhw, sizeof (kmem_hash_walk_t));
860 860 }
861 861
862 862 /*
863 863 * Find the address of the bufctl structure for the address 'buf' in cache
864 864 * 'cp', which is at address caddr, and place it in *out.
865 865 */
866 866 static int
867 867 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
868 868 {
869 869 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
870 870 kmem_bufctl_t *bcp;
871 871 kmem_bufctl_t bc;
872 872
873 873 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
874 874 mdb_warn("unable to read hash bucket for %p in cache %p",
875 875 buf, caddr);
876 876 return (-1);
877 877 }
878 878
879 879 while (bcp != NULL) {
880 880 if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
881 881 (uintptr_t)bcp) == -1) {
882 882 mdb_warn("unable to read bufctl at %p", bcp);
883 883 return (-1);
884 884 }
885 885 if (bc.bc_addr == buf) {
886 886 *out = (uintptr_t)bcp;
887 887 return (0);
888 888 }
889 889 bcp = bc.bc_next;
890 890 }
891 891
892 892 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
893 893 return (-1);
894 894 }
895 895
896 896 int
897 897 kmem_get_magsize(const kmem_cache_t *cp)
898 898 {
899 899 uintptr_t addr = (uintptr_t)cp->cache_magtype;
900 900 GElf_Sym mt_sym;
901 901 kmem_magtype_t mt;
902 902 int res;
903 903
904 904 /*
905 905 * if cpu 0 has a non-zero magsize, it must be correct. caches
906 906 * with KMF_NOMAGAZINE have disabled their magazine layers, so
907 907 * it is okay to return 0 for them.
908 908 */
909 909 if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
910 910 (cp->cache_flags & KMF_NOMAGAZINE))
911 911 return (res);
912 912
913 913 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
914 914 mdb_warn("unable to read 'kmem_magtype'");
915 915 } else if (addr < mt_sym.st_value ||
916 916 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
917 917 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
918 918 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
919 919 cp->cache_name, addr);
920 920 return (0);
921 921 }
922 922 if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
923 923 mdb_warn("unable to read magtype at %a", addr);
924 924 return (0);
925 925 }
926 926 return (mt.mt_magsize);
927 927 }
928 928
929 929 /*ARGSUSED*/
930 930 static int
931 931 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
932 932 {
933 933 *est -= (sp->slab_chunks - sp->slab_refcnt);
934 934
935 935 return (WALK_NEXT);
936 936 }
937 937
938 938 /*
939 939 * Returns an upper bound on the number of allocated buffers in a given
940 940 * cache.
941 941 */
942 942 size_t
943 943 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
944 944 {
945 945 int magsize;
946 946 size_t cache_est;
947 947
948 948 cache_est = cp->cache_buftotal;
949 949
950 950 (void) mdb_pwalk("kmem_slab_partial",
951 951 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
952 952
953 953 if ((magsize = kmem_get_magsize(cp)) != 0) {
954 954 size_t mag_est = cp->cache_full.ml_total * magsize;
955 955
956 956 if (cache_est >= mag_est) {
957 957 cache_est -= mag_est;
958 958 } else {
959 959 mdb_warn("cache %p's magazine layer holds more buffers "
960 960 "than the slab layer.\n", addr);
961 961 }
962 962 }
963 963 return (cache_est);
964 964 }
965 965
966 966 #define READMAG_ROUNDS(rounds) { \
967 967 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
968 968 mdb_warn("couldn't read magazine at %p", kmp); \
969 969 goto fail; \
970 970 } \
971 971 for (i = 0; i < rounds; i++) { \
972 972 maglist[magcnt++] = mp->mag_round[i]; \
973 973 if (magcnt == magmax) { \
974 974 mdb_warn("%d magazines exceeds fudge factor\n", \
975 975 magcnt); \
976 976 goto fail; \
977 977 } \
978 978 } \
979 979 }
980 980
981 981 int
982 982 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
983 983 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
984 984 {
985 985 kmem_magazine_t *kmp, *mp;
986 986 void **maglist = NULL;
987 987 int i, cpu;
988 988 size_t magsize, magmax, magbsize;
989 989 size_t magcnt = 0;
990 990
991 991 /*
992 992 * Read the magtype out of the cache, after verifying the pointer's
993 993 * correctness.
994 994 */
995 995 magsize = kmem_get_magsize(cp);
996 996 if (magsize == 0) {
997 997 *maglistp = NULL;
998 998 *magcntp = 0;
999 999 *magmaxp = 0;
1000 1000 return (WALK_NEXT);
1001 1001 }
1002 1002
1003 1003 /*
1004 1004 * There are several places where we need to go buffer hunting:
1005 1005 * the per-CPU loaded magazine, the per-CPU spare full magazine,
1006 1006 * and the full magazine list in the depot.
1007 1007 *
1008 1008 * For an upper bound on the number of buffers in the magazine
1009 1009 * layer, we have the number of magazines on the cache_full
1010 1010 * list plus at most two magazines per CPU (the loaded and the
1011 1011 * spare). Toss in 100 magazines as a fudge factor in case this
1012 1012 * is live (the number "100" comes from the same fudge factor in
1013 1013 * crash(1M)).
1014 1014 */
1015 1015 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1016 1016 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1017 1017
1018 1018 if (magbsize >= PAGESIZE / 2) {
1019 1019 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1020 1020 addr, magbsize);
1021 1021 return (WALK_ERR);
1022 1022 }
1023 1023
1024 1024 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1025 1025 mp = mdb_alloc(magbsize, alloc_flags);
1026 1026 if (mp == NULL || maglist == NULL)
1027 1027 goto fail;
1028 1028
1029 1029 /*
1030 1030 * First up: the magazines in the depot (i.e. on the cache_full list).
1031 1031 */
1032 1032 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1033 1033 READMAG_ROUNDS(magsize);
1034 1034 kmp = mp->mag_next;
1035 1035
1036 1036 if (kmp == cp->cache_full.ml_list)
1037 1037 break; /* cache_full list loop detected */
1038 1038 }
1039 1039
1040 1040 dprintf(("cache_full list done\n"));
1041 1041
1042 1042 /*
1043 1043 * Now whip through the CPUs, snagging the loaded magazines
1044 1044 * and full spares.
1045 1045 *
1046 1046 * In order to prevent inconsistent dumps, rounds and prounds
1047 1047 * are copied aside before dumping begins.
1048 1048 */
1049 1049 for (cpu = 0; cpu < ncpus; cpu++) {
1050 1050 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1051 1051 short rounds, prounds;
1052 1052
1053 1053 if (KMEM_DUMPCC(ccp)) {
1054 1054 rounds = ccp->cc_dump_rounds;
1055 1055 prounds = ccp->cc_dump_prounds;
1056 1056 } else {
1057 1057 rounds = ccp->cc_rounds;
1058 1058 prounds = ccp->cc_prounds;
1059 1059 }
1060 1060
1061 1061 dprintf(("reading cpu cache %p\n",
1062 1062 (uintptr_t)ccp - (uintptr_t)cp + addr));
1063 1063
1064 1064 if (rounds > 0 &&
1065 1065 (kmp = ccp->cc_loaded) != NULL) {
1066 1066 dprintf(("reading %d loaded rounds\n", rounds));
1067 1067 READMAG_ROUNDS(rounds);
1068 1068 }
1069 1069
1070 1070 if (prounds > 0 &&
1071 1071 (kmp = ccp->cc_ploaded) != NULL) {
1072 1072 dprintf(("reading %d previously loaded rounds\n",
1073 1073 prounds));
1074 1074 READMAG_ROUNDS(prounds);
1075 1075 }
1076 1076 }
1077 1077
1078 1078 dprintf(("magazine layer: %d buffers\n", magcnt));
1079 1079
1080 1080 if (!(alloc_flags & UM_GC))
1081 1081 mdb_free(mp, magbsize);
1082 1082
1083 1083 *maglistp = maglist;
1084 1084 *magcntp = magcnt;
1085 1085 *magmaxp = magmax;
1086 1086
1087 1087 return (WALK_NEXT);
1088 1088
1089 1089 fail:
1090 1090 if (!(alloc_flags & UM_GC)) {
1091 1091 if (mp)
1092 1092 mdb_free(mp, magbsize);
1093 1093 if (maglist)
1094 1094 mdb_free(maglist, magmax * sizeof (void *));
1095 1095 }
1096 1096 return (WALK_ERR);
1097 1097 }
1098 1098
1099 1099 static int
1100 1100 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1101 1101 {
1102 1102 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1103 1103 }
1104 1104
1105 1105 static int
1106 1106 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1107 1107 {
1108 1108 kmem_bufctl_audit_t b;
1109 1109
1110 1110 /*
1111 1111 * if KMF_AUDIT is not set, we know that we're looking at a
1112 1112 * kmem_bufctl_t.
1113 1113 */
1114 1114 if (!(cp->cache_flags & KMF_AUDIT) ||
1115 1115 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1116 1116 (void) memset(&b, 0, sizeof (b));
1117 1117 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1118 1118 mdb_warn("unable to read bufctl at %p", buf);
1119 1119 return (WALK_ERR);
1120 1120 }
1121 1121 }
1122 1122
1123 1123 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1124 1124 }
1125 1125
1126 1126 typedef struct kmem_walk {
1127 1127 int kmw_type;
1128 1128
1129 1129 uintptr_t kmw_addr; /* cache address */
1130 1130 kmem_cache_t *kmw_cp;
1131 1131 size_t kmw_csize;
1132 1132
1133 1133 /*
1134 1134 * magazine layer
1135 1135 */
1136 1136 void **kmw_maglist;
1137 1137 size_t kmw_max;
1138 1138 size_t kmw_count;
1139 1139 size_t kmw_pos;
1140 1140
1141 1141 /*
1142 1142 * slab layer
1143 1143 */
1144 1144 char *kmw_valid; /* to keep track of freed buffers */
1145 1145 char *kmw_ubase; /* buffer for slab data */
1146 1146 } kmem_walk_t;
1147 1147
1148 1148 static int
1149 1149 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1150 1150 {
1151 1151 kmem_walk_t *kmw;
1152 1152 int ncpus, csize;
1153 1153 kmem_cache_t *cp;
1154 1154 size_t vm_quantum;
1155 1155
1156 1156 size_t magmax, magcnt;
1157 1157 void **maglist = NULL;
1158 1158 uint_t chunksize, slabsize;
1159 1159 int status = WALK_ERR;
1160 1160 uintptr_t addr = wsp->walk_addr;
1161 1161 const char *layered;
1162 1162
1163 1163 type &= ~KM_HASH;
1164 1164
1165 1165 if (addr == NULL) {
1166 1166 mdb_warn("kmem walk doesn't support global walks\n");
1167 1167 return (WALK_ERR);
1168 1168 }
1169 1169
1170 1170 dprintf(("walking %p\n", addr));
1171 1171
1172 1172 /*
1173 1173 * First we need to figure out how many CPUs are configured in the
1174 1174 * system to know how much to slurp out.
1175 1175 */
1176 1176 mdb_readvar(&ncpus, "max_ncpus");
1177 1177
1178 1178 csize = KMEM_CACHE_SIZE(ncpus);
1179 1179 cp = mdb_alloc(csize, UM_SLEEP);
1180 1180
1181 1181 if (mdb_vread(cp, csize, addr) == -1) {
1182 1182 mdb_warn("couldn't read cache at addr %p", addr);
1183 1183 goto out2;
1184 1184 }
1185 1185
1186 1186 /*
1187 1187 * It's easy for someone to hand us an invalid cache address.
1188 1188 * Unfortunately, it is hard for this walker to survive an
1189 1189 * invalid cache cleanly. So we make sure that:
1190 1190 *
1191 1191 * 1. the vmem arena for the cache is readable,
1192 1192 * 2. the vmem arena's quantum is a power of 2,
1193 1193 * 3. our slabsize is a multiple of the quantum, and
1194 1194 * 4. our chunksize is >0 and less than our slabsize.
1195 1195 */
1196 1196 if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1197 1197 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1198 1198 vm_quantum == 0 ||
1199 1199 (vm_quantum & (vm_quantum - 1)) != 0 ||
1200 1200 cp->cache_slabsize < vm_quantum ||
1201 1201 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1202 1202 cp->cache_chunksize == 0 ||
1203 1203 cp->cache_chunksize > cp->cache_slabsize) {
1204 1204 mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1205 1205 goto out2;
1206 1206 }
1207 1207
1208 1208 dprintf(("buf total is %d\n", cp->cache_buftotal));
1209 1209
1210 1210 if (cp->cache_buftotal == 0) {
1211 1211 mdb_free(cp, csize);
1212 1212 return (WALK_DONE);
1213 1213 }
1214 1214
1215 1215 /*
1216 1216 * If they ask for bufctls, but it's a small-slab cache,
1217 1217 * there is nothing to report.
1218 1218 */
1219 1219 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1220 1220 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1221 1221 cp->cache_flags));
1222 1222 mdb_free(cp, csize);
1223 1223 return (WALK_DONE);
1224 1224 }
1225 1225
1226 1226 /*
1227 1227 * If they want constructed buffers, but there's no constructor or
1228 1228 * the cache has DEADBEEF checking enabled, there is nothing to report.
1229 1229 */
1230 1230 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1231 1231 cp->cache_constructor == NULL ||
1232 1232 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1233 1233 mdb_free(cp, csize);
1234 1234 return (WALK_DONE);
1235 1235 }
1236 1236
1237 1237 /*
1238 1238 * Read in the contents of the magazine layer
1239 1239 */
1240 1240 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1241 1241 &magmax, UM_SLEEP) == WALK_ERR)
1242 1242 goto out2;
1243 1243
1244 1244 /*
1245 1245 * We have all of the buffers from the magazines; if we are walking
1246 1246 * allocated buffers, sort them so we can bsearch them later.
1247 1247 */
1248 1248 if (type & KM_ALLOCATED)
1249 1249 qsort(maglist, magcnt, sizeof (void *), addrcmp);
1250 1250
1251 1251 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1252 1252
1253 1253 kmw->kmw_type = type;
1254 1254 kmw->kmw_addr = addr;
1255 1255 kmw->kmw_cp = cp;
1256 1256 kmw->kmw_csize = csize;
1257 1257 kmw->kmw_maglist = maglist;
1258 1258 kmw->kmw_max = magmax;
1259 1259 kmw->kmw_count = magcnt;
1260 1260 kmw->kmw_pos = 0;
1261 1261
1262 1262 /*
1263 1263 * When walking allocated buffers in a KMF_HASH cache, we walk the
1264 1264 * hash table instead of the slab layer.
1265 1265 */
1266 1266 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1267 1267 layered = "kmem_hash";
1268 1268
1269 1269 kmw->kmw_type |= KM_HASH;
1270 1270 } else {
1271 1271 /*
1272 1272 * If we are walking freed buffers, we only need the
1273 1273 * magazine layer plus the partially allocated slabs.
1274 1274 * To walk allocated buffers, we need all of the slabs.
1275 1275 */
1276 1276 if (type & KM_ALLOCATED)
1277 1277 layered = "kmem_slab";
1278 1278 else
1279 1279 layered = "kmem_slab_partial";
1280 1280
1281 1281 /*
1282 1282 * for small-slab caches, we read in the entire slab. For
1283 1283 * freed buffers, we can just walk the freelist. For
1284 1284 * allocated buffers, we use a 'valid' array to track
1285 1285 * the freed buffers.
1286 1286 */
1287 1287 if (!(cp->cache_flags & KMF_HASH)) {
1288 1288 chunksize = cp->cache_chunksize;
1289 1289 slabsize = cp->cache_slabsize;
1290 1290
1291 1291 kmw->kmw_ubase = mdb_alloc(slabsize +
1292 1292 sizeof (kmem_bufctl_t), UM_SLEEP);
1293 1293
1294 1294 if (type & KM_ALLOCATED)
1295 1295 kmw->kmw_valid =
1296 1296 mdb_alloc(slabsize / chunksize, UM_SLEEP);
1297 1297 }
1298 1298 }
1299 1299
1300 1300 status = WALK_NEXT;
1301 1301
1302 1302 if (mdb_layered_walk(layered, wsp) == -1) {
1303 1303 mdb_warn("unable to start layered '%s' walk", layered);
1304 1304 status = WALK_ERR;
1305 1305 }
1306 1306
1307 1307 out1:
1308 1308 if (status == WALK_ERR) {
1309 1309 if (kmw->kmw_valid)
1310 1310 mdb_free(kmw->kmw_valid, slabsize / chunksize);
1311 1311
1312 1312 if (kmw->kmw_ubase)
1313 1313 mdb_free(kmw->kmw_ubase, slabsize +
1314 1314 sizeof (kmem_bufctl_t));
1315 1315
1316 1316 if (kmw->kmw_maglist)
1317 1317 mdb_free(kmw->kmw_maglist,
1318 1318 kmw->kmw_max * sizeof (uintptr_t));
1319 1319
1320 1320 mdb_free(kmw, sizeof (kmem_walk_t));
1321 1321 wsp->walk_data = NULL;
1322 1322 }
1323 1323
1324 1324 out2:
1325 1325 if (status == WALK_ERR)
1326 1326 mdb_free(cp, csize);
1327 1327
1328 1328 return (status);
1329 1329 }
1330 1330
1331 1331 int
1332 1332 kmem_walk_step(mdb_walk_state_t *wsp)
1333 1333 {
1334 1334 kmem_walk_t *kmw = wsp->walk_data;
1335 1335 int type = kmw->kmw_type;
1336 1336 kmem_cache_t *cp = kmw->kmw_cp;
1337 1337
1338 1338 void **maglist = kmw->kmw_maglist;
1339 1339 int magcnt = kmw->kmw_count;
1340 1340
1341 1341 uintptr_t chunksize, slabsize;
1342 1342 uintptr_t addr;
1343 1343 const kmem_slab_t *sp;
1344 1344 const kmem_bufctl_t *bcp;
1345 1345 kmem_bufctl_t bc;
1346 1346
1347 1347 int chunks;
1348 1348 char *kbase;
1349 1349 void *buf;
1350 1350 int i, ret;
1351 1351
1352 1352 char *valid, *ubase;
1353 1353
1354 1354 /*
1355 1355 * first, handle the 'kmem_hash' layered walk case
1356 1356 */
1357 1357 if (type & KM_HASH) {
1358 1358 /*
1359 1359 * We have a buffer which has been allocated out of the
1360 1360 * global layer. We need to make sure that it's not
1361 1361 * actually sitting in a magazine before we report it as
1362 1362 * an allocated buffer.
1363 1363 */
1364 1364 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1365 1365
1366 1366 if (magcnt > 0 &&
1367 1367 bsearch(&buf, maglist, magcnt, sizeof (void *),
1368 1368 addrcmp) != NULL)
1369 1369 return (WALK_NEXT);
1370 1370
1371 1371 if (type & KM_BUFCTL)
1372 1372 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1373 1373
1374 1374 return (kmem_walk_callback(wsp, (uintptr_t)buf));
1375 1375 }
1376 1376
1377 1377 ret = WALK_NEXT;
1378 1378
1379 1379 addr = kmw->kmw_addr;
1380 1380
1381 1381 /*
1382 1382 * If we're walking freed buffers, report everything in the
1383 1383 * magazine layer before processing the first slab.
1384 1384 */
1385 1385 if ((type & KM_FREE) && magcnt != 0) {
1386 1386 kmw->kmw_count = 0; /* only do this once */
1387 1387 for (i = 0; i < magcnt; i++) {
1388 1388 buf = maglist[i];
1389 1389
1390 1390 if (type & KM_BUFCTL) {
1391 1391 uintptr_t out;
1392 1392
1393 1393 if (cp->cache_flags & KMF_BUFTAG) {
1394 1394 kmem_buftag_t *btp;
1395 1395 kmem_buftag_t tag;
1396 1396
1397 1397 /* LINTED - alignment */
1398 1398 btp = KMEM_BUFTAG(cp, buf);
1399 1399 if (mdb_vread(&tag, sizeof (tag),
1400 1400 (uintptr_t)btp) == -1) {
1401 1401 mdb_warn("reading buftag for "
1402 1402 "%p at %p", buf, btp);
1403 1403 continue;
1404 1404 }
1405 1405 out = (uintptr_t)tag.bt_bufctl;
1406 1406 } else {
1407 1407 if (kmem_hash_lookup(cp, addr, buf,
1408 1408 &out) == -1)
1409 1409 continue;
1410 1410 }
1411 1411 ret = bufctl_walk_callback(cp, wsp, out);
1412 1412 } else {
1413 1413 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1414 1414 }
1415 1415
1416 1416 if (ret != WALK_NEXT)
1417 1417 return (ret);
1418 1418 }
1419 1419 }
1420 1420
1421 1421 /*
1422 1422 * If they want constructed buffers, we're finished, since the
1423 1423 * magazine layer holds them all.
1424 1424 */
1425 1425 if (type & KM_CONSTRUCTED)
1426 1426 return (WALK_DONE);
1427 1427
1428 1428 /*
1429 1429 * Handle the buffers in the current slab
1430 1430 */
1431 1431 chunksize = cp->cache_chunksize;
1432 1432 slabsize = cp->cache_slabsize;
1433 1433
1434 1434 sp = wsp->walk_layer;
1435 1435 chunks = sp->slab_chunks;
1436 1436 kbase = sp->slab_base;
1437 1437
1438 1438 dprintf(("kbase is %p\n", kbase));
1439 1439
1440 1440 if (!(cp->cache_flags & KMF_HASH)) {
1441 1441 valid = kmw->kmw_valid;
1442 1442 ubase = kmw->kmw_ubase;
1443 1443
1444 1444 if (mdb_vread(ubase, chunks * chunksize,
1445 1445 (uintptr_t)kbase) == -1) {
1446 1446 mdb_warn("failed to read slab contents at %p", kbase);
1447 1447 return (WALK_ERR);
1448 1448 }
1449 1449
1450 1450 /*
1451 1451 * Set up the valid map as fully allocated -- we'll punch
1452 1452 * out the freelist.
1453 1453 */
1454 1454 if (type & KM_ALLOCATED)
1455 1455 (void) memset(valid, 1, chunks);
1456 1456 } else {
1457 1457 valid = NULL;
1458 1458 ubase = NULL;
1459 1459 }
1460 1460
1461 1461 /*
1462 1462 * walk the slab's freelist
1463 1463 */
1464 1464 bcp = sp->slab_head;
1465 1465
1466 1466 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1467 1467
1468 1468 /*
1469 1469 * since we could be in the middle of allocating a buffer,
1470 1470 * our refcnt could be one higher than it aught. So we
1471 1471 * check one further on the freelist than the count allows.
1472 1472 */
1473 1473 for (i = sp->slab_refcnt; i <= chunks; i++) {
1474 1474 uint_t ndx;
1475 1475
1476 1476 dprintf(("bcp is %p\n", bcp));
1477 1477
1478 1478 if (bcp == NULL) {
1479 1479 if (i == chunks)
1480 1480 break;
1481 1481 mdb_warn(
1482 1482 "slab %p in cache %p freelist too short by %d\n",
1483 1483 sp, addr, chunks - i);
1484 1484 break;
1485 1485 }
1486 1486
1487 1487 if (cp->cache_flags & KMF_HASH) {
1488 1488 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1489 1489 mdb_warn("failed to read bufctl ptr at %p",
1490 1490 bcp);
1491 1491 break;
1492 1492 }
1493 1493 buf = bc.bc_addr;
1494 1494 } else {
1495 1495 /*
1496 1496 * Otherwise the buffer is (or should be) in the slab
1497 1497 * that we've read in; determine its offset in the
1498 1498 * slab, validate that it's not corrupt, and add to
1499 1499 * our base address to find the umem_bufctl_t. (Note
1500 1500 * that we don't need to add the size of the bufctl
1501 1501 * to our offset calculation because of the slop that's
1502 1502 * allocated for the buffer at ubase.)
1503 1503 */
1504 1504 uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1505 1505
1506 1506 if (offs > chunks * chunksize) {
1507 1507 mdb_warn("found corrupt bufctl ptr %p"
1508 1508 " in slab %p in cache %p\n", bcp,
1509 1509 wsp->walk_addr, addr);
1510 1510 break;
1511 1511 }
1512 1512
1513 1513 bc = *((kmem_bufctl_t *)((uintptr_t)ubase + offs));
1514 1514 buf = KMEM_BUF(cp, bcp);
1515 1515 }
1516 1516
1517 1517 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1518 1518
1519 1519 if (ndx > slabsize / cp->cache_bufsize) {
1520 1520 /*
1521 1521 * This is very wrong; we have managed to find
1522 1522 * a buffer in the slab which shouldn't
1523 1523 * actually be here. Emit a warning, and
1524 1524 * try to continue.
1525 1525 */
1526 1526 mdb_warn("buf %p is out of range for "
1527 1527 "slab %p, cache %p\n", buf, sp, addr);
1528 1528 } else if (type & KM_ALLOCATED) {
1529 1529 /*
1530 1530 * we have found a buffer on the slab's freelist;
1531 1531 * clear its entry
1532 1532 */
1533 1533 valid[ndx] = 0;
1534 1534 } else {
1535 1535 /*
1536 1536 * Report this freed buffer
1537 1537 */
1538 1538 if (type & KM_BUFCTL) {
1539 1539 ret = bufctl_walk_callback(cp, wsp,
1540 1540 (uintptr_t)bcp);
1541 1541 } else {
1542 1542 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1543 1543 }
1544 1544 if (ret != WALK_NEXT)
1545 1545 return (ret);
1546 1546 }
1547 1547
1548 1548 bcp = bc.bc_next;
1549 1549 }
1550 1550
1551 1551 if (bcp != NULL) {
1552 1552 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1553 1553 sp, addr, bcp));
1554 1554 }
1555 1555
1556 1556 /*
1557 1557 * If we are walking freed buffers, the loop above handled reporting
1558 1558 * them.
1559 1559 */
1560 1560 if (type & KM_FREE)
1561 1561 return (WALK_NEXT);
1562 1562
1563 1563 if (type & KM_BUFCTL) {
1564 1564 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1565 1565 "cache %p\n", addr);
1566 1566 return (WALK_ERR);
1567 1567 }
1568 1568
1569 1569 /*
1570 1570 * Report allocated buffers, skipping buffers in the magazine layer.
1571 1571 * We only get this far for small-slab caches.
1572 1572 */
1573 1573 for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1574 1574 buf = (char *)kbase + i * chunksize;
1575 1575
1576 1576 if (!valid[i])
1577 1577 continue; /* on slab freelist */
1578 1578
1579 1579 if (magcnt > 0 &&
1580 1580 bsearch(&buf, maglist, magcnt, sizeof (void *),
1581 1581 addrcmp) != NULL)
1582 1582 continue; /* in magazine layer */
1583 1583
1584 1584 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1585 1585 }
1586 1586 return (ret);
1587 1587 }
1588 1588
1589 1589 void
1590 1590 kmem_walk_fini(mdb_walk_state_t *wsp)
1591 1591 {
1592 1592 kmem_walk_t *kmw = wsp->walk_data;
1593 1593 uintptr_t chunksize;
1594 1594 uintptr_t slabsize;
1595 1595
1596 1596 if (kmw == NULL)
1597 1597 return;
1598 1598
1599 1599 if (kmw->kmw_maglist != NULL)
1600 1600 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1601 1601
1602 1602 chunksize = kmw->kmw_cp->cache_chunksize;
1603 1603 slabsize = kmw->kmw_cp->cache_slabsize;
1604 1604
1605 1605 if (kmw->kmw_valid != NULL)
1606 1606 mdb_free(kmw->kmw_valid, slabsize / chunksize);
1607 1607 if (kmw->kmw_ubase != NULL)
1608 1608 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1609 1609
1610 1610 mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1611 1611 mdb_free(kmw, sizeof (kmem_walk_t));
1612 1612 }
1613 1613
1614 1614 /*ARGSUSED*/
1615 1615 static int
1616 1616 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1617 1617 {
1618 1618 /*
1619 1619 * Buffers allocated from NOTOUCH caches can also show up as freed
1620 1620 * memory in other caches. This can be a little confusing, so we
1621 1621 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1622 1622 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1623 1623 */
1624 1624 if (c->cache_cflags & KMC_NOTOUCH)
1625 1625 return (WALK_NEXT);
1626 1626
1627 1627 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1628 1628 wsp->walk_cbdata, addr) == -1)
1629 1629 return (WALK_DONE);
1630 1630
1631 1631 return (WALK_NEXT);
1632 1632 }
1633 1633
1634 1634 #define KMEM_WALK_ALL(name, wsp) { \
1635 1635 wsp->walk_data = (name); \
1636 1636 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1637 1637 return (WALK_ERR); \
1638 1638 return (WALK_DONE); \
1639 1639 }
1640 1640
1641 1641 int
1642 1642 kmem_walk_init(mdb_walk_state_t *wsp)
1643 1643 {
1644 1644 if (wsp->walk_arg != NULL)
1645 1645 wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1646 1646
1647 1647 if (wsp->walk_addr == NULL)
1648 1648 KMEM_WALK_ALL("kmem", wsp);
1649 1649 return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1650 1650 }
1651 1651
1652 1652 int
1653 1653 bufctl_walk_init(mdb_walk_state_t *wsp)
1654 1654 {
1655 1655 if (wsp->walk_addr == NULL)
1656 1656 KMEM_WALK_ALL("bufctl", wsp);
1657 1657 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1658 1658 }
1659 1659
1660 1660 int
1661 1661 freemem_walk_init(mdb_walk_state_t *wsp)
1662 1662 {
1663 1663 if (wsp->walk_addr == NULL)
1664 1664 KMEM_WALK_ALL("freemem", wsp);
1665 1665 return (kmem_walk_init_common(wsp, KM_FREE));
1666 1666 }
1667 1667
1668 1668 int
1669 1669 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1670 1670 {
1671 1671 if (wsp->walk_addr == NULL)
1672 1672 KMEM_WALK_ALL("freemem_constructed", wsp);
1673 1673 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1674 1674 }
1675 1675
1676 1676 int
1677 1677 freectl_walk_init(mdb_walk_state_t *wsp)
1678 1678 {
1679 1679 if (wsp->walk_addr == NULL)
1680 1680 KMEM_WALK_ALL("freectl", wsp);
1681 1681 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1682 1682 }
1683 1683
1684 1684 int
1685 1685 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1686 1686 {
1687 1687 if (wsp->walk_addr == NULL)
1688 1688 KMEM_WALK_ALL("freectl_constructed", wsp);
1689 1689 return (kmem_walk_init_common(wsp,
1690 1690 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1691 1691 }
1692 1692
1693 1693 typedef struct bufctl_history_walk {
1694 1694 void *bhw_next;
1695 1695 kmem_cache_t *bhw_cache;
1696 1696 kmem_slab_t *bhw_slab;
1697 1697 hrtime_t bhw_timestamp;
1698 1698 } bufctl_history_walk_t;
1699 1699
1700 1700 int
1701 1701 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1702 1702 {
1703 1703 bufctl_history_walk_t *bhw;
1704 1704 kmem_bufctl_audit_t bc;
1705 1705 kmem_bufctl_audit_t bcn;
1706 1706
1707 1707 if (wsp->walk_addr == NULL) {
1708 1708 mdb_warn("bufctl_history walk doesn't support global walks\n");
1709 1709 return (WALK_ERR);
1710 1710 }
1711 1711
1712 1712 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1713 1713 mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1714 1714 return (WALK_ERR);
1715 1715 }
1716 1716
1717 1717 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1718 1718 bhw->bhw_timestamp = 0;
1719 1719 bhw->bhw_cache = bc.bc_cache;
1720 1720 bhw->bhw_slab = bc.bc_slab;
1721 1721
1722 1722 /*
1723 1723 * sometimes the first log entry matches the base bufctl; in that
1724 1724 * case, skip the base bufctl.
1725 1725 */
1726 1726 if (bc.bc_lastlog != NULL &&
1727 1727 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1728 1728 bc.bc_addr == bcn.bc_addr &&
1729 1729 bc.bc_cache == bcn.bc_cache &&
1730 1730 bc.bc_slab == bcn.bc_slab &&
1731 1731 bc.bc_timestamp == bcn.bc_timestamp &&
1732 1732 bc.bc_thread == bcn.bc_thread)
1733 1733 bhw->bhw_next = bc.bc_lastlog;
1734 1734 else
1735 1735 bhw->bhw_next = (void *)wsp->walk_addr;
1736 1736
1737 1737 wsp->walk_addr = (uintptr_t)bc.bc_addr;
1738 1738 wsp->walk_data = bhw;
1739 1739
1740 1740 return (WALK_NEXT);
1741 1741 }
1742 1742
1743 1743 int
1744 1744 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1745 1745 {
1746 1746 bufctl_history_walk_t *bhw = wsp->walk_data;
1747 1747 uintptr_t addr = (uintptr_t)bhw->bhw_next;
1748 1748 uintptr_t baseaddr = wsp->walk_addr;
1749 1749 kmem_bufctl_audit_t bc;
1750 1750
1751 1751 if (addr == NULL)
1752 1752 return (WALK_DONE);
1753 1753
1754 1754 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1755 1755 mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1756 1756 return (WALK_ERR);
1757 1757 }
1758 1758
1759 1759 /*
1760 1760 * The bufctl is only valid if the address, cache, and slab are
1761 1761 * correct. We also check that the timestamp is decreasing, to
1762 1762 * prevent infinite loops.
1763 1763 */
1764 1764 if ((uintptr_t)bc.bc_addr != baseaddr ||
1765 1765 bc.bc_cache != bhw->bhw_cache ||
1766 1766 bc.bc_slab != bhw->bhw_slab ||
1767 1767 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1768 1768 return (WALK_DONE);
1769 1769
1770 1770 bhw->bhw_next = bc.bc_lastlog;
1771 1771 bhw->bhw_timestamp = bc.bc_timestamp;
1772 1772
1773 1773 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1774 1774 }
1775 1775
1776 1776 void
1777 1777 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1778 1778 {
1779 1779 bufctl_history_walk_t *bhw = wsp->walk_data;
1780 1780
1781 1781 mdb_free(bhw, sizeof (*bhw));
1782 1782 }
1783 1783
1784 1784 typedef struct kmem_log_walk {
1785 1785 kmem_bufctl_audit_t *klw_base;
1786 1786 kmem_bufctl_audit_t **klw_sorted;
1787 1787 kmem_log_header_t klw_lh;
1788 1788 size_t klw_size;
1789 1789 size_t klw_maxndx;
1790 1790 size_t klw_ndx;
1791 1791 } kmem_log_walk_t;
1792 1792
1793 1793 int
1794 1794 kmem_log_walk_init(mdb_walk_state_t *wsp)
1795 1795 {
1796 1796 uintptr_t lp = wsp->walk_addr;
1797 1797 kmem_log_walk_t *klw;
1798 1798 kmem_log_header_t *lhp;
1799 1799 int maxndx, i, j, k;
1800 1800
1801 1801 /*
1802 1802 * By default (global walk), walk the kmem_transaction_log. Otherwise
1803 1803 * read the log whose kmem_log_header_t is stored at walk_addr.
1804 1804 */
1805 1805 if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1806 1806 mdb_warn("failed to read 'kmem_transaction_log'");
1807 1807 return (WALK_ERR);
1808 1808 }
1809 1809
1810 1810 if (lp == NULL) {
1811 1811 mdb_warn("log is disabled\n");
1812 1812 return (WALK_ERR);
1813 1813 }
1814 1814
1815 1815 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1816 1816 lhp = &klw->klw_lh;
1817 1817
1818 1818 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1819 1819 mdb_warn("failed to read log header at %p", lp);
1820 1820 mdb_free(klw, sizeof (kmem_log_walk_t));
1821 1821 return (WALK_ERR);
1822 1822 }
1823 1823
1824 1824 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1825 1825 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1826 1826 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1827 1827
1828 1828 if (mdb_vread(klw->klw_base, klw->klw_size,
1829 1829 (uintptr_t)lhp->lh_base) == -1) {
1830 1830 mdb_warn("failed to read log at base %p", lhp->lh_base);
1831 1831 mdb_free(klw->klw_base, klw->klw_size);
1832 1832 mdb_free(klw, sizeof (kmem_log_walk_t));
1833 1833 return (WALK_ERR);
1834 1834 }
1835 1835
1836 1836 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1837 1837 sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1838 1838
1839 1839 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1840 1840 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1841 1841 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1842 1842
1843 1843 for (j = 0; j < maxndx; j++)
1844 1844 klw->klw_sorted[k++] = &chunk[j];
1845 1845 }
1846 1846
1847 1847 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1848 1848 (int(*)(const void *, const void *))bufctlcmp);
1849 1849
1850 1850 klw->klw_maxndx = k;
1851 1851 wsp->walk_data = klw;
1852 1852
1853 1853 return (WALK_NEXT);
1854 1854 }
1855 1855
1856 1856 int
1857 1857 kmem_log_walk_step(mdb_walk_state_t *wsp)
1858 1858 {
1859 1859 kmem_log_walk_t *klw = wsp->walk_data;
1860 1860 kmem_bufctl_audit_t *bcp;
1861 1861
1862 1862 if (klw->klw_ndx == klw->klw_maxndx)
1863 1863 return (WALK_DONE);
1864 1864
1865 1865 bcp = klw->klw_sorted[klw->klw_ndx++];
1866 1866
1867 1867 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1868 1868 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1869 1869 }
1870 1870
1871 1871 void
1872 1872 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1873 1873 {
1874 1874 kmem_log_walk_t *klw = wsp->walk_data;
1875 1875
1876 1876 mdb_free(klw->klw_base, klw->klw_size);
1877 1877 mdb_free(klw->klw_sorted, klw->klw_maxndx *
1878 1878 sizeof (kmem_bufctl_audit_t *));
1879 1879 mdb_free(klw, sizeof (kmem_log_walk_t));
1880 1880 }
1881 1881
1882 1882 typedef struct allocdby_bufctl {
1883 1883 uintptr_t abb_addr;
1884 1884 hrtime_t abb_ts;
1885 1885 } allocdby_bufctl_t;
1886 1886
1887 1887 typedef struct allocdby_walk {
1888 1888 const char *abw_walk;
1889 1889 uintptr_t abw_thread;
1890 1890 size_t abw_nbufs;
1891 1891 size_t abw_size;
1892 1892 allocdby_bufctl_t *abw_buf;
1893 1893 size_t abw_ndx;
1894 1894 } allocdby_walk_t;
1895 1895
1896 1896 int
1897 1897 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1898 1898 allocdby_walk_t *abw)
1899 1899 {
1900 1900 if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1901 1901 return (WALK_NEXT);
1902 1902
1903 1903 if (abw->abw_nbufs == abw->abw_size) {
1904 1904 allocdby_bufctl_t *buf;
1905 1905 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1906 1906
1907 1907 buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1908 1908
1909 1909 bcopy(abw->abw_buf, buf, oldsize);
1910 1910 mdb_free(abw->abw_buf, oldsize);
1911 1911
1912 1912 abw->abw_size <<= 1;
1913 1913 abw->abw_buf = buf;
1914 1914 }
1915 1915
1916 1916 abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1917 1917 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1918 1918 abw->abw_nbufs++;
1919 1919
1920 1920 return (WALK_NEXT);
1921 1921 }
1922 1922
1923 1923 /*ARGSUSED*/
1924 1924 int
1925 1925 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1926 1926 {
1927 1927 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1928 1928 abw, addr) == -1) {
1929 1929 mdb_warn("couldn't walk bufctl for cache %p", addr);
1930 1930 return (WALK_DONE);
1931 1931 }
1932 1932
1933 1933 return (WALK_NEXT);
1934 1934 }
1935 1935
1936 1936 static int
1937 1937 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1938 1938 {
1939 1939 if (lhs->abb_ts < rhs->abb_ts)
1940 1940 return (1);
1941 1941 if (lhs->abb_ts > rhs->abb_ts)
1942 1942 return (-1);
1943 1943 return (0);
1944 1944 }
1945 1945
1946 1946 static int
1947 1947 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1948 1948 {
1949 1949 allocdby_walk_t *abw;
1950 1950
1951 1951 if (wsp->walk_addr == NULL) {
1952 1952 mdb_warn("allocdby walk doesn't support global walks\n");
1953 1953 return (WALK_ERR);
1954 1954 }
1955 1955
1956 1956 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1957 1957
1958 1958 abw->abw_thread = wsp->walk_addr;
1959 1959 abw->abw_walk = walk;
1960 1960 abw->abw_size = 128; /* something reasonable */
1961 1961 abw->abw_buf =
1962 1962 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1963 1963
1964 1964 wsp->walk_data = abw;
1965 1965
1966 1966 if (mdb_walk("kmem_cache",
1967 1967 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1968 1968 mdb_warn("couldn't walk kmem_cache");
1969 1969 allocdby_walk_fini(wsp);
1970 1970 return (WALK_ERR);
1971 1971 }
1972 1972
1973 1973 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1974 1974 (int(*)(const void *, const void *))allocdby_cmp);
1975 1975
1976 1976 return (WALK_NEXT);
1977 1977 }
1978 1978
1979 1979 int
1980 1980 allocdby_walk_init(mdb_walk_state_t *wsp)
1981 1981 {
1982 1982 return (allocdby_walk_init_common(wsp, "bufctl"));
1983 1983 }
1984 1984
1985 1985 int
1986 1986 freedby_walk_init(mdb_walk_state_t *wsp)
1987 1987 {
1988 1988 return (allocdby_walk_init_common(wsp, "freectl"));
1989 1989 }
1990 1990
1991 1991 int
1992 1992 allocdby_walk_step(mdb_walk_state_t *wsp)
1993 1993 {
1994 1994 allocdby_walk_t *abw = wsp->walk_data;
1995 1995 kmem_bufctl_audit_t bc;
1996 1996 uintptr_t addr;
1997 1997
1998 1998 if (abw->abw_ndx == abw->abw_nbufs)
1999 1999 return (WALK_DONE);
2000 2000
2001 2001 addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
2002 2002
2003 2003 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2004 2004 mdb_warn("couldn't read bufctl at %p", addr);
2005 2005 return (WALK_DONE);
2006 2006 }
2007 2007
2008 2008 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
2009 2009 }
2010 2010
2011 2011 void
2012 2012 allocdby_walk_fini(mdb_walk_state_t *wsp)
2013 2013 {
2014 2014 allocdby_walk_t *abw = wsp->walk_data;
2015 2015
2016 2016 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
2017 2017 mdb_free(abw, sizeof (allocdby_walk_t));
2018 2018 }
2019 2019
2020 2020 /*ARGSUSED*/
2021 2021 int
2022 2022 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
2023 2023 {
2024 2024 char c[MDB_SYM_NAMLEN];
2025 2025 GElf_Sym sym;
2026 2026 int i;
2027 2027
2028 2028 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2029 2029 for (i = 0; i < bcp->bc_depth; i++) {
2030 2030 if (mdb_lookup_by_addr(bcp->bc_stack[i],
2031 2031 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2032 2032 continue;
2033 2033 if (strncmp(c, "kmem_", 5) == 0)
2034 2034 continue;
2035 2035 mdb_printf("%s+0x%lx",
2036 2036 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2037 2037 break;
2038 2038 }
2039 2039 mdb_printf("\n");
2040 2040
2041 2041 return (WALK_NEXT);
2042 2042 }
2043 2043
2044 2044 static int
2045 2045 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2046 2046 {
2047 2047 if (!(flags & DCMD_ADDRSPEC))
2048 2048 return (DCMD_USAGE);
2049 2049
2050 2050 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2051 2051
2052 2052 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2053 2053 mdb_warn("can't walk '%s' for %p", w, addr);
2054 2054 return (DCMD_ERR);
2055 2055 }
2056 2056
2057 2057 return (DCMD_OK);
2058 2058 }
2059 2059
2060 2060 /*ARGSUSED*/
2061 2061 int
2062 2062 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2063 2063 {
2064 2064 return (allocdby_common(addr, flags, "allocdby"));
2065 2065 }
2066 2066
2067 2067 /*ARGSUSED*/
2068 2068 int
2069 2069 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2070 2070 {
2071 2071 return (allocdby_common(addr, flags, "freedby"));
2072 2072 }
2073 2073
2074 2074 /*
2075 2075 * Return a string describing the address in relation to the given thread's
2076 2076 * stack.
2077 2077 *
2078 2078 * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2079 2079 *
2080 2080 * - If the address is above the stack pointer, return an empty string
2081 2081 * signifying that the address is active.
2082 2082 *
2083 2083 * - If the address is below the stack pointer, and the thread is not on proc,
2084 2084 * return " (below sp)".
2085 2085 *
2086 2086 * - If the address is below the stack pointer, and the thread is on proc,
2087 2087 * return " (possibly below sp)". Depending on context, we may or may not
2088 2088 * have an accurate t_sp.
2089 2089 */
2090 2090 static const char *
2091 2091 stack_active(const kthread_t *t, uintptr_t addr)
2092 2092 {
2093 2093 uintptr_t panicstk;
2094 2094 GElf_Sym sym;
2095 2095
2096 2096 if (t->t_state == TS_FREE)
2097 2097 return (" (inactive interrupt thread)");
2098 2098
2099 2099 /*
2100 2100 * Check to see if we're on the panic stack. If so, ignore t_sp, as it
2101 2101 * no longer relates to the thread's real stack.
2102 2102 */
2103 2103 if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2104 2104 panicstk = (uintptr_t)sym.st_value;
2105 2105
2106 2106 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2107 2107 return ("");
2108 2108 }
2109 2109
2110 2110 if (addr >= t->t_sp + STACK_BIAS)
2111 2111 return ("");
2112 2112
2113 2113 if (t->t_state == TS_ONPROC)
2114 2114 return (" (possibly below sp)");
2115 2115
2116 2116 return (" (below sp)");
2117 2117 }
2118 2118
2119 2119 /*
2120 2120 * Additional state for the kmem and vmem ::whatis handlers
2121 2121 */
2122 2122 typedef struct whatis_info {
2123 2123 mdb_whatis_t *wi_w;
2124 2124 const kmem_cache_t *wi_cache;
2125 2125 const vmem_t *wi_vmem;
2126 2126 vmem_t *wi_msb_arena;
2127 2127 size_t wi_slab_size;
2128 2128 uint_t wi_slab_found;
2129 2129 uint_t wi_kmem_lite_count;
2130 2130 uint_t wi_freemem;
2131 2131 } whatis_info_t;
2132 2132
2133 2133 /* call one of our dcmd functions with "-v" and the provided address */
2134 2134 static void
2135 2135 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2136 2136 {
2137 2137 mdb_arg_t a;
2138 2138 a.a_type = MDB_TYPE_STRING;
2139 2139 a.a_un.a_str = "-v";
2140 2140
2141 2141 mdb_printf(":\n");
2142 2142 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2143 2143 }
2144 2144
2145 2145 static void
2146 2146 whatis_print_kmf_lite(uintptr_t btaddr, size_t count)
2147 2147 {
2148 2148 #define KMEM_LITE_MAX 16
2149 2149 pc_t callers[KMEM_LITE_MAX];
2150 2150 pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN;
2151 2151
2152 2152 kmem_buftag_t bt;
2153 2153 intptr_t stat;
2154 2154 const char *plural = "";
2155 2155 int i;
2156 2156
2157 2157 /* validate our arguments and read in the buftag */
2158 2158 if (count == 0 || count > KMEM_LITE_MAX ||
2159 2159 mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2160 2160 return;
2161 2161
2162 2162 /* validate the buffer state and read in the callers */
2163 2163 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2164 2164
2165 2165 if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE)
2166 2166 return;
2167 2167
2168 2168 if (mdb_vread(callers, count * sizeof (pc_t),
2169 2169 btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2170 2170 return;
2171 2171
2172 2172 /* If there aren't any filled in callers, bail */
2173 2173 if (callers[0] == uninit)
2174 2174 return;
2175 2175
2176 2176 plural = (callers[1] == uninit) ? "" : "s";
2177 2177
2178 2178 /* Everything's done and checked; print them out */
2179 2179 mdb_printf(":\n");
2180 2180
2181 2181 mdb_inc_indent(8);
2182 2182 mdb_printf("recent caller%s: %a", plural, callers[0]);
2183 2183 for (i = 1; i < count; i++) {
2184 2184 if (callers[i] == uninit)
2185 2185 break;
2186 2186 mdb_printf(", %a", callers[i]);
2187 2187 }
2188 2188 mdb_dec_indent(8);
2189 2189 }
2190 2190
2191 2191 static void
2192 2192 whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2193 2193 uintptr_t baddr)
2194 2194 {
2195 2195 mdb_whatis_t *w = wi->wi_w;
2196 2196
2197 2197 const kmem_cache_t *cp = wi->wi_cache;
2198 2198 /* LINTED pointer cast may result in improper alignment */
2199 2199 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr);
2200 2200 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2201 2201 int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT));
2202 2202
2203 2203 mdb_whatis_report_object(w, maddr, addr, "");
2204 2204
2205 2205 if (baddr != 0 && !call_printer)
2206 2206 mdb_printf("bufctl %p ", baddr);
2207 2207
2208 2208 mdb_printf("%s from %s",
2209 2209 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2210 2210
2211 2211 if (baddr != 0 && call_printer) {
2212 2212 whatis_call_printer(bufctl, baddr);
2213 2213 return;
2214 2214 }
2215 2215
2216 2216 /* for KMF_LITE caches, try to print out the previous callers */
2217 2217 if (!quiet && (cp->cache_flags & KMF_LITE))
2218 2218 whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count);
2219 2219
2220 2220 mdb_printf("\n");
2221 2221 }
2222 2222
2223 2223 /*ARGSUSED*/
2224 2224 static int
2225 2225 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2226 2226 {
2227 2227 mdb_whatis_t *w = wi->wi_w;
2228 2228
2229 2229 uintptr_t cur;
2230 2230 size_t size = wi->wi_cache->cache_bufsize;
2231 2231
2232 2232 while (mdb_whatis_match(w, addr, size, &cur))
2233 2233 whatis_print_kmem(wi, cur, addr, NULL);
2234 2234
2235 2235 return (WHATIS_WALKRET(w));
2236 2236 }
2237 2237
2238 2238 /*ARGSUSED*/
2239 2239 static int
2240 2240 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi)
2241 2241 {
2242 2242 mdb_whatis_t *w = wi->wi_w;
2243 2243
2244 2244 uintptr_t cur;
2245 2245 uintptr_t addr = (uintptr_t)bcp->bc_addr;
2246 2246 size_t size = wi->wi_cache->cache_bufsize;
2247 2247
2248 2248 while (mdb_whatis_match(w, addr, size, &cur))
2249 2249 whatis_print_kmem(wi, cur, addr, baddr);
2250 2250
2251 2251 return (WHATIS_WALKRET(w));
2252 2252 }
2253 2253
2254 2254 static int
2255 2255 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2256 2256 {
2257 2257 mdb_whatis_t *w = wi->wi_w;
2258 2258
2259 2259 size_t size = vs->vs_end - vs->vs_start;
2260 2260 uintptr_t cur;
2261 2261
2262 2262 /* We're not interested in anything but alloc and free segments */
2263 2263 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2264 2264 return (WALK_NEXT);
2265 2265
2266 2266 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2267 2267 mdb_whatis_report_object(w, cur, vs->vs_start, "");
2268 2268
2269 2269 /*
2270 2270 * If we're not printing it seperately, provide the vmem_seg
2271 2271 * pointer if it has a stack trace.
2272 2272 */
2273 2273 if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2274 2274 (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) ||
2275 2275 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2276 2276 mdb_printf("vmem_seg %p ", addr);
2277 2277 }
2278 2278
2279 2279 mdb_printf("%s from the %s vmem arena",
2280 2280 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2281 2281 wi->wi_vmem->vm_name);
2282 2282
2283 2283 if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2284 2284 whatis_call_printer(vmem_seg, addr);
2285 2285 else
2286 2286 mdb_printf("\n");
2287 2287 }
2288 2288
2289 2289 return (WHATIS_WALKRET(w));
2290 2290 }
2291 2291
2292 2292 static int
2293 2293 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2294 2294 {
2295 2295 mdb_whatis_t *w = wi->wi_w;
2296 2296 const char *nm = vmem->vm_name;
2297 2297
2298 2298 int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0);
2299 2299 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2300 2300
2301 2301 if (identifier != idspace)
2302 2302 return (WALK_NEXT);
2303 2303
2304 2304 wi->wi_vmem = vmem;
2305 2305
2306 2306 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2307 2307 mdb_printf("Searching vmem arena %s...\n", nm);
2308 2308
2309 2309 if (mdb_pwalk("vmem_seg",
2310 2310 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2311 2311 mdb_warn("can't walk vmem_seg for %p", addr);
2312 2312 return (WALK_NEXT);
2313 2313 }
2314 2314
2315 2315 return (WHATIS_WALKRET(w));
2316 2316 }
2317 2317
2318 2318 /*ARGSUSED*/
2319 2319 static int
2320 2320 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi)
2321 2321 {
2322 2322 mdb_whatis_t *w = wi->wi_w;
2323 2323
2324 2324 /* It must overlap with the slab data, or it's not interesting */
2325 2325 if (mdb_whatis_overlaps(w,
2326 2326 (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2327 2327 wi->wi_slab_found++;
2328 2328 return (WALK_DONE);
2329 2329 }
2330 2330 return (WALK_NEXT);
2331 2331 }
2332 2332
2333 2333 static int
2334 2334 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2335 2335 {
2336 2336 mdb_whatis_t *w = wi->wi_w;
2337 2337
2338 2338 char *walk, *freewalk;
2339 2339 mdb_walk_cb_t func;
2340 2340 int do_bufctl;
2341 2341
2342 2342 int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0);
2343 2343 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2344 2344
2345 2345 if (identifier != idspace)
2346 2346 return (WALK_NEXT);
2347 2347
2348 2348 /* Override the '-b' flag as necessary */
2349 2349 if (!(c->cache_flags & KMF_HASH))
2350 2350 do_bufctl = FALSE; /* no bufctls to walk */
2351 2351 else if (c->cache_flags & KMF_AUDIT)
2352 2352 do_bufctl = TRUE; /* we always want debugging info */
2353 2353 else
2354 2354 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2355 2355
2356 2356 if (do_bufctl) {
2357 2357 walk = "bufctl";
2358 2358 freewalk = "freectl";
2359 2359 func = (mdb_walk_cb_t)whatis_walk_bufctl;
2360 2360 } else {
2361 2361 walk = "kmem";
2362 2362 freewalk = "freemem";
2363 2363 func = (mdb_walk_cb_t)whatis_walk_kmem;
2364 2364 }
2365 2365
2366 2366 wi->wi_cache = c;
2367 2367
2368 2368 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2369 2369 mdb_printf("Searching %s...\n", c->cache_name);
2370 2370
2371 2371 /*
2372 2372 * If more then two buffers live on each slab, figure out if we're
2373 2373 * interested in anything in any slab before doing the more expensive
2374 2374 * kmem/freemem (bufctl/freectl) walkers.
2375 2375 */
2376 2376 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2377 2377 if (!(c->cache_flags & KMF_HASH))
2378 2378 wi->wi_slab_size -= sizeof (kmem_slab_t);
2379 2379
2380 2380 if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2381 2381 wi->wi_slab_found = 0;
2382 2382 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2383 2383 addr) == -1) {
2384 2384 mdb_warn("can't find kmem_slab walker");
2385 2385 return (WALK_DONE);
2386 2386 }
2387 2387 if (wi->wi_slab_found == 0)
2388 2388 return (WALK_NEXT);
2389 2389 }
2390 2390
2391 2391 wi->wi_freemem = FALSE;
2392 2392 if (mdb_pwalk(walk, func, wi, addr) == -1) {
2393 2393 mdb_warn("can't find %s walker", walk);
2394 2394 return (WALK_DONE);
2395 2395 }
2396 2396
2397 2397 if (mdb_whatis_done(w))
2398 2398 return (WALK_DONE);
2399 2399
2400 2400 /*
2401 2401 * We have searched for allocated memory; now search for freed memory.
2402 2402 */
2403 2403 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2404 2404 mdb_printf("Searching %s for free memory...\n", c->cache_name);
2405 2405
2406 2406 wi->wi_freemem = TRUE;
2407 2407 if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2408 2408 mdb_warn("can't find %s walker", freewalk);
2409 2409 return (WALK_DONE);
2410 2410 }
2411 2411
2412 2412 return (WHATIS_WALKRET(w));
2413 2413 }
2414 2414
2415 2415 static int
2416 2416 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2417 2417 {
2418 2418 if (c->cache_arena == wi->wi_msb_arena ||
2419 2419 (c->cache_cflags & KMC_NOTOUCH))
2420 2420 return (WALK_NEXT);
2421 2421
2422 2422 return (whatis_walk_cache(addr, c, wi));
2423 2423 }
2424 2424
2425 2425 static int
2426 2426 whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2427 2427 {
2428 2428 if (c->cache_arena != wi->wi_msb_arena)
2429 2429 return (WALK_NEXT);
2430 2430
2431 2431 return (whatis_walk_cache(addr, c, wi));
2432 2432 }
2433 2433
2434 2434 static int
2435 2435 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2436 2436 {
2437 2437 if (c->cache_arena == wi->wi_msb_arena ||
2438 2438 !(c->cache_cflags & KMC_NOTOUCH))
2439 2439 return (WALK_NEXT);
2440 2440
2441 2441 return (whatis_walk_cache(addr, c, wi));
2442 2442 }
2443 2443
2444 2444 static int
2445 2445 whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w)
2446 2446 {
2447 2447 uintptr_t cur;
2448 2448 uintptr_t saddr;
2449 2449 size_t size;
2450 2450
2451 2451 /*
2452 2452 * Often, one calls ::whatis on an address from a thread structure.
2453 2453 * We use this opportunity to short circuit this case...
2454 2454 */
2455 2455 while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur))
2456 2456 mdb_whatis_report_object(w, cur, addr,
2457 2457 "allocated as a thread structure\n");
2458 2458
2459 2459 /*
2460 2460 * Now check the stack
2461 2461 */
2462 2462 if (t->t_stkbase == NULL)
2463 2463 return (WALK_NEXT);
2464 2464
2465 2465 /*
2466 2466 * This assumes that t_stk is the end of the stack, but it's really
2467 2467 * only the initial stack pointer for the thread. Arguments to the
2468 2468 * initial procedure, SA(MINFRAME), etc. are all after t_stk. So
2469 2469 * that 't->t_stk::whatis' reports "part of t's stack", we include
2470 2470 * t_stk in the range (the "+ 1", below), but the kernel should
2471 2471 * really include the full stack bounds where we can find it.
2472 2472 */
2473 2473 saddr = (uintptr_t)t->t_stkbase;
2474 2474 size = (uintptr_t)t->t_stk - saddr + 1;
2475 2475 while (mdb_whatis_match(w, saddr, size, &cur))
2476 2476 mdb_whatis_report_object(w, cur, cur,
2477 2477 "in thread %p's stack%s\n", addr, stack_active(t, cur));
2478 2478
2479 2479 return (WHATIS_WALKRET(w));
2480 2480 }
2481 2481
2482 2482 static void
2483 2483 whatis_modctl_match(mdb_whatis_t *w, const char *name,
2484 2484 uintptr_t base, size_t size, const char *where)
2485 2485 {
2486 2486 uintptr_t cur;
2487 2487
2488 2488 /*
2489 2489 * Since we're searching for addresses inside a module, we report
2490 2490 * them as symbols.
2491 2491 */
2492 2492 while (mdb_whatis_match(w, base, size, &cur))
2493 2493 mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where);
2494 2494 }
2495 2495
2496 2496 static int
2497 2497 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w)
2498 2498 {
2499 2499 char name[MODMAXNAMELEN];
2500 2500 struct module mod;
2501 2501 Shdr shdr;
2502 2502
2503 2503 if (m->mod_mp == NULL)
2504 2504 return (WALK_NEXT);
2505 2505
2506 2506 if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
2507 2507 mdb_warn("couldn't read modctl %p's module", addr);
2508 2508 return (WALK_NEXT);
2509 2509 }
2510 2510
2511 2511 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2512 2512 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2513 2513
2514 2514 whatis_modctl_match(w, name,
2515 2515 (uintptr_t)mod.text, mod.text_size, "text segment");
2516 2516 whatis_modctl_match(w, name,
2517 2517 (uintptr_t)mod.data, mod.data_size, "data segment");
2518 2518 whatis_modctl_match(w, name,
2519 2519 (uintptr_t)mod.bss, mod.bss_size, "bss segment");
2520 2520
2521 2521 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2522 2522 mdb_warn("couldn't read symbol header for %p's module", addr);
2523 2523 return (WALK_NEXT);
2524 2524 }
2525 2525
2526 2526 whatis_modctl_match(w, name,
2527 2527 (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab");
2528 2528 whatis_modctl_match(w, name,
2529 2529 (uintptr_t)mod.symspace, mod.symsize, "symtab");
2530 2530
2531 2531 return (WHATIS_WALKRET(w));
2532 2532 }
2533 2533
2534 2534 /*ARGSUSED*/
2535 2535 static int
2536 2536 whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w)
2537 2537 {
2538 2538 uintptr_t cur;
2539 2539
2540 2540 uintptr_t base = (uintptr_t)seg->pages;
2541 2541 size_t size = (uintptr_t)seg->epages - base;
2542 2542
2543 2543 while (mdb_whatis_match(w, base, size, &cur)) {
2544 2544 /* round our found pointer down to the page_t base. */
2545 2545 size_t offset = (cur - base) % sizeof (page_t);
2546 2546
2547 2547 mdb_whatis_report_object(w, cur, cur - offset,
2548 2548 "allocated as a page structure\n");
2549 2549 }
2550 2550
2551 2551 return (WHATIS_WALKRET(w));
2552 2552 }
2553 2553
2554 2554 /*ARGSUSED*/
2555 2555 static int
2556 2556 whatis_run_modules(mdb_whatis_t *w, void *arg)
2557 2557 {
2558 2558 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) {
2559 2559 mdb_warn("couldn't find modctl walker");
2560 2560 return (1);
2561 2561 }
2562 2562 return (0);
2563 2563 }
2564 2564
2565 2565 /*ARGSUSED*/
2566 2566 static int
2567 2567 whatis_run_threads(mdb_whatis_t *w, void *ignored)
2568 2568 {
2569 2569 /*
2570 2570 * Now search all thread stacks. Yes, this is a little weak; we
2571 2571 * can save a lot of work by first checking to see if the
2572 2572 * address is in segkp vs. segkmem. But hey, computers are
2573 2573 * fast.
2574 2574 */
2575 2575 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) {
2576 2576 mdb_warn("couldn't find thread walker");
2577 2577 return (1);
2578 2578 }
2579 2579 return (0);
2580 2580 }
2581 2581
2582 2582 /*ARGSUSED*/
2583 2583 static int
2584 2584 whatis_run_pages(mdb_whatis_t *w, void *ignored)
2585 2585 {
2586 2586 if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) {
2587 2587 mdb_warn("couldn't find memseg walker");
2588 2588 return (1);
2589 2589 }
2590 2590 return (0);
2591 2591 }
2592 2592
2593 2593 /*ARGSUSED*/
2594 2594 static int
2595 2595 whatis_run_kmem(mdb_whatis_t *w, void *ignored)
2596 2596 {
2597 2597 whatis_info_t wi;
2598 2598
2599 2599 bzero(&wi, sizeof (wi));
2600 2600 wi.wi_w = w;
2601 2601
2602 2602 if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1)
2603 2603 mdb_warn("unable to readvar \"kmem_msb_arena\"");
2604 2604
2605 2605 if (mdb_readvar(&wi.wi_kmem_lite_count,
2606 2606 "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16)
2607 2607 wi.wi_kmem_lite_count = 0;
2608 2608
2609 2609 /*
2610 2610 * We process kmem caches in the following order:
2611 2611 *
2612 2612 * non-KMC_NOTOUCH, non-metadata (typically the most interesting)
2613 2613 * metadata (can be huge with KMF_AUDIT)
2614 2614 * KMC_NOTOUCH, non-metadata (see kmem_walk_all())
2615 2615 */
2616 2616 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2617 2617 &wi) == -1 ||
2618 2618 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2619 2619 &wi) == -1 ||
2620 2620 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2621 2621 &wi) == -1) {
2622 2622 mdb_warn("couldn't find kmem_cache walker");
2623 2623 return (1);
2624 2624 }
2625 2625 return (0);
2626 2626 }
2627 2627
2628 2628 /*ARGSUSED*/
2629 2629 static int
2630 2630 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2631 2631 {
2632 2632 whatis_info_t wi;
2633 2633
2634 2634 bzero(&wi, sizeof (wi));
2635 2635 wi.wi_w = w;
2636 2636
2637 2637 if (mdb_walk("vmem_postfix",
2638 2638 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2639 2639 mdb_warn("couldn't find vmem_postfix walker");
2640 2640 return (1);
2641 2641 }
2642 2642 return (0);
2643 2643 }
2644 2644
2645 2645 typedef struct kmem_log_cpu {
2646 2646 uintptr_t kmc_low;
2647 2647 uintptr_t kmc_high;
2648 2648 } kmem_log_cpu_t;
2649 2649
2650 2650 typedef struct kmem_log_data {
2651 2651 uintptr_t kmd_addr;
2652 2652 kmem_log_cpu_t *kmd_cpu;
2653 2653 } kmem_log_data_t;
2654 2654
2655 2655 int
2656 2656 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2657 2657 kmem_log_data_t *kmd)
2658 2658 {
2659 2659 int i;
2660 2660 kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2661 2661 size_t bufsize;
2662 2662
2663 2663 for (i = 0; i < NCPU; i++) {
2664 2664 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2665 2665 break;
2666 2666 }
2667 2667
2668 2668 if (kmd->kmd_addr) {
2669 2669 if (b->bc_cache == NULL)
2670 2670 return (WALK_NEXT);
2671 2671
2672 2672 if (mdb_vread(&bufsize, sizeof (bufsize),
2673 2673 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2674 2674 mdb_warn(
2675 2675 "failed to read cache_bufsize for cache at %p",
2676 2676 b->bc_cache);
2677 2677 return (WALK_ERR);
2678 2678 }
2679 2679
2680 2680 if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2681 2681 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2682 2682 return (WALK_NEXT);
2683 2683 }
2684 2684
2685 2685 if (i == NCPU)
2686 2686 mdb_printf(" ");
2687 2687 else
2688 2688 mdb_printf("%3d", i);
2689 2689
2690 2690 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2691 2691 b->bc_timestamp, b->bc_thread);
2692 2692
2693 2693 return (WALK_NEXT);
2694 2694 }
2695 2695
2696 2696 /*ARGSUSED*/
2697 2697 int
2698 2698 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2699 2699 {
2700 2700 kmem_log_header_t lh;
2701 2701 kmem_cpu_log_header_t clh;
2702 2702 uintptr_t lhp, clhp;
2703 2703 int ncpus;
2704 2704 uintptr_t *cpu;
2705 2705 GElf_Sym sym;
2706 2706 kmem_log_cpu_t *kmc;
2707 2707 int i;
2708 2708 kmem_log_data_t kmd;
2709 2709 uint_t opt_b = FALSE;
2710 2710
2711 2711 if (mdb_getopts(argc, argv,
2712 2712 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2713 2713 return (DCMD_USAGE);
2714 2714
2715 2715 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2716 2716 mdb_warn("failed to read 'kmem_transaction_log'");
2717 2717 return (DCMD_ERR);
2718 2718 }
2719 2719
2720 2720 if (lhp == NULL) {
2721 2721 mdb_warn("no kmem transaction log\n");
2722 2722 return (DCMD_ERR);
2723 2723 }
2724 2724
2725 2725 mdb_readvar(&ncpus, "ncpus");
2726 2726
2727 2727 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2728 2728 mdb_warn("failed to read log header at %p", lhp);
2729 2729 return (DCMD_ERR);
2730 2730 }
2731 2731
2732 2732 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2733 2733
2734 2734 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2735 2735
2736 2736 if (mdb_lookup_by_name("cpu", &sym) == -1) {
2737 2737 mdb_warn("couldn't find 'cpu' array");
2738 2738 return (DCMD_ERR);
2739 2739 }
2740 2740
2741 2741 if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2742 2742 mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2743 2743 NCPU * sizeof (uintptr_t), sym.st_size);
2744 2744 return (DCMD_ERR);
2745 2745 }
2746 2746
2747 2747 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2748 2748 mdb_warn("failed to read cpu array at %p", sym.st_value);
2749 2749 return (DCMD_ERR);
2750 2750 }
2751 2751
2752 2752 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2753 2753 kmd.kmd_addr = NULL;
2754 2754 kmd.kmd_cpu = kmc;
2755 2755
2756 2756 for (i = 0; i < NCPU; i++) {
2757 2757
2758 2758 if (cpu[i] == NULL)
2759 2759 continue;
2760 2760
2761 2761 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2762 2762 mdb_warn("cannot read cpu %d's log header at %p",
2763 2763 i, clhp);
2764 2764 return (DCMD_ERR);
2765 2765 }
2766 2766
2767 2767 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2768 2768 (uintptr_t)lh.lh_base;
2769 2769 kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2770 2770
2771 2771 clhp += sizeof (kmem_cpu_log_header_t);
2772 2772 }
2773 2773
2774 2774 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2775 2775 "TIMESTAMP", "THREAD");
2776 2776
2777 2777 /*
2778 2778 * If we have been passed an address, print out only log entries
2779 2779 * corresponding to that address. If opt_b is specified, then interpret
2780 2780 * the address as a bufctl.
2781 2781 */
2782 2782 if (flags & DCMD_ADDRSPEC) {
2783 2783 kmem_bufctl_audit_t b;
2784 2784
2785 2785 if (opt_b) {
2786 2786 kmd.kmd_addr = addr;
2787 2787 } else {
2788 2788 if (mdb_vread(&b,
2789 2789 sizeof (kmem_bufctl_audit_t), addr) == -1) {
2790 2790 mdb_warn("failed to read bufctl at %p", addr);
2791 2791 return (DCMD_ERR);
2792 2792 }
2793 2793
2794 2794 (void) kmem_log_walk(addr, &b, &kmd);
2795 2795
2796 2796 return (DCMD_OK);
2797 2797 }
2798 2798 }
2799 2799
2800 2800 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2801 2801 mdb_warn("can't find kmem log walker");
2802 2802 return (DCMD_ERR);
2803 2803 }
2804 2804
2805 2805 return (DCMD_OK);
2806 2806 }
2807 2807
2808 2808 typedef struct bufctl_history_cb {
2809 2809 int bhc_flags;
2810 2810 int bhc_argc;
2811 2811 const mdb_arg_t *bhc_argv;
2812 2812 int bhc_ret;
2813 2813 } bufctl_history_cb_t;
2814 2814
2815 2815 /*ARGSUSED*/
2816 2816 static int
2817 2817 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2818 2818 {
2819 2819 bufctl_history_cb_t *bhc = arg;
2820 2820
2821 2821 bhc->bhc_ret =
2822 2822 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2823 2823
2824 2824 bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2825 2825
2826 2826 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2827 2827 }
2828 2828
2829 2829 void
2830 2830 bufctl_help(void)
2831 2831 {
2832 2832 mdb_printf("%s",
2833 2833 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2834 2834 mdb_dec_indent(2);
2835 2835 mdb_printf("%<b>OPTIONS%</b>\n");
2836 2836 mdb_inc_indent(2);
2837 2837 mdb_printf("%s",
2838 2838 " -v Display the full content of the bufctl, including its stack trace\n"
2839 2839 " -h retrieve the bufctl's transaction history, if available\n"
2840 2840 " -a addr\n"
2841 2841 " filter out bufctls not involving the buffer at addr\n"
2842 2842 " -c caller\n"
2843 2843 " filter out bufctls without the function/PC in their stack trace\n"
2844 2844 " -e earliest\n"
2845 2845 " filter out bufctls timestamped before earliest\n"
2846 2846 " -l latest\n"
2847 2847 " filter out bufctls timestamped after latest\n"
2848 2848 " -t thread\n"
2849 2849 " filter out bufctls not involving thread\n");
2850 2850 }
2851 2851
2852 2852 int
2853 2853 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2854 2854 {
2855 2855 kmem_bufctl_audit_t bc;
2856 2856 uint_t verbose = FALSE;
2857 2857 uint_t history = FALSE;
2858 2858 uint_t in_history = FALSE;
2859 2859 uintptr_t caller = NULL, thread = NULL;
2860 2860 uintptr_t laddr, haddr, baddr = NULL;
2861 2861 hrtime_t earliest = 0, latest = 0;
2862 2862 int i, depth;
2863 2863 char c[MDB_SYM_NAMLEN];
2864 2864 GElf_Sym sym;
2865 2865
2866 2866 if (mdb_getopts(argc, argv,
2867 2867 'v', MDB_OPT_SETBITS, TRUE, &verbose,
2868 2868 'h', MDB_OPT_SETBITS, TRUE, &history,
2869 2869 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */
2870 2870 'c', MDB_OPT_UINTPTR, &caller,
2871 2871 't', MDB_OPT_UINTPTR, &thread,
2872 2872 'e', MDB_OPT_UINT64, &earliest,
2873 2873 'l', MDB_OPT_UINT64, &latest,
2874 2874 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2875 2875 return (DCMD_USAGE);
2876 2876
2877 2877 if (!(flags & DCMD_ADDRSPEC))
2878 2878 return (DCMD_USAGE);
2879 2879
2880 2880 if (in_history && !history)
2881 2881 return (DCMD_USAGE);
2882 2882
2883 2883 if (history && !in_history) {
2884 2884 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2885 2885 UM_SLEEP | UM_GC);
2886 2886 bufctl_history_cb_t bhc;
2887 2887
2888 2888 nargv[0].a_type = MDB_TYPE_STRING;
2889 2889 nargv[0].a_un.a_str = "-H"; /* prevent recursion */
2890 2890
2891 2891 for (i = 0; i < argc; i++)
2892 2892 nargv[i + 1] = argv[i];
2893 2893
2894 2894 /*
2895 2895 * When in history mode, we treat each element as if it
2896 2896 * were in a seperate loop, so that the headers group
2897 2897 * bufctls with similar histories.
2898 2898 */
2899 2899 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2900 2900 bhc.bhc_argc = argc + 1;
2901 2901 bhc.bhc_argv = nargv;
2902 2902 bhc.bhc_ret = DCMD_OK;
2903 2903
2904 2904 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2905 2905 addr) == -1) {
2906 2906 mdb_warn("unable to walk bufctl_history");
2907 2907 return (DCMD_ERR);
2908 2908 }
2909 2909
2910 2910 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2911 2911 mdb_printf("\n");
2912 2912
2913 2913 return (bhc.bhc_ret);
2914 2914 }
2915 2915
2916 2916 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2917 2917 if (verbose) {
2918 2918 mdb_printf("%16s %16s %16s %16s\n"
2919 2919 "%<u>%16s %16s %16s %16s%</u>\n",
2920 2920 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2921 2921 "", "CACHE", "LASTLOG", "CONTENTS");
2922 2922 } else {
2923 2923 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2924 2924 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2925 2925 }
2926 2926 }
2927 2927
2928 2928 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2929 2929 mdb_warn("couldn't read bufctl at %p", addr);
2930 2930 return (DCMD_ERR);
2931 2931 }
2932 2932
2933 2933 /*
2934 2934 * Guard against bogus bc_depth in case the bufctl is corrupt or
2935 2935 * the address does not really refer to a bufctl.
2936 2936 */
2937 2937 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2938 2938
2939 2939 if (caller != NULL) {
2940 2940 laddr = caller;
2941 2941 haddr = caller + sizeof (caller);
2942 2942
2943 2943 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2944 2944 &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2945 2945 /*
2946 2946 * We were provided an exact symbol value; any
2947 2947 * address in the function is valid.
2948 2948 */
2949 2949 laddr = (uintptr_t)sym.st_value;
2950 2950 haddr = (uintptr_t)sym.st_value + sym.st_size;
2951 2951 }
2952 2952
2953 2953 for (i = 0; i < depth; i++)
2954 2954 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2955 2955 break;
2956 2956
2957 2957 if (i == depth)
2958 2958 return (DCMD_OK);
2959 2959 }
2960 2960
2961 2961 if (thread != NULL && (uintptr_t)bc.bc_thread != thread)
2962 2962 return (DCMD_OK);
2963 2963
2964 2964 if (earliest != 0 && bc.bc_timestamp < earliest)
2965 2965 return (DCMD_OK);
2966 2966
2967 2967 if (latest != 0 && bc.bc_timestamp > latest)
2968 2968 return (DCMD_OK);
2969 2969
2970 2970 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2971 2971 return (DCMD_OK);
2972 2972
2973 2973 if (flags & DCMD_PIPE_OUT) {
2974 2974 mdb_printf("%#lr\n", addr);
2975 2975 return (DCMD_OK);
2976 2976 }
2977 2977
2978 2978 if (verbose) {
2979 2979 mdb_printf(
2980 2980 "%<b>%16p%</b> %16p %16llx %16p\n"
2981 2981 "%16s %16p %16p %16p\n",
2982 2982 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2983 2983 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2984 2984
2985 2985 mdb_inc_indent(17);
2986 2986 for (i = 0; i < depth; i++)
2987 2987 mdb_printf("%a\n", bc.bc_stack[i]);
2988 2988 mdb_dec_indent(17);
2989 2989 mdb_printf("\n");
2990 2990 } else {
2991 2991 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2992 2992 bc.bc_timestamp, bc.bc_thread);
2993 2993
2994 2994 for (i = 0; i < depth; i++) {
2995 2995 if (mdb_lookup_by_addr(bc.bc_stack[i],
2996 2996 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2997 2997 continue;
2998 2998 if (strncmp(c, "kmem_", 5) == 0)
2999 2999 continue;
3000 3000 mdb_printf(" %a\n", bc.bc_stack[i]);
3001 3001 break;
3002 3002 }
3003 3003
3004 3004 if (i >= depth)
3005 3005 mdb_printf("\n");
3006 3006 }
3007 3007
3008 3008 return (DCMD_OK);
3009 3009 }
3010 3010
3011 3011 typedef struct kmem_verify {
3012 3012 uint64_t *kmv_buf; /* buffer to read cache contents into */
3013 3013 size_t kmv_size; /* number of bytes in kmv_buf */
3014 3014 int kmv_corruption; /* > 0 if corruption found. */
3015 3015 int kmv_besilent; /* report actual corruption sites */
3016 3016 struct kmem_cache kmv_cache; /* the cache we're operating on */
3017 3017 } kmem_verify_t;
3018 3018
3019 3019 /*
3020 3020 * verify_pattern()
3021 3021 * verify that buf is filled with the pattern pat.
3022 3022 */
3023 3023 static int64_t
3024 3024 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
3025 3025 {
3026 3026 /*LINTED*/
3027 3027 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3028 3028 uint64_t *buf;
3029 3029
3030 3030 for (buf = buf_arg; buf < bufend; buf++)
3031 3031 if (*buf != pat)
3032 3032 return ((uintptr_t)buf - (uintptr_t)buf_arg);
3033 3033 return (-1);
3034 3034 }
3035 3035
3036 3036 /*
3037 3037 * verify_buftag()
3038 3038 * verify that btp->bt_bxstat == (bcp ^ pat)
3039 3039 */
3040 3040 static int
3041 3041 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3042 3042 {
3043 3043 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3044 3044 }
3045 3045
3046 3046 /*
3047 3047 * verify_free()
3048 3048 * verify the integrity of a free block of memory by checking
3049 3049 * that it is filled with 0xdeadbeef and that its buftag is sane.
3050 3050 */
3051 3051 /*ARGSUSED1*/
3052 3052 static int
3053 3053 verify_free(uintptr_t addr, const void *data, void *private)
3054 3054 {
3055 3055 kmem_verify_t *kmv = (kmem_verify_t *)private;
3056 3056 uint64_t *buf = kmv->kmv_buf; /* buf to validate */
3057 3057 int64_t corrupt; /* corruption offset */
3058 3058 kmem_buftag_t *buftagp; /* ptr to buftag */
3059 3059 kmem_cache_t *cp = &kmv->kmv_cache;
3060 3060 int besilent = kmv->kmv_besilent;
3061 3061
3062 3062 /*LINTED*/
3063 3063 buftagp = KMEM_BUFTAG(cp, buf);
3064 3064
3065 3065 /*
3066 3066 * Read the buffer to check.
3067 3067 */
3068 3068 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3069 3069 if (!besilent)
3070 3070 mdb_warn("couldn't read %p", addr);
3071 3071 return (WALK_NEXT);
3072 3072 }
3073 3073
3074 3074 if ((corrupt = verify_pattern(buf, cp->cache_verify,
3075 3075 KMEM_FREE_PATTERN)) >= 0) {
3076 3076 if (!besilent)
3077 3077 mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3078 3078 addr, (uintptr_t)addr + corrupt);
3079 3079 goto corrupt;
3080 3080 }
3081 3081 /*
3082 3082 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3083 3083 * the first bytes of the buffer, hence we cannot check for red
3084 3084 * zone corruption.
3085 3085 */
3086 3086 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3087 3087 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3088 3088 if (!besilent)
3089 3089 mdb_printf("buffer %p (free) seems to "
3090 3090 "have a corrupt redzone pattern\n", addr);
3091 3091 goto corrupt;
3092 3092 }
3093 3093
3094 3094 /*
3095 3095 * confirm bufctl pointer integrity.
3096 3096 */
3097 3097 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3098 3098 if (!besilent)
3099 3099 mdb_printf("buffer %p (free) has a corrupt "
3100 3100 "buftag\n", addr);
3101 3101 goto corrupt;
3102 3102 }
3103 3103
3104 3104 return (WALK_NEXT);
3105 3105 corrupt:
3106 3106 kmv->kmv_corruption++;
3107 3107 return (WALK_NEXT);
3108 3108 }
3109 3109
3110 3110 /*
3111 3111 * verify_alloc()
3112 3112 * Verify that the buftag of an allocated buffer makes sense with respect
3113 3113 * to the buffer.
3114 3114 */
3115 3115 /*ARGSUSED1*/
3116 3116 static int
3117 3117 verify_alloc(uintptr_t addr, const void *data, void *private)
3118 3118 {
3119 3119 kmem_verify_t *kmv = (kmem_verify_t *)private;
3120 3120 kmem_cache_t *cp = &kmv->kmv_cache;
3121 3121 uint64_t *buf = kmv->kmv_buf; /* buf to validate */
3122 3122 /*LINTED*/
3123 3123 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3124 3124 uint32_t *ip = (uint32_t *)buftagp;
3125 3125 uint8_t *bp = (uint8_t *)buf;
3126 3126 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */
3127 3127 int besilent = kmv->kmv_besilent;
3128 3128
3129 3129 /*
3130 3130 * Read the buffer to check.
3131 3131 */
3132 3132 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3133 3133 if (!besilent)
3134 3134 mdb_warn("couldn't read %p", addr);
3135 3135 return (WALK_NEXT);
3136 3136 }
3137 3137
3138 3138 /*
3139 3139 * There are two cases to handle:
3140 3140 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3141 3141 * 0xfeedfacefeedface at the end of it
3142 3142 * 2. If the buf was alloc'd using kmem_alloc, it will have
3143 3143 * 0xbb just past the end of the region in use. At the buftag,
3144 3144 * it will have 0xfeedface (or, if the whole buffer is in use,
3145 3145 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3146 3146 * endianness), followed by 32 bits containing the offset of the
3147 3147 * 0xbb byte in the buffer.
3148 3148 *
3149 3149 * Finally, the two 32-bit words that comprise the second half of the
3150 3150 * buftag should xor to KMEM_BUFTAG_ALLOC
3151 3151 */
3152 3152
3153 3153 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3154 3154 looks_ok = 1;
3155 3155 else if (!KMEM_SIZE_VALID(ip[1]))
3156 3156 size_ok = 0;
3157 3157 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3158 3158 looks_ok = 1;
3159 3159 else
3160 3160 size_ok = 0;
3161 3161
3162 3162 if (!size_ok) {
3163 3163 if (!besilent)
3164 3164 mdb_printf("buffer %p (allocated) has a corrupt "
3165 3165 "redzone size encoding\n", addr);
3166 3166 goto corrupt;
3167 3167 }
3168 3168
3169 3169 if (!looks_ok) {
3170 3170 if (!besilent)
3171 3171 mdb_printf("buffer %p (allocated) has a corrupt "
3172 3172 "redzone signature\n", addr);
3173 3173 goto corrupt;
3174 3174 }
3175 3175
3176 3176 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3177 3177 if (!besilent)
3178 3178 mdb_printf("buffer %p (allocated) has a "
3179 3179 "corrupt buftag\n", addr);
3180 3180 goto corrupt;
3181 3181 }
3182 3182
3183 3183 return (WALK_NEXT);
3184 3184 corrupt:
3185 3185 kmv->kmv_corruption++;
3186 3186 return (WALK_NEXT);
3187 3187 }
3188 3188
3189 3189 /*ARGSUSED2*/
3190 3190 int
3191 3191 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3192 3192 {
3193 3193 if (flags & DCMD_ADDRSPEC) {
3194 3194 int check_alloc = 0, check_free = 0;
3195 3195 kmem_verify_t kmv;
3196 3196
3197 3197 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3198 3198 addr) == -1) {
3199 3199 mdb_warn("couldn't read kmem_cache %p", addr);
3200 3200 return (DCMD_ERR);
3201 3201 }
3202 3202
3203 3203 kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3204 3204 sizeof (kmem_buftag_t);
3205 3205 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3206 3206 kmv.kmv_corruption = 0;
3207 3207
3208 3208 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3209 3209 check_alloc = 1;
3210 3210 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3211 3211 check_free = 1;
3212 3212 } else {
3213 3213 if (!(flags & DCMD_LOOP)) {
3214 3214 mdb_warn("cache %p (%s) does not have "
3215 3215 "redzone checking enabled\n", addr,
3216 3216 kmv.kmv_cache.cache_name);
3217 3217 }
3218 3218 return (DCMD_ERR);
3219 3219 }
3220 3220
3221 3221 if (flags & DCMD_LOOP) {
3222 3222 /*
3223 3223 * table mode, don't print out every corrupt buffer
3224 3224 */
3225 3225 kmv.kmv_besilent = 1;
3226 3226 } else {
3227 3227 mdb_printf("Summary for cache '%s'\n",
3228 3228 kmv.kmv_cache.cache_name);
3229 3229 mdb_inc_indent(2);
3230 3230 kmv.kmv_besilent = 0;
3231 3231 }
3232 3232
3233 3233 if (check_alloc)
3234 3234 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3235 3235 if (check_free)
3236 3236 (void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3237 3237
3238 3238 if (flags & DCMD_LOOP) {
3239 3239 if (kmv.kmv_corruption == 0) {
3240 3240 mdb_printf("%-*s %?p clean\n",
3241 3241 KMEM_CACHE_NAMELEN,
3242 3242 kmv.kmv_cache.cache_name, addr);
3243 3243 } else {
3244 3244 char *s = ""; /* optional s in "buffer[s]" */
3245 3245 if (kmv.kmv_corruption > 1)
3246 3246 s = "s";
3247 3247
3248 3248 mdb_printf("%-*s %?p %d corrupt buffer%s\n",
3249 3249 KMEM_CACHE_NAMELEN,
3250 3250 kmv.kmv_cache.cache_name, addr,
3251 3251 kmv.kmv_corruption, s);
3252 3252 }
3253 3253 } else {
3254 3254 /*
3255 3255 * This is the more verbose mode, when the user has
3256 3256 * type addr::kmem_verify. If the cache was clean,
3257 3257 * nothing will have yet been printed. So say something.
3258 3258 */
3259 3259 if (kmv.kmv_corruption == 0)
3260 3260 mdb_printf("clean\n");
3261 3261
3262 3262 mdb_dec_indent(2);
3263 3263 }
3264 3264 } else {
3265 3265 /*
3266 3266 * If the user didn't specify a cache to verify, we'll walk all
3267 3267 * kmem_cache's, specifying ourself as a callback for each...
3268 3268 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3269 3269 */
3270 3270 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN,
3271 3271 "Cache Name", "Addr", "Cache Integrity");
3272 3272 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3273 3273 }
3274 3274
3275 3275 return (DCMD_OK);
3276 3276 }
3277 3277
3278 3278 typedef struct vmem_node {
3279 3279 struct vmem_node *vn_next;
3280 3280 struct vmem_node *vn_parent;
3281 3281 struct vmem_node *vn_sibling;
3282 3282 struct vmem_node *vn_children;
3283 3283 uintptr_t vn_addr;
3284 3284 int vn_marked;
3285 3285 vmem_t vn_vmem;
3286 3286 } vmem_node_t;
3287 3287
3288 3288 typedef struct vmem_walk {
3289 3289 vmem_node_t *vw_root;
3290 3290 vmem_node_t *vw_current;
3291 3291 } vmem_walk_t;
3292 3292
3293 3293 int
3294 3294 vmem_walk_init(mdb_walk_state_t *wsp)
3295 3295 {
3296 3296 uintptr_t vaddr, paddr;
3297 3297 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3298 3298 vmem_walk_t *vw;
3299 3299
3300 3300 if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3301 3301 mdb_warn("couldn't read 'vmem_list'");
3302 3302 return (WALK_ERR);
3303 3303 }
3304 3304
3305 3305 while (vaddr != NULL) {
3306 3306 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3307 3307 vp->vn_addr = vaddr;
3308 3308 vp->vn_next = head;
3309 3309 head = vp;
3310 3310
3311 3311 if (vaddr == wsp->walk_addr)
3312 3312 current = vp;
3313 3313
3314 3314 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3315 3315 mdb_warn("couldn't read vmem_t at %p", vaddr);
3316 3316 goto err;
3317 3317 }
3318 3318
3319 3319 vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3320 3320 }
3321 3321
3322 3322 for (vp = head; vp != NULL; vp = vp->vn_next) {
3323 3323
3324 3324 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
3325 3325 vp->vn_sibling = root;
3326 3326 root = vp;
3327 3327 continue;
3328 3328 }
3329 3329
3330 3330 for (parent = head; parent != NULL; parent = parent->vn_next) {
3331 3331 if (parent->vn_addr != paddr)
3332 3332 continue;
3333 3333 vp->vn_sibling = parent->vn_children;
3334 3334 parent->vn_children = vp;
3335 3335 vp->vn_parent = parent;
3336 3336 break;
3337 3337 }
3338 3338
3339 3339 if (parent == NULL) {
3340 3340 mdb_warn("couldn't find %p's parent (%p)\n",
3341 3341 vp->vn_addr, paddr);
3342 3342 goto err;
3343 3343 }
3344 3344 }
3345 3345
3346 3346 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3347 3347 vw->vw_root = root;
3348 3348
3349 3349 if (current != NULL)
3350 3350 vw->vw_current = current;
3351 3351 else
3352 3352 vw->vw_current = root;
3353 3353
3354 3354 wsp->walk_data = vw;
3355 3355 return (WALK_NEXT);
3356 3356 err:
3357 3357 for (vp = head; head != NULL; vp = head) {
3358 3358 head = vp->vn_next;
3359 3359 mdb_free(vp, sizeof (vmem_node_t));
3360 3360 }
3361 3361
3362 3362 return (WALK_ERR);
3363 3363 }
3364 3364
3365 3365 int
3366 3366 vmem_walk_step(mdb_walk_state_t *wsp)
3367 3367 {
3368 3368 vmem_walk_t *vw = wsp->walk_data;
3369 3369 vmem_node_t *vp;
3370 3370 int rval;
3371 3371
3372 3372 if ((vp = vw->vw_current) == NULL)
3373 3373 return (WALK_DONE);
3374 3374
3375 3375 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3376 3376
3377 3377 if (vp->vn_children != NULL) {
3378 3378 vw->vw_current = vp->vn_children;
3379 3379 return (rval);
3380 3380 }
3381 3381
3382 3382 do {
3383 3383 vw->vw_current = vp->vn_sibling;
3384 3384 vp = vp->vn_parent;
3385 3385 } while (vw->vw_current == NULL && vp != NULL);
3386 3386
3387 3387 return (rval);
3388 3388 }
3389 3389
3390 3390 /*
3391 3391 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3392 3392 * children are visited before their parent. We perform the postfix walk
3393 3393 * iteratively (rather than recursively) to allow mdb to regain control
3394 3394 * after each callback.
3395 3395 */
3396 3396 int
3397 3397 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3398 3398 {
3399 3399 vmem_walk_t *vw = wsp->walk_data;
3400 3400 vmem_node_t *vp = vw->vw_current;
3401 3401 int rval;
3402 3402
3403 3403 /*
3404 3404 * If this node is marked, then we know that we have already visited
3405 3405 * all of its children. If the node has any siblings, they need to
3406 3406 * be visited next; otherwise, we need to visit the parent. Note
3407 3407 * that vp->vn_marked will only be zero on the first invocation of
3408 3408 * the step function.
3409 3409 */
3410 3410 if (vp->vn_marked) {
3411 3411 if (vp->vn_sibling != NULL)
3412 3412 vp = vp->vn_sibling;
3413 3413 else if (vp->vn_parent != NULL)
3414 3414 vp = vp->vn_parent;
3415 3415 else {
3416 3416 /*
3417 3417 * We have neither a parent, nor a sibling, and we
3418 3418 * have already been visited; we're done.
3419 3419 */
3420 3420 return (WALK_DONE);
3421 3421 }
3422 3422 }
3423 3423
3424 3424 /*
3425 3425 * Before we visit this node, visit its children.
3426 3426 */
3427 3427 while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3428 3428 vp = vp->vn_children;
3429 3429
3430 3430 vp->vn_marked = 1;
3431 3431 vw->vw_current = vp;
3432 3432 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3433 3433
3434 3434 return (rval);
3435 3435 }
3436 3436
3437 3437 void
3438 3438 vmem_walk_fini(mdb_walk_state_t *wsp)
3439 3439 {
3440 3440 vmem_walk_t *vw = wsp->walk_data;
3441 3441 vmem_node_t *root = vw->vw_root;
3442 3442 int done;
3443 3443
3444 3444 if (root == NULL)
3445 3445 return;
3446 3446
3447 3447 if ((vw->vw_root = root->vn_children) != NULL)
3448 3448 vmem_walk_fini(wsp);
3449 3449
3450 3450 vw->vw_root = root->vn_sibling;
3451 3451 done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3452 3452 mdb_free(root, sizeof (vmem_node_t));
3453 3453
3454 3454 if (done) {
3455 3455 mdb_free(vw, sizeof (vmem_walk_t));
3456 3456 } else {
3457 3457 vmem_walk_fini(wsp);
3458 3458 }
3459 3459 }
3460 3460
3461 3461 typedef struct vmem_seg_walk {
3462 3462 uint8_t vsw_type;
3463 3463 uintptr_t vsw_start;
3464 3464 uintptr_t vsw_current;
3465 3465 } vmem_seg_walk_t;
3466 3466
3467 3467 /*ARGSUSED*/
3468 3468 int
3469 3469 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3470 3470 {
3471 3471 vmem_seg_walk_t *vsw;
3472 3472
3473 3473 if (wsp->walk_addr == NULL) {
3474 3474 mdb_warn("vmem_%s does not support global walks\n", name);
3475 3475 return (WALK_ERR);
3476 3476 }
3477 3477
3478 3478 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3479 3479
3480 3480 vsw->vsw_type = type;
3481 3481 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3482 3482 vsw->vsw_current = vsw->vsw_start;
3483 3483
3484 3484 return (WALK_NEXT);
3485 3485 }
3486 3486
3487 3487 /*
3488 3488 * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3489 3489 */
3490 3490 #define VMEM_NONE 0
3491 3491
3492 3492 int
3493 3493 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3494 3494 {
3495 3495 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3496 3496 }
3497 3497
3498 3498 int
3499 3499 vmem_free_walk_init(mdb_walk_state_t *wsp)
3500 3500 {
3501 3501 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3502 3502 }
3503 3503
3504 3504 int
3505 3505 vmem_span_walk_init(mdb_walk_state_t *wsp)
3506 3506 {
3507 3507 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3508 3508 }
3509 3509
3510 3510 int
3511 3511 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3512 3512 {
3513 3513 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3514 3514 }
3515 3515
3516 3516 int
3517 3517 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3518 3518 {
3519 3519 vmem_seg_t seg;
3520 3520 vmem_seg_walk_t *vsw = wsp->walk_data;
3521 3521 uintptr_t addr = vsw->vsw_current;
3522 3522 static size_t seg_size = 0;
3523 3523 int rval;
3524 3524
3525 3525 if (!seg_size) {
3526 3526 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3527 3527 mdb_warn("failed to read 'vmem_seg_size'");
3528 3528 seg_size = sizeof (vmem_seg_t);
3529 3529 }
3530 3530 }
3531 3531
3532 3532 if (seg_size < sizeof (seg))
3533 3533 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3534 3534
3535 3535 if (mdb_vread(&seg, seg_size, addr) == -1) {
3536 3536 mdb_warn("couldn't read vmem_seg at %p", addr);
3537 3537 return (WALK_ERR);
3538 3538 }
3539 3539
3540 3540 vsw->vsw_current = (uintptr_t)seg.vs_anext;
3541 3541 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3542 3542 rval = WALK_NEXT;
3543 3543 } else {
3544 3544 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3545 3545 }
3546 3546
3547 3547 if (vsw->vsw_current == vsw->vsw_start)
3548 3548 return (WALK_DONE);
3549 3549
3550 3550 return (rval);
3551 3551 }
3552 3552
3553 3553 void
3554 3554 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3555 3555 {
3556 3556 vmem_seg_walk_t *vsw = wsp->walk_data;
3557 3557
3558 3558 mdb_free(vsw, sizeof (vmem_seg_walk_t));
3559 3559 }
3560 3560
3561 3561 #define VMEM_NAMEWIDTH 22
3562 3562
3563 3563 int
3564 3564 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3565 3565 {
3566 3566 vmem_t v, parent;
3567 3567 vmem_kstat_t *vkp = &v.vm_kstat;
3568 3568 uintptr_t paddr;
3569 3569 int ident = 0;
3570 3570 char c[VMEM_NAMEWIDTH];
3571 3571
3572 3572 if (!(flags & DCMD_ADDRSPEC)) {
3573 3573 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3574 3574 mdb_warn("can't walk vmem");
3575 3575 return (DCMD_ERR);
3576 3576 }
3577 3577 return (DCMD_OK);
3578 3578 }
3579 3579
3580 3580 if (DCMD_HDRSPEC(flags))
3581 3581 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3582 3582 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3583 3583 "TOTAL", "SUCCEED", "FAIL");
3584 3584
3585 3585 if (mdb_vread(&v, sizeof (v), addr) == -1) {
3586 3586 mdb_warn("couldn't read vmem at %p", addr);
3587 3587 return (DCMD_ERR);
3588 3588 }
3589 3589
3590 3590 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3591 3591 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3592 3592 mdb_warn("couldn't trace %p's ancestry", addr);
3593 3593 ident = 0;
3594 3594 break;
3595 3595 }
3596 3596 paddr = (uintptr_t)parent.vm_source;
3597 3597 }
3598 3598
3599 3599 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3600 3600
3601 3601 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3602 3602 addr, VMEM_NAMEWIDTH, c,
3603 3603 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3604 3604 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3605 3605
3606 3606 return (DCMD_OK);
3607 3607 }
3608 3608
3609 3609 void
3610 3610 vmem_seg_help(void)
3611 3611 {
3612 3612 mdb_printf("%s",
3613 3613 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3614 3614 "\n"
3615 3615 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3616 3616 "representing a single chunk of data. Only ALLOC segments have debugging\n"
3617 3617 "information.\n");
3618 3618 mdb_dec_indent(2);
3619 3619 mdb_printf("%<b>OPTIONS%</b>\n");
3620 3620 mdb_inc_indent(2);
3621 3621 mdb_printf("%s",
3622 3622 " -v Display the full content of the vmem_seg, including its stack trace\n"
3623 3623 " -s report the size of the segment, instead of the end address\n"
3624 3624 " -c caller\n"
3625 3625 " filter out segments without the function/PC in their stack trace\n"
3626 3626 " -e earliest\n"
3627 3627 " filter out segments timestamped before earliest\n"
3628 3628 " -l latest\n"
3629 3629 " filter out segments timestamped after latest\n"
3630 3630 " -m minsize\n"
3631 3631 " filer out segments smaller than minsize\n"
3632 3632 " -M maxsize\n"
3633 3633 " filer out segments larger than maxsize\n"
3634 3634 " -t thread\n"
3635 3635 " filter out segments not involving thread\n"
3636 3636 " -T type\n"
3637 3637 " filter out segments not of type 'type'\n"
3638 3638 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3639 3639 }
3640 3640
3641 3641 /*ARGSUSED*/
3642 3642 int
3643 3643 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3644 3644 {
3645 3645 vmem_seg_t vs;
3646 3646 pc_t *stk = vs.vs_stack;
3647 3647 uintptr_t sz;
3648 3648 uint8_t t;
3649 3649 const char *type = NULL;
3650 3650 GElf_Sym sym;
3651 3651 char c[MDB_SYM_NAMLEN];
3652 3652 int no_debug;
3653 3653 int i;
3654 3654 int depth;
3655 3655 uintptr_t laddr, haddr;
3656 3656
3657 3657 uintptr_t caller = NULL, thread = NULL;
3658 3658 uintptr_t minsize = 0, maxsize = 0;
3659 3659
3660 3660 hrtime_t earliest = 0, latest = 0;
3661 3661
3662 3662 uint_t size = 0;
3663 3663 uint_t verbose = 0;
3664 3664
3665 3665 if (!(flags & DCMD_ADDRSPEC))
3666 3666 return (DCMD_USAGE);
3667 3667
3668 3668 if (mdb_getopts(argc, argv,
3669 3669 'c', MDB_OPT_UINTPTR, &caller,
3670 3670 'e', MDB_OPT_UINT64, &earliest,
3671 3671 'l', MDB_OPT_UINT64, &latest,
3672 3672 's', MDB_OPT_SETBITS, TRUE, &size,
3673 3673 'm', MDB_OPT_UINTPTR, &minsize,
3674 3674 'M', MDB_OPT_UINTPTR, &maxsize,
3675 3675 't', MDB_OPT_UINTPTR, &thread,
3676 3676 'T', MDB_OPT_STR, &type,
3677 3677 'v', MDB_OPT_SETBITS, TRUE, &verbose,
3678 3678 NULL) != argc)
3679 3679 return (DCMD_USAGE);
3680 3680
3681 3681 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3682 3682 if (verbose) {
3683 3683 mdb_printf("%16s %4s %16s %16s %16s\n"
3684 3684 "%<u>%16s %4s %16s %16s %16s%</u>\n",
3685 3685 "ADDR", "TYPE", "START", "END", "SIZE",
3686 3686 "", "", "THREAD", "TIMESTAMP", "");
3687 3687 } else {
3688 3688 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3689 3689 "START", size? "SIZE" : "END", "WHO");
3690 3690 }
3691 3691 }
3692 3692
3693 3693 if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3694 3694 mdb_warn("couldn't read vmem_seg at %p", addr);
3695 3695 return (DCMD_ERR);
3696 3696 }
3697 3697
3698 3698 if (type != NULL) {
3699 3699 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3700 3700 t = VMEM_ALLOC;
3701 3701 else if (strcmp(type, "FREE") == 0)
3702 3702 t = VMEM_FREE;
3703 3703 else if (strcmp(type, "SPAN") == 0)
3704 3704 t = VMEM_SPAN;
3705 3705 else if (strcmp(type, "ROTR") == 0 ||
3706 3706 strcmp(type, "ROTOR") == 0)
3707 3707 t = VMEM_ROTOR;
3708 3708 else if (strcmp(type, "WLKR") == 0 ||
3709 3709 strcmp(type, "WALKER") == 0)
3710 3710 t = VMEM_WALKER;
3711 3711 else {
3712 3712 mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3713 3713 type);
3714 3714 return (DCMD_ERR);
3715 3715 }
3716 3716
3717 3717 if (vs.vs_type != t)
3718 3718 return (DCMD_OK);
3719 3719 }
3720 3720
3721 3721 sz = vs.vs_end - vs.vs_start;
3722 3722
3723 3723 if (minsize != 0 && sz < minsize)
3724 3724 return (DCMD_OK);
3725 3725
3726 3726 if (maxsize != 0 && sz > maxsize)
3727 3727 return (DCMD_OK);
3728 3728
3729 3729 t = vs.vs_type;
3730 3730 depth = vs.vs_depth;
3731 3731
3732 3732 /*
3733 3733 * debug info, when present, is only accurate for VMEM_ALLOC segments
3734 3734 */
3735 3735 no_debug = (t != VMEM_ALLOC) ||
3736 3736 (depth == 0 || depth > VMEM_STACK_DEPTH);
3737 3737
3738 3738 if (no_debug) {
3739 3739 if (caller != NULL || thread != NULL || earliest != 0 ||
3740 3740 latest != 0)
3741 3741 return (DCMD_OK); /* not enough info */
3742 3742 } else {
3743 3743 if (caller != NULL) {
3744 3744 laddr = caller;
3745 3745 haddr = caller + sizeof (caller);
3746 3746
3747 3747 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3748 3748 sizeof (c), &sym) != -1 &&
3749 3749 caller == (uintptr_t)sym.st_value) {
3750 3750 /*
3751 3751 * We were provided an exact symbol value; any
3752 3752 * address in the function is valid.
3753 3753 */
3754 3754 laddr = (uintptr_t)sym.st_value;
3755 3755 haddr = (uintptr_t)sym.st_value + sym.st_size;
3756 3756 }
3757 3757
3758 3758 for (i = 0; i < depth; i++)
3759 3759 if (vs.vs_stack[i] >= laddr &&
3760 3760 vs.vs_stack[i] < haddr)
3761 3761 break;
3762 3762
3763 3763 if (i == depth)
3764 3764 return (DCMD_OK);
3765 3765 }
3766 3766
3767 3767 if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3768 3768 return (DCMD_OK);
3769 3769
3770 3770 if (earliest != 0 && vs.vs_timestamp < earliest)
3771 3771 return (DCMD_OK);
3772 3772
3773 3773 if (latest != 0 && vs.vs_timestamp > latest)
3774 3774 return (DCMD_OK);
3775 3775 }
3776 3776
3777 3777 type = (t == VMEM_ALLOC ? "ALLC" :
3778 3778 t == VMEM_FREE ? "FREE" :
3779 3779 t == VMEM_SPAN ? "SPAN" :
3780 3780 t == VMEM_ROTOR ? "ROTR" :
3781 3781 t == VMEM_WALKER ? "WLKR" :
3782 3782 "????");
3783 3783
3784 3784 if (flags & DCMD_PIPE_OUT) {
3785 3785 mdb_printf("%#lr\n", addr);
3786 3786 return (DCMD_OK);
3787 3787 }
3788 3788
3789 3789 if (verbose) {
3790 3790 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3791 3791 addr, type, vs.vs_start, vs.vs_end, sz);
3792 3792
3793 3793 if (no_debug)
3794 3794 return (DCMD_OK);
3795 3795
3796 3796 mdb_printf("%16s %4s %16p %16llx\n",
3797 3797 "", "", vs.vs_thread, vs.vs_timestamp);
3798 3798
3799 3799 mdb_inc_indent(17);
3800 3800 for (i = 0; i < depth; i++) {
3801 3801 mdb_printf("%a\n", stk[i]);
3802 3802 }
3803 3803 mdb_dec_indent(17);
3804 3804 mdb_printf("\n");
3805 3805 } else {
3806 3806 mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3807 3807 vs.vs_start, size? sz : vs.vs_end);
3808 3808
3809 3809 if (no_debug) {
3810 3810 mdb_printf("\n");
3811 3811 return (DCMD_OK);
3812 3812 }
3813 3813
3814 3814 for (i = 0; i < depth; i++) {
3815 3815 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3816 3816 c, sizeof (c), &sym) == -1)
3817 3817 continue;
3818 3818 if (strncmp(c, "vmem_", 5) == 0)
3819 3819 continue;
3820 3820 break;
3821 3821 }
3822 3822 mdb_printf(" %a\n", stk[i]);
3823 3823 }
3824 3824 return (DCMD_OK);
3825 3825 }
3826 3826
3827 3827 typedef struct kmalog_data {
3828 3828 uintptr_t kma_addr;
3829 3829 hrtime_t kma_newest;
3830 3830 } kmalog_data_t;
3831 3831
3832 3832 /*ARGSUSED*/
3833 3833 static int
3834 3834 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3835 3835 {
3836 3836 char name[KMEM_CACHE_NAMELEN + 1];
3837 3837 hrtime_t delta;
3838 3838 int i, depth;
3839 3839 size_t bufsize;
3840 3840
3841 3841 if (bcp->bc_timestamp == 0)
3842 3842 return (WALK_DONE);
3843 3843
3844 3844 if (kma->kma_newest == 0)
3845 3845 kma->kma_newest = bcp->bc_timestamp;
3846 3846
3847 3847 if (kma->kma_addr) {
3848 3848 if (mdb_vread(&bufsize, sizeof (bufsize),
3849 3849 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3850 3850 mdb_warn(
3851 3851 "failed to read cache_bufsize for cache at %p",
3852 3852 bcp->bc_cache);
3853 3853 return (WALK_ERR);
3854 3854 }
3855 3855
3856 3856 if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3857 3857 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3858 3858 return (WALK_NEXT);
3859 3859 }
3860 3860
3861 3861 delta = kma->kma_newest - bcp->bc_timestamp;
3862 3862 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3863 3863
3864 3864 if (mdb_readstr(name, sizeof (name), (uintptr_t)
3865 3865 &bcp->bc_cache->cache_name) <= 0)
3866 3866 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3867 3867
3868 3868 mdb_printf("\nT-%lld.%09lld addr=%p %s\n",
3869 3869 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3870 3870
3871 3871 for (i = 0; i < depth; i++)
3872 3872 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3873 3873
3874 3874 return (WALK_NEXT);
3875 3875 }
3876 3876
3877 3877 int
3878 3878 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3879 3879 {
3880 3880 const char *logname = "kmem_transaction_log";
3881 3881 kmalog_data_t kma;
3882 3882
3883 3883 if (argc > 1)
3884 3884 return (DCMD_USAGE);
3885 3885
3886 3886 kma.kma_newest = 0;
3887 3887 if (flags & DCMD_ADDRSPEC)
3888 3888 kma.kma_addr = addr;
3889 3889 else
3890 3890 kma.kma_addr = NULL;
3891 3891
3892 3892 if (argc > 0) {
3893 3893 if (argv->a_type != MDB_TYPE_STRING)
3894 3894 return (DCMD_USAGE);
3895 3895 if (strcmp(argv->a_un.a_str, "fail") == 0)
3896 3896 logname = "kmem_failure_log";
3897 3897 else if (strcmp(argv->a_un.a_str, "slab") == 0)
3898 3898 logname = "kmem_slab_log";
3899 3899 else
3900 3900 return (DCMD_USAGE);
3901 3901 }
3902 3902
3903 3903 if (mdb_readvar(&addr, logname) == -1) {
3904 3904 mdb_warn("failed to read %s log header pointer");
3905 3905 return (DCMD_ERR);
3906 3906 }
3907 3907
3908 3908 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3909 3909 mdb_warn("failed to walk kmem log");
3910 3910 return (DCMD_ERR);
3911 3911 }
3912 3912
3913 3913 return (DCMD_OK);
3914 3914 }
3915 3915
3916 3916 /*
3917 3917 * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3918 3918 * The first piece is a structure which we use to accumulate kmem_cache_t
3919 3919 * addresses of interest. The kmc_add is used as a callback for the kmem_cache
3920 3920 * walker; we either add all caches, or ones named explicitly as arguments.
3921 3921 */
3922 3922
3923 3923 typedef struct kmclist {
3924 3924 const char *kmc_name; /* Name to match (or NULL) */
3925 3925 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */
3926 3926 int kmc_nelems; /* Num entries in kmc_caches */
3927 3927 int kmc_size; /* Size of kmc_caches array */
3928 3928 } kmclist_t;
3929 3929
3930 3930 static int
3931 3931 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3932 3932 {
3933 3933 void *p;
3934 3934 int s;
3935 3935
3936 3936 if (kmc->kmc_name == NULL ||
3937 3937 strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3938 3938 /*
3939 3939 * If we have a match, grow our array (if necessary), and then
3940 3940 * add the virtual address of the matching cache to our list.
3941 3941 */
3942 3942 if (kmc->kmc_nelems >= kmc->kmc_size) {
3943 3943 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3944 3944 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3945 3945
3946 3946 bcopy(kmc->kmc_caches, p,
3947 3947 sizeof (uintptr_t) * kmc->kmc_size);
3948 3948
3949 3949 kmc->kmc_caches = p;
3950 3950 kmc->kmc_size = s;
3951 3951 }
3952 3952
3953 3953 kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3954 3954 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3955 3955 }
3956 3956
3957 3957 return (WALK_NEXT);
3958 3958 }
3959 3959
3960 3960 /*
3961 3961 * The second piece of ::kmausers is a hash table of allocations. Each
3962 3962 * allocation owner is identified by its stack trace and data_size. We then
3963 3963 * track the total bytes of all such allocations, and the number of allocations
3964 3964 * to report at the end. Once we have a list of caches, we walk through the
3965 3965 * allocated bufctls of each, and update our hash table accordingly.
3966 3966 */
3967 3967
3968 3968 typedef struct kmowner {
3969 3969 struct kmowner *kmo_head; /* First hash elt in bucket */
3970 3970 struct kmowner *kmo_next; /* Next hash elt in chain */
3971 3971 size_t kmo_signature; /* Hash table signature */
3972 3972 uint_t kmo_num; /* Number of allocations */
3973 3973 size_t kmo_data_size; /* Size of each allocation */
3974 3974 size_t kmo_total_size; /* Total bytes of allocation */
3975 3975 int kmo_depth; /* Depth of stack trace */
3976 3976 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */
3977 3977 } kmowner_t;
3978 3978
3979 3979 typedef struct kmusers {
3980 3980 uintptr_t kmu_addr; /* address of interest */
3981 3981 const kmem_cache_t *kmu_cache; /* Current kmem cache */
3982 3982 kmowner_t *kmu_hash; /* Hash table of owners */
3983 3983 int kmu_nelems; /* Number of entries in use */
3984 3984 int kmu_size; /* Total number of entries */
3985 3985 } kmusers_t;
3986 3986
3987 3987 static void
3988 3988 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
3989 3989 size_t size, size_t data_size)
3990 3990 {
3991 3991 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3992 3992 size_t bucket, signature = data_size;
3993 3993 kmowner_t *kmo, *kmoend;
3994 3994
3995 3995 /*
3996 3996 * If the hash table is full, double its size and rehash everything.
3997 3997 */
3998 3998 if (kmu->kmu_nelems >= kmu->kmu_size) {
3999 3999 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
4000 4000
4001 4001 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
4002 4002 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
4003 4003 kmu->kmu_hash = kmo;
4004 4004 kmu->kmu_size = s;
4005 4005
4006 4006 kmoend = kmu->kmu_hash + kmu->kmu_size;
4007 4007 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
4008 4008 kmo->kmo_head = NULL;
4009 4009
4010 4010 kmoend = kmu->kmu_hash + kmu->kmu_nelems;
4011 4011 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
4012 4012 bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
4013 4013 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4014 4014 kmu->kmu_hash[bucket].kmo_head = kmo;
4015 4015 }
4016 4016 }
4017 4017
4018 4018 /*
4019 4019 * Finish computing the hash signature from the stack trace, and then
4020 4020 * see if the owner is in the hash table. If so, update our stats.
4021 4021 */
4022 4022 for (i = 0; i < depth; i++)
4023 4023 signature += bcp->bc_stack[i];
4024 4024
4025 4025 bucket = signature & (kmu->kmu_size - 1);
4026 4026
4027 4027 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4028 4028 if (kmo->kmo_signature == signature) {
4029 4029 size_t difference = 0;
4030 4030
4031 4031 difference |= kmo->kmo_data_size - data_size;
4032 4032 difference |= kmo->kmo_depth - depth;
4033 4033
4034 4034 for (i = 0; i < depth; i++) {
4035 4035 difference |= kmo->kmo_stack[i] -
4036 4036 bcp->bc_stack[i];
4037 4037 }
4038 4038
4039 4039 if (difference == 0) {
4040 4040 kmo->kmo_total_size += size;
4041 4041 kmo->kmo_num++;
4042 4042 return;
4043 4043 }
4044 4044 }
4045 4045 }
4046 4046
4047 4047 /*
4048 4048 * If the owner is not yet hashed, grab the next element and fill it
4049 4049 * in based on the allocation information.
4050 4050 */
4051 4051 kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4052 4052 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4053 4053 kmu->kmu_hash[bucket].kmo_head = kmo;
4054 4054
4055 4055 kmo->kmo_signature = signature;
4056 4056 kmo->kmo_num = 1;
4057 4057 kmo->kmo_data_size = data_size;
4058 4058 kmo->kmo_total_size = size;
4059 4059 kmo->kmo_depth = depth;
4060 4060
4061 4061 for (i = 0; i < depth; i++)
4062 4062 kmo->kmo_stack[i] = bcp->bc_stack[i];
4063 4063 }
4064 4064
4065 4065 /*
4066 4066 * When ::kmausers is invoked without the -f flag, we simply update our hash
4067 4067 * table with the information from each allocated bufctl.
4068 4068 */
4069 4069 /*ARGSUSED*/
4070 4070 static int
4071 4071 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4072 4072 {
4073 4073 const kmem_cache_t *cp = kmu->kmu_cache;
4074 4074
4075 4075 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4076 4076 return (WALK_NEXT);
4077 4077 }
4078 4078
4079 4079 /*
4080 4080 * When ::kmausers is invoked with the -f flag, we print out the information
4081 4081 * for each bufctl as well as updating the hash table.
4082 4082 */
4083 4083 static int
4084 4084 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4085 4085 {
4086 4086 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4087 4087 const kmem_cache_t *cp = kmu->kmu_cache;
4088 4088 kmem_bufctl_t bufctl;
4089 4089
4090 4090 if (kmu->kmu_addr) {
4091 4091 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1)
4092 4092 mdb_warn("couldn't read bufctl at %p", addr);
4093 4093 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4094 4094 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4095 4095 cp->cache_bufsize)
4096 4096 return (WALK_NEXT);
4097 4097 }
4098 4098
4099 4099 mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4100 4100 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4101 4101
4102 4102 for (i = 0; i < depth; i++)
4103 4103 mdb_printf("\t %a\n", bcp->bc_stack[i]);
4104 4104
4105 4105 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4106 4106 return (WALK_NEXT);
4107 4107 }
4108 4108
4109 4109 /*
4110 4110 * We sort our results by allocation size before printing them.
4111 4111 */
4112 4112 static int
4113 4113 kmownercmp(const void *lp, const void *rp)
4114 4114 {
4115 4115 const kmowner_t *lhs = lp;
4116 4116 const kmowner_t *rhs = rp;
4117 4117
4118 4118 return (rhs->kmo_total_size - lhs->kmo_total_size);
4119 4119 }
4120 4120
4121 4121 /*
4122 4122 * The main engine of ::kmausers is relatively straightforward: First we
4123 4123 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4124 4124 * iterate over the allocated bufctls of each cache in the list. Finally,
4125 4125 * we sort and print our results.
4126 4126 */
4127 4127 /*ARGSUSED*/
4128 4128 int
4129 4129 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4130 4130 {
4131 4131 int mem_threshold = 8192; /* Minimum # bytes for printing */
4132 4132 int cnt_threshold = 100; /* Minimum # blocks for printing */
4133 4133 int audited_caches = 0; /* Number of KMF_AUDIT caches found */
4134 4134 int do_all_caches = 1; /* Do all caches (no arguments) */
4135 4135 int opt_e = FALSE; /* Include "small" users */
4136 4136 int opt_f = FALSE; /* Print stack traces */
4137 4137
4138 4138 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4139 4139 kmowner_t *kmo, *kmoend;
4140 4140 int i, oelems;
4141 4141
4142 4142 kmclist_t kmc;
4143 4143 kmusers_t kmu;
4144 4144
4145 4145 bzero(&kmc, sizeof (kmc));
4146 4146 bzero(&kmu, sizeof (kmu));
4147 4147
4148 4148 while ((i = mdb_getopts(argc, argv,
4149 4149 'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4150 4150 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4151 4151
4152 4152 argv += i; /* skip past options we just processed */
4153 4153 argc -= i; /* adjust argc */
4154 4154
4155 4155 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4156 4156 return (DCMD_USAGE);
4157 4157
4158 4158 oelems = kmc.kmc_nelems;
4159 4159 kmc.kmc_name = argv->a_un.a_str;
4160 4160 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4161 4161
4162 4162 if (kmc.kmc_nelems == oelems) {
4163 4163 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4164 4164 return (DCMD_ERR);
4165 4165 }
4166 4166
4167 4167 do_all_caches = 0;
4168 4168 argv++;
4169 4169 argc--;
4170 4170 }
4171 4171
4172 4172 if (flags & DCMD_ADDRSPEC) {
4173 4173 opt_f = TRUE;
4174 4174 kmu.kmu_addr = addr;
4175 4175 } else {
4176 4176 kmu.kmu_addr = NULL;
4177 4177 }
4178 4178
4179 4179 if (opt_e)
4180 4180 mem_threshold = cnt_threshold = 0;
4181 4181
4182 4182 if (opt_f)
4183 4183 callback = (mdb_walk_cb_t)kmause2;
4184 4184
4185 4185 if (do_all_caches) {
4186 4186 kmc.kmc_name = NULL; /* match all cache names */
4187 4187 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4188 4188 }
4189 4189
4190 4190 for (i = 0; i < kmc.kmc_nelems; i++) {
4191 4191 uintptr_t cp = kmc.kmc_caches[i];
4192 4192 kmem_cache_t c;
4193 4193
4194 4194 if (mdb_vread(&c, sizeof (c), cp) == -1) {
4195 4195 mdb_warn("failed to read cache at %p", cp);
4196 4196 continue;
4197 4197 }
4198 4198
4199 4199 if (!(c.cache_flags & KMF_AUDIT)) {
4200 4200 if (!do_all_caches) {
4201 4201 mdb_warn("KMF_AUDIT is not enabled for %s\n",
4202 4202 c.cache_name);
4203 4203 }
4204 4204 continue;
4205 4205 }
4206 4206
4207 4207 kmu.kmu_cache = &c;
4208 4208 (void) mdb_pwalk("bufctl", callback, &kmu, cp);
4209 4209 audited_caches++;
4210 4210 }
4211 4211
4212 4212 if (audited_caches == 0 && do_all_caches) {
4213 4213 mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4214 4214 return (DCMD_ERR);
4215 4215 }
4216 4216
4217 4217 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4218 4218 kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4219 4219
4220 4220 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4221 4221 if (kmo->kmo_total_size < mem_threshold &&
4222 4222 kmo->kmo_num < cnt_threshold)
4223 4223 continue;
4224 4224 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4225 4225 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4226 4226 for (i = 0; i < kmo->kmo_depth; i++)
4227 4227 mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4228 4228 }
4229 4229
4230 4230 return (DCMD_OK);
4231 4231 }
4232 4232
4233 4233 void
4234 4234 kmausers_help(void)
4235 4235 {
4236 4236 mdb_printf(
4237 4237 "Displays the largest users of the kmem allocator, sorted by \n"
4238 4238 "trace. If one or more caches is specified, only those caches\n"
4239 4239 "will be searched. By default, all caches are searched. If an\n"
4240 4240 "address is specified, then only those allocations which include\n"
4241 4241 "the given address are displayed. Specifying an address implies\n"
4242 4242 "-f.\n"
4243 4243 "\n"
4244 4244 "\t-e\tInclude all users, not just the largest\n"
4245 4245 "\t-f\tDisplay individual allocations. By default, users are\n"
4246 4246 "\t\tgrouped by stack\n");
4247 4247 }
4248 4248
4249 4249 static int
4250 4250 kmem_ready_check(void)
4251 4251 {
4252 4252 int ready;
4253 4253
4254 4254 if (mdb_readvar(&ready, "kmem_ready") < 0)
4255 4255 return (-1); /* errno is set for us */
4256 4256
4257 4257 return (ready);
4258 4258 }
4259 4259
4260 4260 void
4261 4261 kmem_statechange(void)
4262 4262 {
4263 4263 static int been_ready = 0;
4264 4264
4265 4265 if (been_ready)
4266 4266 return;
4267 4267
4268 4268 if (kmem_ready_check() <= 0)
4269 4269 return;
4270 4270
4271 4271 been_ready = 1;
4272 4272 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4273 4273 }
4274 4274
4275 4275 void
4276 4276 kmem_init(void)
4277 4277 {
4278 4278 mdb_walker_t w = {
4279 4279 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4280 4280 list_walk_step, list_walk_fini
4281 4281 };
4282 4282
4283 4283 /*
4284 4284 * If kmem is ready, we'll need to invoke the kmem_cache walker
4285 4285 * immediately. Walkers in the linkage structure won't be ready until
4286 4286 * _mdb_init returns, so we'll need to add this one manually. If kmem
4287 4287 * is ready, we'll use the walker to initialize the caches. If kmem
4288 4288 * isn't ready, we'll register a callback that will allow us to defer
4289 4289 * cache walking until it is.
4290 4290 */
4291 4291 if (mdb_add_walker(&w) != 0) {
4292 4292 mdb_warn("failed to add kmem_cache walker");
4293 4293 return;
4294 4294 }
4295 4295
4296 4296 kmem_statechange();
4297 4297
4298 4298 /* register our ::whatis handlers */
4299 4299 mdb_whatis_register("modules", whatis_run_modules, NULL,
4300 4300 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4301 4301 mdb_whatis_register("threads", whatis_run_threads, NULL,
4302 4302 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4303 4303 mdb_whatis_register("pages", whatis_run_pages, NULL,
4304 4304 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4305 4305 mdb_whatis_register("kmem", whatis_run_kmem, NULL,
4306 4306 WHATIS_PRIO_ALLOCATOR, 0);
4307 4307 mdb_whatis_register("vmem", whatis_run_vmem, NULL,
4308 4308 WHATIS_PRIO_ALLOCATOR, 0);
4309 4309 }
4310 4310
4311 4311 typedef struct whatthread {
4312 4312 uintptr_t wt_target;
4313 4313 int wt_verbose;
4314 4314 } whatthread_t;
↓ open down ↓ |
4314 lines elided |
↑ open up ↑ |
4315 4315
4316 4316 static int
4317 4317 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4318 4318 {
4319 4319 uintptr_t current, data;
4320 4320
4321 4321 if (t->t_stkbase == NULL)
4322 4322 return (WALK_NEXT);
4323 4323
4324 4324 /*
4325 - * Warn about swapped out threads, but drive on anyway
4326 - */
4327 - if (!(t->t_schedflag & TS_LOAD)) {
4328 - mdb_warn("thread %p's stack swapped out\n", addr);
4329 - return (WALK_NEXT);
4330 - }
4331 -
4332 - /*
4333 4325 * Search the thread's stack for the given pointer. Note that it would
4334 4326 * be more efficient to follow ::kgrep's lead and read in page-sized
4335 4327 * chunks, but this routine is already fast and simple.
4336 4328 */
4337 4329 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4338 4330 current += sizeof (uintptr_t)) {
4339 4331 if (mdb_vread(&data, sizeof (data), current) == -1) {
4340 4332 mdb_warn("couldn't read thread %p's stack at %p",
4341 4333 addr, current);
4342 4334 return (WALK_ERR);
4343 4335 }
4344 4336
4345 4337 if (data == w->wt_target) {
4346 4338 if (w->wt_verbose) {
4347 4339 mdb_printf("%p in thread %p's stack%s\n",
4348 4340 current, addr, stack_active(t, current));
4349 4341 } else {
4350 4342 mdb_printf("%#lr\n", addr);
4351 4343 return (WALK_NEXT);
4352 4344 }
4353 4345 }
4354 4346 }
4355 4347
4356 4348 return (WALK_NEXT);
4357 4349 }
4358 4350
4359 4351 int
4360 4352 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4361 4353 {
4362 4354 whatthread_t w;
4363 4355
4364 4356 if (!(flags & DCMD_ADDRSPEC))
4365 4357 return (DCMD_USAGE);
4366 4358
4367 4359 w.wt_verbose = FALSE;
4368 4360 w.wt_target = addr;
4369 4361
4370 4362 if (mdb_getopts(argc, argv,
4371 4363 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4372 4364 return (DCMD_USAGE);
4373 4365
4374 4366 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4375 4367 == -1) {
4376 4368 mdb_warn("couldn't walk threads");
4377 4369 return (DCMD_ERR);
4378 4370 }
4379 4371
4380 4372 return (DCMD_OK);
4381 4373 }
↓ open down ↓ |
39 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX