1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright 2011 Joyent, Inc.  All rights reserved.
  28  * Copyright (c) 2012 by Delphix. All rights reserved.
  29  */
  30 
  31 #include <mdb/mdb_param.h>
  32 #include <mdb/mdb_modapi.h>
  33 #include <mdb/mdb_ctf.h>
  34 #include <mdb/mdb_whatis.h>
  35 #include <sys/cpuvar.h>
  36 #include <sys/kmem_impl.h>
  37 #include <sys/vmem_impl.h>
  38 #include <sys/machelf.h>
  39 #include <sys/modctl.h>
  40 #include <sys/kobj.h>
  41 #include <sys/panic.h>
  42 #include <sys/stack.h>
  43 #include <sys/sysmacros.h>
  44 #include <vm/page.h>
  45 
  46 #include "avl.h"
  47 #include "combined.h"
  48 #include "dist.h"
  49 #include "kmem.h"
  50 #include "list.h"
  51 
  52 #define dprintf(x) if (mdb_debug_level) { \
  53         mdb_printf("kmem debug: ");  \
  54         /*CSTYLED*/\
  55         mdb_printf x ;\
  56 }
  57 
  58 #define KM_ALLOCATED            0x01
  59 #define KM_FREE                 0x02
  60 #define KM_BUFCTL               0x04
  61 #define KM_CONSTRUCTED          0x08    /* only constructed free buffers */
  62 #define KM_HASH                 0x10
  63 
  64 static int mdb_debug_level = 0;
  65 
  66 /*ARGSUSED*/
  67 static int
  68 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
  69 {
  70         mdb_walker_t w;
  71         char descr[64];
  72 
  73         (void) mdb_snprintf(descr, sizeof (descr),
  74             "walk the %s cache", c->cache_name);
  75 
  76         w.walk_name = c->cache_name;
  77         w.walk_descr = descr;
  78         w.walk_init = kmem_walk_init;
  79         w.walk_step = kmem_walk_step;
  80         w.walk_fini = kmem_walk_fini;
  81         w.walk_init_arg = (void *)addr;
  82 
  83         if (mdb_add_walker(&w) == -1)
  84                 mdb_warn("failed to add %s walker", c->cache_name);
  85 
  86         return (WALK_NEXT);
  87 }
  88 
  89 /*ARGSUSED*/
  90 int
  91 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
  92 {
  93         mdb_debug_level ^= 1;
  94 
  95         mdb_printf("kmem: debugging is now %s\n",
  96             mdb_debug_level ? "on" : "off");
  97 
  98         return (DCMD_OK);
  99 }
 100 
 101 int
 102 kmem_cache_walk_init(mdb_walk_state_t *wsp)
 103 {
 104         GElf_Sym sym;
 105 
 106         if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
 107                 mdb_warn("couldn't find kmem_caches");
 108                 return (WALK_ERR);
 109         }
 110 
 111         wsp->walk_addr = (uintptr_t)sym.st_value;
 112 
 113         return (list_walk_init_named(wsp, "cache list", "cache"));
 114 }
 115 
 116 int
 117 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
 118 {
 119         if (wsp->walk_addr == NULL) {
 120                 mdb_warn("kmem_cpu_cache doesn't support global walks");
 121                 return (WALK_ERR);
 122         }
 123 
 124         if (mdb_layered_walk("cpu", wsp) == -1) {
 125                 mdb_warn("couldn't walk 'cpu'");
 126                 return (WALK_ERR);
 127         }
 128 
 129         wsp->walk_data = (void *)wsp->walk_addr;
 130 
 131         return (WALK_NEXT);
 132 }
 133 
 134 int
 135 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
 136 {
 137         uintptr_t caddr = (uintptr_t)wsp->walk_data;
 138         const cpu_t *cpu = wsp->walk_layer;
 139         kmem_cpu_cache_t cc;
 140 
 141         caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]);
 142 
 143         if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
 144                 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
 145                 return (WALK_ERR);
 146         }
 147 
 148         return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
 149 }
 150 
 151 static int
 152 kmem_slab_check(void *p, uintptr_t saddr, void *arg)
 153 {
 154         kmem_slab_t *sp = p;
 155         uintptr_t caddr = (uintptr_t)arg;
 156         if ((uintptr_t)sp->slab_cache != caddr) {
 157                 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
 158                     saddr, caddr, sp->slab_cache);
 159                 return (-1);
 160         }
 161 
 162         return (0);
 163 }
 164 
 165 static int
 166 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
 167 {
 168         kmem_slab_t *sp = p;
 169 
 170         int rc = kmem_slab_check(p, saddr, arg);
 171         if (rc != 0) {
 172                 return (rc);
 173         }
 174 
 175         if (!KMEM_SLAB_IS_PARTIAL(sp)) {
 176                 mdb_warn("slab %p is not a partial slab\n", saddr);
 177                 return (-1);
 178         }
 179 
 180         return (0);
 181 }
 182 
 183 static int
 184 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
 185 {
 186         kmem_slab_t *sp = p;
 187 
 188         int rc = kmem_slab_check(p, saddr, arg);
 189         if (rc != 0) {
 190                 return (rc);
 191         }
 192 
 193         if (!KMEM_SLAB_IS_ALL_USED(sp)) {
 194                 mdb_warn("slab %p is not completely allocated\n", saddr);
 195                 return (-1);
 196         }
 197 
 198         return (0);
 199 }
 200 
 201 typedef struct {
 202         uintptr_t kns_cache_addr;
 203         int kns_nslabs;
 204 } kmem_nth_slab_t;
 205 
 206 static int
 207 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
 208 {
 209         kmem_nth_slab_t *chkp = arg;
 210 
 211         int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
 212         if (rc != 0) {
 213                 return (rc);
 214         }
 215 
 216         return (chkp->kns_nslabs-- == 0 ? 1 : 0);
 217 }
 218 
 219 static int
 220 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
 221 {
 222         uintptr_t caddr = wsp->walk_addr;
 223 
 224         wsp->walk_addr = (uintptr_t)(caddr +
 225             offsetof(kmem_cache_t, cache_complete_slabs));
 226 
 227         return (list_walk_init_checked(wsp, "slab list", "slab",
 228             kmem_complete_slab_check, (void *)caddr));
 229 }
 230 
 231 static int
 232 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
 233 {
 234         uintptr_t caddr = wsp->walk_addr;
 235 
 236         wsp->walk_addr = (uintptr_t)(caddr +
 237             offsetof(kmem_cache_t, cache_partial_slabs));
 238 
 239         return (avl_walk_init_checked(wsp, "slab list", "slab",
 240             kmem_partial_slab_check, (void *)caddr));
 241 }
 242 
 243 int
 244 kmem_slab_walk_init(mdb_walk_state_t *wsp)
 245 {
 246         uintptr_t caddr = wsp->walk_addr;
 247 
 248         if (caddr == NULL) {
 249                 mdb_warn("kmem_slab doesn't support global walks\n");
 250                 return (WALK_ERR);
 251         }
 252 
 253         combined_walk_init(wsp);
 254         combined_walk_add(wsp,
 255             kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
 256         combined_walk_add(wsp,
 257             kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
 258 
 259         return (WALK_NEXT);
 260 }
 261 
 262 static int
 263 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
 264 {
 265         uintptr_t caddr = wsp->walk_addr;
 266         kmem_nth_slab_t *chk;
 267 
 268         chk = mdb_alloc(sizeof (kmem_nth_slab_t),
 269             UM_SLEEP | UM_GC);
 270         chk->kns_cache_addr = caddr;
 271         chk->kns_nslabs = 1;
 272         wsp->walk_addr = (uintptr_t)(caddr +
 273             offsetof(kmem_cache_t, cache_complete_slabs));
 274 
 275         return (list_walk_init_checked(wsp, "slab list", "slab",
 276             kmem_nth_slab_check, chk));
 277 }
 278 
 279 int
 280 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
 281 {
 282         uintptr_t caddr = wsp->walk_addr;
 283         kmem_cache_t c;
 284 
 285         if (caddr == NULL) {
 286                 mdb_warn("kmem_slab_partial doesn't support global walks\n");
 287                 return (WALK_ERR);
 288         }
 289 
 290         if (mdb_vread(&c, sizeof (c), caddr) == -1) {
 291                 mdb_warn("couldn't read kmem_cache at %p", caddr);
 292                 return (WALK_ERR);
 293         }
 294 
 295         combined_walk_init(wsp);
 296 
 297         /*
 298          * Some consumers (umem_walk_step(), in particular) require at
 299          * least one callback if there are any buffers in the cache.  So
 300          * if there are *no* partial slabs, report the first full slab, if
 301          * any.
 302          *
 303          * Yes, this is ugly, but it's cleaner than the other possibilities.
 304          */
 305         if (c.cache_partial_slabs.avl_numnodes == 0) {
 306                 combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
 307                     list_walk_step, list_walk_fini);
 308         } else {
 309                 combined_walk_add(wsp, kmem_partial_slab_walk_init,
 310                     avl_walk_step, avl_walk_fini);
 311         }
 312 
 313         return (WALK_NEXT);
 314 }
 315 
 316 int
 317 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
 318 {
 319         kmem_cache_t c;
 320         const char *filter = NULL;
 321 
 322         if (mdb_getopts(ac, argv,
 323             'n', MDB_OPT_STR, &filter,
 324             NULL) != ac) {
 325                 return (DCMD_USAGE);
 326         }
 327 
 328         if (!(flags & DCMD_ADDRSPEC)) {
 329                 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
 330                         mdb_warn("can't walk kmem_cache");
 331                         return (DCMD_ERR);
 332                 }
 333                 return (DCMD_OK);
 334         }
 335 
 336         if (DCMD_HDRSPEC(flags))
 337                 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
 338                     "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
 339 
 340         if (mdb_vread(&c, sizeof (c), addr) == -1) {
 341                 mdb_warn("couldn't read kmem_cache at %p", addr);
 342                 return (DCMD_ERR);
 343         }
 344 
 345         if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
 346                 return (DCMD_OK);
 347 
 348         mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
 349             c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
 350 
 351         return (DCMD_OK);
 352 }
 353 
 354 void
 355 kmem_cache_help(void)
 356 {
 357         mdb_printf("%s", "Print kernel memory caches.\n\n");
 358         mdb_dec_indent(2);
 359         mdb_printf("%<b>OPTIONS%</b>\n");
 360         mdb_inc_indent(2);
 361         mdb_printf("%s",
 362 "  -n name\n"
 363 "        name of kmem cache (or matching partial name)\n"
 364 "\n"
 365 "Column\tDescription\n"
 366 "\n"
 367 "ADDR\t\taddress of kmem cache\n"
 368 "NAME\t\tname of kmem cache\n"
 369 "FLAG\t\tvarious cache state flags\n"
 370 "CFLAG\t\tcache creation flags\n"
 371 "BUFSIZE\tobject size in bytes\n"
 372 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
 373 }
 374 
 375 #define LABEL_WIDTH     11
 376 static void
 377 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
 378     size_t maxbuckets, size_t minbucketsize)
 379 {
 380         uint64_t total;
 381         int buckets;
 382         int i;
 383         const int *distarray;
 384         int complete[2];
 385 
 386         buckets = buffers_per_slab;
 387 
 388         total = 0;
 389         for (i = 0; i <= buffers_per_slab; i++)
 390                 total += ks_bucket[i];
 391 
 392         if (maxbuckets > 1)
 393                 buckets = MIN(buckets, maxbuckets);
 394 
 395         if (minbucketsize > 1) {
 396                 /*
 397                  * minbucketsize does not apply to the first bucket reserved
 398                  * for completely allocated slabs
 399                  */
 400                 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
 401                     minbucketsize));
 402                 if ((buckets < 2) && (buffers_per_slab > 1)) {
 403                         buckets = 2;
 404                         minbucketsize = (buffers_per_slab - 1);
 405                 }
 406         }
 407 
 408         /*
 409          * The first printed bucket is reserved for completely allocated slabs.
 410          * Passing (buckets - 1) excludes that bucket from the generated
 411          * distribution, since we're handling it as a special case.
 412          */
 413         complete[0] = buffers_per_slab;
 414         complete[1] = buffers_per_slab + 1;
 415         distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
 416 
 417         mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
 418         dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
 419 
 420         dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
 421         /*
 422          * Print bucket ranges in descending order after the first bucket for
 423          * completely allocated slabs, so a person can see immediately whether
 424          * or not there is fragmentation without having to scan possibly
 425          * multiple screens of output. Starting at (buckets - 2) excludes the
 426          * extra terminating bucket.
 427          */
 428         for (i = buckets - 2; i >= 0; i--) {
 429                 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
 430         }
 431         mdb_printf("\n");
 432 }
 433 #undef LABEL_WIDTH
 434 
 435 /*ARGSUSED*/
 436 static int
 437 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
 438 {
 439         *is_slab = B_TRUE;
 440         return (WALK_DONE);
 441 }
 442 
 443 /*ARGSUSED*/
 444 static int
 445 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
 446     boolean_t *is_slab)
 447 {
 448         /*
 449          * The "kmem_partial_slab" walker reports the first full slab if there
 450          * are no partial slabs (for the sake of consumers that require at least
 451          * one callback if there are any buffers in the cache).
 452          */
 453         *is_slab = KMEM_SLAB_IS_PARTIAL(sp);
 454         return (WALK_DONE);
 455 }
 456 
 457 typedef struct kmem_slab_usage {
 458         int ksu_refcnt;                 /* count of allocated buffers on slab */
 459         boolean_t ksu_nomove;           /* slab marked non-reclaimable */
 460 } kmem_slab_usage_t;
 461 
 462 typedef struct kmem_slab_stats {
 463         const kmem_cache_t *ks_cp;
 464         int ks_slabs;                   /* slabs in cache */
 465         int ks_partial_slabs;           /* partially allocated slabs in cache */
 466         uint64_t ks_unused_buffers;     /* total unused buffers in cache */
 467         int ks_max_buffers_per_slab;    /* max buffers per slab */
 468         int ks_usage_len;               /* ks_usage array length */
 469         kmem_slab_usage_t *ks_usage;    /* partial slab usage */
 470         uint_t *ks_bucket;              /* slab usage distribution */
 471 } kmem_slab_stats_t;
 472 
 473 /*ARGSUSED*/
 474 static int
 475 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
 476     kmem_slab_stats_t *ks)
 477 {
 478         kmem_slab_usage_t *ksu;
 479         long unused;
 480 
 481         ks->ks_slabs++;
 482         ks->ks_bucket[sp->slab_refcnt]++;
 483 
 484         unused = (sp->slab_chunks - sp->slab_refcnt);
 485         if (unused == 0) {
 486                 return (WALK_NEXT);
 487         }
 488 
 489         ks->ks_partial_slabs++;
 490         ks->ks_unused_buffers += unused;
 491 
 492         if (ks->ks_partial_slabs > ks->ks_usage_len) {
 493                 kmem_slab_usage_t *usage;
 494                 int len = ks->ks_usage_len;
 495 
 496                 len = (len == 0 ? 16 : len * 2);
 497                 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
 498                 if (ks->ks_usage != NULL) {
 499                         bcopy(ks->ks_usage, usage,
 500                             ks->ks_usage_len * sizeof (kmem_slab_usage_t));
 501                         mdb_free(ks->ks_usage,
 502                             ks->ks_usage_len * sizeof (kmem_slab_usage_t));
 503                 }
 504                 ks->ks_usage = usage;
 505                 ks->ks_usage_len = len;
 506         }
 507 
 508         ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
 509         ksu->ksu_refcnt = sp->slab_refcnt;
 510         ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
 511         return (WALK_NEXT);
 512 }
 513 
 514 static void
 515 kmem_slabs_header()
 516 {
 517         mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
 518             "", "", "Partial", "", "Unused", "");
 519         mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
 520             "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
 521         mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
 522             "-------------------------", "--------", "--------", "---------",
 523             "---------", "------");
 524 }
 525 
 526 int
 527 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 528 {
 529         kmem_cache_t c;
 530         kmem_slab_stats_t stats;
 531         mdb_walk_cb_t cb;
 532         int pct;
 533         int tenths_pct;
 534         size_t maxbuckets = 1;
 535         size_t minbucketsize = 0;
 536         const char *filter = NULL;
 537         const char *name = NULL;
 538         uint_t opt_v = FALSE;
 539         boolean_t buckets = B_FALSE;
 540         boolean_t skip = B_FALSE;
 541 
 542         if (mdb_getopts(argc, argv,
 543             'B', MDB_OPT_UINTPTR, &minbucketsize,
 544             'b', MDB_OPT_UINTPTR, &maxbuckets,
 545             'n', MDB_OPT_STR, &filter,
 546             'N', MDB_OPT_STR, &name,
 547             'v', MDB_OPT_SETBITS, TRUE, &opt_v,
 548             NULL) != argc) {
 549                 return (DCMD_USAGE);
 550         }
 551 
 552         if ((maxbuckets != 1) || (minbucketsize != 0)) {
 553                 buckets = B_TRUE;
 554         }
 555 
 556         if (!(flags & DCMD_ADDRSPEC)) {
 557                 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
 558                     argv) == -1) {
 559                         mdb_warn("can't walk kmem_cache");
 560                         return (DCMD_ERR);
 561                 }
 562                 return (DCMD_OK);
 563         }
 564 
 565         if (mdb_vread(&c, sizeof (c), addr) == -1) {
 566                 mdb_warn("couldn't read kmem_cache at %p", addr);
 567                 return (DCMD_ERR);
 568         }
 569 
 570         if (name == NULL) {
 571                 skip = ((filter != NULL) &&
 572                     (strstr(c.cache_name, filter) == NULL));
 573         } else if (filter == NULL) {
 574                 skip = (strcmp(c.cache_name, name) != 0);
 575         } else {
 576                 /* match either -n or -N */
 577                 skip = ((strcmp(c.cache_name, name) != 0) &&
 578                     (strstr(c.cache_name, filter) == NULL));
 579         }
 580 
 581         if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
 582                 kmem_slabs_header();
 583         } else if ((opt_v || buckets) && !skip) {
 584                 if (DCMD_HDRSPEC(flags)) {
 585                         kmem_slabs_header();
 586                 } else {
 587                         boolean_t is_slab = B_FALSE;
 588                         const char *walker_name;
 589                         if (opt_v) {
 590                                 cb = (mdb_walk_cb_t)kmem_first_partial_slab;
 591                                 walker_name = "kmem_slab_partial";
 592                         } else {
 593                                 cb = (mdb_walk_cb_t)kmem_first_slab;
 594                                 walker_name = "kmem_slab";
 595                         }
 596                         (void) mdb_pwalk(walker_name, cb, &is_slab, addr);
 597                         if (is_slab) {
 598                                 kmem_slabs_header();
 599                         }
 600                 }
 601         }
 602 
 603         if (skip) {
 604                 return (DCMD_OK);
 605         }
 606 
 607         bzero(&stats, sizeof (kmem_slab_stats_t));
 608         stats.ks_cp = &c;
 609         stats.ks_max_buffers_per_slab = c.cache_maxchunks;
 610         /* +1 to include a zero bucket */
 611         stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
 612             sizeof (*stats.ks_bucket), UM_SLEEP);
 613         cb = (mdb_walk_cb_t)kmem_slablist_stat;
 614         (void) mdb_pwalk("kmem_slab", cb, &stats, addr);
 615 
 616         if (c.cache_buftotal == 0) {
 617                 pct = 0;
 618                 tenths_pct = 0;
 619         } else {
 620                 uint64_t n = stats.ks_unused_buffers * 10000;
 621                 pct = (int)(n / c.cache_buftotal);
 622                 tenths_pct = pct - ((pct / 100) * 100);
 623                 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
 624                 if (tenths_pct == 10) {
 625                         pct += 100;
 626                         tenths_pct = 0;
 627                 }
 628         }
 629 
 630         pct /= 100;
 631         mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
 632             stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
 633             stats.ks_unused_buffers, pct, tenths_pct);
 634 
 635         if (maxbuckets == 0) {
 636                 maxbuckets = stats.ks_max_buffers_per_slab;
 637         }
 638 
 639         if (((maxbuckets > 1) || (minbucketsize > 0)) &&
 640             (stats.ks_slabs > 0)) {
 641                 mdb_printf("\n");
 642                 kmem_slabs_print_dist(stats.ks_bucket,
 643                     stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
 644         }
 645 
 646         mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
 647             sizeof (*stats.ks_bucket));
 648 
 649         if (!opt_v) {
 650                 return (DCMD_OK);
 651         }
 652 
 653         if (opt_v && (stats.ks_partial_slabs > 0)) {
 654                 int i;
 655                 kmem_slab_usage_t *ksu;
 656 
 657                 mdb_printf("  %d complete (%d), %d partial:",
 658                     (stats.ks_slabs - stats.ks_partial_slabs),
 659                     stats.ks_max_buffers_per_slab,
 660                     stats.ks_partial_slabs);
 661 
 662                 for (i = 0; i < stats.ks_partial_slabs; i++) {
 663                         ksu = &stats.ks_usage[i];
 664                         mdb_printf(" %d%s", ksu->ksu_refcnt,
 665                             (ksu->ksu_nomove ? "*" : ""));
 666                 }
 667                 mdb_printf("\n\n");
 668         }
 669 
 670         if (stats.ks_usage_len > 0) {
 671                 mdb_free(stats.ks_usage,
 672                     stats.ks_usage_len * sizeof (kmem_slab_usage_t));
 673         }
 674 
 675         return (DCMD_OK);
 676 }
 677 
 678 void
 679 kmem_slabs_help(void)
 680 {
 681         mdb_printf("%s",
 682 "Display slab usage per kmem cache.\n\n");
 683         mdb_dec_indent(2);
 684         mdb_printf("%<b>OPTIONS%</b>\n");
 685         mdb_inc_indent(2);
 686         mdb_printf("%s",
 687 "  -n name\n"
 688 "        name of kmem cache (or matching partial name)\n"
 689 "  -N name\n"
 690 "        exact name of kmem cache\n"
 691 "  -b maxbins\n"
 692 "        Print a distribution of allocated buffers per slab using at\n"
 693 "        most maxbins bins. The first bin is reserved for completely\n"
 694 "        allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
 695 "        effect as specifying the maximum allocated buffers per slab\n"
 696 "        or setting minbinsize to 1 (-B 1).\n"
 697 "  -B minbinsize\n"
 698 "        Print a distribution of allocated buffers per slab, making\n"
 699 "        all bins (except the first, reserved for completely allocated\n"
 700 "        slabs) at least minbinsize buffers apart.\n"
 701 "  -v    verbose output: List the allocated buffer count of each partial\n"
 702 "        slab on the free list in order from front to back to show how\n"
 703 "        closely the slabs are ordered by usage. For example\n"
 704 "\n"
 705 "          10 complete, 3 partial (8): 7 3 1\n"
 706 "\n"
 707 "        means there are thirteen slabs with eight buffers each, including\n"
 708 "        three partially allocated slabs with less than all eight buffers\n"
 709 "        allocated.\n"
 710 "\n"
 711 "        Buffer allocations are always from the front of the partial slab\n"
 712 "        list. When a buffer is freed from a completely used slab, that\n"
 713 "        slab is added to the front of the partial slab list. Assuming\n"
 714 "        that all buffers are equally likely to be freed soon, the\n"
 715 "        desired order of partial slabs is most-used at the front of the\n"
 716 "        list and least-used at the back (as in the example above).\n"
 717 "        However, if a slab contains an allocated buffer that will not\n"
 718 "        soon be freed, it would be better for that slab to be at the\n"
 719 "        front where all of its buffers can be allocated. Taking a slab\n"
 720 "        off the partial slab list (either with all buffers freed or all\n"
 721 "        buffers allocated) reduces cache fragmentation.\n"
 722 "\n"
 723 "        A slab's allocated buffer count representing a partial slab (9 in\n"
 724 "        the example below) may be marked as follows:\n"
 725 "\n"
 726 "        9*   An asterisk indicates that kmem has marked the slab non-\n"
 727 "        reclaimable because the kmem client refused to move one of the\n"
 728 "        slab's buffers. Since kmem does not expect to completely free the\n"
 729 "        slab, it moves it to the front of the list in the hope of\n"
 730 "        completely allocating it instead. A slab marked with an asterisk\n"
 731 "        stays marked for as long as it remains on the partial slab list.\n"
 732 "\n"
 733 "Column\t\tDescription\n"
 734 "\n"
 735 "Cache Name\t\tname of kmem cache\n"
 736 "Slabs\t\t\ttotal slab count\n"
 737 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
 738 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
 739 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
 740 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
 741 "\t\t\t  for accounting structures (debug mode), slab\n"
 742 "\t\t\t  coloring (incremental small offsets to stagger\n"
 743 "\t\t\t  buffer alignment), or the per-CPU magazine layer\n");
 744 }
 745 
 746 static int
 747 addrcmp(const void *lhs, const void *rhs)
 748 {
 749         uintptr_t p1 = *((uintptr_t *)lhs);
 750         uintptr_t p2 = *((uintptr_t *)rhs);
 751 
 752         if (p1 < p2)
 753                 return (-1);
 754         if (p1 > p2)
 755                 return (1);
 756         return (0);
 757 }
 758 
 759 static int
 760 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
 761 {
 762         const kmem_bufctl_audit_t *bcp1 = *lhs;
 763         const kmem_bufctl_audit_t *bcp2 = *rhs;
 764 
 765         if (bcp1->bc_timestamp > bcp2->bc_timestamp)
 766                 return (-1);
 767 
 768         if (bcp1->bc_timestamp < bcp2->bc_timestamp)
 769                 return (1);
 770 
 771         return (0);
 772 }
 773 
 774 typedef struct kmem_hash_walk {
 775         uintptr_t *kmhw_table;
 776         size_t kmhw_nelems;
 777         size_t kmhw_pos;
 778         kmem_bufctl_t kmhw_cur;
 779 } kmem_hash_walk_t;
 780 
 781 int
 782 kmem_hash_walk_init(mdb_walk_state_t *wsp)
 783 {
 784         kmem_hash_walk_t *kmhw;
 785         uintptr_t *hash;
 786         kmem_cache_t c;
 787         uintptr_t haddr, addr = wsp->walk_addr;
 788         size_t nelems;
 789         size_t hsize;
 790 
 791         if (addr == NULL) {
 792                 mdb_warn("kmem_hash doesn't support global walks\n");
 793                 return (WALK_ERR);
 794         }
 795 
 796         if (mdb_vread(&c, sizeof (c), addr) == -1) {
 797                 mdb_warn("couldn't read cache at addr %p", addr);
 798                 return (WALK_ERR);
 799         }
 800 
 801         if (!(c.cache_flags & KMF_HASH)) {
 802                 mdb_warn("cache %p doesn't have a hash table\n", addr);
 803                 return (WALK_DONE);             /* nothing to do */
 804         }
 805 
 806         kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
 807         kmhw->kmhw_cur.bc_next = NULL;
 808         kmhw->kmhw_pos = 0;
 809 
 810         kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
 811         hsize = nelems * sizeof (uintptr_t);
 812         haddr = (uintptr_t)c.cache_hash_table;
 813 
 814         kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
 815         if (mdb_vread(hash, hsize, haddr) == -1) {
 816                 mdb_warn("failed to read hash table at %p", haddr);
 817                 mdb_free(hash, hsize);
 818                 mdb_free(kmhw, sizeof (kmem_hash_walk_t));
 819                 return (WALK_ERR);
 820         }
 821 
 822         wsp->walk_data = kmhw;
 823 
 824         return (WALK_NEXT);
 825 }
 826 
 827 int
 828 kmem_hash_walk_step(mdb_walk_state_t *wsp)
 829 {
 830         kmem_hash_walk_t *kmhw = wsp->walk_data;
 831         uintptr_t addr = NULL;
 832 
 833         if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) {
 834                 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
 835                         if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL)
 836                                 break;
 837                 }
 838         }
 839         if (addr == NULL)
 840                 return (WALK_DONE);
 841 
 842         if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
 843                 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
 844                 return (WALK_ERR);
 845         }
 846 
 847         return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
 848 }
 849 
 850 void
 851 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
 852 {
 853         kmem_hash_walk_t *kmhw = wsp->walk_data;
 854 
 855         if (kmhw == NULL)
 856                 return;
 857 
 858         mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
 859         mdb_free(kmhw, sizeof (kmem_hash_walk_t));
 860 }
 861 
 862 /*
 863  * Find the address of the bufctl structure for the address 'buf' in cache
 864  * 'cp', which is at address caddr, and place it in *out.
 865  */
 866 static int
 867 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
 868 {
 869         uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
 870         kmem_bufctl_t *bcp;
 871         kmem_bufctl_t bc;
 872 
 873         if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
 874                 mdb_warn("unable to read hash bucket for %p in cache %p",
 875                     buf, caddr);
 876                 return (-1);
 877         }
 878 
 879         while (bcp != NULL) {
 880                 if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
 881                     (uintptr_t)bcp) == -1) {
 882                         mdb_warn("unable to read bufctl at %p", bcp);
 883                         return (-1);
 884                 }
 885                 if (bc.bc_addr == buf) {
 886                         *out = (uintptr_t)bcp;
 887                         return (0);
 888                 }
 889                 bcp = bc.bc_next;
 890         }
 891 
 892         mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
 893         return (-1);
 894 }
 895 
 896 int
 897 kmem_get_magsize(const kmem_cache_t *cp)
 898 {
 899         uintptr_t addr = (uintptr_t)cp->cache_magtype;
 900         GElf_Sym mt_sym;
 901         kmem_magtype_t mt;
 902         int res;
 903 
 904         /*
 905          * if cpu 0 has a non-zero magsize, it must be correct.  caches
 906          * with KMF_NOMAGAZINE have disabled their magazine layers, so
 907          * it is okay to return 0 for them.
 908          */
 909         if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
 910             (cp->cache_flags & KMF_NOMAGAZINE))
 911                 return (res);
 912 
 913         if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
 914                 mdb_warn("unable to read 'kmem_magtype'");
 915         } else if (addr < mt_sym.st_value ||
 916             addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
 917             ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
 918                 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
 919                     cp->cache_name, addr);
 920                 return (0);
 921         }
 922         if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
 923                 mdb_warn("unable to read magtype at %a", addr);
 924                 return (0);
 925         }
 926         return (mt.mt_magsize);
 927 }
 928 
 929 /*ARGSUSED*/
 930 static int
 931 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
 932 {
 933         *est -= (sp->slab_chunks - sp->slab_refcnt);
 934 
 935         return (WALK_NEXT);
 936 }
 937 
 938 /*
 939  * Returns an upper bound on the number of allocated buffers in a given
 940  * cache.
 941  */
 942 size_t
 943 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
 944 {
 945         int magsize;
 946         size_t cache_est;
 947 
 948         cache_est = cp->cache_buftotal;
 949 
 950         (void) mdb_pwalk("kmem_slab_partial",
 951             (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
 952 
 953         if ((magsize = kmem_get_magsize(cp)) != 0) {
 954                 size_t mag_est = cp->cache_full.ml_total * magsize;
 955 
 956                 if (cache_est >= mag_est) {
 957                         cache_est -= mag_est;
 958                 } else {
 959                         mdb_warn("cache %p's magazine layer holds more buffers "
 960                             "than the slab layer.\n", addr);
 961                 }
 962         }
 963         return (cache_est);
 964 }
 965 
 966 #define READMAG_ROUNDS(rounds) { \
 967         if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
 968                 mdb_warn("couldn't read magazine at %p", kmp); \
 969                 goto fail; \
 970         } \
 971         for (i = 0; i < rounds; i++) { \
 972                 maglist[magcnt++] = mp->mag_round[i]; \
 973                 if (magcnt == magmax) { \
 974                         mdb_warn("%d magazines exceeds fudge factor\n", \
 975                             magcnt); \
 976                         goto fail; \
 977                 } \
 978         } \
 979 }
 980 
 981 int
 982 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
 983     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
 984 {
 985         kmem_magazine_t *kmp, *mp;
 986         void **maglist = NULL;
 987         int i, cpu;
 988         size_t magsize, magmax, magbsize;
 989         size_t magcnt = 0;
 990 
 991         /*
 992          * Read the magtype out of the cache, after verifying the pointer's
 993          * correctness.
 994          */
 995         magsize = kmem_get_magsize(cp);
 996         if (magsize == 0) {
 997                 *maglistp = NULL;
 998                 *magcntp = 0;
 999                 *magmaxp = 0;
1000                 return (WALK_NEXT);
1001         }
1002 
1003         /*
1004          * There are several places where we need to go buffer hunting:
1005          * the per-CPU loaded magazine, the per-CPU spare full magazine,
1006          * and the full magazine list in the depot.
1007          *
1008          * For an upper bound on the number of buffers in the magazine
1009          * layer, we have the number of magazines on the cache_full
1010          * list plus at most two magazines per CPU (the loaded and the
1011          * spare).  Toss in 100 magazines as a fudge factor in case this
1012          * is live (the number "100" comes from the same fudge factor in
1013          * crash(1M)).
1014          */
1015         magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1016         magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1017 
1018         if (magbsize >= PAGESIZE / 2) {
1019                 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1020                     addr, magbsize);
1021                 return (WALK_ERR);
1022         }
1023 
1024         maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1025         mp = mdb_alloc(magbsize, alloc_flags);
1026         if (mp == NULL || maglist == NULL)
1027                 goto fail;
1028 
1029         /*
1030          * First up: the magazines in the depot (i.e. on the cache_full list).
1031          */
1032         for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1033                 READMAG_ROUNDS(magsize);
1034                 kmp = mp->mag_next;
1035 
1036                 if (kmp == cp->cache_full.ml_list)
1037                         break; /* cache_full list loop detected */
1038         }
1039 
1040         dprintf(("cache_full list done\n"));
1041 
1042         /*
1043          * Now whip through the CPUs, snagging the loaded magazines
1044          * and full spares.
1045          *
1046          * In order to prevent inconsistent dumps, rounds and prounds
1047          * are copied aside before dumping begins.
1048          */
1049         for (cpu = 0; cpu < ncpus; cpu++) {
1050                 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1051                 short rounds, prounds;
1052 
1053                 if (KMEM_DUMPCC(ccp)) {
1054                         rounds = ccp->cc_dump_rounds;
1055                         prounds = ccp->cc_dump_prounds;
1056                 } else {
1057                         rounds = ccp->cc_rounds;
1058                         prounds = ccp->cc_prounds;
1059                 }
1060 
1061                 dprintf(("reading cpu cache %p\n",
1062                     (uintptr_t)ccp - (uintptr_t)cp + addr));
1063 
1064                 if (rounds > 0 &&
1065                     (kmp = ccp->cc_loaded) != NULL) {
1066                         dprintf(("reading %d loaded rounds\n", rounds));
1067                         READMAG_ROUNDS(rounds);
1068                 }
1069 
1070                 if (prounds > 0 &&
1071                     (kmp = ccp->cc_ploaded) != NULL) {
1072                         dprintf(("reading %d previously loaded rounds\n",
1073                             prounds));
1074                         READMAG_ROUNDS(prounds);
1075                 }
1076         }
1077 
1078         dprintf(("magazine layer: %d buffers\n", magcnt));
1079 
1080         if (!(alloc_flags & UM_GC))
1081                 mdb_free(mp, magbsize);
1082 
1083         *maglistp = maglist;
1084         *magcntp = magcnt;
1085         *magmaxp = magmax;
1086 
1087         return (WALK_NEXT);
1088 
1089 fail:
1090         if (!(alloc_flags & UM_GC)) {
1091                 if (mp)
1092                         mdb_free(mp, magbsize);
1093                 if (maglist)
1094                         mdb_free(maglist, magmax * sizeof (void *));
1095         }
1096         return (WALK_ERR);
1097 }
1098 
1099 static int
1100 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1101 {
1102         return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1103 }
1104 
1105 static int
1106 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1107 {
1108         kmem_bufctl_audit_t b;
1109 
1110         /*
1111          * if KMF_AUDIT is not set, we know that we're looking at a
1112          * kmem_bufctl_t.
1113          */
1114         if (!(cp->cache_flags & KMF_AUDIT) ||
1115             mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1116                 (void) memset(&b, 0, sizeof (b));
1117                 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1118                         mdb_warn("unable to read bufctl at %p", buf);
1119                         return (WALK_ERR);
1120                 }
1121         }
1122 
1123         return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1124 }
1125 
1126 typedef struct kmem_walk {
1127         int kmw_type;
1128 
1129         uintptr_t kmw_addr;             /* cache address */
1130         kmem_cache_t *kmw_cp;
1131         size_t kmw_csize;
1132 
1133         /*
1134          * magazine layer
1135          */
1136         void **kmw_maglist;
1137         size_t kmw_max;
1138         size_t kmw_count;
1139         size_t kmw_pos;
1140 
1141         /*
1142          * slab layer
1143          */
1144         char *kmw_valid;        /* to keep track of freed buffers */
1145         char *kmw_ubase;        /* buffer for slab data */
1146 } kmem_walk_t;
1147 
1148 static int
1149 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1150 {
1151         kmem_walk_t *kmw;
1152         int ncpus, csize;
1153         kmem_cache_t *cp;
1154         size_t vm_quantum;
1155 
1156         size_t magmax, magcnt;
1157         void **maglist = NULL;
1158         uint_t chunksize, slabsize;
1159         int status = WALK_ERR;
1160         uintptr_t addr = wsp->walk_addr;
1161         const char *layered;
1162 
1163         type &= ~KM_HASH;
1164 
1165         if (addr == NULL) {
1166                 mdb_warn("kmem walk doesn't support global walks\n");
1167                 return (WALK_ERR);
1168         }
1169 
1170         dprintf(("walking %p\n", addr));
1171 
1172         /*
1173          * First we need to figure out how many CPUs are configured in the
1174          * system to know how much to slurp out.
1175          */
1176         mdb_readvar(&ncpus, "max_ncpus");
1177 
1178         csize = KMEM_CACHE_SIZE(ncpus);
1179         cp = mdb_alloc(csize, UM_SLEEP);
1180 
1181         if (mdb_vread(cp, csize, addr) == -1) {
1182                 mdb_warn("couldn't read cache at addr %p", addr);
1183                 goto out2;
1184         }
1185 
1186         /*
1187          * It's easy for someone to hand us an invalid cache address.
1188          * Unfortunately, it is hard for this walker to survive an
1189          * invalid cache cleanly.  So we make sure that:
1190          *
1191          *      1. the vmem arena for the cache is readable,
1192          *      2. the vmem arena's quantum is a power of 2,
1193          *      3. our slabsize is a multiple of the quantum, and
1194          *      4. our chunksize is >0 and less than our slabsize.
1195          */
1196         if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1197             (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1198             vm_quantum == 0 ||
1199             (vm_quantum & (vm_quantum - 1)) != 0 ||
1200             cp->cache_slabsize < vm_quantum ||
1201             P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1202             cp->cache_chunksize == 0 ||
1203             cp->cache_chunksize > cp->cache_slabsize) {
1204                 mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1205                 goto out2;
1206         }
1207 
1208         dprintf(("buf total is %d\n", cp->cache_buftotal));
1209 
1210         if (cp->cache_buftotal == 0) {
1211                 mdb_free(cp, csize);
1212                 return (WALK_DONE);
1213         }
1214 
1215         /*
1216          * If they ask for bufctls, but it's a small-slab cache,
1217          * there is nothing to report.
1218          */
1219         if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1220                 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1221                     cp->cache_flags));
1222                 mdb_free(cp, csize);
1223                 return (WALK_DONE);
1224         }
1225 
1226         /*
1227          * If they want constructed buffers, but there's no constructor or
1228          * the cache has DEADBEEF checking enabled, there is nothing to report.
1229          */
1230         if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1231             cp->cache_constructor == NULL ||
1232             (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1233                 mdb_free(cp, csize);
1234                 return (WALK_DONE);
1235         }
1236 
1237         /*
1238          * Read in the contents of the magazine layer
1239          */
1240         if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1241             &magmax, UM_SLEEP) == WALK_ERR)
1242                 goto out2;
1243 
1244         /*
1245          * We have all of the buffers from the magazines;  if we are walking
1246          * allocated buffers, sort them so we can bsearch them later.
1247          */
1248         if (type & KM_ALLOCATED)
1249                 qsort(maglist, magcnt, sizeof (void *), addrcmp);
1250 
1251         wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1252 
1253         kmw->kmw_type = type;
1254         kmw->kmw_addr = addr;
1255         kmw->kmw_cp = cp;
1256         kmw->kmw_csize = csize;
1257         kmw->kmw_maglist = maglist;
1258         kmw->kmw_max = magmax;
1259         kmw->kmw_count = magcnt;
1260         kmw->kmw_pos = 0;
1261 
1262         /*
1263          * When walking allocated buffers in a KMF_HASH cache, we walk the
1264          * hash table instead of the slab layer.
1265          */
1266         if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1267                 layered = "kmem_hash";
1268 
1269                 kmw->kmw_type |= KM_HASH;
1270         } else {
1271                 /*
1272                  * If we are walking freed buffers, we only need the
1273                  * magazine layer plus the partially allocated slabs.
1274                  * To walk allocated buffers, we need all of the slabs.
1275                  */
1276                 if (type & KM_ALLOCATED)
1277                         layered = "kmem_slab";
1278                 else
1279                         layered = "kmem_slab_partial";
1280 
1281                 /*
1282                  * for small-slab caches, we read in the entire slab.  For
1283                  * freed buffers, we can just walk the freelist.  For
1284                  * allocated buffers, we use a 'valid' array to track
1285                  * the freed buffers.
1286                  */
1287                 if (!(cp->cache_flags & KMF_HASH)) {
1288                         chunksize = cp->cache_chunksize;
1289                         slabsize = cp->cache_slabsize;
1290 
1291                         kmw->kmw_ubase = mdb_alloc(slabsize +
1292                             sizeof (kmem_bufctl_t), UM_SLEEP);
1293 
1294                         if (type & KM_ALLOCATED)
1295                                 kmw->kmw_valid =
1296                                     mdb_alloc(slabsize / chunksize, UM_SLEEP);
1297                 }
1298         }
1299 
1300         status = WALK_NEXT;
1301 
1302         if (mdb_layered_walk(layered, wsp) == -1) {
1303                 mdb_warn("unable to start layered '%s' walk", layered);
1304                 status = WALK_ERR;
1305         }
1306 
1307 out1:
1308         if (status == WALK_ERR) {
1309                 if (kmw->kmw_valid)
1310                         mdb_free(kmw->kmw_valid, slabsize / chunksize);
1311 
1312                 if (kmw->kmw_ubase)
1313                         mdb_free(kmw->kmw_ubase, slabsize +
1314                             sizeof (kmem_bufctl_t));
1315 
1316                 if (kmw->kmw_maglist)
1317                         mdb_free(kmw->kmw_maglist,
1318                             kmw->kmw_max * sizeof (uintptr_t));
1319 
1320                 mdb_free(kmw, sizeof (kmem_walk_t));
1321                 wsp->walk_data = NULL;
1322         }
1323 
1324 out2:
1325         if (status == WALK_ERR)
1326                 mdb_free(cp, csize);
1327 
1328         return (status);
1329 }
1330 
1331 int
1332 kmem_walk_step(mdb_walk_state_t *wsp)
1333 {
1334         kmem_walk_t *kmw = wsp->walk_data;
1335         int type = kmw->kmw_type;
1336         kmem_cache_t *cp = kmw->kmw_cp;
1337 
1338         void **maglist = kmw->kmw_maglist;
1339         int magcnt = kmw->kmw_count;
1340 
1341         uintptr_t chunksize, slabsize;
1342         uintptr_t addr;
1343         const kmem_slab_t *sp;
1344         const kmem_bufctl_t *bcp;
1345         kmem_bufctl_t bc;
1346 
1347         int chunks;
1348         char *kbase;
1349         void *buf;
1350         int i, ret;
1351 
1352         char *valid, *ubase;
1353 
1354         /*
1355          * first, handle the 'kmem_hash' layered walk case
1356          */
1357         if (type & KM_HASH) {
1358                 /*
1359                  * We have a buffer which has been allocated out of the
1360                  * global layer. We need to make sure that it's not
1361                  * actually sitting in a magazine before we report it as
1362                  * an allocated buffer.
1363                  */
1364                 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1365 
1366                 if (magcnt > 0 &&
1367                     bsearch(&buf, maglist, magcnt, sizeof (void *),
1368                     addrcmp) != NULL)
1369                         return (WALK_NEXT);
1370 
1371                 if (type & KM_BUFCTL)
1372                         return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1373 
1374                 return (kmem_walk_callback(wsp, (uintptr_t)buf));
1375         }
1376 
1377         ret = WALK_NEXT;
1378 
1379         addr = kmw->kmw_addr;
1380 
1381         /*
1382          * If we're walking freed buffers, report everything in the
1383          * magazine layer before processing the first slab.
1384          */
1385         if ((type & KM_FREE) && magcnt != 0) {
1386                 kmw->kmw_count = 0;          /* only do this once */
1387                 for (i = 0; i < magcnt; i++) {
1388                         buf = maglist[i];
1389 
1390                         if (type & KM_BUFCTL) {
1391                                 uintptr_t out;
1392 
1393                                 if (cp->cache_flags & KMF_BUFTAG) {
1394                                         kmem_buftag_t *btp;
1395                                         kmem_buftag_t tag;
1396 
1397                                         /* LINTED - alignment */
1398                                         btp = KMEM_BUFTAG(cp, buf);
1399                                         if (mdb_vread(&tag, sizeof (tag),
1400                                             (uintptr_t)btp) == -1) {
1401                                                 mdb_warn("reading buftag for "
1402                                                     "%p at %p", buf, btp);
1403                                                 continue;
1404                                         }
1405                                         out = (uintptr_t)tag.bt_bufctl;
1406                                 } else {
1407                                         if (kmem_hash_lookup(cp, addr, buf,
1408                                             &out) == -1)
1409                                                 continue;
1410                                 }
1411                                 ret = bufctl_walk_callback(cp, wsp, out);
1412                         } else {
1413                                 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1414                         }
1415 
1416                         if (ret != WALK_NEXT)
1417                                 return (ret);
1418                 }
1419         }
1420 
1421         /*
1422          * If they want constructed buffers, we're finished, since the
1423          * magazine layer holds them all.
1424          */
1425         if (type & KM_CONSTRUCTED)
1426                 return (WALK_DONE);
1427 
1428         /*
1429          * Handle the buffers in the current slab
1430          */
1431         chunksize = cp->cache_chunksize;
1432         slabsize = cp->cache_slabsize;
1433 
1434         sp = wsp->walk_layer;
1435         chunks = sp->slab_chunks;
1436         kbase = sp->slab_base;
1437 
1438         dprintf(("kbase is %p\n", kbase));
1439 
1440         if (!(cp->cache_flags & KMF_HASH)) {
1441                 valid = kmw->kmw_valid;
1442                 ubase = kmw->kmw_ubase;
1443 
1444                 if (mdb_vread(ubase, chunks * chunksize,
1445                     (uintptr_t)kbase) == -1) {
1446                         mdb_warn("failed to read slab contents at %p", kbase);
1447                         return (WALK_ERR);
1448                 }
1449 
1450                 /*
1451                  * Set up the valid map as fully allocated -- we'll punch
1452                  * out the freelist.
1453                  */
1454                 if (type & KM_ALLOCATED)
1455                         (void) memset(valid, 1, chunks);
1456         } else {
1457                 valid = NULL;
1458                 ubase = NULL;
1459         }
1460 
1461         /*
1462          * walk the slab's freelist
1463          */
1464         bcp = sp->slab_head;
1465 
1466         dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1467 
1468         /*
1469          * since we could be in the middle of allocating a buffer,
1470          * our refcnt could be one higher than it aught.  So we
1471          * check one further on the freelist than the count allows.
1472          */
1473         for (i = sp->slab_refcnt; i <= chunks; i++) {
1474                 uint_t ndx;
1475 
1476                 dprintf(("bcp is %p\n", bcp));
1477 
1478                 if (bcp == NULL) {
1479                         if (i == chunks)
1480                                 break;
1481                         mdb_warn(
1482                             "slab %p in cache %p freelist too short by %d\n",
1483                             sp, addr, chunks - i);
1484                         break;
1485                 }
1486 
1487                 if (cp->cache_flags & KMF_HASH) {
1488                         if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1489                                 mdb_warn("failed to read bufctl ptr at %p",
1490                                     bcp);
1491                                 break;
1492                         }
1493                         buf = bc.bc_addr;
1494                 } else {
1495                         /*
1496                          * Otherwise the buffer is (or should be) in the slab
1497                          * that we've read in; determine its offset in the
1498                          * slab, validate that it's not corrupt, and add to
1499                          * our base address to find the umem_bufctl_t.  (Note
1500                          * that we don't need to add the size of the bufctl
1501                          * to our offset calculation because of the slop that's
1502                          * allocated for the buffer at ubase.)
1503                          */
1504                         uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1505 
1506                         if (offs > chunks * chunksize) {
1507                                 mdb_warn("found corrupt bufctl ptr %p"
1508                                     " in slab %p in cache %p\n", bcp,
1509                                     wsp->walk_addr, addr);
1510                                 break;
1511                         }
1512 
1513                         bc = *((kmem_bufctl_t *)((uintptr_t)ubase + offs));
1514                         buf = KMEM_BUF(cp, bcp);
1515                 }
1516 
1517                 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1518 
1519                 if (ndx > slabsize / cp->cache_bufsize) {
1520                         /*
1521                          * This is very wrong; we have managed to find
1522                          * a buffer in the slab which shouldn't
1523                          * actually be here.  Emit a warning, and
1524                          * try to continue.
1525                          */
1526                         mdb_warn("buf %p is out of range for "
1527                             "slab %p, cache %p\n", buf, sp, addr);
1528                 } else if (type & KM_ALLOCATED) {
1529                         /*
1530                          * we have found a buffer on the slab's freelist;
1531                          * clear its entry
1532                          */
1533                         valid[ndx] = 0;
1534                 } else {
1535                         /*
1536                          * Report this freed buffer
1537                          */
1538                         if (type & KM_BUFCTL) {
1539                                 ret = bufctl_walk_callback(cp, wsp,
1540                                     (uintptr_t)bcp);
1541                         } else {
1542                                 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1543                         }
1544                         if (ret != WALK_NEXT)
1545                                 return (ret);
1546                 }
1547 
1548                 bcp = bc.bc_next;
1549         }
1550 
1551         if (bcp != NULL) {
1552                 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1553                     sp, addr, bcp));
1554         }
1555 
1556         /*
1557          * If we are walking freed buffers, the loop above handled reporting
1558          * them.
1559          */
1560         if (type & KM_FREE)
1561                 return (WALK_NEXT);
1562 
1563         if (type & KM_BUFCTL) {
1564                 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1565                     "cache %p\n", addr);
1566                 return (WALK_ERR);
1567         }
1568 
1569         /*
1570          * Report allocated buffers, skipping buffers in the magazine layer.
1571          * We only get this far for small-slab caches.
1572          */
1573         for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1574                 buf = (char *)kbase + i * chunksize;
1575 
1576                 if (!valid[i])
1577                         continue;               /* on slab freelist */
1578 
1579                 if (magcnt > 0 &&
1580                     bsearch(&buf, maglist, magcnt, sizeof (void *),
1581                     addrcmp) != NULL)
1582                         continue;               /* in magazine layer */
1583 
1584                 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1585         }
1586         return (ret);
1587 }
1588 
1589 void
1590 kmem_walk_fini(mdb_walk_state_t *wsp)
1591 {
1592         kmem_walk_t *kmw = wsp->walk_data;
1593         uintptr_t chunksize;
1594         uintptr_t slabsize;
1595 
1596         if (kmw == NULL)
1597                 return;
1598 
1599         if (kmw->kmw_maglist != NULL)
1600                 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1601 
1602         chunksize = kmw->kmw_cp->cache_chunksize;
1603         slabsize = kmw->kmw_cp->cache_slabsize;
1604 
1605         if (kmw->kmw_valid != NULL)
1606                 mdb_free(kmw->kmw_valid, slabsize / chunksize);
1607         if (kmw->kmw_ubase != NULL)
1608                 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1609 
1610         mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1611         mdb_free(kmw, sizeof (kmem_walk_t));
1612 }
1613 
1614 /*ARGSUSED*/
1615 static int
1616 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1617 {
1618         /*
1619          * Buffers allocated from NOTOUCH caches can also show up as freed
1620          * memory in other caches.  This can be a little confusing, so we
1621          * don't walk NOTOUCH caches when walking all caches (thereby assuring
1622          * that "::walk kmem" and "::walk freemem" yield disjoint output).
1623          */
1624         if (c->cache_cflags & KMC_NOTOUCH)
1625                 return (WALK_NEXT);
1626 
1627         if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1628             wsp->walk_cbdata, addr) == -1)
1629                 return (WALK_DONE);
1630 
1631         return (WALK_NEXT);
1632 }
1633 
1634 #define KMEM_WALK_ALL(name, wsp) { \
1635         wsp->walk_data = (name); \
1636         if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1637                 return (WALK_ERR); \
1638         return (WALK_DONE); \
1639 }
1640 
1641 int
1642 kmem_walk_init(mdb_walk_state_t *wsp)
1643 {
1644         if (wsp->walk_arg != NULL)
1645                 wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1646 
1647         if (wsp->walk_addr == NULL)
1648                 KMEM_WALK_ALL("kmem", wsp);
1649         return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1650 }
1651 
1652 int
1653 bufctl_walk_init(mdb_walk_state_t *wsp)
1654 {
1655         if (wsp->walk_addr == NULL)
1656                 KMEM_WALK_ALL("bufctl", wsp);
1657         return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1658 }
1659 
1660 int
1661 freemem_walk_init(mdb_walk_state_t *wsp)
1662 {
1663         if (wsp->walk_addr == NULL)
1664                 KMEM_WALK_ALL("freemem", wsp);
1665         return (kmem_walk_init_common(wsp, KM_FREE));
1666 }
1667 
1668 int
1669 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1670 {
1671         if (wsp->walk_addr == NULL)
1672                 KMEM_WALK_ALL("freemem_constructed", wsp);
1673         return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1674 }
1675 
1676 int
1677 freectl_walk_init(mdb_walk_state_t *wsp)
1678 {
1679         if (wsp->walk_addr == NULL)
1680                 KMEM_WALK_ALL("freectl", wsp);
1681         return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1682 }
1683 
1684 int
1685 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1686 {
1687         if (wsp->walk_addr == NULL)
1688                 KMEM_WALK_ALL("freectl_constructed", wsp);
1689         return (kmem_walk_init_common(wsp,
1690             KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1691 }
1692 
1693 typedef struct bufctl_history_walk {
1694         void            *bhw_next;
1695         kmem_cache_t    *bhw_cache;
1696         kmem_slab_t     *bhw_slab;
1697         hrtime_t        bhw_timestamp;
1698 } bufctl_history_walk_t;
1699 
1700 int
1701 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1702 {
1703         bufctl_history_walk_t *bhw;
1704         kmem_bufctl_audit_t bc;
1705         kmem_bufctl_audit_t bcn;
1706 
1707         if (wsp->walk_addr == NULL) {
1708                 mdb_warn("bufctl_history walk doesn't support global walks\n");
1709                 return (WALK_ERR);
1710         }
1711 
1712         if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1713                 mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1714                 return (WALK_ERR);
1715         }
1716 
1717         bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1718         bhw->bhw_timestamp = 0;
1719         bhw->bhw_cache = bc.bc_cache;
1720         bhw->bhw_slab = bc.bc_slab;
1721 
1722         /*
1723          * sometimes the first log entry matches the base bufctl;  in that
1724          * case, skip the base bufctl.
1725          */
1726         if (bc.bc_lastlog != NULL &&
1727             mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1728             bc.bc_addr == bcn.bc_addr &&
1729             bc.bc_cache == bcn.bc_cache &&
1730             bc.bc_slab == bcn.bc_slab &&
1731             bc.bc_timestamp == bcn.bc_timestamp &&
1732             bc.bc_thread == bcn.bc_thread)
1733                 bhw->bhw_next = bc.bc_lastlog;
1734         else
1735                 bhw->bhw_next = (void *)wsp->walk_addr;
1736 
1737         wsp->walk_addr = (uintptr_t)bc.bc_addr;
1738         wsp->walk_data = bhw;
1739 
1740         return (WALK_NEXT);
1741 }
1742 
1743 int
1744 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1745 {
1746         bufctl_history_walk_t *bhw = wsp->walk_data;
1747         uintptr_t addr = (uintptr_t)bhw->bhw_next;
1748         uintptr_t baseaddr = wsp->walk_addr;
1749         kmem_bufctl_audit_t bc;
1750 
1751         if (addr == NULL)
1752                 return (WALK_DONE);
1753 
1754         if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1755                 mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1756                 return (WALK_ERR);
1757         }
1758 
1759         /*
1760          * The bufctl is only valid if the address, cache, and slab are
1761          * correct.  We also check that the timestamp is decreasing, to
1762          * prevent infinite loops.
1763          */
1764         if ((uintptr_t)bc.bc_addr != baseaddr ||
1765             bc.bc_cache != bhw->bhw_cache ||
1766             bc.bc_slab != bhw->bhw_slab ||
1767             (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1768                 return (WALK_DONE);
1769 
1770         bhw->bhw_next = bc.bc_lastlog;
1771         bhw->bhw_timestamp = bc.bc_timestamp;
1772 
1773         return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1774 }
1775 
1776 void
1777 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1778 {
1779         bufctl_history_walk_t *bhw = wsp->walk_data;
1780 
1781         mdb_free(bhw, sizeof (*bhw));
1782 }
1783 
1784 typedef struct kmem_log_walk {
1785         kmem_bufctl_audit_t *klw_base;
1786         kmem_bufctl_audit_t **klw_sorted;
1787         kmem_log_header_t klw_lh;
1788         size_t klw_size;
1789         size_t klw_maxndx;
1790         size_t klw_ndx;
1791 } kmem_log_walk_t;
1792 
1793 int
1794 kmem_log_walk_init(mdb_walk_state_t *wsp)
1795 {
1796         uintptr_t lp = wsp->walk_addr;
1797         kmem_log_walk_t *klw;
1798         kmem_log_header_t *lhp;
1799         int maxndx, i, j, k;
1800 
1801         /*
1802          * By default (global walk), walk the kmem_transaction_log.  Otherwise
1803          * read the log whose kmem_log_header_t is stored at walk_addr.
1804          */
1805         if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1806                 mdb_warn("failed to read 'kmem_transaction_log'");
1807                 return (WALK_ERR);
1808         }
1809 
1810         if (lp == NULL) {
1811                 mdb_warn("log is disabled\n");
1812                 return (WALK_ERR);
1813         }
1814 
1815         klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1816         lhp = &klw->klw_lh;
1817 
1818         if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1819                 mdb_warn("failed to read log header at %p", lp);
1820                 mdb_free(klw, sizeof (kmem_log_walk_t));
1821                 return (WALK_ERR);
1822         }
1823 
1824         klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1825         klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1826         maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1827 
1828         if (mdb_vread(klw->klw_base, klw->klw_size,
1829             (uintptr_t)lhp->lh_base) == -1) {
1830                 mdb_warn("failed to read log at base %p", lhp->lh_base);
1831                 mdb_free(klw->klw_base, klw->klw_size);
1832                 mdb_free(klw, sizeof (kmem_log_walk_t));
1833                 return (WALK_ERR);
1834         }
1835 
1836         klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1837             sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1838 
1839         for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1840                 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1841                     ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1842 
1843                 for (j = 0; j < maxndx; j++)
1844                         klw->klw_sorted[k++] = &chunk[j];
1845         }
1846 
1847         qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1848             (int(*)(const void *, const void *))bufctlcmp);
1849 
1850         klw->klw_maxndx = k;
1851         wsp->walk_data = klw;
1852 
1853         return (WALK_NEXT);
1854 }
1855 
1856 int
1857 kmem_log_walk_step(mdb_walk_state_t *wsp)
1858 {
1859         kmem_log_walk_t *klw = wsp->walk_data;
1860         kmem_bufctl_audit_t *bcp;
1861 
1862         if (klw->klw_ndx == klw->klw_maxndx)
1863                 return (WALK_DONE);
1864 
1865         bcp = klw->klw_sorted[klw->klw_ndx++];
1866 
1867         return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1868             (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1869 }
1870 
1871 void
1872 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1873 {
1874         kmem_log_walk_t *klw = wsp->walk_data;
1875 
1876         mdb_free(klw->klw_base, klw->klw_size);
1877         mdb_free(klw->klw_sorted, klw->klw_maxndx *
1878             sizeof (kmem_bufctl_audit_t *));
1879         mdb_free(klw, sizeof (kmem_log_walk_t));
1880 }
1881 
1882 typedef struct allocdby_bufctl {
1883         uintptr_t abb_addr;
1884         hrtime_t abb_ts;
1885 } allocdby_bufctl_t;
1886 
1887 typedef struct allocdby_walk {
1888         const char *abw_walk;
1889         uintptr_t abw_thread;
1890         size_t abw_nbufs;
1891         size_t abw_size;
1892         allocdby_bufctl_t *abw_buf;
1893         size_t abw_ndx;
1894 } allocdby_walk_t;
1895 
1896 int
1897 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1898     allocdby_walk_t *abw)
1899 {
1900         if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1901                 return (WALK_NEXT);
1902 
1903         if (abw->abw_nbufs == abw->abw_size) {
1904                 allocdby_bufctl_t *buf;
1905                 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1906 
1907                 buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1908 
1909                 bcopy(abw->abw_buf, buf, oldsize);
1910                 mdb_free(abw->abw_buf, oldsize);
1911 
1912                 abw->abw_size <<= 1;
1913                 abw->abw_buf = buf;
1914         }
1915 
1916         abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1917         abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1918         abw->abw_nbufs++;
1919 
1920         return (WALK_NEXT);
1921 }
1922 
1923 /*ARGSUSED*/
1924 int
1925 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1926 {
1927         if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1928             abw, addr) == -1) {
1929                 mdb_warn("couldn't walk bufctl for cache %p", addr);
1930                 return (WALK_DONE);
1931         }
1932 
1933         return (WALK_NEXT);
1934 }
1935 
1936 static int
1937 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1938 {
1939         if (lhs->abb_ts < rhs->abb_ts)
1940                 return (1);
1941         if (lhs->abb_ts > rhs->abb_ts)
1942                 return (-1);
1943         return (0);
1944 }
1945 
1946 static int
1947 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1948 {
1949         allocdby_walk_t *abw;
1950 
1951         if (wsp->walk_addr == NULL) {
1952                 mdb_warn("allocdby walk doesn't support global walks\n");
1953                 return (WALK_ERR);
1954         }
1955 
1956         abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1957 
1958         abw->abw_thread = wsp->walk_addr;
1959         abw->abw_walk = walk;
1960         abw->abw_size = 128; /* something reasonable */
1961         abw->abw_buf =
1962             mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1963 
1964         wsp->walk_data = abw;
1965 
1966         if (mdb_walk("kmem_cache",
1967             (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1968                 mdb_warn("couldn't walk kmem_cache");
1969                 allocdby_walk_fini(wsp);
1970                 return (WALK_ERR);
1971         }
1972 
1973         qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1974             (int(*)(const void *, const void *))allocdby_cmp);
1975 
1976         return (WALK_NEXT);
1977 }
1978 
1979 int
1980 allocdby_walk_init(mdb_walk_state_t *wsp)
1981 {
1982         return (allocdby_walk_init_common(wsp, "bufctl"));
1983 }
1984 
1985 int
1986 freedby_walk_init(mdb_walk_state_t *wsp)
1987 {
1988         return (allocdby_walk_init_common(wsp, "freectl"));
1989 }
1990 
1991 int
1992 allocdby_walk_step(mdb_walk_state_t *wsp)
1993 {
1994         allocdby_walk_t *abw = wsp->walk_data;
1995         kmem_bufctl_audit_t bc;
1996         uintptr_t addr;
1997 
1998         if (abw->abw_ndx == abw->abw_nbufs)
1999                 return (WALK_DONE);
2000 
2001         addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
2002 
2003         if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2004                 mdb_warn("couldn't read bufctl at %p", addr);
2005                 return (WALK_DONE);
2006         }
2007 
2008         return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
2009 }
2010 
2011 void
2012 allocdby_walk_fini(mdb_walk_state_t *wsp)
2013 {
2014         allocdby_walk_t *abw = wsp->walk_data;
2015 
2016         mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
2017         mdb_free(abw, sizeof (allocdby_walk_t));
2018 }
2019 
2020 /*ARGSUSED*/
2021 int
2022 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
2023 {
2024         char c[MDB_SYM_NAMLEN];
2025         GElf_Sym sym;
2026         int i;
2027 
2028         mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2029         for (i = 0; i < bcp->bc_depth; i++) {
2030                 if (mdb_lookup_by_addr(bcp->bc_stack[i],
2031                     MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2032                         continue;
2033                 if (strncmp(c, "kmem_", 5) == 0)
2034                         continue;
2035                 mdb_printf("%s+0x%lx",
2036                     c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2037                 break;
2038         }
2039         mdb_printf("\n");
2040 
2041         return (WALK_NEXT);
2042 }
2043 
2044 static int
2045 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2046 {
2047         if (!(flags & DCMD_ADDRSPEC))
2048                 return (DCMD_USAGE);
2049 
2050         mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2051 
2052         if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2053                 mdb_warn("can't walk '%s' for %p", w, addr);
2054                 return (DCMD_ERR);
2055         }
2056 
2057         return (DCMD_OK);
2058 }
2059 
2060 /*ARGSUSED*/
2061 int
2062 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2063 {
2064         return (allocdby_common(addr, flags, "allocdby"));
2065 }
2066 
2067 /*ARGSUSED*/
2068 int
2069 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2070 {
2071         return (allocdby_common(addr, flags, "freedby"));
2072 }
2073 
2074 /*
2075  * Return a string describing the address in relation to the given thread's
2076  * stack.
2077  *
2078  * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2079  *
2080  * - If the address is above the stack pointer, return an empty string
2081  *   signifying that the address is active.
2082  *
2083  * - If the address is below the stack pointer, and the thread is not on proc,
2084  *   return " (below sp)".
2085  *
2086  * - If the address is below the stack pointer, and the thread is on proc,
2087  *   return " (possibly below sp)".  Depending on context, we may or may not
2088  *   have an accurate t_sp.
2089  */
2090 static const char *
2091 stack_active(const kthread_t *t, uintptr_t addr)
2092 {
2093         uintptr_t panicstk;
2094         GElf_Sym sym;
2095 
2096         if (t->t_state == TS_FREE)
2097                 return (" (inactive interrupt thread)");
2098 
2099         /*
2100          * Check to see if we're on the panic stack.  If so, ignore t_sp, as it
2101          * no longer relates to the thread's real stack.
2102          */
2103         if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2104                 panicstk = (uintptr_t)sym.st_value;
2105 
2106                 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2107                         return ("");
2108         }
2109 
2110         if (addr >= t->t_sp + STACK_BIAS)
2111                 return ("");
2112 
2113         if (t->t_state == TS_ONPROC)
2114                 return (" (possibly below sp)");
2115 
2116         return (" (below sp)");
2117 }
2118 
2119 /*
2120  * Additional state for the kmem and vmem ::whatis handlers
2121  */
2122 typedef struct whatis_info {
2123         mdb_whatis_t *wi_w;
2124         const kmem_cache_t *wi_cache;
2125         const vmem_t *wi_vmem;
2126         vmem_t *wi_msb_arena;
2127         size_t wi_slab_size;
2128         uint_t wi_slab_found;
2129         uint_t wi_kmem_lite_count;
2130         uint_t wi_freemem;
2131 } whatis_info_t;
2132 
2133 /* call one of our dcmd functions with "-v" and the provided address */
2134 static void
2135 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2136 {
2137         mdb_arg_t a;
2138         a.a_type = MDB_TYPE_STRING;
2139         a.a_un.a_str = "-v";
2140 
2141         mdb_printf(":\n");
2142         (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2143 }
2144 
2145 static void
2146 whatis_print_kmf_lite(uintptr_t btaddr, size_t count)
2147 {
2148 #define KMEM_LITE_MAX   16
2149         pc_t callers[KMEM_LITE_MAX];
2150         pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN;
2151 
2152         kmem_buftag_t bt;
2153         intptr_t stat;
2154         const char *plural = "";
2155         int i;
2156 
2157         /* validate our arguments and read in the buftag */
2158         if (count == 0 || count > KMEM_LITE_MAX ||
2159             mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2160                 return;
2161 
2162         /* validate the buffer state and read in the callers */
2163         stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2164 
2165         if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE)
2166                 return;
2167 
2168         if (mdb_vread(callers, count * sizeof (pc_t),
2169             btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2170                 return;
2171 
2172         /* If there aren't any filled in callers, bail */
2173         if (callers[0] == uninit)
2174                 return;
2175 
2176         plural = (callers[1] == uninit) ? "" : "s";
2177 
2178         /* Everything's done and checked; print them out */
2179         mdb_printf(":\n");
2180 
2181         mdb_inc_indent(8);
2182         mdb_printf("recent caller%s: %a", plural, callers[0]);
2183         for (i = 1; i < count; i++) {
2184                 if (callers[i] == uninit)
2185                         break;
2186                 mdb_printf(", %a", callers[i]);
2187         }
2188         mdb_dec_indent(8);
2189 }
2190 
2191 static void
2192 whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2193     uintptr_t baddr)
2194 {
2195         mdb_whatis_t *w = wi->wi_w;
2196 
2197         const kmem_cache_t *cp = wi->wi_cache;
2198         /* LINTED pointer cast may result in improper alignment */
2199         uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr);
2200         int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2201         int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT));
2202 
2203         mdb_whatis_report_object(w, maddr, addr, "");
2204 
2205         if (baddr != 0 && !call_printer)
2206                 mdb_printf("bufctl %p ", baddr);
2207 
2208         mdb_printf("%s from %s",
2209             (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2210 
2211         if (baddr != 0 && call_printer) {
2212                 whatis_call_printer(bufctl, baddr);
2213                 return;
2214         }
2215 
2216         /* for KMF_LITE caches, try to print out the previous callers */
2217         if (!quiet && (cp->cache_flags & KMF_LITE))
2218                 whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count);
2219 
2220         mdb_printf("\n");
2221 }
2222 
2223 /*ARGSUSED*/
2224 static int
2225 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2226 {
2227         mdb_whatis_t *w = wi->wi_w;
2228 
2229         uintptr_t cur;
2230         size_t size = wi->wi_cache->cache_bufsize;
2231 
2232         while (mdb_whatis_match(w, addr, size, &cur))
2233                 whatis_print_kmem(wi, cur, addr, NULL);
2234 
2235         return (WHATIS_WALKRET(w));
2236 }
2237 
2238 /*ARGSUSED*/
2239 static int
2240 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi)
2241 {
2242         mdb_whatis_t *w = wi->wi_w;
2243 
2244         uintptr_t cur;
2245         uintptr_t addr = (uintptr_t)bcp->bc_addr;
2246         size_t size = wi->wi_cache->cache_bufsize;
2247 
2248         while (mdb_whatis_match(w, addr, size, &cur))
2249                 whatis_print_kmem(wi, cur, addr, baddr);
2250 
2251         return (WHATIS_WALKRET(w));
2252 }
2253 
2254 static int
2255 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2256 {
2257         mdb_whatis_t *w = wi->wi_w;
2258 
2259         size_t size = vs->vs_end - vs->vs_start;
2260         uintptr_t cur;
2261 
2262         /* We're not interested in anything but alloc and free segments */
2263         if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2264                 return (WALK_NEXT);
2265 
2266         while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2267                 mdb_whatis_report_object(w, cur, vs->vs_start, "");
2268 
2269                 /*
2270                  * If we're not printing it seperately, provide the vmem_seg
2271                  * pointer if it has a stack trace.
2272                  */
2273                 if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2274                     (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) ||
2275                     (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2276                         mdb_printf("vmem_seg %p ", addr);
2277                 }
2278 
2279                 mdb_printf("%s from the %s vmem arena",
2280                     (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2281                     wi->wi_vmem->vm_name);
2282 
2283                 if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2284                         whatis_call_printer(vmem_seg, addr);
2285                 else
2286                         mdb_printf("\n");
2287         }
2288 
2289         return (WHATIS_WALKRET(w));
2290 }
2291 
2292 static int
2293 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2294 {
2295         mdb_whatis_t *w = wi->wi_w;
2296         const char *nm = vmem->vm_name;
2297 
2298         int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0);
2299         int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2300 
2301         if (identifier != idspace)
2302                 return (WALK_NEXT);
2303 
2304         wi->wi_vmem = vmem;
2305 
2306         if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2307                 mdb_printf("Searching vmem arena %s...\n", nm);
2308 
2309         if (mdb_pwalk("vmem_seg",
2310             (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2311                 mdb_warn("can't walk vmem_seg for %p", addr);
2312                 return (WALK_NEXT);
2313         }
2314 
2315         return (WHATIS_WALKRET(w));
2316 }
2317 
2318 /*ARGSUSED*/
2319 static int
2320 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi)
2321 {
2322         mdb_whatis_t *w = wi->wi_w;
2323 
2324         /* It must overlap with the slab data, or it's not interesting */
2325         if (mdb_whatis_overlaps(w,
2326             (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2327                 wi->wi_slab_found++;
2328                 return (WALK_DONE);
2329         }
2330         return (WALK_NEXT);
2331 }
2332 
2333 static int
2334 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2335 {
2336         mdb_whatis_t *w = wi->wi_w;
2337 
2338         char *walk, *freewalk;
2339         mdb_walk_cb_t func;
2340         int do_bufctl;
2341 
2342         int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0);
2343         int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2344 
2345         if (identifier != idspace)
2346                 return (WALK_NEXT);
2347 
2348         /* Override the '-b' flag as necessary */
2349         if (!(c->cache_flags & KMF_HASH))
2350                 do_bufctl = FALSE;      /* no bufctls to walk */
2351         else if (c->cache_flags & KMF_AUDIT)
2352                 do_bufctl = TRUE;       /* we always want debugging info */
2353         else
2354                 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2355 
2356         if (do_bufctl) {
2357                 walk = "bufctl";
2358                 freewalk = "freectl";
2359                 func = (mdb_walk_cb_t)whatis_walk_bufctl;
2360         } else {
2361                 walk = "kmem";
2362                 freewalk = "freemem";
2363                 func = (mdb_walk_cb_t)whatis_walk_kmem;
2364         }
2365 
2366         wi->wi_cache = c;
2367 
2368         if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2369                 mdb_printf("Searching %s...\n", c->cache_name);
2370 
2371         /*
2372          * If more then two buffers live on each slab, figure out if we're
2373          * interested in anything in any slab before doing the more expensive
2374          * kmem/freemem (bufctl/freectl) walkers.
2375          */
2376         wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2377         if (!(c->cache_flags & KMF_HASH))
2378                 wi->wi_slab_size -= sizeof (kmem_slab_t);
2379 
2380         if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2381                 wi->wi_slab_found = 0;
2382                 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2383                     addr) == -1) {
2384                         mdb_warn("can't find kmem_slab walker");
2385                         return (WALK_DONE);
2386                 }
2387                 if (wi->wi_slab_found == 0)
2388                         return (WALK_NEXT);
2389         }
2390 
2391         wi->wi_freemem = FALSE;
2392         if (mdb_pwalk(walk, func, wi, addr) == -1) {
2393                 mdb_warn("can't find %s walker", walk);
2394                 return (WALK_DONE);
2395         }
2396 
2397         if (mdb_whatis_done(w))
2398                 return (WALK_DONE);
2399 
2400         /*
2401          * We have searched for allocated memory; now search for freed memory.
2402          */
2403         if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2404                 mdb_printf("Searching %s for free memory...\n", c->cache_name);
2405 
2406         wi->wi_freemem = TRUE;
2407         if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2408                 mdb_warn("can't find %s walker", freewalk);
2409                 return (WALK_DONE);
2410         }
2411 
2412         return (WHATIS_WALKRET(w));
2413 }
2414 
2415 static int
2416 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2417 {
2418         if (c->cache_arena == wi->wi_msb_arena ||
2419             (c->cache_cflags & KMC_NOTOUCH))
2420                 return (WALK_NEXT);
2421 
2422         return (whatis_walk_cache(addr, c, wi));
2423 }
2424 
2425 static int
2426 whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2427 {
2428         if (c->cache_arena != wi->wi_msb_arena)
2429                 return (WALK_NEXT);
2430 
2431         return (whatis_walk_cache(addr, c, wi));
2432 }
2433 
2434 static int
2435 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2436 {
2437         if (c->cache_arena == wi->wi_msb_arena ||
2438             !(c->cache_cflags & KMC_NOTOUCH))
2439                 return (WALK_NEXT);
2440 
2441         return (whatis_walk_cache(addr, c, wi));
2442 }
2443 
2444 static int
2445 whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w)
2446 {
2447         uintptr_t cur;
2448         uintptr_t saddr;
2449         size_t size;
2450 
2451         /*
2452          * Often, one calls ::whatis on an address from a thread structure.
2453          * We use this opportunity to short circuit this case...
2454          */
2455         while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur))
2456                 mdb_whatis_report_object(w, cur, addr,
2457                     "allocated as a thread structure\n");
2458 
2459         /*
2460          * Now check the stack
2461          */
2462         if (t->t_stkbase == NULL)
2463                 return (WALK_NEXT);
2464 
2465         /*
2466          * This assumes that t_stk is the end of the stack, but it's really
2467          * only the initial stack pointer for the thread.  Arguments to the
2468          * initial procedure, SA(MINFRAME), etc. are all after t_stk.  So
2469          * that 't->t_stk::whatis' reports "part of t's stack", we include
2470          * t_stk in the range (the "+ 1", below), but the kernel should
2471          * really include the full stack bounds where we can find it.
2472          */
2473         saddr = (uintptr_t)t->t_stkbase;
2474         size = (uintptr_t)t->t_stk - saddr + 1;
2475         while (mdb_whatis_match(w, saddr, size, &cur))
2476                 mdb_whatis_report_object(w, cur, cur,
2477                     "in thread %p's stack%s\n", addr, stack_active(t, cur));
2478 
2479         return (WHATIS_WALKRET(w));
2480 }
2481 
2482 static void
2483 whatis_modctl_match(mdb_whatis_t *w, const char *name,
2484     uintptr_t base, size_t size, const char *where)
2485 {
2486         uintptr_t cur;
2487 
2488         /*
2489          * Since we're searching for addresses inside a module, we report
2490          * them as symbols.
2491          */
2492         while (mdb_whatis_match(w, base, size, &cur))
2493                 mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where);
2494 }
2495 
2496 static int
2497 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w)
2498 {
2499         char name[MODMAXNAMELEN];
2500         struct module mod;
2501         Shdr shdr;
2502 
2503         if (m->mod_mp == NULL)
2504                 return (WALK_NEXT);
2505 
2506         if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
2507                 mdb_warn("couldn't read modctl %p's module", addr);
2508                 return (WALK_NEXT);
2509         }
2510 
2511         if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2512                 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2513 
2514         whatis_modctl_match(w, name,
2515             (uintptr_t)mod.text, mod.text_size, "text segment");
2516         whatis_modctl_match(w, name,
2517             (uintptr_t)mod.data, mod.data_size, "data segment");
2518         whatis_modctl_match(w, name,
2519             (uintptr_t)mod.bss, mod.bss_size, "bss segment");
2520 
2521         if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2522                 mdb_warn("couldn't read symbol header for %p's module", addr);
2523                 return (WALK_NEXT);
2524         }
2525 
2526         whatis_modctl_match(w, name,
2527             (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab");
2528         whatis_modctl_match(w, name,
2529             (uintptr_t)mod.symspace, mod.symsize, "symtab");
2530 
2531         return (WHATIS_WALKRET(w));
2532 }
2533 
2534 /*ARGSUSED*/
2535 static int
2536 whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w)
2537 {
2538         uintptr_t cur;
2539 
2540         uintptr_t base = (uintptr_t)seg->pages;
2541         size_t size = (uintptr_t)seg->epages - base;
2542 
2543         while (mdb_whatis_match(w, base, size, &cur)) {
2544                 /* round our found pointer down to the page_t base. */
2545                 size_t offset = (cur - base) % sizeof (page_t);
2546 
2547                 mdb_whatis_report_object(w, cur, cur - offset,
2548                     "allocated as a page structure\n");
2549         }
2550 
2551         return (WHATIS_WALKRET(w));
2552 }
2553 
2554 /*ARGSUSED*/
2555 static int
2556 whatis_run_modules(mdb_whatis_t *w, void *arg)
2557 {
2558         if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) {
2559                 mdb_warn("couldn't find modctl walker");
2560                 return (1);
2561         }
2562         return (0);
2563 }
2564 
2565 /*ARGSUSED*/
2566 static int
2567 whatis_run_threads(mdb_whatis_t *w, void *ignored)
2568 {
2569         /*
2570          * Now search all thread stacks.  Yes, this is a little weak; we
2571          * can save a lot of work by first checking to see if the
2572          * address is in segkp vs. segkmem.  But hey, computers are
2573          * fast.
2574          */
2575         if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) {
2576                 mdb_warn("couldn't find thread walker");
2577                 return (1);
2578         }
2579         return (0);
2580 }
2581 
2582 /*ARGSUSED*/
2583 static int
2584 whatis_run_pages(mdb_whatis_t *w, void *ignored)
2585 {
2586         if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) {
2587                 mdb_warn("couldn't find memseg walker");
2588                 return (1);
2589         }
2590         return (0);
2591 }
2592 
2593 /*ARGSUSED*/
2594 static int
2595 whatis_run_kmem(mdb_whatis_t *w, void *ignored)
2596 {
2597         whatis_info_t wi;
2598 
2599         bzero(&wi, sizeof (wi));
2600         wi.wi_w = w;
2601 
2602         if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1)
2603                 mdb_warn("unable to readvar \"kmem_msb_arena\"");
2604 
2605         if (mdb_readvar(&wi.wi_kmem_lite_count,
2606             "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16)
2607                 wi.wi_kmem_lite_count = 0;
2608 
2609         /*
2610          * We process kmem caches in the following order:
2611          *
2612          *      non-KMC_NOTOUCH, non-metadata   (typically the most interesting)
2613          *      metadata                        (can be huge with KMF_AUDIT)
2614          *      KMC_NOTOUCH, non-metadata       (see kmem_walk_all())
2615          */
2616         if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2617             &wi) == -1 ||
2618             mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2619             &wi) == -1 ||
2620             mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2621             &wi) == -1) {
2622                 mdb_warn("couldn't find kmem_cache walker");
2623                 return (1);
2624         }
2625         return (0);
2626 }
2627 
2628 /*ARGSUSED*/
2629 static int
2630 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2631 {
2632         whatis_info_t wi;
2633 
2634         bzero(&wi, sizeof (wi));
2635         wi.wi_w = w;
2636 
2637         if (mdb_walk("vmem_postfix",
2638             (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2639                 mdb_warn("couldn't find vmem_postfix walker");
2640                 return (1);
2641         }
2642         return (0);
2643 }
2644 
2645 typedef struct kmem_log_cpu {
2646         uintptr_t kmc_low;
2647         uintptr_t kmc_high;
2648 } kmem_log_cpu_t;
2649 
2650 typedef struct kmem_log_data {
2651         uintptr_t kmd_addr;
2652         kmem_log_cpu_t *kmd_cpu;
2653 } kmem_log_data_t;
2654 
2655 int
2656 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2657     kmem_log_data_t *kmd)
2658 {
2659         int i;
2660         kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2661         size_t bufsize;
2662 
2663         for (i = 0; i < NCPU; i++) {
2664                 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2665                         break;
2666         }
2667 
2668         if (kmd->kmd_addr) {
2669                 if (b->bc_cache == NULL)
2670                         return (WALK_NEXT);
2671 
2672                 if (mdb_vread(&bufsize, sizeof (bufsize),
2673                     (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2674                         mdb_warn(
2675                             "failed to read cache_bufsize for cache at %p",
2676                             b->bc_cache);
2677                         return (WALK_ERR);
2678                 }
2679 
2680                 if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2681                     kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2682                         return (WALK_NEXT);
2683         }
2684 
2685         if (i == NCPU)
2686                 mdb_printf("   ");
2687         else
2688                 mdb_printf("%3d", i);
2689 
2690         mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2691             b->bc_timestamp, b->bc_thread);
2692 
2693         return (WALK_NEXT);
2694 }
2695 
2696 /*ARGSUSED*/
2697 int
2698 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2699 {
2700         kmem_log_header_t lh;
2701         kmem_cpu_log_header_t clh;
2702         uintptr_t lhp, clhp;
2703         int ncpus;
2704         uintptr_t *cpu;
2705         GElf_Sym sym;
2706         kmem_log_cpu_t *kmc;
2707         int i;
2708         kmem_log_data_t kmd;
2709         uint_t opt_b = FALSE;
2710 
2711         if (mdb_getopts(argc, argv,
2712             'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2713                 return (DCMD_USAGE);
2714 
2715         if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2716                 mdb_warn("failed to read 'kmem_transaction_log'");
2717                 return (DCMD_ERR);
2718         }
2719 
2720         if (lhp == NULL) {
2721                 mdb_warn("no kmem transaction log\n");
2722                 return (DCMD_ERR);
2723         }
2724 
2725         mdb_readvar(&ncpus, "ncpus");
2726 
2727         if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2728                 mdb_warn("failed to read log header at %p", lhp);
2729                 return (DCMD_ERR);
2730         }
2731 
2732         clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2733 
2734         cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2735 
2736         if (mdb_lookup_by_name("cpu", &sym) == -1) {
2737                 mdb_warn("couldn't find 'cpu' array");
2738                 return (DCMD_ERR);
2739         }
2740 
2741         if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2742                 mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2743                     NCPU * sizeof (uintptr_t), sym.st_size);
2744                 return (DCMD_ERR);
2745         }
2746 
2747         if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2748                 mdb_warn("failed to read cpu array at %p", sym.st_value);
2749                 return (DCMD_ERR);
2750         }
2751 
2752         kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2753         kmd.kmd_addr = NULL;
2754         kmd.kmd_cpu = kmc;
2755 
2756         for (i = 0; i < NCPU; i++) {
2757 
2758                 if (cpu[i] == NULL)
2759                         continue;
2760 
2761                 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2762                         mdb_warn("cannot read cpu %d's log header at %p",
2763                             i, clhp);
2764                         return (DCMD_ERR);
2765                 }
2766 
2767                 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2768                     (uintptr_t)lh.lh_base;
2769                 kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2770 
2771                 clhp += sizeof (kmem_cpu_log_header_t);
2772         }
2773 
2774         mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2775             "TIMESTAMP", "THREAD");
2776 
2777         /*
2778          * If we have been passed an address, print out only log entries
2779          * corresponding to that address.  If opt_b is specified, then interpret
2780          * the address as a bufctl.
2781          */
2782         if (flags & DCMD_ADDRSPEC) {
2783                 kmem_bufctl_audit_t b;
2784 
2785                 if (opt_b) {
2786                         kmd.kmd_addr = addr;
2787                 } else {
2788                         if (mdb_vread(&b,
2789                             sizeof (kmem_bufctl_audit_t), addr) == -1) {
2790                                 mdb_warn("failed to read bufctl at %p", addr);
2791                                 return (DCMD_ERR);
2792                         }
2793 
2794                         (void) kmem_log_walk(addr, &b, &kmd);
2795 
2796                         return (DCMD_OK);
2797                 }
2798         }
2799 
2800         if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2801                 mdb_warn("can't find kmem log walker");
2802                 return (DCMD_ERR);
2803         }
2804 
2805         return (DCMD_OK);
2806 }
2807 
2808 typedef struct bufctl_history_cb {
2809         int             bhc_flags;
2810         int             bhc_argc;
2811         const mdb_arg_t *bhc_argv;
2812         int             bhc_ret;
2813 } bufctl_history_cb_t;
2814 
2815 /*ARGSUSED*/
2816 static int
2817 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2818 {
2819         bufctl_history_cb_t *bhc = arg;
2820 
2821         bhc->bhc_ret =
2822             bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2823 
2824         bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2825 
2826         return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2827 }
2828 
2829 void
2830 bufctl_help(void)
2831 {
2832         mdb_printf("%s",
2833 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2834         mdb_dec_indent(2);
2835         mdb_printf("%<b>OPTIONS%</b>\n");
2836         mdb_inc_indent(2);
2837         mdb_printf("%s",
2838 "  -v    Display the full content of the bufctl, including its stack trace\n"
2839 "  -h    retrieve the bufctl's transaction history, if available\n"
2840 "  -a addr\n"
2841 "        filter out bufctls not involving the buffer at addr\n"
2842 "  -c caller\n"
2843 "        filter out bufctls without the function/PC in their stack trace\n"
2844 "  -e earliest\n"
2845 "        filter out bufctls timestamped before earliest\n"
2846 "  -l latest\n"
2847 "        filter out bufctls timestamped after latest\n"
2848 "  -t thread\n"
2849 "        filter out bufctls not involving thread\n");
2850 }
2851 
2852 int
2853 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2854 {
2855         kmem_bufctl_audit_t bc;
2856         uint_t verbose = FALSE;
2857         uint_t history = FALSE;
2858         uint_t in_history = FALSE;
2859         uintptr_t caller = NULL, thread = NULL;
2860         uintptr_t laddr, haddr, baddr = NULL;
2861         hrtime_t earliest = 0, latest = 0;
2862         int i, depth;
2863         char c[MDB_SYM_NAMLEN];
2864         GElf_Sym sym;
2865 
2866         if (mdb_getopts(argc, argv,
2867             'v', MDB_OPT_SETBITS, TRUE, &verbose,
2868             'h', MDB_OPT_SETBITS, TRUE, &history,
2869             'H', MDB_OPT_SETBITS, TRUE, &in_history,                /* internal */
2870             'c', MDB_OPT_UINTPTR, &caller,
2871             't', MDB_OPT_UINTPTR, &thread,
2872             'e', MDB_OPT_UINT64, &earliest,
2873             'l', MDB_OPT_UINT64, &latest,
2874             'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2875                 return (DCMD_USAGE);
2876 
2877         if (!(flags & DCMD_ADDRSPEC))
2878                 return (DCMD_USAGE);
2879 
2880         if (in_history && !history)
2881                 return (DCMD_USAGE);
2882 
2883         if (history && !in_history) {
2884                 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2885                     UM_SLEEP | UM_GC);
2886                 bufctl_history_cb_t bhc;
2887 
2888                 nargv[0].a_type = MDB_TYPE_STRING;
2889                 nargv[0].a_un.a_str = "-H";             /* prevent recursion */
2890 
2891                 for (i = 0; i < argc; i++)
2892                         nargv[i + 1] = argv[i];
2893 
2894                 /*
2895                  * When in history mode, we treat each element as if it
2896                  * were in a seperate loop, so that the headers group
2897                  * bufctls with similar histories.
2898                  */
2899                 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2900                 bhc.bhc_argc = argc + 1;
2901                 bhc.bhc_argv = nargv;
2902                 bhc.bhc_ret = DCMD_OK;
2903 
2904                 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2905                     addr) == -1) {
2906                         mdb_warn("unable to walk bufctl_history");
2907                         return (DCMD_ERR);
2908                 }
2909 
2910                 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2911                         mdb_printf("\n");
2912 
2913                 return (bhc.bhc_ret);
2914         }
2915 
2916         if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2917                 if (verbose) {
2918                         mdb_printf("%16s %16s %16s %16s\n"
2919                             "%<u>%16s %16s %16s %16s%</u>\n",
2920                             "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2921                             "", "CACHE", "LASTLOG", "CONTENTS");
2922                 } else {
2923                         mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2924                             "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2925                 }
2926         }
2927 
2928         if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2929                 mdb_warn("couldn't read bufctl at %p", addr);
2930                 return (DCMD_ERR);
2931         }
2932 
2933         /*
2934          * Guard against bogus bc_depth in case the bufctl is corrupt or
2935          * the address does not really refer to a bufctl.
2936          */
2937         depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2938 
2939         if (caller != NULL) {
2940                 laddr = caller;
2941                 haddr = caller + sizeof (caller);
2942 
2943                 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2944                     &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2945                         /*
2946                          * We were provided an exact symbol value; any
2947                          * address in the function is valid.
2948                          */
2949                         laddr = (uintptr_t)sym.st_value;
2950                         haddr = (uintptr_t)sym.st_value + sym.st_size;
2951                 }
2952 
2953                 for (i = 0; i < depth; i++)
2954                         if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2955                                 break;
2956 
2957                 if (i == depth)
2958                         return (DCMD_OK);
2959         }
2960 
2961         if (thread != NULL && (uintptr_t)bc.bc_thread != thread)
2962                 return (DCMD_OK);
2963 
2964         if (earliest != 0 && bc.bc_timestamp < earliest)
2965                 return (DCMD_OK);
2966 
2967         if (latest != 0 && bc.bc_timestamp > latest)
2968                 return (DCMD_OK);
2969 
2970         if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2971                 return (DCMD_OK);
2972 
2973         if (flags & DCMD_PIPE_OUT) {
2974                 mdb_printf("%#lr\n", addr);
2975                 return (DCMD_OK);
2976         }
2977 
2978         if (verbose) {
2979                 mdb_printf(
2980                     "%<b>%16p%</b> %16p %16llx %16p\n"
2981                     "%16s %16p %16p %16p\n",
2982                     addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2983                     "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2984 
2985                 mdb_inc_indent(17);
2986                 for (i = 0; i < depth; i++)
2987                         mdb_printf("%a\n", bc.bc_stack[i]);
2988                 mdb_dec_indent(17);
2989                 mdb_printf("\n");
2990         } else {
2991                 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2992                     bc.bc_timestamp, bc.bc_thread);
2993 
2994                 for (i = 0; i < depth; i++) {
2995                         if (mdb_lookup_by_addr(bc.bc_stack[i],
2996                             MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2997                                 continue;
2998                         if (strncmp(c, "kmem_", 5) == 0)
2999                                 continue;
3000                         mdb_printf(" %a\n", bc.bc_stack[i]);
3001                         break;
3002                 }
3003 
3004                 if (i >= depth)
3005                         mdb_printf("\n");
3006         }
3007 
3008         return (DCMD_OK);
3009 }
3010 
3011 typedef struct kmem_verify {
3012         uint64_t *kmv_buf;              /* buffer to read cache contents into */
3013         size_t kmv_size;                /* number of bytes in kmv_buf */
3014         int kmv_corruption;             /* > 0 if corruption found. */
3015         int kmv_besilent;               /* report actual corruption sites */
3016         struct kmem_cache kmv_cache;    /* the cache we're operating on */
3017 } kmem_verify_t;
3018 
3019 /*
3020  * verify_pattern()
3021  *      verify that buf is filled with the pattern pat.
3022  */
3023 static int64_t
3024 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
3025 {
3026         /*LINTED*/
3027         uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3028         uint64_t *buf;
3029 
3030         for (buf = buf_arg; buf < bufend; buf++)
3031                 if (*buf != pat)
3032                         return ((uintptr_t)buf - (uintptr_t)buf_arg);
3033         return (-1);
3034 }
3035 
3036 /*
3037  * verify_buftag()
3038  *      verify that btp->bt_bxstat == (bcp ^ pat)
3039  */
3040 static int
3041 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3042 {
3043         return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3044 }
3045 
3046 /*
3047  * verify_free()
3048  *      verify the integrity of a free block of memory by checking
3049  *      that it is filled with 0xdeadbeef and that its buftag is sane.
3050  */
3051 /*ARGSUSED1*/
3052 static int
3053 verify_free(uintptr_t addr, const void *data, void *private)
3054 {
3055         kmem_verify_t *kmv = (kmem_verify_t *)private;
3056         uint64_t *buf = kmv->kmv_buf;        /* buf to validate */
3057         int64_t corrupt;                /* corruption offset */
3058         kmem_buftag_t *buftagp;         /* ptr to buftag */
3059         kmem_cache_t *cp = &kmv->kmv_cache;
3060         int besilent = kmv->kmv_besilent;
3061 
3062         /*LINTED*/
3063         buftagp = KMEM_BUFTAG(cp, buf);
3064 
3065         /*
3066          * Read the buffer to check.
3067          */
3068         if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3069                 if (!besilent)
3070                         mdb_warn("couldn't read %p", addr);
3071                 return (WALK_NEXT);
3072         }
3073 
3074         if ((corrupt = verify_pattern(buf, cp->cache_verify,
3075             KMEM_FREE_PATTERN)) >= 0) {
3076                 if (!besilent)
3077                         mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3078                             addr, (uintptr_t)addr + corrupt);
3079                 goto corrupt;
3080         }
3081         /*
3082          * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3083          * the first bytes of the buffer, hence we cannot check for red
3084          * zone corruption.
3085          */
3086         if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3087             buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3088                 if (!besilent)
3089                         mdb_printf("buffer %p (free) seems to "
3090                             "have a corrupt redzone pattern\n", addr);
3091                 goto corrupt;
3092         }
3093 
3094         /*
3095          * confirm bufctl pointer integrity.
3096          */
3097         if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3098                 if (!besilent)
3099                         mdb_printf("buffer %p (free) has a corrupt "
3100                             "buftag\n", addr);
3101                 goto corrupt;
3102         }
3103 
3104         return (WALK_NEXT);
3105 corrupt:
3106         kmv->kmv_corruption++;
3107         return (WALK_NEXT);
3108 }
3109 
3110 /*
3111  * verify_alloc()
3112  *      Verify that the buftag of an allocated buffer makes sense with respect
3113  *      to the buffer.
3114  */
3115 /*ARGSUSED1*/
3116 static int
3117 verify_alloc(uintptr_t addr, const void *data, void *private)
3118 {
3119         kmem_verify_t *kmv = (kmem_verify_t *)private;
3120         kmem_cache_t *cp = &kmv->kmv_cache;
3121         uint64_t *buf = kmv->kmv_buf;        /* buf to validate */
3122         /*LINTED*/
3123         kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3124         uint32_t *ip = (uint32_t *)buftagp;
3125         uint8_t *bp = (uint8_t *)buf;
3126         int looks_ok = 0, size_ok = 1;  /* flags for finding corruption */
3127         int besilent = kmv->kmv_besilent;
3128 
3129         /*
3130          * Read the buffer to check.
3131          */
3132         if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3133                 if (!besilent)
3134                         mdb_warn("couldn't read %p", addr);
3135                 return (WALK_NEXT);
3136         }
3137 
3138         /*
3139          * There are two cases to handle:
3140          * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3141          *    0xfeedfacefeedface at the end of it
3142          * 2. If the buf was alloc'd using kmem_alloc, it will have
3143          *    0xbb just past the end of the region in use.  At the buftag,
3144          *    it will have 0xfeedface (or, if the whole buffer is in use,
3145          *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3146          *    endianness), followed by 32 bits containing the offset of the
3147          *    0xbb byte in the buffer.
3148          *
3149          * Finally, the two 32-bit words that comprise the second half of the
3150          * buftag should xor to KMEM_BUFTAG_ALLOC
3151          */
3152 
3153         if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3154                 looks_ok = 1;
3155         else if (!KMEM_SIZE_VALID(ip[1]))
3156                 size_ok = 0;
3157         else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3158                 looks_ok = 1;
3159         else
3160                 size_ok = 0;
3161 
3162         if (!size_ok) {
3163                 if (!besilent)
3164                         mdb_printf("buffer %p (allocated) has a corrupt "
3165                             "redzone size encoding\n", addr);
3166                 goto corrupt;
3167         }
3168 
3169         if (!looks_ok) {
3170                 if (!besilent)
3171                         mdb_printf("buffer %p (allocated) has a corrupt "
3172                             "redzone signature\n", addr);
3173                 goto corrupt;
3174         }
3175 
3176         if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3177                 if (!besilent)
3178                         mdb_printf("buffer %p (allocated) has a "
3179                             "corrupt buftag\n", addr);
3180                 goto corrupt;
3181         }
3182 
3183         return (WALK_NEXT);
3184 corrupt:
3185         kmv->kmv_corruption++;
3186         return (WALK_NEXT);
3187 }
3188 
3189 /*ARGSUSED2*/
3190 int
3191 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3192 {
3193         if (flags & DCMD_ADDRSPEC) {
3194                 int check_alloc = 0, check_free = 0;
3195                 kmem_verify_t kmv;
3196 
3197                 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3198                     addr) == -1) {
3199                         mdb_warn("couldn't read kmem_cache %p", addr);
3200                         return (DCMD_ERR);
3201                 }
3202 
3203                 kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3204                     sizeof (kmem_buftag_t);
3205                 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3206                 kmv.kmv_corruption = 0;
3207 
3208                 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3209                         check_alloc = 1;
3210                         if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3211                                 check_free = 1;
3212                 } else {
3213                         if (!(flags & DCMD_LOOP)) {
3214                                 mdb_warn("cache %p (%s) does not have "
3215                                     "redzone checking enabled\n", addr,
3216                                     kmv.kmv_cache.cache_name);
3217                         }
3218                         return (DCMD_ERR);
3219                 }
3220 
3221                 if (flags & DCMD_LOOP) {
3222                         /*
3223                          * table mode, don't print out every corrupt buffer
3224                          */
3225                         kmv.kmv_besilent = 1;
3226                 } else {
3227                         mdb_printf("Summary for cache '%s'\n",
3228                             kmv.kmv_cache.cache_name);
3229                         mdb_inc_indent(2);
3230                         kmv.kmv_besilent = 0;
3231                 }
3232 
3233                 if (check_alloc)
3234                         (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3235                 if (check_free)
3236                         (void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3237 
3238                 if (flags & DCMD_LOOP) {
3239                         if (kmv.kmv_corruption == 0) {
3240                                 mdb_printf("%-*s %?p clean\n",
3241                                     KMEM_CACHE_NAMELEN,
3242                                     kmv.kmv_cache.cache_name, addr);
3243                         } else {
3244                                 char *s = "";   /* optional s in "buffer[s]" */
3245                                 if (kmv.kmv_corruption > 1)
3246                                         s = "s";
3247 
3248                                 mdb_printf("%-*s %?p %d corrupt buffer%s\n",
3249                                     KMEM_CACHE_NAMELEN,
3250                                     kmv.kmv_cache.cache_name, addr,
3251                                     kmv.kmv_corruption, s);
3252                         }
3253                 } else {
3254                         /*
3255                          * This is the more verbose mode, when the user has
3256                          * type addr::kmem_verify.  If the cache was clean,
3257                          * nothing will have yet been printed. So say something.
3258                          */
3259                         if (kmv.kmv_corruption == 0)
3260                                 mdb_printf("clean\n");
3261 
3262                         mdb_dec_indent(2);
3263                 }
3264         } else {
3265                 /*
3266                  * If the user didn't specify a cache to verify, we'll walk all
3267                  * kmem_cache's, specifying ourself as a callback for each...
3268                  * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3269                  */
3270                 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN,
3271                     "Cache Name", "Addr", "Cache Integrity");
3272                 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3273         }
3274 
3275         return (DCMD_OK);
3276 }
3277 
3278 typedef struct vmem_node {
3279         struct vmem_node *vn_next;
3280         struct vmem_node *vn_parent;
3281         struct vmem_node *vn_sibling;
3282         struct vmem_node *vn_children;
3283         uintptr_t vn_addr;
3284         int vn_marked;
3285         vmem_t vn_vmem;
3286 } vmem_node_t;
3287 
3288 typedef struct vmem_walk {
3289         vmem_node_t *vw_root;
3290         vmem_node_t *vw_current;
3291 } vmem_walk_t;
3292 
3293 int
3294 vmem_walk_init(mdb_walk_state_t *wsp)
3295 {
3296         uintptr_t vaddr, paddr;
3297         vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3298         vmem_walk_t *vw;
3299 
3300         if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3301                 mdb_warn("couldn't read 'vmem_list'");
3302                 return (WALK_ERR);
3303         }
3304 
3305         while (vaddr != NULL) {
3306                 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3307                 vp->vn_addr = vaddr;
3308                 vp->vn_next = head;
3309                 head = vp;
3310 
3311                 if (vaddr == wsp->walk_addr)
3312                         current = vp;
3313 
3314                 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3315                         mdb_warn("couldn't read vmem_t at %p", vaddr);
3316                         goto err;
3317                 }
3318 
3319                 vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3320         }
3321 
3322         for (vp = head; vp != NULL; vp = vp->vn_next) {
3323 
3324                 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
3325                         vp->vn_sibling = root;
3326                         root = vp;
3327                         continue;
3328                 }
3329 
3330                 for (parent = head; parent != NULL; parent = parent->vn_next) {
3331                         if (parent->vn_addr != paddr)
3332                                 continue;
3333                         vp->vn_sibling = parent->vn_children;
3334                         parent->vn_children = vp;
3335                         vp->vn_parent = parent;
3336                         break;
3337                 }
3338 
3339                 if (parent == NULL) {
3340                         mdb_warn("couldn't find %p's parent (%p)\n",
3341                             vp->vn_addr, paddr);
3342                         goto err;
3343                 }
3344         }
3345 
3346         vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3347         vw->vw_root = root;
3348 
3349         if (current != NULL)
3350                 vw->vw_current = current;
3351         else
3352                 vw->vw_current = root;
3353 
3354         wsp->walk_data = vw;
3355         return (WALK_NEXT);
3356 err:
3357         for (vp = head; head != NULL; vp = head) {
3358                 head = vp->vn_next;
3359                 mdb_free(vp, sizeof (vmem_node_t));
3360         }
3361 
3362         return (WALK_ERR);
3363 }
3364 
3365 int
3366 vmem_walk_step(mdb_walk_state_t *wsp)
3367 {
3368         vmem_walk_t *vw = wsp->walk_data;
3369         vmem_node_t *vp;
3370         int rval;
3371 
3372         if ((vp = vw->vw_current) == NULL)
3373                 return (WALK_DONE);
3374 
3375         rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3376 
3377         if (vp->vn_children != NULL) {
3378                 vw->vw_current = vp->vn_children;
3379                 return (rval);
3380         }
3381 
3382         do {
3383                 vw->vw_current = vp->vn_sibling;
3384                 vp = vp->vn_parent;
3385         } while (vw->vw_current == NULL && vp != NULL);
3386 
3387         return (rval);
3388 }
3389 
3390 /*
3391  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3392  * children are visited before their parent.  We perform the postfix walk
3393  * iteratively (rather than recursively) to allow mdb to regain control
3394  * after each callback.
3395  */
3396 int
3397 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3398 {
3399         vmem_walk_t *vw = wsp->walk_data;
3400         vmem_node_t *vp = vw->vw_current;
3401         int rval;
3402 
3403         /*
3404          * If this node is marked, then we know that we have already visited
3405          * all of its children.  If the node has any siblings, they need to
3406          * be visited next; otherwise, we need to visit the parent.  Note
3407          * that vp->vn_marked will only be zero on the first invocation of
3408          * the step function.
3409          */
3410         if (vp->vn_marked) {
3411                 if (vp->vn_sibling != NULL)
3412                         vp = vp->vn_sibling;
3413                 else if (vp->vn_parent != NULL)
3414                         vp = vp->vn_parent;
3415                 else {
3416                         /*
3417                          * We have neither a parent, nor a sibling, and we
3418                          * have already been visited; we're done.
3419                          */
3420                         return (WALK_DONE);
3421                 }
3422         }
3423 
3424         /*
3425          * Before we visit this node, visit its children.
3426          */
3427         while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3428                 vp = vp->vn_children;
3429 
3430         vp->vn_marked = 1;
3431         vw->vw_current = vp;
3432         rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3433 
3434         return (rval);
3435 }
3436 
3437 void
3438 vmem_walk_fini(mdb_walk_state_t *wsp)
3439 {
3440         vmem_walk_t *vw = wsp->walk_data;
3441         vmem_node_t *root = vw->vw_root;
3442         int done;
3443 
3444         if (root == NULL)
3445                 return;
3446 
3447         if ((vw->vw_root = root->vn_children) != NULL)
3448                 vmem_walk_fini(wsp);
3449 
3450         vw->vw_root = root->vn_sibling;
3451         done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3452         mdb_free(root, sizeof (vmem_node_t));
3453 
3454         if (done) {
3455                 mdb_free(vw, sizeof (vmem_walk_t));
3456         } else {
3457                 vmem_walk_fini(wsp);
3458         }
3459 }
3460 
3461 typedef struct vmem_seg_walk {
3462         uint8_t vsw_type;
3463         uintptr_t vsw_start;
3464         uintptr_t vsw_current;
3465 } vmem_seg_walk_t;
3466 
3467 /*ARGSUSED*/
3468 int
3469 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3470 {
3471         vmem_seg_walk_t *vsw;
3472 
3473         if (wsp->walk_addr == NULL) {
3474                 mdb_warn("vmem_%s does not support global walks\n", name);
3475                 return (WALK_ERR);
3476         }
3477 
3478         wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3479 
3480         vsw->vsw_type = type;
3481         vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3482         vsw->vsw_current = vsw->vsw_start;
3483 
3484         return (WALK_NEXT);
3485 }
3486 
3487 /*
3488  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3489  */
3490 #define VMEM_NONE       0
3491 
3492 int
3493 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3494 {
3495         return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3496 }
3497 
3498 int
3499 vmem_free_walk_init(mdb_walk_state_t *wsp)
3500 {
3501         return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3502 }
3503 
3504 int
3505 vmem_span_walk_init(mdb_walk_state_t *wsp)
3506 {
3507         return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3508 }
3509 
3510 int
3511 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3512 {
3513         return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3514 }
3515 
3516 int
3517 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3518 {
3519         vmem_seg_t seg;
3520         vmem_seg_walk_t *vsw = wsp->walk_data;
3521         uintptr_t addr = vsw->vsw_current;
3522         static size_t seg_size = 0;
3523         int rval;
3524 
3525         if (!seg_size) {
3526                 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3527                         mdb_warn("failed to read 'vmem_seg_size'");
3528                         seg_size = sizeof (vmem_seg_t);
3529                 }
3530         }
3531 
3532         if (seg_size < sizeof (seg))
3533                 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3534 
3535         if (mdb_vread(&seg, seg_size, addr) == -1) {
3536                 mdb_warn("couldn't read vmem_seg at %p", addr);
3537                 return (WALK_ERR);
3538         }
3539 
3540         vsw->vsw_current = (uintptr_t)seg.vs_anext;
3541         if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3542                 rval = WALK_NEXT;
3543         } else {
3544                 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3545         }
3546 
3547         if (vsw->vsw_current == vsw->vsw_start)
3548                 return (WALK_DONE);
3549 
3550         return (rval);
3551 }
3552 
3553 void
3554 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3555 {
3556         vmem_seg_walk_t *vsw = wsp->walk_data;
3557 
3558         mdb_free(vsw, sizeof (vmem_seg_walk_t));
3559 }
3560 
3561 #define VMEM_NAMEWIDTH  22
3562 
3563 int
3564 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3565 {
3566         vmem_t v, parent;
3567         vmem_kstat_t *vkp = &v.vm_kstat;
3568         uintptr_t paddr;
3569         int ident = 0;
3570         char c[VMEM_NAMEWIDTH];
3571 
3572         if (!(flags & DCMD_ADDRSPEC)) {
3573                 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3574                         mdb_warn("can't walk vmem");
3575                         return (DCMD_ERR);
3576                 }
3577                 return (DCMD_OK);
3578         }
3579 
3580         if (DCMD_HDRSPEC(flags))
3581                 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3582                     "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3583                     "TOTAL", "SUCCEED", "FAIL");
3584 
3585         if (mdb_vread(&v, sizeof (v), addr) == -1) {
3586                 mdb_warn("couldn't read vmem at %p", addr);
3587                 return (DCMD_ERR);
3588         }
3589 
3590         for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3591                 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3592                         mdb_warn("couldn't trace %p's ancestry", addr);
3593                         ident = 0;
3594                         break;
3595                 }
3596                 paddr = (uintptr_t)parent.vm_source;
3597         }
3598 
3599         (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3600 
3601         mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3602             addr, VMEM_NAMEWIDTH, c,
3603             vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3604             vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3605 
3606         return (DCMD_OK);
3607 }
3608 
3609 void
3610 vmem_seg_help(void)
3611 {
3612         mdb_printf("%s",
3613 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3614 "\n"
3615 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3616 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3617 "information.\n");
3618         mdb_dec_indent(2);
3619         mdb_printf("%<b>OPTIONS%</b>\n");
3620         mdb_inc_indent(2);
3621         mdb_printf("%s",
3622 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3623 "  -s    report the size of the segment, instead of the end address\n"
3624 "  -c caller\n"
3625 "        filter out segments without the function/PC in their stack trace\n"
3626 "  -e earliest\n"
3627 "        filter out segments timestamped before earliest\n"
3628 "  -l latest\n"
3629 "        filter out segments timestamped after latest\n"
3630 "  -m minsize\n"
3631 "        filer out segments smaller than minsize\n"
3632 "  -M maxsize\n"
3633 "        filer out segments larger than maxsize\n"
3634 "  -t thread\n"
3635 "        filter out segments not involving thread\n"
3636 "  -T type\n"
3637 "        filter out segments not of type 'type'\n"
3638 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3639 }
3640 
3641 /*ARGSUSED*/
3642 int
3643 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3644 {
3645         vmem_seg_t vs;
3646         pc_t *stk = vs.vs_stack;
3647         uintptr_t sz;
3648         uint8_t t;
3649         const char *type = NULL;
3650         GElf_Sym sym;
3651         char c[MDB_SYM_NAMLEN];
3652         int no_debug;
3653         int i;
3654         int depth;
3655         uintptr_t laddr, haddr;
3656 
3657         uintptr_t caller = NULL, thread = NULL;
3658         uintptr_t minsize = 0, maxsize = 0;
3659 
3660         hrtime_t earliest = 0, latest = 0;
3661 
3662         uint_t size = 0;
3663         uint_t verbose = 0;
3664 
3665         if (!(flags & DCMD_ADDRSPEC))
3666                 return (DCMD_USAGE);
3667 
3668         if (mdb_getopts(argc, argv,
3669             'c', MDB_OPT_UINTPTR, &caller,
3670             'e', MDB_OPT_UINT64, &earliest,
3671             'l', MDB_OPT_UINT64, &latest,
3672             's', MDB_OPT_SETBITS, TRUE, &size,
3673             'm', MDB_OPT_UINTPTR, &minsize,
3674             'M', MDB_OPT_UINTPTR, &maxsize,
3675             't', MDB_OPT_UINTPTR, &thread,
3676             'T', MDB_OPT_STR, &type,
3677             'v', MDB_OPT_SETBITS, TRUE, &verbose,
3678             NULL) != argc)
3679                 return (DCMD_USAGE);
3680 
3681         if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3682                 if (verbose) {
3683                         mdb_printf("%16s %4s %16s %16s %16s\n"
3684                             "%<u>%16s %4s %16s %16s %16s%</u>\n",
3685                             "ADDR", "TYPE", "START", "END", "SIZE",
3686                             "", "", "THREAD", "TIMESTAMP", "");
3687                 } else {
3688                         mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3689                             "START", size? "SIZE" : "END", "WHO");
3690                 }
3691         }
3692 
3693         if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3694                 mdb_warn("couldn't read vmem_seg at %p", addr);
3695                 return (DCMD_ERR);
3696         }
3697 
3698         if (type != NULL) {
3699                 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3700                         t = VMEM_ALLOC;
3701                 else if (strcmp(type, "FREE") == 0)
3702                         t = VMEM_FREE;
3703                 else if (strcmp(type, "SPAN") == 0)
3704                         t = VMEM_SPAN;
3705                 else if (strcmp(type, "ROTR") == 0 ||
3706                     strcmp(type, "ROTOR") == 0)
3707                         t = VMEM_ROTOR;
3708                 else if (strcmp(type, "WLKR") == 0 ||
3709                     strcmp(type, "WALKER") == 0)
3710                         t = VMEM_WALKER;
3711                 else {
3712                         mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3713                             type);
3714                         return (DCMD_ERR);
3715                 }
3716 
3717                 if (vs.vs_type != t)
3718                         return (DCMD_OK);
3719         }
3720 
3721         sz = vs.vs_end - vs.vs_start;
3722 
3723         if (minsize != 0 && sz < minsize)
3724                 return (DCMD_OK);
3725 
3726         if (maxsize != 0 && sz > maxsize)
3727                 return (DCMD_OK);
3728 
3729         t = vs.vs_type;
3730         depth = vs.vs_depth;
3731 
3732         /*
3733          * debug info, when present, is only accurate for VMEM_ALLOC segments
3734          */
3735         no_debug = (t != VMEM_ALLOC) ||
3736             (depth == 0 || depth > VMEM_STACK_DEPTH);
3737 
3738         if (no_debug) {
3739                 if (caller != NULL || thread != NULL || earliest != 0 ||
3740                     latest != 0)
3741                         return (DCMD_OK);               /* not enough info */
3742         } else {
3743                 if (caller != NULL) {
3744                         laddr = caller;
3745                         haddr = caller + sizeof (caller);
3746 
3747                         if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3748                             sizeof (c), &sym) != -1 &&
3749                             caller == (uintptr_t)sym.st_value) {
3750                                 /*
3751                                  * We were provided an exact symbol value; any
3752                                  * address in the function is valid.
3753                                  */
3754                                 laddr = (uintptr_t)sym.st_value;
3755                                 haddr = (uintptr_t)sym.st_value + sym.st_size;
3756                         }
3757 
3758                         for (i = 0; i < depth; i++)
3759                                 if (vs.vs_stack[i] >= laddr &&
3760                                     vs.vs_stack[i] < haddr)
3761                                         break;
3762 
3763                         if (i == depth)
3764                                 return (DCMD_OK);
3765                 }
3766 
3767                 if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3768                         return (DCMD_OK);
3769 
3770                 if (earliest != 0 && vs.vs_timestamp < earliest)
3771                         return (DCMD_OK);
3772 
3773                 if (latest != 0 && vs.vs_timestamp > latest)
3774                         return (DCMD_OK);
3775         }
3776 
3777         type = (t == VMEM_ALLOC ? "ALLC" :
3778             t == VMEM_FREE ? "FREE" :
3779             t == VMEM_SPAN ? "SPAN" :
3780             t == VMEM_ROTOR ? "ROTR" :
3781             t == VMEM_WALKER ? "WLKR" :
3782             "????");
3783 
3784         if (flags & DCMD_PIPE_OUT) {
3785                 mdb_printf("%#lr\n", addr);
3786                 return (DCMD_OK);
3787         }
3788 
3789         if (verbose) {
3790                 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3791                     addr, type, vs.vs_start, vs.vs_end, sz);
3792 
3793                 if (no_debug)
3794                         return (DCMD_OK);
3795 
3796                 mdb_printf("%16s %4s %16p %16llx\n",
3797                     "", "", vs.vs_thread, vs.vs_timestamp);
3798 
3799                 mdb_inc_indent(17);
3800                 for (i = 0; i < depth; i++) {
3801                         mdb_printf("%a\n", stk[i]);
3802                 }
3803                 mdb_dec_indent(17);
3804                 mdb_printf("\n");
3805         } else {
3806                 mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3807                     vs.vs_start, size? sz : vs.vs_end);
3808 
3809                 if (no_debug) {
3810                         mdb_printf("\n");
3811                         return (DCMD_OK);
3812                 }
3813 
3814                 for (i = 0; i < depth; i++) {
3815                         if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3816                             c, sizeof (c), &sym) == -1)
3817                                 continue;
3818                         if (strncmp(c, "vmem_", 5) == 0)
3819                                 continue;
3820                         break;
3821                 }
3822                 mdb_printf(" %a\n", stk[i]);
3823         }
3824         return (DCMD_OK);
3825 }
3826 
3827 typedef struct kmalog_data {
3828         uintptr_t       kma_addr;
3829         hrtime_t        kma_newest;
3830 } kmalog_data_t;
3831 
3832 /*ARGSUSED*/
3833 static int
3834 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3835 {
3836         char name[KMEM_CACHE_NAMELEN + 1];
3837         hrtime_t delta;
3838         int i, depth;
3839         size_t bufsize;
3840 
3841         if (bcp->bc_timestamp == 0)
3842                 return (WALK_DONE);
3843 
3844         if (kma->kma_newest == 0)
3845                 kma->kma_newest = bcp->bc_timestamp;
3846 
3847         if (kma->kma_addr) {
3848                 if (mdb_vread(&bufsize, sizeof (bufsize),
3849                     (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3850                         mdb_warn(
3851                             "failed to read cache_bufsize for cache at %p",
3852                             bcp->bc_cache);
3853                         return (WALK_ERR);
3854                 }
3855 
3856                 if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3857                     kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3858                         return (WALK_NEXT);
3859         }
3860 
3861         delta = kma->kma_newest - bcp->bc_timestamp;
3862         depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3863 
3864         if (mdb_readstr(name, sizeof (name), (uintptr_t)
3865             &bcp->bc_cache->cache_name) <= 0)
3866                 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3867 
3868         mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3869             delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3870 
3871         for (i = 0; i < depth; i++)
3872                 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3873 
3874         return (WALK_NEXT);
3875 }
3876 
3877 int
3878 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3879 {
3880         const char *logname = "kmem_transaction_log";
3881         kmalog_data_t kma;
3882 
3883         if (argc > 1)
3884                 return (DCMD_USAGE);
3885 
3886         kma.kma_newest = 0;
3887         if (flags & DCMD_ADDRSPEC)
3888                 kma.kma_addr = addr;
3889         else
3890                 kma.kma_addr = NULL;
3891 
3892         if (argc > 0) {
3893                 if (argv->a_type != MDB_TYPE_STRING)
3894                         return (DCMD_USAGE);
3895                 if (strcmp(argv->a_un.a_str, "fail") == 0)
3896                         logname = "kmem_failure_log";
3897                 else if (strcmp(argv->a_un.a_str, "slab") == 0)
3898                         logname = "kmem_slab_log";
3899                 else
3900                         return (DCMD_USAGE);
3901         }
3902 
3903         if (mdb_readvar(&addr, logname) == -1) {
3904                 mdb_warn("failed to read %s log header pointer");
3905                 return (DCMD_ERR);
3906         }
3907 
3908         if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3909                 mdb_warn("failed to walk kmem log");
3910                 return (DCMD_ERR);
3911         }
3912 
3913         return (DCMD_OK);
3914 }
3915 
3916 /*
3917  * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3918  * The first piece is a structure which we use to accumulate kmem_cache_t
3919  * addresses of interest.  The kmc_add is used as a callback for the kmem_cache
3920  * walker; we either add all caches, or ones named explicitly as arguments.
3921  */
3922 
3923 typedef struct kmclist {
3924         const char *kmc_name;                   /* Name to match (or NULL) */
3925         uintptr_t *kmc_caches;                  /* List of kmem_cache_t addrs */
3926         int kmc_nelems;                         /* Num entries in kmc_caches */
3927         int kmc_size;                           /* Size of kmc_caches array */
3928 } kmclist_t;
3929 
3930 static int
3931 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3932 {
3933         void *p;
3934         int s;
3935 
3936         if (kmc->kmc_name == NULL ||
3937             strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3938                 /*
3939                  * If we have a match, grow our array (if necessary), and then
3940                  * add the virtual address of the matching cache to our list.
3941                  */
3942                 if (kmc->kmc_nelems >= kmc->kmc_size) {
3943                         s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3944                         p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3945 
3946                         bcopy(kmc->kmc_caches, p,
3947                             sizeof (uintptr_t) * kmc->kmc_size);
3948 
3949                         kmc->kmc_caches = p;
3950                         kmc->kmc_size = s;
3951                 }
3952 
3953                 kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3954                 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3955         }
3956 
3957         return (WALK_NEXT);
3958 }
3959 
3960 /*
3961  * The second piece of ::kmausers is a hash table of allocations.  Each
3962  * allocation owner is identified by its stack trace and data_size.  We then
3963  * track the total bytes of all such allocations, and the number of allocations
3964  * to report at the end.  Once we have a list of caches, we walk through the
3965  * allocated bufctls of each, and update our hash table accordingly.
3966  */
3967 
3968 typedef struct kmowner {
3969         struct kmowner *kmo_head;               /* First hash elt in bucket */
3970         struct kmowner *kmo_next;               /* Next hash elt in chain */
3971         size_t kmo_signature;                   /* Hash table signature */
3972         uint_t kmo_num;                         /* Number of allocations */
3973         size_t kmo_data_size;                   /* Size of each allocation */
3974         size_t kmo_total_size;                  /* Total bytes of allocation */
3975         int kmo_depth;                          /* Depth of stack trace */
3976         uintptr_t kmo_stack[KMEM_STACK_DEPTH];  /* Stack trace */
3977 } kmowner_t;
3978 
3979 typedef struct kmusers {
3980         uintptr_t kmu_addr;                     /* address of interest */
3981         const kmem_cache_t *kmu_cache;          /* Current kmem cache */
3982         kmowner_t *kmu_hash;                    /* Hash table of owners */
3983         int kmu_nelems;                         /* Number of entries in use */
3984         int kmu_size;                           /* Total number of entries */
3985 } kmusers_t;
3986 
3987 static void
3988 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
3989     size_t size, size_t data_size)
3990 {
3991         int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3992         size_t bucket, signature = data_size;
3993         kmowner_t *kmo, *kmoend;
3994 
3995         /*
3996          * If the hash table is full, double its size and rehash everything.
3997          */
3998         if (kmu->kmu_nelems >= kmu->kmu_size) {
3999                 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
4000 
4001                 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
4002                 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
4003                 kmu->kmu_hash = kmo;
4004                 kmu->kmu_size = s;
4005 
4006                 kmoend = kmu->kmu_hash + kmu->kmu_size;
4007                 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
4008                         kmo->kmo_head = NULL;
4009 
4010                 kmoend = kmu->kmu_hash + kmu->kmu_nelems;
4011                 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
4012                         bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
4013                         kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4014                         kmu->kmu_hash[bucket].kmo_head = kmo;
4015                 }
4016         }
4017 
4018         /*
4019          * Finish computing the hash signature from the stack trace, and then
4020          * see if the owner is in the hash table.  If so, update our stats.
4021          */
4022         for (i = 0; i < depth; i++)
4023                 signature += bcp->bc_stack[i];
4024 
4025         bucket = signature & (kmu->kmu_size - 1);
4026 
4027         for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4028                 if (kmo->kmo_signature == signature) {
4029                         size_t difference = 0;
4030 
4031                         difference |= kmo->kmo_data_size - data_size;
4032                         difference |= kmo->kmo_depth - depth;
4033 
4034                         for (i = 0; i < depth; i++) {
4035                                 difference |= kmo->kmo_stack[i] -
4036                                     bcp->bc_stack[i];
4037                         }
4038 
4039                         if (difference == 0) {
4040                                 kmo->kmo_total_size += size;
4041                                 kmo->kmo_num++;
4042                                 return;
4043                         }
4044                 }
4045         }
4046 
4047         /*
4048          * If the owner is not yet hashed, grab the next element and fill it
4049          * in based on the allocation information.
4050          */
4051         kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4052         kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4053         kmu->kmu_hash[bucket].kmo_head = kmo;
4054 
4055         kmo->kmo_signature = signature;
4056         kmo->kmo_num = 1;
4057         kmo->kmo_data_size = data_size;
4058         kmo->kmo_total_size = size;
4059         kmo->kmo_depth = depth;
4060 
4061         for (i = 0; i < depth; i++)
4062                 kmo->kmo_stack[i] = bcp->bc_stack[i];
4063 }
4064 
4065 /*
4066  * When ::kmausers is invoked without the -f flag, we simply update our hash
4067  * table with the information from each allocated bufctl.
4068  */
4069 /*ARGSUSED*/
4070 static int
4071 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4072 {
4073         const kmem_cache_t *cp = kmu->kmu_cache;
4074 
4075         kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4076         return (WALK_NEXT);
4077 }
4078 
4079 /*
4080  * When ::kmausers is invoked with the -f flag, we print out the information
4081  * for each bufctl as well as updating the hash table.
4082  */
4083 static int
4084 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4085 {
4086         int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4087         const kmem_cache_t *cp = kmu->kmu_cache;
4088         kmem_bufctl_t bufctl;
4089 
4090         if (kmu->kmu_addr) {
4091                 if (mdb_vread(&bufctl, sizeof (bufctl),  addr) == -1)
4092                         mdb_warn("couldn't read bufctl at %p", addr);
4093                 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4094                     kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4095                     cp->cache_bufsize)
4096                         return (WALK_NEXT);
4097         }
4098 
4099         mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4100             cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4101 
4102         for (i = 0; i < depth; i++)
4103                 mdb_printf("\t %a\n", bcp->bc_stack[i]);
4104 
4105         kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4106         return (WALK_NEXT);
4107 }
4108 
4109 /*
4110  * We sort our results by allocation size before printing them.
4111  */
4112 static int
4113 kmownercmp(const void *lp, const void *rp)
4114 {
4115         const kmowner_t *lhs = lp;
4116         const kmowner_t *rhs = rp;
4117 
4118         return (rhs->kmo_total_size - lhs->kmo_total_size);
4119 }
4120 
4121 /*
4122  * The main engine of ::kmausers is relatively straightforward: First we
4123  * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4124  * iterate over the allocated bufctls of each cache in the list.  Finally,
4125  * we sort and print our results.
4126  */
4127 /*ARGSUSED*/
4128 int
4129 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4130 {
4131         int mem_threshold = 8192;       /* Minimum # bytes for printing */
4132         int cnt_threshold = 100;        /* Minimum # blocks for printing */
4133         int audited_caches = 0;         /* Number of KMF_AUDIT caches found */
4134         int do_all_caches = 1;          /* Do all caches (no arguments) */
4135         int opt_e = FALSE;              /* Include "small" users */
4136         int opt_f = FALSE;              /* Print stack traces */
4137 
4138         mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4139         kmowner_t *kmo, *kmoend;
4140         int i, oelems;
4141 
4142         kmclist_t kmc;
4143         kmusers_t kmu;
4144 
4145         bzero(&kmc, sizeof (kmc));
4146         bzero(&kmu, sizeof (kmu));
4147 
4148         while ((i = mdb_getopts(argc, argv,
4149             'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4150             'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4151 
4152                 argv += i;      /* skip past options we just processed */
4153                 argc -= i;      /* adjust argc */
4154 
4155                 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4156                         return (DCMD_USAGE);
4157 
4158                 oelems = kmc.kmc_nelems;
4159                 kmc.kmc_name = argv->a_un.a_str;
4160                 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4161 
4162                 if (kmc.kmc_nelems == oelems) {
4163                         mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4164                         return (DCMD_ERR);
4165                 }
4166 
4167                 do_all_caches = 0;
4168                 argv++;
4169                 argc--;
4170         }
4171 
4172         if (flags & DCMD_ADDRSPEC) {
4173                 opt_f = TRUE;
4174                 kmu.kmu_addr = addr;
4175         } else {
4176                 kmu.kmu_addr = NULL;
4177         }
4178 
4179         if (opt_e)
4180                 mem_threshold = cnt_threshold = 0;
4181 
4182         if (opt_f)
4183                 callback = (mdb_walk_cb_t)kmause2;
4184 
4185         if (do_all_caches) {
4186                 kmc.kmc_name = NULL; /* match all cache names */
4187                 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4188         }
4189 
4190         for (i = 0; i < kmc.kmc_nelems; i++) {
4191                 uintptr_t cp = kmc.kmc_caches[i];
4192                 kmem_cache_t c;
4193 
4194                 if (mdb_vread(&c, sizeof (c), cp) == -1) {
4195                         mdb_warn("failed to read cache at %p", cp);
4196                         continue;
4197                 }
4198 
4199                 if (!(c.cache_flags & KMF_AUDIT)) {
4200                         if (!do_all_caches) {
4201                                 mdb_warn("KMF_AUDIT is not enabled for %s\n",
4202                                     c.cache_name);
4203                         }
4204                         continue;
4205                 }
4206 
4207                 kmu.kmu_cache = &c;
4208                 (void) mdb_pwalk("bufctl", callback, &kmu, cp);
4209                 audited_caches++;
4210         }
4211 
4212         if (audited_caches == 0 && do_all_caches) {
4213                 mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4214                 return (DCMD_ERR);
4215         }
4216 
4217         qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4218         kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4219 
4220         for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4221                 if (kmo->kmo_total_size < mem_threshold &&
4222                     kmo->kmo_num < cnt_threshold)
4223                         continue;
4224                 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4225                     kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4226                 for (i = 0; i < kmo->kmo_depth; i++)
4227                         mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4228         }
4229 
4230         return (DCMD_OK);
4231 }
4232 
4233 void
4234 kmausers_help(void)
4235 {
4236         mdb_printf(
4237             "Displays the largest users of the kmem allocator, sorted by \n"
4238             "trace.  If one or more caches is specified, only those caches\n"
4239             "will be searched.  By default, all caches are searched.  If an\n"
4240             "address is specified, then only those allocations which include\n"
4241             "the given address are displayed.  Specifying an address implies\n"
4242             "-f.\n"
4243             "\n"
4244             "\t-e\tInclude all users, not just the largest\n"
4245             "\t-f\tDisplay individual allocations.  By default, users are\n"
4246             "\t\tgrouped by stack\n");
4247 }
4248 
4249 static int
4250 kmem_ready_check(void)
4251 {
4252         int ready;
4253 
4254         if (mdb_readvar(&ready, "kmem_ready") < 0)
4255                 return (-1); /* errno is set for us */
4256 
4257         return (ready);
4258 }
4259 
4260 void
4261 kmem_statechange(void)
4262 {
4263         static int been_ready = 0;
4264 
4265         if (been_ready)
4266                 return;
4267 
4268         if (kmem_ready_check() <= 0)
4269                 return;
4270 
4271         been_ready = 1;
4272         (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4273 }
4274 
4275 void
4276 kmem_init(void)
4277 {
4278         mdb_walker_t w = {
4279                 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4280                 list_walk_step, list_walk_fini
4281         };
4282 
4283         /*
4284          * If kmem is ready, we'll need to invoke the kmem_cache walker
4285          * immediately.  Walkers in the linkage structure won't be ready until
4286          * _mdb_init returns, so we'll need to add this one manually.  If kmem
4287          * is ready, we'll use the walker to initialize the caches.  If kmem
4288          * isn't ready, we'll register a callback that will allow us to defer
4289          * cache walking until it is.
4290          */
4291         if (mdb_add_walker(&w) != 0) {
4292                 mdb_warn("failed to add kmem_cache walker");
4293                 return;
4294         }
4295 
4296         kmem_statechange();
4297 
4298         /* register our ::whatis handlers */
4299         mdb_whatis_register("modules", whatis_run_modules, NULL,
4300             WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4301         mdb_whatis_register("threads", whatis_run_threads, NULL,
4302             WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4303         mdb_whatis_register("pages", whatis_run_pages, NULL,
4304             WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4305         mdb_whatis_register("kmem", whatis_run_kmem, NULL,
4306             WHATIS_PRIO_ALLOCATOR, 0);
4307         mdb_whatis_register("vmem", whatis_run_vmem, NULL,
4308             WHATIS_PRIO_ALLOCATOR, 0);
4309 }
4310 
4311 typedef struct whatthread {
4312         uintptr_t       wt_target;
4313         int             wt_verbose;
4314 } whatthread_t;
4315 
4316 static int
4317 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4318 {
4319         uintptr_t current, data;
4320 
4321         if (t->t_stkbase == NULL)
4322                 return (WALK_NEXT);
4323 
4324         /*
4325          * Warn about swapped out threads, but drive on anyway
4326          */
4327         if (!(t->t_schedflag & TS_LOAD)) {
4328                 mdb_warn("thread %p's stack swapped out\n", addr);
4329                 return (WALK_NEXT);
4330         }
4331 
4332         /*
4333          * Search the thread's stack for the given pointer.  Note that it would
4334          * be more efficient to follow ::kgrep's lead and read in page-sized
4335          * chunks, but this routine is already fast and simple.
4336          */
4337         for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4338             current += sizeof (uintptr_t)) {
4339                 if (mdb_vread(&data, sizeof (data), current) == -1) {
4340                         mdb_warn("couldn't read thread %p's stack at %p",
4341                             addr, current);
4342                         return (WALK_ERR);
4343                 }
4344 
4345                 if (data == w->wt_target) {
4346                         if (w->wt_verbose) {
4347                                 mdb_printf("%p in thread %p's stack%s\n",
4348                                     current, addr, stack_active(t, current));
4349                         } else {
4350                                 mdb_printf("%#lr\n", addr);
4351                                 return (WALK_NEXT);
4352                         }
4353                 }
4354         }
4355 
4356         return (WALK_NEXT);
4357 }
4358 
4359 int
4360 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4361 {
4362         whatthread_t w;
4363 
4364         if (!(flags & DCMD_ADDRSPEC))
4365                 return (DCMD_USAGE);
4366 
4367         w.wt_verbose = FALSE;
4368         w.wt_target = addr;
4369 
4370         if (mdb_getopts(argc, argv,
4371             'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4372                 return (DCMD_USAGE);
4373 
4374         if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4375             == -1) {
4376                 mdb_warn("couldn't walk threads");
4377                 return (DCMD_ERR);
4378         }
4379 
4380         return (DCMD_OK);
4381 }