1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  24  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  25  */
  26 
  27 /* Portions Copyright 2010 Robert Milkowski */
  28 
  29 #include <mdb/mdb_ctf.h>
  30 #include <sys/zfs_context.h>
  31 #include <sys/mdb_modapi.h>
  32 #include <sys/dbuf.h>
  33 #include <sys/dmu_objset.h>
  34 #include <sys/dsl_dir.h>
  35 #include <sys/dsl_pool.h>
  36 #include <sys/metaslab_impl.h>
  37 #include <sys/space_map.h>
  38 #include <sys/list.h>
  39 #include <sys/vdev_impl.h>
  40 #include <sys/zap_leaf.h>
  41 #include <sys/zap_impl.h>
  42 #include <ctype.h>
  43 #include <sys/zfs_acl.h>
  44 #include <sys/sa_impl.h>
  45 
  46 #ifdef _KERNEL
  47 #define ZFS_OBJ_NAME    "zfs"
  48 extern int64_t mdb_gethrtime(void);
  49 #else
  50 #define ZFS_OBJ_NAME    "libzpool.so.1"
  51 #endif
  52 
  53 #define ZFS_STRUCT      "struct " ZFS_OBJ_NAME "`"
  54 
  55 #ifndef _KERNEL
  56 int aok;
  57 #endif
  58 
  59 enum spa_flags {
  60         SPA_FLAG_CONFIG                 = 1 << 0,
  61         SPA_FLAG_VDEVS                  = 1 << 1,
  62         SPA_FLAG_ERRORS                 = 1 << 2,
  63         SPA_FLAG_METASLAB_GROUPS        = 1 << 3,
  64         SPA_FLAG_METASLABS              = 1 << 4,
  65         SPA_FLAG_HISTOGRAMS             = 1 << 5
  66 };
  67 
  68 #define SPA_FLAG_ALL_VDEV       \
  69         (SPA_FLAG_VDEVS | SPA_FLAG_ERRORS | SPA_FLAG_METASLAB_GROUPS | \
  70         SPA_FLAG_METASLABS | SPA_FLAG_HISTOGRAMS)
  71 
  72 static int
  73 getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp,
  74     const char *member, int len, void *buf)
  75 {
  76         mdb_ctf_id_t id;
  77         ulong_t off;
  78         char name[64];
  79 
  80         if (idp == NULL) {
  81                 if (mdb_ctf_lookup_by_name(type, &id) == -1) {
  82                         mdb_warn("couldn't find type %s", type);
  83                         return (DCMD_ERR);
  84                 }
  85                 idp = &id;
  86         } else {
  87                 type = name;
  88                 mdb_ctf_type_name(*idp, name, sizeof (name));
  89         }
  90 
  91         if (mdb_ctf_offsetof(*idp, member, &off) == -1) {
  92                 mdb_warn("couldn't find member %s of type %s\n", member, type);
  93                 return (DCMD_ERR);
  94         }
  95         if (off % 8 != 0) {
  96                 mdb_warn("member %s of type %s is unsupported bitfield",
  97                     member, type);
  98                 return (DCMD_ERR);
  99         }
 100         off /= 8;
 101 
 102         if (mdb_vread(buf, len, addr + off) == -1) {
 103                 mdb_warn("failed to read %s from %s at %p",
 104                     member, type, addr + off);
 105                 return (DCMD_ERR);
 106         }
 107         /* mdb_warn("read %s from %s at %p+%llx\n", member, type, addr, off); */
 108 
 109         return (0);
 110 }
 111 
 112 #define GETMEMB(addr, structname, member, dest) \
 113         getmember(addr, ZFS_STRUCT structname, NULL, #member, \
 114         sizeof (dest), &(dest))
 115 
 116 #define GETMEMBID(addr, ctfid, member, dest) \
 117         getmember(addr, NULL, ctfid, #member, sizeof (dest), &(dest))
 118 
 119 static boolean_t
 120 strisprint(const char *cp)
 121 {
 122         for (; *cp; cp++) {
 123                 if (!isprint(*cp))
 124                         return (B_FALSE);
 125         }
 126         return (B_TRUE);
 127 }
 128 
 129 #define NICENUM_BUFLEN 6
 130 
 131 static int
 132 snprintfrac(char *buf, int len,
 133     uint64_t numerator, uint64_t denom, int frac_digits)
 134 {
 135         int mul = 1;
 136         int whole, frac, i;
 137 
 138         for (i = frac_digits; i; i--)
 139                 mul *= 10;
 140         whole = numerator / denom;
 141         frac = mul * numerator / denom - mul * whole;
 142         return (mdb_snprintf(buf, len, "%u.%0*u", whole, frac_digits, frac));
 143 }
 144 
 145 static void
 146 mdb_nicenum(uint64_t num, char *buf)
 147 {
 148         uint64_t n = num;
 149         int index = 0;
 150         char *u;
 151 
 152         while (n >= 1024) {
 153                 n = (n + (1024 / 2)) / 1024; /* Round up or down */
 154                 index++;
 155         }
 156 
 157         u = &" \0K\0M\0G\0T\0P\0E\0"[index*2];
 158 
 159         if (index == 0) {
 160                 (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu",
 161                     (u_longlong_t)n);
 162         } else if (n < 10 && (num & (num - 1)) != 0) {
 163                 (void) snprintfrac(buf, NICENUM_BUFLEN,
 164                     num, 1ULL << 10 * index, 2);
 165                 strcat(buf, u);
 166         } else if (n < 100 && (num & (num - 1)) != 0) {
 167                 (void) snprintfrac(buf, NICENUM_BUFLEN,
 168                     num, 1ULL << 10 * index, 1);
 169                 strcat(buf, u);
 170         } else {
 171                 (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu%s",
 172                     (u_longlong_t)n, u);
 173         }
 174 }
 175 
 176 static int verbose;
 177 
 178 static int
 179 freelist_walk_init(mdb_walk_state_t *wsp)
 180 {
 181         if (wsp->walk_addr == NULL) {
 182                 mdb_warn("must supply starting address\n");
 183                 return (WALK_ERR);
 184         }
 185 
 186         wsp->walk_data = 0;  /* Index into the freelist */
 187         return (WALK_NEXT);
 188 }
 189 
 190 static int
 191 freelist_walk_step(mdb_walk_state_t *wsp)
 192 {
 193         uint64_t entry;
 194         uintptr_t number = (uintptr_t)wsp->walk_data;
 195         char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
 196                             "INVALID", "INVALID", "INVALID", "INVALID" };
 197         int mapshift = SPA_MINBLOCKSHIFT;
 198 
 199         if (mdb_vread(&entry, sizeof (entry), wsp->walk_addr) == -1) {
 200                 mdb_warn("failed to read freelist entry %p", wsp->walk_addr);
 201                 return (WALK_DONE);
 202         }
 203         wsp->walk_addr += sizeof (entry);
 204         wsp->walk_data = (void *)(number + 1);
 205 
 206         if (SM_DEBUG_DECODE(entry)) {
 207                 mdb_printf("DEBUG: %3u  %10s: txg=%llu  pass=%llu\n",
 208                     number,
 209                     ddata[SM_DEBUG_ACTION_DECODE(entry)],
 210                     SM_DEBUG_TXG_DECODE(entry),
 211                     SM_DEBUG_SYNCPASS_DECODE(entry));
 212         } else {
 213                 mdb_printf("Entry: %3u  offsets=%08llx-%08llx  type=%c  "
 214                     "size=%06llx", number,
 215                     SM_OFFSET_DECODE(entry) << mapshift,
 216                     (SM_OFFSET_DECODE(entry) + SM_RUN_DECODE(entry)) <<
 217                     mapshift,
 218                     SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
 219                     SM_RUN_DECODE(entry) << mapshift);
 220                 if (verbose)
 221                         mdb_printf("      (raw=%012llx)\n", entry);
 222                 mdb_printf("\n");
 223         }
 224         return (WALK_NEXT);
 225 }
 226 
 227 static int
 228 mdb_dsl_dir_name(uintptr_t addr, char *buf)
 229 {
 230         static int gotid;
 231         static mdb_ctf_id_t dd_id;
 232         uintptr_t dd_parent;
 233         char dd_myname[MAXNAMELEN];
 234 
 235         if (!gotid) {
 236                 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dir",
 237                     &dd_id) == -1) {
 238                         mdb_warn("couldn't find struct dsl_dir");
 239                         return (DCMD_ERR);
 240                 }
 241                 gotid = TRUE;
 242         }
 243         if (GETMEMBID(addr, &dd_id, dd_parent, dd_parent) ||
 244             GETMEMBID(addr, &dd_id, dd_myname, dd_myname)) {
 245                 return (DCMD_ERR);
 246         }
 247 
 248         if (dd_parent) {
 249                 if (mdb_dsl_dir_name(dd_parent, buf))
 250                         return (DCMD_ERR);
 251                 strcat(buf, "/");
 252         }
 253 
 254         if (dd_myname[0])
 255                 strcat(buf, dd_myname);
 256         else
 257                 strcat(buf, "???");
 258 
 259         return (0);
 260 }
 261 
 262 static int
 263 objset_name(uintptr_t addr, char *buf)
 264 {
 265         static int gotid;
 266         static mdb_ctf_id_t os_id, ds_id;
 267         uintptr_t os_dsl_dataset;
 268         char ds_snapname[MAXNAMELEN];
 269         uintptr_t ds_dir;
 270 
 271         buf[0] = '\0';
 272 
 273         if (!gotid) {
 274                 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "objset",
 275                     &os_id) == -1) {
 276                         mdb_warn("couldn't find struct objset");
 277                         return (DCMD_ERR);
 278                 }
 279                 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dataset",
 280                     &ds_id) == -1) {
 281                         mdb_warn("couldn't find struct dsl_dataset");
 282                         return (DCMD_ERR);
 283                 }
 284 
 285                 gotid = TRUE;
 286         }
 287 
 288         if (GETMEMBID(addr, &os_id, os_dsl_dataset, os_dsl_dataset))
 289                 return (DCMD_ERR);
 290 
 291         if (os_dsl_dataset == 0) {
 292                 strcat(buf, "mos");
 293                 return (0);
 294         }
 295 
 296         if (GETMEMBID(os_dsl_dataset, &ds_id, ds_snapname, ds_snapname) ||
 297             GETMEMBID(os_dsl_dataset, &ds_id, ds_dir, ds_dir)) {
 298                 return (DCMD_ERR);
 299         }
 300 
 301         if (ds_dir && mdb_dsl_dir_name(ds_dir, buf))
 302                 return (DCMD_ERR);
 303 
 304         if (ds_snapname[0]) {
 305                 strcat(buf, "@");
 306                 strcat(buf, ds_snapname);
 307         }
 308         return (0);
 309 }
 310 
 311 static void
 312 enum_lookup(char *out, size_t size, mdb_ctf_id_t id, int val,
 313     const char *prefix)
 314 {
 315         const char *cp;
 316         size_t len = strlen(prefix);
 317 
 318         if ((cp = mdb_ctf_enum_name(id, val)) != NULL) {
 319                 if (strncmp(cp, prefix, len) == 0)
 320                         cp += len;
 321                 (void) strncpy(out, cp, size);
 322         } else {
 323                 mdb_snprintf(out, size, "? (%d)", val);
 324         }
 325 }
 326 
 327 /* ARGSUSED */
 328 static int
 329 zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 330 {
 331         /*
 332          * This table can be approximately generated by running:
 333          * egrep "^[a-z0-9_]+ [a-z0-9_]+( =.*)?;" *.c | cut -d ' ' -f 2
 334          */
 335         static const char *params[] = {
 336                 "arc_reduce_dnlc_percent",
 337                 "arc_lotsfree_percent",
 338                 "zfs_dirty_data_max",
 339                 "zfs_dirty_data_sync",
 340                 "zfs_delay_max_ns",
 341                 "zfs_delay_min_dirty_percent",
 342                 "zfs_delay_scale",
 343                 "zfs_vdev_max_active",
 344                 "zfs_vdev_sync_read_min_active",
 345                 "zfs_vdev_sync_read_max_active",
 346                 "zfs_vdev_sync_write_min_active",
 347                 "zfs_vdev_sync_write_max_active",
 348                 "zfs_vdev_async_read_min_active",
 349                 "zfs_vdev_async_read_max_active",
 350                 "zfs_vdev_async_write_min_active",
 351                 "zfs_vdev_async_write_max_active",
 352                 "zfs_vdev_scrub_min_active",
 353                 "zfs_vdev_scrub_max_active",
 354                 "zfs_vdev_async_write_active_min_dirty_percent",
 355                 "zfs_vdev_async_write_active_max_dirty_percent",
 356                 "spa_asize_inflation",
 357                 "zfs_arc_max",
 358                 "zfs_arc_min",
 359                 "arc_shrink_shift",
 360                 "zfs_mdcomp_disable",
 361                 "zfs_prefetch_disable",
 362                 "zfetch_max_streams",
 363                 "zfetch_min_sec_reap",
 364                 "zfetch_block_cap",
 365                 "zfetch_array_rd_sz",
 366                 "zfs_default_bs",
 367                 "zfs_default_ibs",
 368                 "metaslab_aliquot",
 369                 "reference_tracking_enable",
 370                 "reference_history",
 371                 "spa_max_replication_override",
 372                 "spa_mode_global",
 373                 "zfs_flags",
 374                 "zfs_txg_timeout",
 375                 "zfs_vdev_cache_max",
 376                 "zfs_vdev_cache_size",
 377                 "zfs_vdev_cache_bshift",
 378                 "vdev_mirror_shift",
 379                 "zfs_scrub_limit",
 380                 "zfs_no_scrub_io",
 381                 "zfs_no_scrub_prefetch",
 382                 "zfs_vdev_aggregation_limit",
 383                 "fzap_default_block_shift",
 384                 "zfs_immediate_write_sz",
 385                 "zfs_read_chunk_size",
 386                 "zfs_nocacheflush",
 387                 "zil_replay_disable",
 388                 "metaslab_gang_bang",
 389                 "metaslab_df_alloc_threshold",
 390                 "metaslab_df_free_pct",
 391                 "zio_injection_enabled",
 392                 "zvol_immediate_write_sz",
 393                 "zio_max_timeout_ms",
 394                 "zio_min_timeout_ms",
 395                 "zio_timeout_shift",
 396         };
 397 
 398         for (int i = 0; i < sizeof (params) / sizeof (params[0]); i++) {
 399                 int sz;
 400                 uint64_t val64;
 401                 uint32_t *val32p = (uint32_t *)&val64;
 402 
 403                 sz = mdb_readvar(&val64, params[i]);
 404                 if (sz == 4) {
 405                         mdb_printf("%s = 0x%x\n", params[i], *val32p);
 406                 } else if (sz == 8) {
 407                         mdb_printf("%s = 0x%llx\n", params[i], val64);
 408                 } else {
 409                         mdb_warn("variable %s not found", params[i]);
 410                 }
 411         }
 412 
 413         return (DCMD_OK);
 414 }
 415 
 416 /* ARGSUSED */
 417 static int
 418 blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 419 {
 420         mdb_ctf_id_t type_enum, checksum_enum, compress_enum;
 421         char type[80], checksum[80], compress[80];
 422         blkptr_t blk, *bp = &blk;
 423         char buf[BP_SPRINTF_LEN];
 424 
 425         if (mdb_vread(&blk, sizeof (blkptr_t), addr) == -1) {
 426                 mdb_warn("failed to read blkptr_t");
 427                 return (DCMD_ERR);
 428         }
 429 
 430         if (mdb_ctf_lookup_by_name("enum dmu_object_type", &type_enum) == -1 ||
 431             mdb_ctf_lookup_by_name("enum zio_checksum", &checksum_enum) == -1 ||
 432             mdb_ctf_lookup_by_name("enum zio_compress", &compress_enum) == -1) {
 433                 mdb_warn("Could not find blkptr enumerated types");
 434                 return (DCMD_ERR);
 435         }
 436 
 437         enum_lookup(type, sizeof (type), type_enum,
 438             BP_GET_TYPE(bp), "DMU_OT_");
 439         enum_lookup(checksum, sizeof (checksum), checksum_enum,
 440             BP_GET_CHECKSUM(bp), "ZIO_CHECKSUM_");
 441         enum_lookup(compress, sizeof (compress), compress_enum,
 442             BP_GET_COMPRESS(bp), "ZIO_COMPRESS_");
 443 
 444         SNPRINTF_BLKPTR(mdb_snprintf, '\n', buf, sizeof (buf), bp, type,
 445             checksum, compress);
 446 
 447         mdb_printf("%s\n", buf);
 448 
 449         return (DCMD_OK);
 450 }
 451 
 452 typedef struct mdb_dmu_buf_impl {
 453         struct {
 454                 uint64_t db_object;
 455         } db;
 456         uintptr_t db_objset;
 457         uint64_t db_level;
 458         uint64_t db_blkid;
 459         struct {
 460                 uint64_t rc_count;
 461         } db_holds;
 462 } mdb_dmu_buf_impl_t;
 463 
 464 /* ARGSUSED */
 465 static int
 466 dbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 467 {
 468         mdb_dmu_buf_impl_t db;
 469         char objectname[32];
 470         char blkidname[32];
 471         char path[MAXNAMELEN];
 472 
 473         if (DCMD_HDRSPEC(flags))
 474                 mdb_printf("        addr object lvl blkid holds os\n");
 475 
 476         if (mdb_ctf_vread(&db, ZFS_STRUCT "dmu_buf_impl", "mdb_dmu_buf_impl_t",
 477             addr, 0) == -1)
 478                 return (DCMD_ERR);
 479 
 480         if (db.db.db_object == DMU_META_DNODE_OBJECT)
 481                 (void) strcpy(objectname, "mdn");
 482         else
 483                 (void) mdb_snprintf(objectname, sizeof (objectname), "%llx",
 484                     (u_longlong_t)db.db.db_object);
 485 
 486         if (db.db_blkid == DMU_BONUS_BLKID)
 487                 (void) strcpy(blkidname, "bonus");
 488         else
 489                 (void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx",
 490                     (u_longlong_t)db.db_blkid);
 491 
 492         if (objset_name(db.db_objset, path)) {
 493                 return (DCMD_ERR);
 494         }
 495 
 496         mdb_printf("%p %8s %1u %9s %2llu %s\n", addr,
 497             objectname, (int)db.db_level, blkidname,
 498             db.db_holds.rc_count, path);
 499 
 500         return (DCMD_OK);
 501 }
 502 
 503 #define CHAIN_END 0xffff
 504 /*
 505  * ::zap_leaf [-v]
 506  *
 507  * Print a zap_leaf_phys_t, assumed to be 16k
 508  */
 509 /* ARGSUSED */
 510 static int
 511 zap_leaf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 512 {
 513         char buf[16*1024];
 514         int verbose = B_FALSE;
 515         int four = B_FALSE;
 516         zap_leaf_t l;
 517         zap_leaf_phys_t *zlp = (void *)buf;
 518         int i;
 519 
 520         if (mdb_getopts(argc, argv,
 521             'v', MDB_OPT_SETBITS, TRUE, &verbose,
 522             '4', MDB_OPT_SETBITS, TRUE, &four,
 523             NULL) != argc)
 524                 return (DCMD_USAGE);
 525 
 526         l.l_phys = zlp;
 527         l.l_bs = 14; /* assume 16k blocks */
 528         if (four)
 529                 l.l_bs = 12;
 530 
 531         if (!(flags & DCMD_ADDRSPEC)) {
 532                 return (DCMD_USAGE);
 533         }
 534 
 535         if (mdb_vread(buf, sizeof (buf), addr) == -1) {
 536                 mdb_warn("failed to read zap_leaf_phys_t at %p", addr);
 537                 return (DCMD_ERR);
 538         }
 539 
 540         if (zlp->l_hdr.lh_block_type != ZBT_LEAF ||
 541             zlp->l_hdr.lh_magic != ZAP_LEAF_MAGIC) {
 542                 mdb_warn("This does not appear to be a zap_leaf_phys_t");
 543                 return (DCMD_ERR);
 544         }
 545 
 546         mdb_printf("zap_leaf_phys_t at %p:\n", addr);
 547         mdb_printf("    lh_prefix_len = %u\n", zlp->l_hdr.lh_prefix_len);
 548         mdb_printf("    lh_prefix = %llx\n", zlp->l_hdr.lh_prefix);
 549         mdb_printf("    lh_nentries = %u\n", zlp->l_hdr.lh_nentries);
 550         mdb_printf("    lh_nfree = %u\n", zlp->l_hdr.lh_nfree,
 551             zlp->l_hdr.lh_nfree * 100 / (ZAP_LEAF_NUMCHUNKS(&l)));
 552         mdb_printf("    lh_freelist = %u\n", zlp->l_hdr.lh_freelist);
 553         mdb_printf("    lh_flags = %x (%s)\n", zlp->l_hdr.lh_flags,
 554             zlp->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED ?
 555             "ENTRIES_CDSORTED" : "");
 556 
 557         if (verbose) {
 558                 mdb_printf(" hash table:\n");
 559                 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) {
 560                         if (zlp->l_hash[i] != CHAIN_END)
 561                                 mdb_printf("    %u: %u\n", i, zlp->l_hash[i]);
 562                 }
 563         }
 564 
 565         mdb_printf(" chunks:\n");
 566         for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) {
 567                 /* LINTED: alignment */
 568                 zap_leaf_chunk_t *zlc = &ZAP_LEAF_CHUNK(&l, i);
 569                 switch (zlc->l_entry.le_type) {
 570                 case ZAP_CHUNK_FREE:
 571                         if (verbose) {
 572                                 mdb_printf("    %u: free; lf_next = %u\n",
 573                                     i, zlc->l_free.lf_next);
 574                         }
 575                         break;
 576                 case ZAP_CHUNK_ENTRY:
 577                         mdb_printf("    %u: entry\n", i);
 578                         if (verbose) {
 579                                 mdb_printf("        le_next = %u\n",
 580                                     zlc->l_entry.le_next);
 581                         }
 582                         mdb_printf("        le_name_chunk = %u\n",
 583                             zlc->l_entry.le_name_chunk);
 584                         mdb_printf("        le_name_numints = %u\n",
 585                             zlc->l_entry.le_name_numints);
 586                         mdb_printf("        le_value_chunk = %u\n",
 587                             zlc->l_entry.le_value_chunk);
 588                         mdb_printf("        le_value_intlen = %u\n",
 589                             zlc->l_entry.le_value_intlen);
 590                         mdb_printf("        le_value_numints = %u\n",
 591                             zlc->l_entry.le_value_numints);
 592                         mdb_printf("        le_cd = %u\n",
 593                             zlc->l_entry.le_cd);
 594                         mdb_printf("        le_hash = %llx\n",
 595                             zlc->l_entry.le_hash);
 596                         break;
 597                 case ZAP_CHUNK_ARRAY:
 598                         mdb_printf("    %u: array", i);
 599                         if (strisprint((char *)zlc->l_array.la_array))
 600                                 mdb_printf(" \"%s\"", zlc->l_array.la_array);
 601                         mdb_printf("\n");
 602                         if (verbose) {
 603                                 int j;
 604                                 mdb_printf("        ");
 605                                 for (j = 0; j < ZAP_LEAF_ARRAY_BYTES; j++) {
 606                                         mdb_printf("%02x ",
 607                                             zlc->l_array.la_array[j]);
 608                                 }
 609                                 mdb_printf("\n");
 610                         }
 611                         if (zlc->l_array.la_next != CHAIN_END) {
 612                                 mdb_printf("        lf_next = %u\n",
 613                                     zlc->l_array.la_next);
 614                         }
 615                         break;
 616                 default:
 617                         mdb_printf("    %u: undefined type %u\n",
 618                             zlc->l_entry.le_type);
 619                 }
 620         }
 621 
 622         return (DCMD_OK);
 623 }
 624 
 625 typedef struct dbufs_data {
 626         mdb_ctf_id_t id;
 627         uint64_t objset;
 628         uint64_t object;
 629         uint64_t level;
 630         uint64_t blkid;
 631         char *osname;
 632 } dbufs_data_t;
 633 
 634 #define DBUFS_UNSET     (0xbaddcafedeadbeefULL)
 635 
 636 /* ARGSUSED */
 637 static int
 638 dbufs_cb(uintptr_t addr, const void *unknown, void *arg)
 639 {
 640         dbufs_data_t *data = arg;
 641         uintptr_t objset;
 642         dmu_buf_t db;
 643         uint8_t level;
 644         uint64_t blkid;
 645         char osname[MAXNAMELEN];
 646 
 647         if (GETMEMBID(addr, &data->id, db_objset, objset) ||
 648             GETMEMBID(addr, &data->id, db, db) ||
 649             GETMEMBID(addr, &data->id, db_level, level) ||
 650             GETMEMBID(addr, &data->id, db_blkid, blkid)) {
 651                 return (WALK_ERR);
 652         }
 653 
 654         if ((data->objset == DBUFS_UNSET || data->objset == objset) &&
 655             (data->osname == NULL || (objset_name(objset, osname) == 0 &&
 656             strcmp(data->osname, osname) == 0)) &&
 657             (data->object == DBUFS_UNSET || data->object == db.db_object) &&
 658             (data->level == DBUFS_UNSET || data->level == level) &&
 659             (data->blkid == DBUFS_UNSET || data->blkid == blkid)) {
 660                 mdb_printf("%#lr\n", addr);
 661         }
 662         return (WALK_NEXT);
 663 }
 664 
 665 /* ARGSUSED */
 666 static int
 667 dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 668 {
 669         dbufs_data_t data;
 670         char *object = NULL;
 671         char *blkid = NULL;
 672 
 673         data.objset = data.object = data.level = data.blkid = DBUFS_UNSET;
 674         data.osname = NULL;
 675 
 676         if (mdb_getopts(argc, argv,
 677             'O', MDB_OPT_UINT64, &data.objset,
 678             'n', MDB_OPT_STR, &data.osname,
 679             'o', MDB_OPT_STR, &object,
 680             'l', MDB_OPT_UINT64, &data.level,
 681             'b', MDB_OPT_STR, &blkid) != argc) {
 682                 return (DCMD_USAGE);
 683         }
 684 
 685         if (object) {
 686                 if (strcmp(object, "mdn") == 0) {
 687                         data.object = DMU_META_DNODE_OBJECT;
 688                 } else {
 689                         data.object = mdb_strtoull(object);
 690                 }
 691         }
 692 
 693         if (blkid) {
 694                 if (strcmp(blkid, "bonus") == 0) {
 695                         data.blkid = DMU_BONUS_BLKID;
 696                 } else {
 697                         data.blkid = mdb_strtoull(blkid);
 698                 }
 699         }
 700 
 701         if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dmu_buf_impl", &data.id) == -1) {
 702                 mdb_warn("couldn't find struct dmu_buf_impl_t");
 703                 return (DCMD_ERR);
 704         }
 705 
 706         if (mdb_walk("dmu_buf_impl_t", dbufs_cb, &data) != 0) {
 707                 mdb_warn("can't walk dbufs");
 708                 return (DCMD_ERR);
 709         }
 710 
 711         return (DCMD_OK);
 712 }
 713 
 714 typedef struct abuf_find_data {
 715         dva_t dva;
 716         mdb_ctf_id_t id;
 717 } abuf_find_data_t;
 718 
 719 /* ARGSUSED */
 720 static int
 721 abuf_find_cb(uintptr_t addr, const void *unknown, void *arg)
 722 {
 723         abuf_find_data_t *data = arg;
 724         dva_t dva;
 725 
 726         if (GETMEMBID(addr, &data->id, b_dva, dva)) {
 727                 return (WALK_ERR);
 728         }
 729 
 730         if (dva.dva_word[0] == data->dva.dva_word[0] &&
 731             dva.dva_word[1] == data->dva.dva_word[1]) {
 732                 mdb_printf("%#lr\n", addr);
 733         }
 734         return (WALK_NEXT);
 735 }
 736 
 737 /* ARGSUSED */
 738 static int
 739 abuf_find(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 740 {
 741         abuf_find_data_t data;
 742         GElf_Sym sym;
 743         int i;
 744         const char *syms[] = {
 745                 "ARC_mru",
 746                 "ARC_mru_ghost",
 747                 "ARC_mfu",
 748                 "ARC_mfu_ghost",
 749         };
 750 
 751         if (argc != 2)
 752                 return (DCMD_USAGE);
 753 
 754         for (i = 0; i < 2; i ++) {
 755                 switch (argv[i].a_type) {
 756                 case MDB_TYPE_STRING:
 757                         data.dva.dva_word[i] = mdb_strtoull(argv[i].a_un.a_str);
 758                         break;
 759                 case MDB_TYPE_IMMEDIATE:
 760                         data.dva.dva_word[i] = argv[i].a_un.a_val;
 761                         break;
 762                 default:
 763                         return (DCMD_USAGE);
 764                 }
 765         }
 766 
 767         if (mdb_ctf_lookup_by_name(ZFS_STRUCT "arc_buf_hdr", &data.id) == -1) {
 768                 mdb_warn("couldn't find struct arc_buf_hdr");
 769                 return (DCMD_ERR);
 770         }
 771 
 772         for (i = 0; i < sizeof (syms) / sizeof (syms[0]); i++) {
 773                 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, syms[i], &sym)) {
 774                         mdb_warn("can't find symbol %s", syms[i]);
 775                         return (DCMD_ERR);
 776                 }
 777 
 778                 if (mdb_pwalk("list", abuf_find_cb, &data, sym.st_value) != 0) {
 779                         mdb_warn("can't walk %s", syms[i]);
 780                         return (DCMD_ERR);
 781                 }
 782         }
 783 
 784         return (DCMD_OK);
 785 }
 786 
 787 
 788 typedef struct dbgmsg_arg {
 789         boolean_t da_verbose;
 790         boolean_t da_address;
 791 } dbgmsg_arg_t;
 792 
 793 /* ARGSUSED */
 794 static int
 795 dbgmsg_cb(uintptr_t addr, const void *unknown, void *arg)
 796 {
 797         static mdb_ctf_id_t id;
 798         static boolean_t gotid;
 799         static ulong_t off;
 800 
 801         dbgmsg_arg_t *da = arg;
 802         time_t timestamp;
 803         char buf[1024];
 804 
 805         if (!gotid) {
 806                 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "zfs_dbgmsg", &id) ==
 807                     -1) {
 808                         mdb_warn("couldn't find struct zfs_dbgmsg");
 809                         return (WALK_ERR);
 810                 }
 811                 gotid = TRUE;
 812                 if (mdb_ctf_offsetof(id, "zdm_msg", &off) == -1) {
 813                         mdb_warn("couldn't find zdm_msg");
 814                         return (WALK_ERR);
 815                 }
 816                 off /= 8;
 817         }
 818 
 819 
 820         if (GETMEMBID(addr, &id, zdm_timestamp, timestamp)) {
 821                 return (WALK_ERR);
 822         }
 823 
 824         if (mdb_readstr(buf, sizeof (buf), addr + off) == -1) {
 825                 mdb_warn("failed to read zdm_msg at %p\n", addr + off);
 826                 return (DCMD_ERR);
 827         }
 828 
 829         if (da->da_address)
 830                 mdb_printf("%p ", addr);
 831         if (da->da_verbose)
 832                 mdb_printf("%Y ", timestamp);
 833 
 834         mdb_printf("%s\n", buf);
 835 
 836         if (da->da_verbose)
 837                 (void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL);
 838 
 839         return (WALK_NEXT);
 840 }
 841 
 842 /* ARGSUSED */
 843 static int
 844 dbgmsg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 845 {
 846         GElf_Sym sym;
 847         dbgmsg_arg_t da = { 0 };
 848 
 849         if (mdb_getopts(argc, argv,
 850             'v', MDB_OPT_SETBITS, B_TRUE, &da.da_verbose,
 851             'a', MDB_OPT_SETBITS, B_TRUE, &da.da_address,
 852             NULL) != argc)
 853                 return (DCMD_USAGE);
 854 
 855         if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "zfs_dbgmsgs", &sym)) {
 856                 mdb_warn("can't find zfs_dbgmsgs");
 857                 return (DCMD_ERR);
 858         }
 859 
 860         if (mdb_pwalk("list", dbgmsg_cb, &da, sym.st_value) != 0) {
 861                 mdb_warn("can't walk zfs_dbgmsgs");
 862                 return (DCMD_ERR);
 863         }
 864 
 865         return (DCMD_OK);
 866 }
 867 
 868 /*ARGSUSED*/
 869 static int
 870 arc_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 871 {
 872         kstat_named_t *stats;
 873         GElf_Sym sym;
 874         int nstats, i;
 875         uint_t opt_a = FALSE;
 876         uint_t opt_b = FALSE;
 877         uint_t shift = 0;
 878         const char *suffix;
 879 
 880         static const char *bytestats[] = {
 881                 "p", "c", "c_min", "c_max", "size", "duplicate_buffers_size",
 882                 "arc_meta_used", "arc_meta_limit", "arc_meta_max",
 883                 NULL
 884         };
 885 
 886         static const char *extras[] = {
 887                 "arc_no_grow", "arc_tempreserve",
 888                 NULL
 889         };
 890 
 891         if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "arc_stats", &sym) == -1) {
 892                 mdb_warn("failed to find 'arc_stats'");
 893                 return (DCMD_ERR);
 894         }
 895 
 896         stats = mdb_zalloc(sym.st_size, UM_SLEEP | UM_GC);
 897 
 898         if (mdb_vread(stats, sym.st_size, sym.st_value) == -1) {
 899                 mdb_warn("couldn't read 'arc_stats' at %p", sym.st_value);
 900                 return (DCMD_ERR);
 901         }
 902 
 903         nstats = sym.st_size / sizeof (kstat_named_t);
 904 
 905         /* NB: -a / opt_a are ignored for backwards compatability */
 906         if (mdb_getopts(argc, argv,
 907             'a', MDB_OPT_SETBITS, TRUE, &opt_a,
 908             'b', MDB_OPT_SETBITS, TRUE, &opt_b,
 909             'k', MDB_OPT_SETBITS, 10, &shift,
 910             'm', MDB_OPT_SETBITS, 20, &shift,
 911             'g', MDB_OPT_SETBITS, 30, &shift,
 912             NULL) != argc)
 913                 return (DCMD_USAGE);
 914 
 915         if (!opt_b && !shift)
 916                 shift = 20;
 917 
 918         switch (shift) {
 919         case 0:
 920                 suffix = "B";
 921                 break;
 922         case 10:
 923                 suffix = "KB";
 924                 break;
 925         case 20:
 926                 suffix = "MB";
 927                 break;
 928         case 30:
 929                 suffix = "GB";
 930                 break;
 931         default:
 932                 suffix = "XX";
 933         }
 934 
 935         for (i = 0; i < nstats; i++) {
 936                 int j;
 937                 boolean_t bytes = B_FALSE;
 938 
 939                 for (j = 0; bytestats[j]; j++) {
 940                         if (strcmp(stats[i].name, bytestats[j]) == 0) {
 941                                 bytes = B_TRUE;
 942                                 break;
 943                         }
 944                 }
 945 
 946                 if (bytes) {
 947                         mdb_printf("%-25s = %9llu %s\n", stats[i].name,
 948                             stats[i].value.ui64 >> shift, suffix);
 949                 } else {
 950                         mdb_printf("%-25s = %9llu\n", stats[i].name,
 951                             stats[i].value.ui64);
 952                 }
 953         }
 954 
 955         for (i = 0; extras[i]; i++) {
 956                 uint64_t buf;
 957 
 958                 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, extras[i], &sym) == -1) {
 959                         mdb_warn("failed to find '%s'", extras[i]);
 960                         return (DCMD_ERR);
 961                 }
 962 
 963                 if (sym.st_size != sizeof (uint64_t) &&
 964                     sym.st_size != sizeof (uint32_t)) {
 965                         mdb_warn("expected scalar for variable '%s'\n",
 966                             extras[i]);
 967                         return (DCMD_ERR);
 968                 }
 969 
 970                 if (mdb_vread(&buf, sym.st_size, sym.st_value) == -1) {
 971                         mdb_warn("couldn't read '%s'", extras[i]);
 972                         return (DCMD_ERR);
 973                 }
 974 
 975                 mdb_printf("%-25s = ", extras[i]);
 976 
 977                 /* NB: all the 64-bit extras happen to be byte counts */
 978                 if (sym.st_size == sizeof (uint64_t))
 979                         mdb_printf("%9llu %s\n", buf >> shift, suffix);
 980 
 981                 if (sym.st_size == sizeof (uint32_t))
 982                         mdb_printf("%9d\n", *((uint32_t *)&buf));
 983         }
 984         return (DCMD_OK);
 985 }
 986 
 987 typedef struct mdb_spa_print {
 988         pool_state_t spa_state;
 989         char spa_name[MAXNAMELEN];
 990 } mdb_spa_print_t;
 991 
 992 /*
 993  * ::spa
 994  *
 995  *      -c      Print configuration information as well
 996  *      -v      Print vdev state
 997  *      -e      Print vdev error stats
 998  *      -m      Print vdev metaslab info
 999  *      -M      print vdev metaslab group info
1000  *      -h      Print histogram info (must be combined with -m or -M)
1001  *
1002  * Print a summarized spa_t.  When given no arguments, prints out a table of all
1003  * active pools on the system.
1004  */
1005 /* ARGSUSED */
1006 static int
1007 spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1008 {
1009         const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED",
1010                 "SPARE", "L2CACHE", "UNINIT", "UNAVAIL", "POTENTIAL" };
1011         const char *state;
1012         int spa_flags = 0;
1013 
1014         if (mdb_getopts(argc, argv,
1015             'c', MDB_OPT_SETBITS, SPA_FLAG_CONFIG, &spa_flags,
1016             'v', MDB_OPT_SETBITS, SPA_FLAG_VDEVS, &spa_flags,
1017             'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags,
1018             'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags,
1019             'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags,
1020             'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags,
1021             NULL) != argc)
1022                 return (DCMD_USAGE);
1023 
1024         if (!(flags & DCMD_ADDRSPEC)) {
1025                 if (mdb_walk_dcmd("spa", "spa", argc, argv) == -1) {
1026                         mdb_warn("can't walk spa");
1027                         return (DCMD_ERR);
1028                 }
1029 
1030                 return (DCMD_OK);
1031         }
1032 
1033         if (flags & DCMD_PIPE_OUT) {
1034                 mdb_printf("%#lr\n", addr);
1035                 return (DCMD_OK);
1036         }
1037 
1038         if (DCMD_HDRSPEC(flags))
1039                 mdb_printf("%<u>%-?s %9s %-*s%</u>\n", "ADDR", "STATE",
1040                     sizeof (uintptr_t) == 4 ? 60 : 52, "NAME");
1041 
1042         mdb_spa_print_t spa;
1043         if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_print_t", addr, 0) == -1)
1044                 return (DCMD_ERR);
1045 
1046         if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL)
1047                 state = "UNKNOWN";
1048         else
1049                 state = statetab[spa.spa_state];
1050 
1051         mdb_printf("%0?p %9s %s\n", addr, state, spa.spa_name);
1052 
1053         if (spa_flags & SPA_FLAG_CONFIG) {
1054                 mdb_printf("\n");
1055                 mdb_inc_indent(4);
1056                 if (mdb_call_dcmd("spa_config", addr, flags, 0,
1057                     NULL) != DCMD_OK)
1058                         return (DCMD_ERR);
1059                 mdb_dec_indent(4);
1060         }
1061 
1062         if (spa_flags & SPA_FLAG_ALL_VDEV) {
1063                 mdb_arg_t v;
1064                 char opts[100] = "-";
1065                 int args =
1066                     (spa_flags | SPA_FLAG_VDEVS) == SPA_FLAG_VDEVS ? 0 : 1;
1067 
1068                 if (spa_flags & SPA_FLAG_ERRORS)
1069                         strcat(opts, "e");
1070                 if (spa_flags & SPA_FLAG_METASLABS)
1071                         strcat(opts, "m");
1072                 if (spa_flags & SPA_FLAG_METASLAB_GROUPS)
1073                         strcat(opts, "M");
1074                 if (spa_flags & SPA_FLAG_HISTOGRAMS)
1075                         strcat(opts, "h");
1076 
1077                 v.a_type = MDB_TYPE_STRING;
1078                 v.a_un.a_str = opts;
1079 
1080                 mdb_printf("\n");
1081                 mdb_inc_indent(4);
1082                 if (mdb_call_dcmd("spa_vdevs", addr, flags, args,
1083                     &v) != DCMD_OK)
1084                         return (DCMD_ERR);
1085                 mdb_dec_indent(4);
1086         }
1087 
1088         return (DCMD_OK);
1089 }
1090 
1091 typedef struct mdb_spa_config_spa {
1092         uintptr_t spa_config;
1093 } mdb_spa_config_spa_t;
1094 
1095 /*
1096  * ::spa_config
1097  *
1098  * Given a spa_t, print the configuration information stored in spa_config.
1099  * Since it's just an nvlist, format it as an indented list of name=value pairs.
1100  * We simply read the value of spa_config and pass off to ::nvlist.
1101  */
1102 /* ARGSUSED */
1103 static int
1104 spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1105 {
1106         mdb_spa_config_spa_t spa;
1107 
1108         if (argc != 0 || !(flags & DCMD_ADDRSPEC))
1109                 return (DCMD_USAGE);
1110 
1111         if (mdb_ctf_vread(&spa, ZFS_STRUCT "spa", "mdb_spa_config_spa_t",
1112             addr, 0) == -1)
1113                 return (DCMD_ERR);
1114 
1115         if (spa.spa_config == 0) {
1116                 mdb_printf("(none)\n");
1117                 return (DCMD_OK);
1118         }
1119 
1120         return (mdb_call_dcmd("nvlist", spa.spa_config, flags,
1121             0, NULL));
1122 }
1123 
1124 const char histo_stars[] = "****************************************";
1125 const int histo_width = sizeof (histo_stars) - 1;
1126 
1127 static void
1128 dump_histogram(const uint64_t *histo, int size, int offset)
1129 {
1130         int i;
1131         int minidx = size - 1;
1132         int maxidx = 0;
1133         uint64_t max = 0;
1134 
1135         for (i = 0; i < size; i++) {
1136                 if (histo[i] > max)
1137                         max = histo[i];
1138                 if (histo[i] > 0 && i > maxidx)
1139                         maxidx = i;
1140                 if (histo[i] > 0 && i < minidx)
1141                         minidx = i;
1142         }
1143 
1144         if (max < histo_width)
1145                 max = histo_width;
1146 
1147         for (i = minidx; i <= maxidx; i++) {
1148                 mdb_printf("%3u: %6llu %s\n",
1149                     i + offset, (u_longlong_t)histo[i],
1150                     &histo_stars[(max - histo[i]) * histo_width / max]);
1151         }
1152 }
1153 
1154 typedef struct mdb_range_tree {
1155         uint64_t rt_space;
1156 } mdb_range_tree_t;
1157 
1158 typedef struct mdb_metaslab_group {
1159         uint64_t mg_fragmentation;
1160         uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE];
1161 } mdb_metaslab_group_t;
1162 
1163 typedef struct mdb_metaslab {
1164         uint64_t ms_id;
1165         uint64_t ms_start;
1166         uint64_t ms_size;
1167         uint64_t ms_fragmentation;
1168         uintptr_t ms_alloctree[TXG_SIZE];
1169         uintptr_t ms_freetree[TXG_SIZE];
1170         uintptr_t ms_tree;
1171         uintptr_t ms_sm;
1172 } mdb_metaslab_t;
1173 
1174 typedef struct mdb_space_map_phys_t {
1175         uint64_t smp_alloc;
1176         uint64_t smp_histogram[SPACE_MAP_HISTOGRAM_SIZE];
1177 } mdb_space_map_phys_t;
1178 
1179 typedef struct mdb_space_map {
1180         uint64_t sm_size;
1181         uint8_t sm_shift;
1182         uint64_t sm_alloc;
1183         uintptr_t sm_phys;
1184 } mdb_space_map_t;
1185 
1186 typedef struct mdb_vdev {
1187         uintptr_t vdev_ms;
1188         uint64_t vdev_ms_count;
1189         vdev_stat_t vdev_stat;
1190 } mdb_vdev_t;
1191 
1192 static int
1193 metaslab_stats(uintptr_t addr, int spa_flags)
1194 {
1195         mdb_vdev_t vdev;
1196         uintptr_t *vdev_ms;
1197 
1198         if (mdb_ctf_vread(&vdev, "vdev_t", "mdb_vdev_t",
1199             (uintptr_t)addr, 0) == -1) {
1200                 mdb_warn("failed to read vdev at %p\n", addr);
1201                 return (DCMD_ERR);
1202         }
1203 
1204         mdb_inc_indent(4);
1205         mdb_printf("%<u>%-?s %6s %20s %10s %9s%</u>\n", "ADDR", "ID",
1206             "OFFSET", "FREE", "FRAGMENTATION");
1207 
1208         vdev_ms = mdb_alloc(vdev.vdev_ms_count * sizeof (void *),
1209             UM_SLEEP | UM_GC);
1210         if (mdb_vread(vdev_ms, vdev.vdev_ms_count * sizeof (void *),
1211             (uintptr_t)vdev.vdev_ms) == -1) {
1212                 mdb_warn("failed to read vdev_ms at %p\n", vdev.vdev_ms);
1213                 return (DCMD_ERR);
1214         }
1215 
1216         for (int m = 0; m < vdev.vdev_ms_count; m++) {
1217                 mdb_metaslab_t ms;
1218                 mdb_space_map_t sm = { 0 };
1219                 char free[NICENUM_BUFLEN];
1220 
1221                 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t",
1222                     (uintptr_t)vdev_ms[m], 0) == -1)
1223                         return (DCMD_ERR);
1224 
1225                 if (ms.ms_sm != NULL &&
1226                     mdb_ctf_vread(&sm, "space_map_t", "mdb_space_map_t",
1227                     ms.ms_sm, 0) == -1)
1228                         return (DCMD_ERR);
1229 
1230                 mdb_nicenum(ms.ms_size - sm.sm_alloc, free);
1231 
1232                 mdb_printf("%0?p %6llu %20llx %10s ", vdev_ms[m], ms.ms_id,
1233                     ms.ms_start, free);
1234                 if (ms.ms_fragmentation == ZFS_FRAG_INVALID)
1235                         mdb_printf("%9s\n", "-");
1236                 else
1237                         mdb_printf("%9llu%%\n", ms.ms_fragmentation);
1238 
1239                 if ((spa_flags & SPA_FLAG_HISTOGRAMS) && ms.ms_sm != NULL) {
1240                         mdb_space_map_phys_t smp;
1241 
1242                         if (sm.sm_phys == NULL)
1243                                 continue;
1244 
1245                         (void) mdb_ctf_vread(&smp, "space_map_phys_t",
1246                             "mdb_space_map_phys_t", sm.sm_phys, 0);
1247 
1248                         dump_histogram(smp.smp_histogram,
1249                             SPACE_MAP_HISTOGRAM_SIZE, sm.sm_shift);
1250                 }
1251         }
1252         mdb_dec_indent(4);
1253         return (DCMD_OK);
1254 }
1255 
1256 static int
1257 metaslab_group_stats(uintptr_t addr, int spa_flags)
1258 {
1259         mdb_metaslab_group_t mg;
1260         if (mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t",
1261             (uintptr_t)addr, 0) == -1) {
1262                 mdb_warn("failed to read vdev_mg at %p\n", addr);
1263                 return (DCMD_ERR);
1264         }
1265 
1266         mdb_inc_indent(4);
1267         mdb_printf("%<u>%-?s %15s%</u>\n", "ADDR", "FRAGMENTATION");
1268         if (mg.mg_fragmentation == ZFS_FRAG_INVALID)
1269                 mdb_printf("%0?p %15s\n", addr, "-");
1270         else
1271                 mdb_printf("%0?p %15llu%%\n", addr, mg.mg_fragmentation);
1272 
1273         if (spa_flags & SPA_FLAG_HISTOGRAMS)
1274                 dump_histogram(mg.mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
1275         mdb_dec_indent(4);
1276         return (DCMD_OK);
1277 }
1278 
1279 /*
1280  * ::vdev
1281  *
1282  * Print out a summarized vdev_t, in the following form:
1283  *
1284  * ADDR             STATE       AUX            DESC
1285  * fffffffbcde23df0 HEALTHY     -              /dev/dsk/c0t0d0
1286  *
1287  * If '-r' is specified, recursively visit all children.
1288  *
1289  * With '-e', the statistics associated with the vdev are printed as well.
1290  */
1291 static int
1292 do_print_vdev(uintptr_t addr, int flags, int depth, boolean_t recursive,
1293     int spa_flags)
1294 {
1295         vdev_t vdev;
1296         char desc[MAXNAMELEN];
1297         int c, children;
1298         uintptr_t *child;
1299         const char *state, *aux;
1300 
1301         if (mdb_vread(&vdev, sizeof (vdev), (uintptr_t)addr) == -1) {
1302                 mdb_warn("failed to read vdev_t at %p\n", (uintptr_t)addr);
1303                 return (DCMD_ERR);
1304         }
1305 
1306         if (flags & DCMD_PIPE_OUT) {
1307                 mdb_printf("%#lr\n", addr);
1308         } else {
1309                 if (vdev.vdev_path != NULL) {
1310                         if (mdb_readstr(desc, sizeof (desc),
1311                             (uintptr_t)vdev.vdev_path) == -1) {
1312                                 mdb_warn("failed to read vdev_path at %p\n",
1313                                     vdev.vdev_path);
1314                                 return (DCMD_ERR);
1315                         }
1316                 } else if (vdev.vdev_ops != NULL) {
1317                         vdev_ops_t ops;
1318                         if (mdb_vread(&ops, sizeof (ops),
1319                             (uintptr_t)vdev.vdev_ops) == -1) {
1320                                 mdb_warn("failed to read vdev_ops at %p\n",
1321                                     vdev.vdev_ops);
1322                                 return (DCMD_ERR);
1323                         }
1324                         (void) strcpy(desc, ops.vdev_op_type);
1325                 } else {
1326                         (void) strcpy(desc, "<unknown>");
1327                 }
1328 
1329                 if (depth == 0 && DCMD_HDRSPEC(flags))
1330                         mdb_printf("%<u>%-?s %-9s %-12s %-*s%</u>\n",
1331                             "ADDR", "STATE", "AUX",
1332                             sizeof (uintptr_t) == 4 ? 43 : 35,
1333                             "DESCRIPTION");
1334 
1335                 mdb_printf("%0?p ", addr);
1336 
1337                 switch (vdev.vdev_state) {
1338                 case VDEV_STATE_CLOSED:
1339                         state = "CLOSED";
1340                         break;
1341                 case VDEV_STATE_OFFLINE:
1342                         state = "OFFLINE";
1343                         break;
1344                 case VDEV_STATE_CANT_OPEN:
1345                         state = "CANT_OPEN";
1346                         break;
1347                 case VDEV_STATE_DEGRADED:
1348                         state = "DEGRADED";
1349                         break;
1350                 case VDEV_STATE_HEALTHY:
1351                         state = "HEALTHY";
1352                         break;
1353                 case VDEV_STATE_REMOVED:
1354                         state = "REMOVED";
1355                         break;
1356                 case VDEV_STATE_FAULTED:
1357                         state = "FAULTED";
1358                         break;
1359                 default:
1360                         state = "UNKNOWN";
1361                         break;
1362                 }
1363 
1364                 switch (vdev.vdev_stat.vs_aux) {
1365                 case VDEV_AUX_NONE:
1366                         aux = "-";
1367                         break;
1368                 case VDEV_AUX_OPEN_FAILED:
1369                         aux = "OPEN_FAILED";
1370                         break;
1371                 case VDEV_AUX_CORRUPT_DATA:
1372                         aux = "CORRUPT_DATA";
1373                         break;
1374                 case VDEV_AUX_NO_REPLICAS:
1375                         aux = "NO_REPLICAS";
1376                         break;
1377                 case VDEV_AUX_BAD_GUID_SUM:
1378                         aux = "BAD_GUID_SUM";
1379                         break;
1380                 case VDEV_AUX_TOO_SMALL:
1381                         aux = "TOO_SMALL";
1382                         break;
1383                 case VDEV_AUX_BAD_LABEL:
1384                         aux = "BAD_LABEL";
1385                         break;
1386                 case VDEV_AUX_VERSION_NEWER:
1387                         aux = "VERS_NEWER";
1388                         break;
1389                 case VDEV_AUX_VERSION_OLDER:
1390                         aux = "VERS_OLDER";
1391                         break;
1392                 case VDEV_AUX_UNSUP_FEAT:
1393                         aux = "UNSUP_FEAT";
1394                         break;
1395                 case VDEV_AUX_SPARED:
1396                         aux = "SPARED";
1397                         break;
1398                 case VDEV_AUX_ERR_EXCEEDED:
1399                         aux = "ERR_EXCEEDED";
1400                         break;
1401                 case VDEV_AUX_IO_FAILURE:
1402                         aux = "IO_FAILURE";
1403                         break;
1404                 case VDEV_AUX_BAD_LOG:
1405                         aux = "BAD_LOG";
1406                         break;
1407                 case VDEV_AUX_EXTERNAL:
1408                         aux = "EXTERNAL";
1409                         break;
1410                 case VDEV_AUX_SPLIT_POOL:
1411                         aux = "SPLIT_POOL";
1412                         break;
1413                 default:
1414                         aux = "UNKNOWN";
1415                         break;
1416                 }
1417 
1418                 mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc);
1419 
1420                 if (spa_flags & SPA_FLAG_ERRORS) {
1421                         vdev_stat_t *vs = &vdev.vdev_stat;
1422                         int i;
1423 
1424                         mdb_inc_indent(4);
1425                         mdb_printf("\n");
1426                         mdb_printf("%<u>       %12s %12s %12s %12s "
1427                             "%12s%</u>\n", "READ", "WRITE", "FREE", "CLAIM",
1428                             "IOCTL");
1429                         mdb_printf("OPS     ");
1430                         for (i = 1; i < ZIO_TYPES; i++)
1431                                 mdb_printf("%11#llx%s", vs->vs_ops[i],
1432                                     i == ZIO_TYPES - 1 ? "" : "  ");
1433                         mdb_printf("\n");
1434                         mdb_printf("BYTES   ");
1435                         for (i = 1; i < ZIO_TYPES; i++)
1436                                 mdb_printf("%11#llx%s", vs->vs_bytes[i],
1437                                     i == ZIO_TYPES - 1 ? "" : "  ");
1438 
1439 
1440                         mdb_printf("\n");
1441                         mdb_printf("EREAD    %10#llx\n", vs->vs_read_errors);
1442                         mdb_printf("EWRITE   %10#llx\n", vs->vs_write_errors);
1443                         mdb_printf("ECKSUM   %10#llx\n",
1444                             vs->vs_checksum_errors);
1445                         mdb_dec_indent(4);
1446                         mdb_printf("\n");
1447                 }
1448 
1449                 if (spa_flags & SPA_FLAG_METASLAB_GROUPS &&
1450                     vdev.vdev_mg != NULL) {
1451                         metaslab_group_stats((uintptr_t)vdev.vdev_mg,
1452                             spa_flags);
1453                 }
1454                 if (spa_flags & SPA_FLAG_METASLABS && vdev.vdev_ms != NULL) {
1455                         metaslab_stats((uintptr_t)addr, spa_flags);
1456                 }
1457         }
1458 
1459         children = vdev.vdev_children;
1460 
1461         if (children == 0 || !recursive)
1462                 return (DCMD_OK);
1463 
1464         child = mdb_alloc(children * sizeof (void *), UM_SLEEP | UM_GC);
1465         if (mdb_vread(child, children * sizeof (void *),
1466             (uintptr_t)vdev.vdev_child) == -1) {
1467                 mdb_warn("failed to read vdev children at %p", vdev.vdev_child);
1468                 return (DCMD_ERR);
1469         }
1470 
1471         for (c = 0; c < children; c++) {
1472                 if (do_print_vdev(child[c], flags, depth + 2, recursive,
1473                     spa_flags)) {
1474                         return (DCMD_ERR);
1475                 }
1476         }
1477 
1478         return (DCMD_OK);
1479 }
1480 
1481 static int
1482 vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1483 {
1484         uint64_t depth = 0;
1485         boolean_t recursive = B_FALSE;
1486         int spa_flags = 0;
1487 
1488         if (mdb_getopts(argc, argv,
1489             'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags,
1490             'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags,
1491             'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags,
1492             'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags,
1493             'r', MDB_OPT_SETBITS, TRUE, &recursive,
1494             'd', MDB_OPT_UINT64, &depth, NULL) != argc)
1495                 return (DCMD_USAGE);
1496 
1497         if (!(flags & DCMD_ADDRSPEC)) {
1498                 mdb_warn("no vdev_t address given\n");
1499                 return (DCMD_ERR);
1500         }
1501 
1502         return (do_print_vdev(addr, flags, (int)depth, recursive, spa_flags));
1503 }
1504 
1505 typedef struct metaslab_walk_data {
1506         uint64_t mw_numvdevs;
1507         uintptr_t *mw_vdevs;
1508         int mw_curvdev;
1509         uint64_t mw_nummss;
1510         uintptr_t *mw_mss;
1511         int mw_curms;
1512 } metaslab_walk_data_t;
1513 
1514 static int
1515 metaslab_walk_step(mdb_walk_state_t *wsp)
1516 {
1517         metaslab_walk_data_t *mw = wsp->walk_data;
1518         metaslab_t ms;
1519         uintptr_t msp;
1520 
1521         if (mw->mw_curvdev >= mw->mw_numvdevs)
1522                 return (WALK_DONE);
1523 
1524         if (mw->mw_mss == NULL) {
1525                 uintptr_t mssp;
1526                 uintptr_t vdevp;
1527 
1528                 ASSERT(mw->mw_curms == 0);
1529                 ASSERT(mw->mw_nummss == 0);
1530 
1531                 vdevp = mw->mw_vdevs[mw->mw_curvdev];
1532                 if (GETMEMB(vdevp, "vdev", vdev_ms, mssp) ||
1533                     GETMEMB(vdevp, "vdev", vdev_ms_count, mw->mw_nummss)) {
1534                         return (WALK_ERR);
1535                 }
1536 
1537                 mw->mw_mss = mdb_alloc(mw->mw_nummss * sizeof (void*),
1538                     UM_SLEEP | UM_GC);
1539                 if (mdb_vread(mw->mw_mss, mw->mw_nummss * sizeof (void*),
1540                     mssp) == -1) {
1541                         mdb_warn("failed to read vdev_ms at %p", mssp);
1542                         return (WALK_ERR);
1543                 }
1544         }
1545 
1546         if (mw->mw_curms >= mw->mw_nummss) {
1547                 mw->mw_mss = NULL;
1548                 mw->mw_curms = 0;
1549                 mw->mw_nummss = 0;
1550                 mw->mw_curvdev++;
1551                 return (WALK_NEXT);
1552         }
1553 
1554         msp = mw->mw_mss[mw->mw_curms];
1555         if (mdb_vread(&ms, sizeof (metaslab_t), msp) == -1) {
1556                 mdb_warn("failed to read metaslab_t at %p", msp);
1557                 return (WALK_ERR);
1558         }
1559 
1560         mw->mw_curms++;
1561 
1562         return (wsp->walk_callback(msp, &ms, wsp->walk_cbdata));
1563 }
1564 
1565 /* ARGSUSED */
1566 static int
1567 metaslab_walk_init(mdb_walk_state_t *wsp)
1568 {
1569         metaslab_walk_data_t *mw;
1570         uintptr_t root_vdevp;
1571         uintptr_t childp;
1572 
1573         if (wsp->walk_addr == NULL) {
1574                 mdb_warn("must supply address of spa_t\n");
1575                 return (WALK_ERR);
1576         }
1577 
1578         mw = mdb_zalloc(sizeof (metaslab_walk_data_t), UM_SLEEP | UM_GC);
1579 
1580         if (GETMEMB(wsp->walk_addr, "spa", spa_root_vdev, root_vdevp) ||
1581             GETMEMB(root_vdevp, "vdev", vdev_children, mw->mw_numvdevs) ||
1582             GETMEMB(root_vdevp, "vdev", vdev_child, childp)) {
1583                 return (DCMD_ERR);
1584         }
1585 
1586         mw->mw_vdevs = mdb_alloc(mw->mw_numvdevs * sizeof (void *),
1587             UM_SLEEP | UM_GC);
1588         if (mdb_vread(mw->mw_vdevs, mw->mw_numvdevs * sizeof (void *),
1589             childp) == -1) {
1590                 mdb_warn("failed to read root vdev children at %p", childp);
1591                 return (DCMD_ERR);
1592         }
1593 
1594         wsp->walk_data = mw;
1595 
1596         return (WALK_NEXT);
1597 }
1598 
1599 typedef struct mdb_spa {
1600         uintptr_t spa_dsl_pool;
1601         uintptr_t spa_root_vdev;
1602 } mdb_spa_t;
1603 
1604 typedef struct mdb_dsl_dir {
1605         uintptr_t dd_phys;
1606         int64_t dd_space_towrite[TXG_SIZE];
1607 } mdb_dsl_dir_t;
1608 
1609 typedef struct mdb_dsl_dir_phys {
1610         uint64_t dd_used_bytes;
1611         uint64_t dd_compressed_bytes;
1612         uint64_t dd_uncompressed_bytes;
1613 } mdb_dsl_dir_phys_t;
1614 
1615 typedef struct space_data {
1616         uint64_t ms_alloctree[TXG_SIZE];
1617         uint64_t ms_freetree[TXG_SIZE];
1618         uint64_t ms_tree;
1619         uint64_t avail;
1620         uint64_t nowavail;
1621 } space_data_t;
1622 
1623 /* ARGSUSED */
1624 static int
1625 space_cb(uintptr_t addr, const void *unknown, void *arg)
1626 {
1627         space_data_t *sd = arg;
1628         mdb_metaslab_t ms;
1629         mdb_range_tree_t rt;
1630         mdb_space_map_t sm = { 0 };
1631         mdb_space_map_phys_t smp = { 0 };
1632         int i;
1633 
1634         if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t",
1635             addr, 0) == -1)
1636                 return (WALK_ERR);
1637 
1638         for (i = 0; i < TXG_SIZE; i++) {
1639                 if (mdb_ctf_vread(&rt, "range_tree_t",
1640                     "mdb_range_tree_t", ms.ms_alloctree[i], 0) == -1)
1641                         return (WALK_ERR);
1642 
1643                 sd->ms_alloctree[i] += rt.rt_space;
1644 
1645                 if (mdb_ctf_vread(&rt, "range_tree_t",
1646                     "mdb_range_tree_t", ms.ms_freetree[i], 0) == -1)
1647                         return (WALK_ERR);
1648 
1649                 sd->ms_freetree[i] += rt.rt_space;
1650         }
1651 
1652         if (mdb_ctf_vread(&rt, "range_tree_t",
1653             "mdb_range_tree_t", ms.ms_tree, 0) == -1)
1654                 return (WALK_ERR);
1655 
1656         if (ms.ms_sm != NULL &&
1657             mdb_ctf_vread(&sm, "space_map_t",
1658             "mdb_space_map_t", ms.ms_sm, 0) == -1)
1659                 return (WALK_ERR);
1660 
1661         if (sm.sm_phys != NULL) {
1662                 (void) mdb_ctf_vread(&smp, "space_map_phys_t",
1663                     "mdb_space_map_phys_t", sm.sm_phys, 0);
1664         }
1665 
1666         sd->ms_tree += rt.rt_space;
1667         sd->avail += sm.sm_size - sm.sm_alloc;
1668         sd->nowavail += sm.sm_size - smp.smp_alloc;
1669 
1670         return (WALK_NEXT);
1671 }
1672 
1673 /*
1674  * ::spa_space [-b]
1675  *
1676  * Given a spa_t, print out it's on-disk space usage and in-core
1677  * estimates of future usage.  If -b is given, print space in bytes.
1678  * Otherwise print in megabytes.
1679  */
1680 /* ARGSUSED */
1681 static int
1682 spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1683 {
1684         mdb_spa_t spa;
1685         uintptr_t dp_root_dir;
1686         mdb_dsl_dir_t dd;
1687         mdb_dsl_dir_phys_t dsp;
1688         uint64_t children;
1689         uintptr_t childaddr;
1690         space_data_t sd;
1691         int shift = 20;
1692         char *suffix = "M";
1693         int bytes = B_FALSE;
1694 
1695         if (mdb_getopts(argc, argv, 'b', MDB_OPT_SETBITS, TRUE, &bytes, NULL) !=
1696             argc)
1697                 return (DCMD_USAGE);
1698         if (!(flags & DCMD_ADDRSPEC))
1699                 return (DCMD_USAGE);
1700 
1701         if (bytes) {
1702                 shift = 0;
1703                 suffix = "";
1704         }
1705 
1706         if (GETMEMB(addr, "spa", spa_dsl_pool, spa.spa_dsl_pool) ||
1707             GETMEMB(addr, "spa", spa_root_vdev, spa.spa_root_vdev) ||
1708             GETMEMB(spa.spa_root_vdev, "vdev", vdev_children, children) ||
1709             GETMEMB(spa.spa_root_vdev, "vdev", vdev_child, childaddr) ||
1710             GETMEMB(spa.spa_dsl_pool, "dsl_pool",
1711             dp_root_dir, dp_root_dir) ||
1712             GETMEMB(dp_root_dir, "dsl_dir", dd_phys, dd.dd_phys) ||
1713             GETMEMB(dp_root_dir, "dsl_dir",
1714             dd_space_towrite, dd.dd_space_towrite) ||
1715             GETMEMB(dd.dd_phys, "dsl_dir_phys",
1716             dd_used_bytes, dsp.dd_used_bytes) ||
1717             GETMEMB(dd.dd_phys, "dsl_dir_phys",
1718             dd_compressed_bytes, dsp.dd_compressed_bytes) ||
1719             GETMEMB(dd.dd_phys, "dsl_dir_phys",
1720             dd_uncompressed_bytes, dsp.dd_uncompressed_bytes)) {
1721                 return (DCMD_ERR);
1722         }
1723 
1724         mdb_printf("dd_space_towrite = %llu%s %llu%s %llu%s %llu%s\n",
1725             dd.dd_space_towrite[0] >> shift, suffix,
1726             dd.dd_space_towrite[1] >> shift, suffix,
1727             dd.dd_space_towrite[2] >> shift, suffix,
1728             dd.dd_space_towrite[3] >> shift, suffix);
1729 
1730         mdb_printf("dd_phys.dd_used_bytes = %llu%s\n",
1731             dsp.dd_used_bytes >> shift, suffix);
1732         mdb_printf("dd_phys.dd_compressed_bytes = %llu%s\n",
1733             dsp.dd_compressed_bytes >> shift, suffix);
1734         mdb_printf("dd_phys.dd_uncompressed_bytes = %llu%s\n",
1735             dsp.dd_uncompressed_bytes >> shift, suffix);
1736 
1737         bzero(&sd, sizeof (sd));
1738         if (mdb_pwalk("metaslab", space_cb, &sd, addr) != 0) {
1739                 mdb_warn("can't walk metaslabs");
1740                 return (DCMD_ERR);
1741         }
1742 
1743         mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n",
1744             sd.ms_alloctree[0] >> shift, suffix,
1745             sd.ms_alloctree[1] >> shift, suffix,
1746             sd.ms_alloctree[2] >> shift, suffix,
1747             sd.ms_alloctree[3] >> shift, suffix);
1748         mdb_printf("ms_freemap = %llu%s %llu%s %llu%s %llu%s\n",
1749             sd.ms_freetree[0] >> shift, suffix,
1750             sd.ms_freetree[1] >> shift, suffix,
1751             sd.ms_freetree[2] >> shift, suffix,
1752             sd.ms_freetree[3] >> shift, suffix);
1753         mdb_printf("ms_tree = %llu%s\n", sd.ms_tree >> shift, suffix);
1754         mdb_printf("last synced avail = %llu%s\n", sd.avail >> shift, suffix);
1755         mdb_printf("current syncing avail = %llu%s\n",
1756             sd.nowavail >> shift, suffix);
1757 
1758         return (DCMD_OK);
1759 }
1760 
1761 typedef struct mdb_spa_aux_vdev {
1762         int sav_count;
1763         uintptr_t sav_vdevs;
1764 } mdb_spa_aux_vdev_t;
1765 
1766 typedef struct mdb_spa_vdevs {
1767         uintptr_t spa_root_vdev;
1768         mdb_spa_aux_vdev_t spa_l2cache;
1769         mdb_spa_aux_vdev_t spa_spares;
1770 } mdb_spa_vdevs_t;
1771 
1772 static int
1773 spa_print_aux(mdb_spa_aux_vdev_t *sav, uint_t flags, mdb_arg_t *v,
1774     const char *name)
1775 {
1776         uintptr_t *aux;
1777         size_t len;
1778         int ret, i;
1779 
1780         /*
1781          * Iterate over aux vdevs and print those out as well.  This is a
1782          * little annoying because we don't have a root vdev to pass to ::vdev.
1783          * Instead, we print a single line and then call it for each child
1784          * vdev.
1785          */
1786         if (sav->sav_count != 0) {
1787                 v[1].a_type = MDB_TYPE_STRING;
1788                 v[1].a_un.a_str = "-d";
1789                 v[2].a_type = MDB_TYPE_IMMEDIATE;
1790                 v[2].a_un.a_val = 2;
1791 
1792                 len = sav->sav_count * sizeof (uintptr_t);
1793                 aux = mdb_alloc(len, UM_SLEEP);
1794                 if (mdb_vread(aux, len, sav->sav_vdevs) == -1) {
1795                         mdb_free(aux, len);
1796                         mdb_warn("failed to read l2cache vdevs at %p",
1797                             sav->sav_vdevs);
1798                         return (DCMD_ERR);
1799                 }
1800 
1801                 mdb_printf("%-?s %-9s %-12s %s\n", "-", "-", "-", name);
1802 
1803                 for (i = 0; i < sav->sav_count; i++) {
1804                         ret = mdb_call_dcmd("vdev", aux[i], flags, 3, v);
1805                         if (ret != DCMD_OK) {
1806                                 mdb_free(aux, len);
1807                                 return (ret);
1808                         }
1809                 }
1810 
1811                 mdb_free(aux, len);
1812         }
1813 
1814         return (0);
1815 }
1816 
1817 /*
1818  * ::spa_vdevs
1819  *
1820  *      -e      Include error stats
1821  *      -m      Include metaslab information
1822  *      -M      Include metaslab group information
1823  *      -h      Include histogram information (requires -m or -M)
1824  *
1825  * Print out a summarized list of vdevs for the given spa_t.
1826  * This is accomplished by invoking "::vdev -re" on the root vdev, as well as
1827  * iterating over the cache devices.
1828  */
1829 /* ARGSUSED */
1830 static int
1831 spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1832 {
1833         mdb_arg_t v[3];
1834         int ret;
1835         char opts[100] = "-r";
1836         int spa_flags = 0;
1837 
1838         if (mdb_getopts(argc, argv,
1839             'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags,
1840             'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags,
1841             'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags,
1842             'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags,
1843             NULL) != argc)
1844                 return (DCMD_USAGE);
1845 
1846         if (!(flags & DCMD_ADDRSPEC))
1847                 return (DCMD_USAGE);
1848 
1849         mdb_spa_vdevs_t spa;
1850         if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_vdevs_t", addr, 0) == -1)
1851                 return (DCMD_ERR);
1852 
1853         /*
1854          * Unitialized spa_t structures can have a NULL root vdev.
1855          */
1856         if (spa.spa_root_vdev == NULL) {
1857                 mdb_printf("no associated vdevs\n");
1858                 return (DCMD_OK);
1859         }
1860 
1861         if (spa_flags & SPA_FLAG_ERRORS)
1862                 strcat(opts, "e");
1863         if (spa_flags & SPA_FLAG_METASLABS)
1864                 strcat(opts, "m");
1865         if (spa_flags & SPA_FLAG_METASLAB_GROUPS)
1866                 strcat(opts, "M");
1867         if (spa_flags & SPA_FLAG_HISTOGRAMS)
1868                 strcat(opts, "h");
1869 
1870         v[0].a_type = MDB_TYPE_STRING;
1871         v[0].a_un.a_str = opts;
1872 
1873         ret = mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev,
1874             flags, 1, v);
1875         if (ret != DCMD_OK)
1876                 return (ret);
1877 
1878         if (spa_print_aux(&spa.spa_l2cache, flags, v, "cache") != 0 ||
1879             spa_print_aux(&spa.spa_spares, flags, v, "spares") != 0)
1880                 return (DCMD_ERR);
1881 
1882         return (DCMD_OK);
1883 }
1884 
1885 /*
1886  * ::zio
1887  *
1888  * Print a summary of zio_t and all its children.  This is intended to display a
1889  * zio tree, and hence we only pick the most important pieces of information for
1890  * the main summary.  More detailed information can always be found by doing a
1891  * '::print zio' on the underlying zio_t.  The columns we display are:
1892  *
1893  *      ADDRESS  TYPE  STAGE  WAITER  TIME_ELAPSED
1894  *
1895  * The 'address' column is indented by one space for each depth level as we
1896  * descend down the tree.
1897  */
1898 
1899 #define ZIO_MAXINDENT   7
1900 #define ZIO_MAXWIDTH    (sizeof (uintptr_t) * 2 + ZIO_MAXINDENT)
1901 #define ZIO_WALK_SELF   0
1902 #define ZIO_WALK_CHILD  1
1903 #define ZIO_WALK_PARENT 2
1904 
1905 typedef struct zio_print_args {
1906         int     zpa_current_depth;
1907         int     zpa_min_depth;
1908         int     zpa_max_depth;
1909         int     zpa_type;
1910         uint_t  zpa_flags;
1911 } zio_print_args_t;
1912 
1913 typedef struct mdb_zio {
1914         enum zio_type io_type;
1915         enum zio_stage io_stage;
1916         uintptr_t io_waiter;
1917         uintptr_t io_spa;
1918         struct {
1919                 struct {
1920                         uintptr_t list_next;
1921                 } list_head;
1922         } io_parent_list;
1923         int io_error;
1924 } mdb_zio_t;
1925 
1926 typedef struct mdb_zio_timestamp {
1927         hrtime_t io_timestamp;
1928 } mdb_zio_timestamp_t;
1929 
1930 static int zio_child_cb(uintptr_t addr, const void *unknown, void *arg);
1931 
1932 static int
1933 zio_print_cb(uintptr_t addr, zio_print_args_t *zpa)
1934 {
1935         mdb_ctf_id_t type_enum, stage_enum;
1936         int indent = zpa->zpa_current_depth;
1937         const char *type, *stage;
1938         uintptr_t laddr;
1939         mdb_zio_t zio;
1940         mdb_zio_timestamp_t zio_timestamp = { 0 };
1941 
1942         if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", addr, 0) == -1)
1943                 return (WALK_ERR);
1944         (void) mdb_ctf_vread(&zio_timestamp, ZFS_STRUCT "zio",
1945             "mdb_zio_timestamp_t", addr, MDB_CTF_VREAD_QUIET);
1946 
1947         if (indent > ZIO_MAXINDENT)
1948                 indent = ZIO_MAXINDENT;
1949 
1950         if (mdb_ctf_lookup_by_name("enum zio_type", &type_enum) == -1 ||
1951             mdb_ctf_lookup_by_name("enum zio_stage", &stage_enum) == -1) {
1952                 mdb_warn("failed to lookup zio enums");
1953                 return (WALK_ERR);
1954         }
1955 
1956         if ((type = mdb_ctf_enum_name(type_enum, zio.io_type)) != NULL)
1957                 type += sizeof ("ZIO_TYPE_") - 1;
1958         else
1959                 type = "?";
1960 
1961         if (zio.io_error == 0) {
1962                 stage = mdb_ctf_enum_name(stage_enum, zio.io_stage);
1963                 if (stage != NULL)
1964                         stage += sizeof ("ZIO_STAGE_") - 1;
1965                 else
1966                         stage = "?";
1967         } else {
1968                 stage = "FAILED";
1969         }
1970 
1971         if (zpa->zpa_current_depth >= zpa->zpa_min_depth) {
1972                 if (zpa->zpa_flags & DCMD_PIPE_OUT) {
1973                         mdb_printf("%?p\n", addr);
1974                 } else {
1975                         mdb_printf("%*s%-*p %-5s %-16s ", indent, "",
1976                             ZIO_MAXWIDTH - indent, addr, type, stage);
1977                         if (zio.io_waiter != 0)
1978                                 mdb_printf("%-16lx ", zio.io_waiter);
1979                         else
1980                                 mdb_printf("%-16s ", "-");
1981 #ifdef _KERNEL
1982                         if (zio_timestamp.io_timestamp != 0) {
1983                                 mdb_printf("%llums", (mdb_gethrtime() -
1984                                     zio_timestamp.io_timestamp) /
1985                                     1000000);
1986                         } else {
1987                                 mdb_printf("%-12s ", "-");
1988                         }
1989 #else
1990                         mdb_printf("%-12s ", "-");
1991 #endif
1992                         mdb_printf("\n");
1993                 }
1994         }
1995 
1996         if (zpa->zpa_current_depth >= zpa->zpa_max_depth)
1997                 return (WALK_NEXT);
1998 
1999         if (zpa->zpa_type == ZIO_WALK_PARENT)
2000                 laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio",
2001                     "io_parent_list");
2002         else
2003                 laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio",
2004                     "io_child_list");
2005 
2006         zpa->zpa_current_depth++;
2007         if (mdb_pwalk("list", zio_child_cb, zpa, laddr) != 0) {
2008                 mdb_warn("failed to walk zio_t children at %p\n", laddr);
2009                 return (WALK_ERR);
2010         }
2011         zpa->zpa_current_depth--;
2012 
2013         return (WALK_NEXT);
2014 }
2015 
2016 /* ARGSUSED */
2017 static int
2018 zio_child_cb(uintptr_t addr, const void *unknown, void *arg)
2019 {
2020         zio_link_t zl;
2021         uintptr_t ziop;
2022         zio_print_args_t *zpa = arg;
2023 
2024         if (mdb_vread(&zl, sizeof (zl), addr) == -1) {
2025                 mdb_warn("failed to read zio_link_t at %p", addr);
2026                 return (WALK_ERR);
2027         }
2028 
2029         if (zpa->zpa_type == ZIO_WALK_PARENT)
2030                 ziop = (uintptr_t)zl.zl_parent;
2031         else
2032                 ziop = (uintptr_t)zl.zl_child;
2033 
2034         return (zio_print_cb(ziop, zpa));
2035 }
2036 
2037 /* ARGSUSED */
2038 static int
2039 zio_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2040 {
2041         zio_print_args_t zpa = { 0 };
2042 
2043         if (!(flags & DCMD_ADDRSPEC))
2044                 return (DCMD_USAGE);
2045 
2046         if (mdb_getopts(argc, argv,
2047             'r', MDB_OPT_SETBITS, INT_MAX, &zpa.zpa_max_depth,
2048             'c', MDB_OPT_SETBITS, ZIO_WALK_CHILD, &zpa.zpa_type,
2049             'p', MDB_OPT_SETBITS, ZIO_WALK_PARENT, &zpa.zpa_type,
2050             NULL) != argc)
2051                 return (DCMD_USAGE);
2052 
2053         zpa.zpa_flags = flags;
2054         if (zpa.zpa_max_depth != 0) {
2055                 if (zpa.zpa_type == ZIO_WALK_SELF)
2056                         zpa.zpa_type = ZIO_WALK_CHILD;
2057         } else if (zpa.zpa_type != ZIO_WALK_SELF) {
2058                 zpa.zpa_min_depth = 1;
2059                 zpa.zpa_max_depth = 1;
2060         }
2061 
2062         if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) {
2063                 mdb_printf("%<u>%-*s %-5s %-16s %-16s %-12s%</u>\n",
2064                     ZIO_MAXWIDTH, "ADDRESS", "TYPE", "STAGE", "WAITER",
2065                     "TIME_ELAPSED");
2066         }
2067 
2068         if (zio_print_cb(addr, &zpa) != WALK_NEXT)
2069                 return (DCMD_ERR);
2070 
2071         return (DCMD_OK);
2072 }
2073 
2074 /*
2075  * [addr]::zio_state
2076  *
2077  * Print a summary of all zio_t structures on the system, or for a particular
2078  * pool.  This is equivalent to '::walk zio_root | ::zio'.
2079  */
2080 /*ARGSUSED*/
2081 static int
2082 zio_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2083 {
2084         /*
2085          * MDB will remember the last address of the pipeline, so if we don't
2086          * zero this we'll end up trying to walk zio structures for a
2087          * non-existent spa_t.
2088          */
2089         if (!(flags & DCMD_ADDRSPEC))
2090                 addr = 0;
2091 
2092         return (mdb_pwalk_dcmd("zio_root", "zio", argc, argv, addr));
2093 }
2094 
2095 typedef struct txg_list_walk_data {
2096         uintptr_t lw_head[TXG_SIZE];
2097         int     lw_txgoff;
2098         int     lw_maxoff;
2099         size_t  lw_offset;
2100         void    *lw_obj;
2101 } txg_list_walk_data_t;
2102 
2103 static int
2104 txg_list_walk_init_common(mdb_walk_state_t *wsp, int txg, int maxoff)
2105 {
2106         txg_list_walk_data_t *lwd;
2107         txg_list_t list;
2108         int i;
2109 
2110         lwd = mdb_alloc(sizeof (txg_list_walk_data_t), UM_SLEEP | UM_GC);
2111         if (mdb_vread(&list, sizeof (txg_list_t), wsp->walk_addr) == -1) {
2112                 mdb_warn("failed to read txg_list_t at %#lx", wsp->walk_addr);
2113                 return (WALK_ERR);
2114         }
2115 
2116         for (i = 0; i < TXG_SIZE; i++)
2117                 lwd->lw_head[i] = (uintptr_t)list.tl_head[i];
2118         lwd->lw_offset = list.tl_offset;
2119         lwd->lw_obj = mdb_alloc(lwd->lw_offset + sizeof (txg_node_t),
2120             UM_SLEEP | UM_GC);
2121         lwd->lw_txgoff = txg;
2122         lwd->lw_maxoff = maxoff;
2123 
2124         wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
2125         wsp->walk_data = lwd;
2126 
2127         return (WALK_NEXT);
2128 }
2129 
2130 static int
2131 txg_list_walk_init(mdb_walk_state_t *wsp)
2132 {
2133         return (txg_list_walk_init_common(wsp, 0, TXG_SIZE-1));
2134 }
2135 
2136 static int
2137 txg_list0_walk_init(mdb_walk_state_t *wsp)
2138 {
2139         return (txg_list_walk_init_common(wsp, 0, 0));
2140 }
2141 
2142 static int
2143 txg_list1_walk_init(mdb_walk_state_t *wsp)
2144 {
2145         return (txg_list_walk_init_common(wsp, 1, 1));
2146 }
2147 
2148 static int
2149 txg_list2_walk_init(mdb_walk_state_t *wsp)
2150 {
2151         return (txg_list_walk_init_common(wsp, 2, 2));
2152 }
2153 
2154 static int
2155 txg_list3_walk_init(mdb_walk_state_t *wsp)
2156 {
2157         return (txg_list_walk_init_common(wsp, 3, 3));
2158 }
2159 
2160 static int
2161 txg_list_walk_step(mdb_walk_state_t *wsp)
2162 {
2163         txg_list_walk_data_t *lwd = wsp->walk_data;
2164         uintptr_t addr;
2165         txg_node_t *node;
2166         int status;
2167 
2168         while (wsp->walk_addr == NULL && lwd->lw_txgoff < lwd->lw_maxoff) {
2169                 lwd->lw_txgoff++;
2170                 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
2171         }
2172 
2173         if (wsp->walk_addr == NULL)
2174                 return (WALK_DONE);
2175 
2176         addr = wsp->walk_addr - lwd->lw_offset;
2177 
2178         if (mdb_vread(lwd->lw_obj,
2179             lwd->lw_offset + sizeof (txg_node_t), addr) == -1) {
2180                 mdb_warn("failed to read list element at %#lx", addr);
2181                 return (WALK_ERR);
2182         }
2183 
2184         status = wsp->walk_callback(addr, lwd->lw_obj, wsp->walk_cbdata);
2185         node = (txg_node_t *)((uintptr_t)lwd->lw_obj + lwd->lw_offset);
2186         wsp->walk_addr = (uintptr_t)node->tn_next[lwd->lw_txgoff];
2187 
2188         return (status);
2189 }
2190 
2191 /*
2192  * ::walk spa
2193  *
2194  * Walk all named spa_t structures in the namespace.  This is nothing more than
2195  * a layered avl walk.
2196  */
2197 static int
2198 spa_walk_init(mdb_walk_state_t *wsp)
2199 {
2200         GElf_Sym sym;
2201 
2202         if (wsp->walk_addr != NULL) {
2203                 mdb_warn("spa walk only supports global walks\n");
2204                 return (WALK_ERR);
2205         }
2206 
2207         if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "spa_namespace_avl", &sym) == -1) {
2208                 mdb_warn("failed to find symbol 'spa_namespace_avl'");
2209                 return (WALK_ERR);
2210         }
2211 
2212         wsp->walk_addr = (uintptr_t)sym.st_value;
2213 
2214         if (mdb_layered_walk("avl", wsp) == -1) {
2215                 mdb_warn("failed to walk 'avl'\n");
2216                 return (WALK_ERR);
2217         }
2218 
2219         return (WALK_NEXT);
2220 }
2221 
2222 static int
2223 spa_walk_step(mdb_walk_state_t *wsp)
2224 {
2225         return (wsp->walk_callback(wsp->walk_addr, NULL, wsp->walk_cbdata));
2226 }
2227 
2228 /*
2229  * [addr]::walk zio
2230  *
2231  * Walk all active zio_t structures on the system.  This is simply a layered
2232  * walk on top of ::walk zio_cache, with the optional ability to limit the
2233  * structures to a particular pool.
2234  */
2235 static int
2236 zio_walk_init(mdb_walk_state_t *wsp)
2237 {
2238         wsp->walk_data = (void *)wsp->walk_addr;
2239 
2240         if (mdb_layered_walk("zio_cache", wsp) == -1) {
2241                 mdb_warn("failed to walk 'zio_cache'\n");
2242                 return (WALK_ERR);
2243         }
2244 
2245         return (WALK_NEXT);
2246 }
2247 
2248 static int
2249 zio_walk_step(mdb_walk_state_t *wsp)
2250 {
2251         mdb_zio_t zio;
2252         uintptr_t spa = (uintptr_t)wsp->walk_data;
2253 
2254         if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t",
2255             wsp->walk_addr, 0) == -1)
2256                 return (WALK_ERR);
2257 
2258         if (spa != 0 && spa != zio.io_spa)
2259                 return (WALK_NEXT);
2260 
2261         return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
2262 }
2263 
2264 /*
2265  * [addr]::walk zio_root
2266  *
2267  * Walk only root zio_t structures, optionally for a particular spa_t.
2268  */
2269 static int
2270 zio_walk_root_step(mdb_walk_state_t *wsp)
2271 {
2272         mdb_zio_t zio;
2273         uintptr_t spa = (uintptr_t)wsp->walk_data;
2274 
2275         if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t",
2276             wsp->walk_addr, 0) == -1)
2277                 return (WALK_ERR);
2278 
2279         if (spa != 0 && spa != zio.io_spa)
2280                 return (WALK_NEXT);
2281 
2282         /* If the parent list is not empty, ignore */
2283         if (zio.io_parent_list.list_head.list_next !=
2284             wsp->walk_addr +
2285             mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", "io_parent_list") +
2286             mdb_ctf_offsetof_by_name("struct list", "list_head"))
2287                 return (WALK_NEXT);
2288 
2289         return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
2290 }
2291 
2292 /*
2293  * ::zfs_blkstats
2294  *
2295  *      -v      print verbose per-level information
2296  *
2297  */
2298 static int
2299 zfs_blkstats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2300 {
2301         boolean_t verbose = B_FALSE;
2302         zfs_all_blkstats_t stats;
2303         dmu_object_type_t t;
2304         zfs_blkstat_t *tzb;
2305         uint64_t ditto;
2306         dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES + 10];
2307         /* +10 in case it grew */
2308 
2309         if (mdb_readvar(&dmu_ot, "dmu_ot") == -1) {
2310                 mdb_warn("failed to read 'dmu_ot'");
2311                 return (DCMD_ERR);
2312         }
2313 
2314         if (mdb_getopts(argc, argv,
2315             'v', MDB_OPT_SETBITS, TRUE, &verbose,
2316             NULL) != argc)
2317                 return (DCMD_USAGE);
2318 
2319         if (!(flags & DCMD_ADDRSPEC))
2320                 return (DCMD_USAGE);
2321 
2322         if (GETMEMB(addr, "spa", spa_dsl_pool, addr) ||
2323             GETMEMB(addr, "dsl_pool", dp_blkstats, addr) ||
2324             mdb_vread(&stats, sizeof (zfs_all_blkstats_t), addr) == -1) {
2325                 mdb_warn("failed to read data at %p;", addr);
2326                 mdb_printf("maybe no stats? run \"zpool scrub\" first.");
2327                 return (DCMD_ERR);
2328         }
2329 
2330         tzb = &stats.zab_type[DN_MAX_LEVELS][DMU_OT_TOTAL];
2331         if (tzb->zb_gangs != 0) {
2332                 mdb_printf("Ganged blocks: %llu\n",
2333                     (longlong_t)tzb->zb_gangs);
2334         }
2335 
2336         ditto = tzb->zb_ditto_2_of_2_samevdev + tzb->zb_ditto_2_of_3_samevdev +
2337             tzb->zb_ditto_3_of_3_samevdev;
2338         if (ditto != 0) {
2339                 mdb_printf("Dittoed blocks on same vdev: %llu\n",
2340                     (longlong_t)ditto);
2341         }
2342 
2343         mdb_printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2344             "\t  avg\t comp\t%%Total\tType\n");
2345 
2346         for (t = 0; t <= DMU_OT_TOTAL; t++) {
2347                 char csize[NICENUM_BUFLEN], lsize[NICENUM_BUFLEN];
2348                 char psize[NICENUM_BUFLEN], asize[NICENUM_BUFLEN];
2349                 char avg[NICENUM_BUFLEN];
2350                 char comp[NICENUM_BUFLEN], pct[NICENUM_BUFLEN];
2351                 char typename[64];
2352                 int l;
2353 
2354 
2355                 if (t == DMU_OT_DEFERRED)
2356                         strcpy(typename, "deferred free");
2357                 else if (t == DMU_OT_OTHER)
2358                         strcpy(typename, "other");
2359                 else if (t == DMU_OT_TOTAL)
2360                         strcpy(typename, "Total");
2361                 else if (mdb_readstr(typename, sizeof (typename),
2362                     (uintptr_t)dmu_ot[t].ot_name) == -1) {
2363                         mdb_warn("failed to read type name");
2364                         return (DCMD_ERR);
2365                 }
2366 
2367                 if (stats.zab_type[DN_MAX_LEVELS][t].zb_asize == 0)
2368                         continue;
2369 
2370                 for (l = -1; l < DN_MAX_LEVELS; l++) {
2371                         int level = (l == -1 ? DN_MAX_LEVELS : l);
2372                         zfs_blkstat_t *zb = &stats.zab_type[level][t];
2373 
2374                         if (zb->zb_asize == 0)
2375                                 continue;
2376 
2377                         /*
2378                          * Don't print each level unless requested.
2379                          */
2380                         if (!verbose && level != DN_MAX_LEVELS)
2381                                 continue;
2382 
2383                         /*
2384                          * If all the space is level 0, don't print the
2385                          * level 0 separately.
2386                          */
2387                         if (level == 0 && zb->zb_asize ==
2388                             stats.zab_type[DN_MAX_LEVELS][t].zb_asize)
2389                                 continue;
2390 
2391                         mdb_nicenum(zb->zb_count, csize);
2392                         mdb_nicenum(zb->zb_lsize, lsize);
2393                         mdb_nicenum(zb->zb_psize, psize);
2394                         mdb_nicenum(zb->zb_asize, asize);
2395                         mdb_nicenum(zb->zb_asize / zb->zb_count, avg);
2396                         (void) snprintfrac(comp, NICENUM_BUFLEN,
2397                             zb->zb_lsize, zb->zb_psize, 2);
2398                         (void) snprintfrac(pct, NICENUM_BUFLEN,
2399                             100 * zb->zb_asize, tzb->zb_asize, 2);
2400 
2401                         mdb_printf("%6s\t%5s\t%5s\t%5s\t%5s"
2402                             "\t%5s\t%6s\t",
2403                             csize, lsize, psize, asize, avg, comp, pct);
2404 
2405                         if (level == DN_MAX_LEVELS)
2406                                 mdb_printf("%s\n", typename);
2407                         else
2408                                 mdb_printf("  L%d %s\n",
2409                                     level, typename);
2410                 }
2411         }
2412 
2413         return (DCMD_OK);
2414 }
2415 
2416 typedef struct mdb_reference {
2417         uintptr_t ref_holder;
2418         uintptr_t ref_removed;
2419         uint64_t ref_number;
2420 } mdb_reference_t;
2421 
2422 /* ARGSUSED */
2423 static int
2424 reference_cb(uintptr_t addr, const void *ignored, void *arg)
2425 {
2426         mdb_reference_t ref;
2427         boolean_t holder_is_str = B_FALSE;
2428         char holder_str[128];
2429         boolean_t removed = (boolean_t)arg;
2430 
2431         if (mdb_ctf_vread(&ref, "reference_t", "mdb_reference_t", addr,
2432             0) == -1)
2433                 return (DCMD_ERR);
2434 
2435         if (mdb_readstr(holder_str, sizeof (holder_str),
2436             ref.ref_holder) != -1)
2437                 holder_is_str = strisprint(holder_str);
2438 
2439         if (removed)
2440                 mdb_printf("removed ");
2441         mdb_printf("reference ");
2442         if (ref.ref_number != 1)
2443                 mdb_printf("with count=%llu ", ref.ref_number);
2444         mdb_printf("with tag %lx", ref.ref_holder);
2445         if (holder_is_str)
2446                 mdb_printf(" \"%s\"", holder_str);
2447         mdb_printf(", held at:\n");
2448 
2449         (void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL);
2450 
2451         if (removed) {
2452                 mdb_printf("removed at:\n");
2453                 (void) mdb_call_dcmd("whatis", ref.ref_removed,
2454                     DCMD_ADDRSPEC, 0, NULL);
2455         }
2456 
2457         mdb_printf("\n");
2458 
2459         return (WALK_NEXT);
2460 }
2461 
2462 typedef struct mdb_refcount {
2463         uint64_t rc_count;
2464 } mdb_refcount_t;
2465 
2466 typedef struct mdb_refcount_removed {
2467         uint64_t rc_removed_count;
2468 } mdb_refcount_removed_t;
2469 
2470 typedef struct mdb_refcount_tracked {
2471         boolean_t rc_tracked;
2472 } mdb_refcount_tracked_t;
2473 
2474 /* ARGSUSED */
2475 static int
2476 refcount(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2477 {
2478         mdb_refcount_t rc;
2479         mdb_refcount_removed_t rcr;
2480         mdb_refcount_tracked_t rct;
2481         int off;
2482         boolean_t released = B_FALSE;
2483 
2484         if (!(flags & DCMD_ADDRSPEC))
2485                 return (DCMD_USAGE);
2486 
2487         if (mdb_getopts(argc, argv,
2488             'r', MDB_OPT_SETBITS, B_TRUE, &released,
2489             NULL) != argc)
2490                 return (DCMD_USAGE);
2491 
2492         if (mdb_ctf_vread(&rc, "refcount_t", "mdb_refcount_t", addr,
2493             0) == -1)
2494                 return (DCMD_ERR);
2495 
2496         if (mdb_ctf_vread(&rcr, "refcount_t", "mdb_refcount_removed_t", addr,
2497             MDB_CTF_VREAD_QUIET) == -1) {
2498                 mdb_printf("refcount_t at %p has %llu holds (untracked)\n",
2499                     addr, (longlong_t)rc.rc_count);
2500                 return (DCMD_OK);
2501         }
2502 
2503         if (mdb_ctf_vread(&rct, "refcount_t", "mdb_refcount_tracked_t", addr,
2504             MDB_CTF_VREAD_QUIET) == -1) {
2505                 /* If this is an old target, it might be tracked. */
2506                 rct.rc_tracked = B_TRUE;
2507         }
2508 
2509         mdb_printf("refcount_t at %p has %llu current holds, "
2510             "%llu recently released holds\n",
2511             addr, (longlong_t)rc.rc_count, (longlong_t)rcr.rc_removed_count);
2512 
2513         if (rct.rc_tracked && rc.rc_count > 0)
2514                 mdb_printf("current holds:\n");
2515         off = mdb_ctf_offsetof_by_name("refcount_t", "rc_list");
2516         if (off == -1)
2517                 return (DCMD_ERR);
2518         mdb_pwalk("list", reference_cb, (void*)B_FALSE, addr + off);
2519 
2520         if (released && rcr.rc_removed_count > 0) {
2521                 mdb_printf("released holds:\n");
2522 
2523                 off = mdb_ctf_offsetof_by_name("refcount_t", "rc_removed");
2524                 if (off == -1)
2525                         return (DCMD_ERR);
2526                 mdb_pwalk("list", reference_cb, (void*)B_FALSE, addr + off);
2527         }
2528 
2529         return (DCMD_OK);
2530 }
2531 
2532 /* ARGSUSED */
2533 static int
2534 sa_attr_table(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2535 {
2536         sa_attr_table_t *table;
2537         sa_os_t sa_os;
2538         char *name;
2539         int i;
2540 
2541         if (mdb_vread(&sa_os, sizeof (sa_os_t), addr) == -1) {
2542                 mdb_warn("failed to read sa_os at %p", addr);
2543                 return (DCMD_ERR);
2544         }
2545 
2546         table = mdb_alloc(sizeof (sa_attr_table_t) * sa_os.sa_num_attrs,
2547             UM_SLEEP | UM_GC);
2548         name = mdb_alloc(MAXPATHLEN, UM_SLEEP | UM_GC);
2549 
2550         if (mdb_vread(table, sizeof (sa_attr_table_t) * sa_os.sa_num_attrs,
2551             (uintptr_t)sa_os.sa_attr_table) == -1) {
2552                 mdb_warn("failed to read sa_os at %p", addr);
2553                 return (DCMD_ERR);
2554         }
2555 
2556         mdb_printf("%<u>%-10s %-10s %-10s %-10s %s%</u>\n",
2557             "ATTR ID", "REGISTERED", "LENGTH", "BSWAP", "NAME");
2558         for (i = 0; i != sa_os.sa_num_attrs; i++) {
2559                 mdb_readstr(name, MAXPATHLEN, (uintptr_t)table[i].sa_name);
2560                 mdb_printf("%5x   %8x %8x %8x          %-s\n",
2561                     (int)table[i].sa_attr, (int)table[i].sa_registered,
2562                     (int)table[i].sa_length, table[i].sa_byteswap, name);
2563         }
2564 
2565         return (DCMD_OK);
2566 }
2567 
2568 static int
2569 sa_get_off_table(uintptr_t addr, uint32_t **off_tab, int attr_count)
2570 {
2571         uintptr_t idx_table;
2572 
2573         if (GETMEMB(addr, "sa_idx_tab", sa_idx_tab, idx_table)) {
2574                 mdb_printf("can't find offset table in sa_idx_tab\n");
2575                 return (-1);
2576         }
2577 
2578         *off_tab = mdb_alloc(attr_count * sizeof (uint32_t),
2579             UM_SLEEP | UM_GC);
2580 
2581         if (mdb_vread(*off_tab,
2582             attr_count * sizeof (uint32_t), idx_table) == -1) {
2583                 mdb_warn("failed to attribute offset table %p", idx_table);
2584                 return (-1);
2585         }
2586 
2587         return (DCMD_OK);
2588 }
2589 
2590 /*ARGSUSED*/
2591 static int
2592 sa_attr_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2593 {
2594         uint32_t *offset_tab;
2595         int attr_count;
2596         uint64_t attr_id;
2597         uintptr_t attr_addr;
2598         uintptr_t bonus_tab, spill_tab;
2599         uintptr_t db_bonus, db_spill;
2600         uintptr_t os, os_sa;
2601         uintptr_t db_data;
2602 
2603         if (argc != 1)
2604                 return (DCMD_USAGE);
2605 
2606         if (argv[0].a_type == MDB_TYPE_STRING)
2607                 attr_id = mdb_strtoull(argv[0].a_un.a_str);
2608         else
2609                 return (DCMD_USAGE);
2610 
2611         if (GETMEMB(addr, "sa_handle", sa_bonus_tab, bonus_tab) ||
2612             GETMEMB(addr, "sa_handle", sa_spill_tab, spill_tab) ||
2613             GETMEMB(addr, "sa_handle", sa_os, os) ||
2614             GETMEMB(addr, "sa_handle", sa_bonus, db_bonus) ||
2615             GETMEMB(addr, "sa_handle", sa_spill, db_spill)) {
2616                 mdb_printf("Can't find necessary information in sa_handle "
2617                     "in sa_handle\n");
2618                 return (DCMD_ERR);
2619         }
2620 
2621         if (GETMEMB(os, "objset", os_sa, os_sa)) {
2622                 mdb_printf("Can't find os_sa in objset\n");
2623                 return (DCMD_ERR);
2624         }
2625 
2626         if (GETMEMB(os_sa, "sa_os", sa_num_attrs, attr_count)) {
2627                 mdb_printf("Can't find sa_num_attrs\n");
2628                 return (DCMD_ERR);
2629         }
2630 
2631         if (attr_id > attr_count) {
2632                 mdb_printf("attribute id number is out of range\n");
2633                 return (DCMD_ERR);
2634         }
2635 
2636         if (bonus_tab) {
2637                 if (sa_get_off_table(bonus_tab, &offset_tab,
2638                     attr_count) == -1) {
2639                         return (DCMD_ERR);
2640                 }
2641 
2642                 if (GETMEMB(db_bonus, "dmu_buf", db_data, db_data)) {
2643                         mdb_printf("can't find db_data in bonus dbuf\n");
2644                         return (DCMD_ERR);
2645                 }
2646         }
2647 
2648         if (bonus_tab && !TOC_ATTR_PRESENT(offset_tab[attr_id]) &&
2649             spill_tab == NULL) {
2650                 mdb_printf("Attribute does not exist\n");
2651                 return (DCMD_ERR);
2652         } else if (!TOC_ATTR_PRESENT(offset_tab[attr_id]) && spill_tab) {
2653                 if (sa_get_off_table(spill_tab, &offset_tab,
2654                     attr_count) == -1) {
2655                         return (DCMD_ERR);
2656                 }
2657                 if (GETMEMB(db_spill, "dmu_buf", db_data, db_data)) {
2658                         mdb_printf("can't find db_data in spill dbuf\n");
2659                         return (DCMD_ERR);
2660                 }
2661                 if (!TOC_ATTR_PRESENT(offset_tab[attr_id])) {
2662                         mdb_printf("Attribute does not exist\n");
2663                         return (DCMD_ERR);
2664                 }
2665         }
2666         attr_addr = db_data + TOC_OFF(offset_tab[attr_id]);
2667         mdb_printf("%p\n", attr_addr);
2668         return (DCMD_OK);
2669 }
2670 
2671 /* ARGSUSED */
2672 static int
2673 zfs_ace_print_common(uintptr_t addr, uint_t flags,
2674     uint64_t id, uint32_t access_mask, uint16_t ace_flags,
2675     uint16_t ace_type, int verbose)
2676 {
2677         if (DCMD_HDRSPEC(flags) && !verbose)
2678                 mdb_printf("%<u>%-?s %-8s %-8s %-8s %s%</u>\n",
2679                     "ADDR", "FLAGS", "MASK", "TYPE", "ID");
2680 
2681         if (!verbose) {
2682                 mdb_printf("%0?p %-8x %-8x %-8x %-llx\n", addr,
2683                     ace_flags, access_mask, ace_type, id);
2684                 return (DCMD_OK);
2685         }
2686 
2687         switch (ace_flags & ACE_TYPE_FLAGS) {
2688         case ACE_OWNER:
2689                 mdb_printf("owner@:");
2690                 break;
2691         case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
2692                 mdb_printf("group@:");
2693                 break;
2694         case ACE_EVERYONE:
2695                 mdb_printf("everyone@:");
2696                 break;
2697         case ACE_IDENTIFIER_GROUP:
2698                 mdb_printf("group:%llx:", (u_longlong_t)id);
2699                 break;
2700         case 0: /* User entry */
2701                 mdb_printf("user:%llx:", (u_longlong_t)id);
2702                 break;
2703         }
2704 
2705         /* print out permission mask */
2706         if (access_mask & ACE_READ_DATA)
2707                 mdb_printf("r");
2708         else
2709                 mdb_printf("-");
2710         if (access_mask & ACE_WRITE_DATA)
2711                 mdb_printf("w");
2712         else
2713                 mdb_printf("-");
2714         if (access_mask & ACE_EXECUTE)
2715                 mdb_printf("x");
2716         else
2717                 mdb_printf("-");
2718         if (access_mask & ACE_APPEND_DATA)
2719                 mdb_printf("p");
2720         else
2721                 mdb_printf("-");
2722         if (access_mask & ACE_DELETE)
2723                 mdb_printf("d");
2724         else
2725                 mdb_printf("-");
2726         if (access_mask & ACE_DELETE_CHILD)
2727                 mdb_printf("D");
2728         else
2729                 mdb_printf("-");
2730         if (access_mask & ACE_READ_ATTRIBUTES)
2731                 mdb_printf("a");
2732         else
2733                 mdb_printf("-");
2734         if (access_mask & ACE_WRITE_ATTRIBUTES)
2735                 mdb_printf("A");
2736         else
2737                 mdb_printf("-");
2738         if (access_mask & ACE_READ_NAMED_ATTRS)
2739                 mdb_printf("R");
2740         else
2741                 mdb_printf("-");
2742         if (access_mask & ACE_WRITE_NAMED_ATTRS)
2743                 mdb_printf("W");
2744         else
2745                 mdb_printf("-");
2746         if (access_mask & ACE_READ_ACL)
2747                 mdb_printf("c");
2748         else
2749                 mdb_printf("-");
2750         if (access_mask & ACE_WRITE_ACL)
2751                 mdb_printf("C");
2752         else
2753                 mdb_printf("-");
2754         if (access_mask & ACE_WRITE_OWNER)
2755                 mdb_printf("o");
2756         else
2757                 mdb_printf("-");
2758         if (access_mask & ACE_SYNCHRONIZE)
2759                 mdb_printf("s");
2760         else
2761                 mdb_printf("-");
2762 
2763         mdb_printf(":");
2764 
2765         /* Print out inheritance flags */
2766         if (ace_flags & ACE_FILE_INHERIT_ACE)
2767                 mdb_printf("f");
2768         else
2769                 mdb_printf("-");
2770         if (ace_flags & ACE_DIRECTORY_INHERIT_ACE)
2771                 mdb_printf("d");
2772         else
2773                 mdb_printf("-");
2774         if (ace_flags & ACE_INHERIT_ONLY_ACE)
2775                 mdb_printf("i");
2776         else
2777                 mdb_printf("-");
2778         if (ace_flags & ACE_NO_PROPAGATE_INHERIT_ACE)
2779                 mdb_printf("n");
2780         else
2781                 mdb_printf("-");
2782         if (ace_flags & ACE_SUCCESSFUL_ACCESS_ACE_FLAG)
2783                 mdb_printf("S");
2784         else
2785                 mdb_printf("-");
2786         if (ace_flags & ACE_FAILED_ACCESS_ACE_FLAG)
2787                 mdb_printf("F");
2788         else
2789                 mdb_printf("-");
2790         if (ace_flags & ACE_INHERITED_ACE)
2791                 mdb_printf("I");
2792         else
2793                 mdb_printf("-");
2794 
2795         switch (ace_type) {
2796         case ACE_ACCESS_ALLOWED_ACE_TYPE:
2797                 mdb_printf(":allow\n");
2798                 break;
2799         case ACE_ACCESS_DENIED_ACE_TYPE:
2800                 mdb_printf(":deny\n");
2801                 break;
2802         case ACE_SYSTEM_AUDIT_ACE_TYPE:
2803                 mdb_printf(":audit\n");
2804                 break;
2805         case ACE_SYSTEM_ALARM_ACE_TYPE:
2806                 mdb_printf(":alarm\n");
2807                 break;
2808         default:
2809                 mdb_printf(":?\n");
2810         }
2811         return (DCMD_OK);
2812 }
2813 
2814 /* ARGSUSED */
2815 static int
2816 zfs_ace_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2817 {
2818         zfs_ace_t zace;
2819         int verbose = FALSE;
2820         uint64_t id;
2821 
2822         if (!(flags & DCMD_ADDRSPEC))
2823                 return (DCMD_USAGE);
2824 
2825         if (mdb_getopts(argc, argv,
2826             'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
2827                 return (DCMD_USAGE);
2828 
2829         if (mdb_vread(&zace, sizeof (zfs_ace_t), addr) == -1) {
2830                 mdb_warn("failed to read zfs_ace_t");
2831                 return (DCMD_ERR);
2832         }
2833 
2834         if ((zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == 0 ||
2835             (zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP)
2836                 id = zace.z_fuid;
2837         else
2838                 id = -1;
2839 
2840         return (zfs_ace_print_common(addr, flags, id, zace.z_hdr.z_access_mask,
2841             zace.z_hdr.z_flags, zace.z_hdr.z_type, verbose));
2842 }
2843 
2844 /* ARGSUSED */
2845 static int
2846 zfs_ace0_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2847 {
2848         ace_t ace;
2849         uint64_t id;
2850         int verbose = FALSE;
2851 
2852         if (!(flags & DCMD_ADDRSPEC))
2853                 return (DCMD_USAGE);
2854 
2855         if (mdb_getopts(argc, argv,
2856             'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
2857                 return (DCMD_USAGE);
2858 
2859         if (mdb_vread(&ace, sizeof (ace_t), addr) == -1) {
2860                 mdb_warn("failed to read ace_t");
2861                 return (DCMD_ERR);
2862         }
2863 
2864         if ((ace.a_flags & ACE_TYPE_FLAGS) == 0 ||
2865             (ace.a_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP)
2866                 id = ace.a_who;
2867         else
2868                 id = -1;
2869 
2870         return (zfs_ace_print_common(addr, flags, id, ace.a_access_mask,
2871             ace.a_flags, ace.a_type, verbose));
2872 }
2873 
2874 typedef struct acl_dump_args {
2875         int a_argc;
2876         const mdb_arg_t *a_argv;
2877         uint16_t a_version;
2878         int a_flags;
2879 } acl_dump_args_t;
2880 
2881 /* ARGSUSED */
2882 static int
2883 acl_aces_cb(uintptr_t addr, const void *unknown, void *arg)
2884 {
2885         acl_dump_args_t *acl_args = (acl_dump_args_t *)arg;
2886 
2887         if (acl_args->a_version == 1) {
2888                 if (mdb_call_dcmd("zfs_ace", addr,
2889                     DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc,
2890                     acl_args->a_argv) != DCMD_OK) {
2891                         return (WALK_ERR);
2892                 }
2893         } else {
2894                 if (mdb_call_dcmd("zfs_ace0", addr,
2895                     DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc,
2896                     acl_args->a_argv) != DCMD_OK) {
2897                         return (WALK_ERR);
2898                 }
2899         }
2900         acl_args->a_flags = DCMD_LOOP;
2901         return (WALK_NEXT);
2902 }
2903 
2904 /* ARGSUSED */
2905 static int
2906 acl_cb(uintptr_t addr, const void *unknown, void *arg)
2907 {
2908         acl_dump_args_t *acl_args = (acl_dump_args_t *)arg;
2909 
2910         if (acl_args->a_version == 1) {
2911                 if (mdb_pwalk("zfs_acl_node_aces", acl_aces_cb,
2912                     arg, addr) != 0) {
2913                         mdb_warn("can't walk ACEs");
2914                         return (DCMD_ERR);
2915                 }
2916         } else {
2917                 if (mdb_pwalk("zfs_acl_node_aces0", acl_aces_cb,
2918                     arg, addr) != 0) {
2919                         mdb_warn("can't walk ACEs");
2920                         return (DCMD_ERR);
2921                 }
2922         }
2923         return (WALK_NEXT);
2924 }
2925 
2926 /* ARGSUSED */
2927 static int
2928 zfs_acl_dump(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2929 {
2930         zfs_acl_t zacl;
2931         int verbose = FALSE;
2932         acl_dump_args_t acl_args;
2933 
2934         if (!(flags & DCMD_ADDRSPEC))
2935                 return (DCMD_USAGE);
2936 
2937         if (mdb_getopts(argc, argv,
2938             'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
2939                 return (DCMD_USAGE);
2940 
2941         if (mdb_vread(&zacl, sizeof (zfs_acl_t), addr) == -1) {
2942                 mdb_warn("failed to read zfs_acl_t");
2943                 return (DCMD_ERR);
2944         }
2945 
2946         acl_args.a_argc = argc;
2947         acl_args.a_argv = argv;
2948         acl_args.a_version = zacl.z_version;
2949         acl_args.a_flags = DCMD_LOOPFIRST;
2950 
2951         if (mdb_pwalk("zfs_acl_node", acl_cb, &acl_args, addr) != 0) {
2952                 mdb_warn("can't walk ACL");
2953                 return (DCMD_ERR);
2954         }
2955 
2956         return (DCMD_OK);
2957 }
2958 
2959 /* ARGSUSED */
2960 static int
2961 zfs_acl_node_walk_init(mdb_walk_state_t *wsp)
2962 {
2963         if (wsp->walk_addr == NULL) {
2964                 mdb_warn("must supply address of zfs_acl_node_t\n");
2965                 return (WALK_ERR);
2966         }
2967 
2968         wsp->walk_addr +=
2969             mdb_ctf_offsetof_by_name(ZFS_STRUCT "zfs_acl", "z_acl");
2970 
2971         if (mdb_layered_walk("list", wsp) == -1) {
2972                 mdb_warn("failed to walk 'list'\n");
2973                 return (WALK_ERR);
2974         }
2975 
2976         return (WALK_NEXT);
2977 }
2978 
2979 static int
2980 zfs_acl_node_walk_step(mdb_walk_state_t *wsp)
2981 {
2982         zfs_acl_node_t  aclnode;
2983 
2984         if (mdb_vread(&aclnode, sizeof (zfs_acl_node_t),
2985             wsp->walk_addr) == -1) {
2986                 mdb_warn("failed to read zfs_acl_node at %p", wsp->walk_addr);
2987                 return (WALK_ERR);
2988         }
2989 
2990         return (wsp->walk_callback(wsp->walk_addr, &aclnode, wsp->walk_cbdata));
2991 }
2992 
2993 typedef struct ace_walk_data {
2994         int             ace_count;
2995         int             ace_version;
2996 } ace_walk_data_t;
2997 
2998 static int
2999 zfs_aces_walk_init_common(mdb_walk_state_t *wsp, int version,
3000     int ace_count, uintptr_t ace_data)
3001 {
3002         ace_walk_data_t *ace_walk_data;
3003 
3004         if (wsp->walk_addr == NULL) {
3005                 mdb_warn("must supply address of zfs_acl_node_t\n");
3006                 return (WALK_ERR);
3007         }
3008 
3009         ace_walk_data = mdb_alloc(sizeof (ace_walk_data_t), UM_SLEEP | UM_GC);
3010 
3011         ace_walk_data->ace_count = ace_count;
3012         ace_walk_data->ace_version = version;
3013 
3014         wsp->walk_addr = ace_data;
3015         wsp->walk_data = ace_walk_data;
3016 
3017         return (WALK_NEXT);
3018 }
3019 
3020 static int
3021 zfs_acl_node_aces_walk_init_common(mdb_walk_state_t *wsp, int version)
3022 {
3023         static int gotid;
3024         static mdb_ctf_id_t acl_id;
3025         int z_ace_count;
3026         uintptr_t z_acldata;
3027 
3028         if (!gotid) {
3029                 if (mdb_ctf_lookup_by_name("struct zfs_acl_node",
3030                     &acl_id) == -1) {
3031                         mdb_warn("couldn't find struct zfs_acl_node");
3032                         return (DCMD_ERR);
3033                 }
3034                 gotid = TRUE;
3035         }
3036 
3037         if (GETMEMBID(wsp->walk_addr, &acl_id, z_ace_count, z_ace_count)) {
3038                 return (DCMD_ERR);
3039         }
3040         if (GETMEMBID(wsp->walk_addr, &acl_id, z_acldata, z_acldata)) {
3041                 return (DCMD_ERR);
3042         }
3043 
3044         return (zfs_aces_walk_init_common(wsp, version,
3045             z_ace_count, z_acldata));
3046 }
3047 
3048 /* ARGSUSED */
3049 static int
3050 zfs_acl_node_aces_walk_init(mdb_walk_state_t *wsp)
3051 {
3052         return (zfs_acl_node_aces_walk_init_common(wsp, 1));
3053 }
3054 
3055 /* ARGSUSED */
3056 static int
3057 zfs_acl_node_aces0_walk_init(mdb_walk_state_t *wsp)
3058 {
3059         return (zfs_acl_node_aces_walk_init_common(wsp, 0));
3060 }
3061 
3062 static int
3063 zfs_aces_walk_step(mdb_walk_state_t *wsp)
3064 {
3065         ace_walk_data_t *ace_data = wsp->walk_data;
3066         zfs_ace_t zace;
3067         ace_t *acep;
3068         int status;
3069         int entry_type;
3070         int allow_type;
3071         uintptr_t ptr;
3072 
3073         if (ace_data->ace_count == 0)
3074                 return (WALK_DONE);
3075 
3076         if (mdb_vread(&zace, sizeof (zfs_ace_t), wsp->walk_addr) == -1) {
3077                 mdb_warn("failed to read zfs_ace_t at %#lx",
3078                     wsp->walk_addr);
3079                 return (WALK_ERR);
3080         }
3081 
3082         switch (ace_data->ace_version) {
3083         case 0:
3084                 acep = (ace_t *)&zace;
3085                 entry_type = acep->a_flags & ACE_TYPE_FLAGS;
3086                 allow_type = acep->a_type;
3087                 break;
3088         case 1:
3089                 entry_type = zace.z_hdr.z_flags & ACE_TYPE_FLAGS;
3090                 allow_type = zace.z_hdr.z_type;
3091                 break;
3092         default:
3093                 return (WALK_ERR);
3094         }
3095 
3096         ptr = (uintptr_t)wsp->walk_addr;
3097         switch (entry_type) {
3098         case ACE_OWNER:
3099         case ACE_EVERYONE:
3100         case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
3101                 ptr += ace_data->ace_version == 0 ?
3102                     sizeof (ace_t) : sizeof (zfs_ace_hdr_t);
3103                 break;
3104         case ACE_IDENTIFIER_GROUP:
3105         default:
3106                 switch (allow_type) {
3107                 case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
3108                 case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
3109                 case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
3110                 case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
3111                         ptr += ace_data->ace_version == 0 ?
3112                             sizeof (ace_t) : sizeof (zfs_object_ace_t);
3113                         break;
3114                 default:
3115                         ptr += ace_data->ace_version == 0 ?
3116                             sizeof (ace_t) : sizeof (zfs_ace_t);
3117                         break;
3118                 }
3119         }
3120 
3121         ace_data->ace_count--;
3122         status = wsp->walk_callback(wsp->walk_addr,
3123             (void *)(uintptr_t)&zace, wsp->walk_cbdata);
3124 
3125         wsp->walk_addr = ptr;
3126         return (status);
3127 }
3128 
3129 typedef struct mdb_zfs_rrwlock {
3130         uintptr_t       rr_writer;
3131         boolean_t       rr_writer_wanted;
3132 } mdb_zfs_rrwlock_t;
3133 
3134 static uint_t rrw_key;
3135 
3136 /* ARGSUSED */
3137 static int
3138 rrwlock(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3139 {
3140         mdb_zfs_rrwlock_t rrw;
3141 
3142         if (rrw_key == 0) {
3143                 if (mdb_ctf_readsym(&rrw_key, "uint_t", "rrw_tsd_key", 0) == -1)
3144                         return (DCMD_ERR);
3145         }
3146 
3147         if (mdb_ctf_vread(&rrw, "rrwlock_t", "mdb_zfs_rrwlock_t", addr,
3148             0) == -1)
3149                 return (DCMD_ERR);
3150 
3151         if (rrw.rr_writer != 0) {
3152                 mdb_printf("write lock held by thread %lx\n", rrw.rr_writer);
3153                 return (DCMD_OK);
3154         }
3155 
3156         if (rrw.rr_writer_wanted) {
3157                 mdb_printf("writer wanted\n");
3158         }
3159 
3160         mdb_printf("anonymous references:\n");
3161         (void) mdb_call_dcmd("refcount", addr +
3162             mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_anon_rcount"),
3163             DCMD_ADDRSPEC, 0, NULL);
3164 
3165         mdb_printf("linked references:\n");
3166         (void) mdb_call_dcmd("refcount", addr +
3167             mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_linked_rcount"),
3168             DCMD_ADDRSPEC, 0, NULL);
3169 
3170         /*
3171          * XXX This should find references from
3172          * "::walk thread | ::tsd -v <rrw_key>", but there is no support
3173          * for programmatic consumption of dcmds, so this would be
3174          * difficult, potentially requiring reimplementing ::tsd (both
3175          * user and kernel versions) in this MDB module.
3176          */
3177 
3178         return (DCMD_OK);
3179 }
3180 
3181 /*
3182  * MDB module linkage information:
3183  *
3184  * We declare a list of structures describing our dcmds, and a function
3185  * named _mdb_init to return a pointer to our module information.
3186  */
3187 
3188 static const mdb_dcmd_t dcmds[] = {
3189         { "arc", "[-bkmg]", "print ARC variables", arc_print },
3190         { "blkptr", ":", "print blkptr_t", blkptr },
3191         { "dbuf", ":", "print dmu_buf_impl_t", dbuf },
3192         { "dbufs",
3193             "\t[-O objset_t*] [-n objset_name | \"mos\"] "
3194             "[-o object | \"mdn\"] \n"
3195             "\t[-l level] [-b blkid | \"bonus\"]",
3196             "find dmu_buf_impl_t's that match specified criteria", dbufs },
3197         { "abuf_find", "dva_word[0] dva_word[1]",
3198             "find arc_buf_hdr_t of a specified DVA",
3199             abuf_find },
3200         { "spa", "?[-cevmMh]\n"
3201             "\t-c display spa config\n"
3202             "\t-e display vdev statistics\n"
3203             "\t-v display vdev information\n"
3204             "\t-m display metaslab statistics\n"
3205             "\t-M display metaslab group statistics\n"
3206             "\t-h display histogram (requires -m or -M)\n",
3207             "spa_t summary", spa_print },
3208         { "spa_config", ":", "print spa_t configuration", spa_print_config },
3209         { "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space },
3210         { "spa_vdevs", ":[-emMh]\n"
3211             "\t-e display vdev statistics\n"
3212             "\t-m dispaly metaslab statistics\n"
3213             "\t-M display metaslab group statistic\n"
3214             "\t-h display histogram (requires -m or -M)\n",
3215             "given a spa_t, print vdev summary", spa_vdevs },
3216         { "vdev", ":[-re]\n"
3217             "\t-r display recursively\n"
3218             "\t-e display statistics\n"
3219             "\t-m display metaslab statistics\n"
3220             "\t-M display metaslab group statistics\n"
3221             "\t-h display histogram (requires -m or -M)\n",
3222             "vdev_t summary", vdev_print },
3223         { "zio", ":[-cpr]\n"
3224             "\t-c display children\n"
3225             "\t-p display parents\n"
3226             "\t-r display recursively",
3227             "zio_t summary", zio_print },
3228         { "zio_state", "?", "print out all zio_t structures on system or "
3229             "for a particular pool", zio_state },
3230         { "zfs_blkstats", ":[-v]",
3231             "given a spa_t, print block type stats from last scrub",
3232             zfs_blkstats },
3233         { "zfs_params", "", "print zfs tunable parameters", zfs_params },
3234         { "refcount", ":[-r]\n"
3235             "\t-r display recently removed references",
3236             "print refcount_t holders", refcount },
3237         { "zap_leaf", "", "print zap_leaf_phys_t", zap_leaf },
3238         { "zfs_aces", ":[-v]", "print all ACEs from a zfs_acl_t",
3239             zfs_acl_dump },
3240         { "zfs_ace", ":[-v]", "print zfs_ace", zfs_ace_print },
3241         { "zfs_ace0", ":[-v]", "print zfs_ace0", zfs_ace0_print },
3242         { "sa_attr_table", ":", "print SA attribute table from sa_os_t",
3243             sa_attr_table},
3244         { "sa_attr", ": attr_id",
3245             "print SA attribute address when given sa_handle_t", sa_attr_print},
3246         { "zfs_dbgmsg", ":[-va]",
3247             "print zfs debug log", dbgmsg},
3248         { "rrwlock", ":",
3249             "print rrwlock_t, including readers", rrwlock},
3250         { NULL }
3251 };
3252 
3253 static const mdb_walker_t walkers[] = {
3254         { "zms_freelist", "walk ZFS metaslab freelist",
3255             freelist_walk_init, freelist_walk_step, NULL },
3256         { "txg_list", "given any txg_list_t *, walk all entries in all txgs",
3257             txg_list_walk_init, txg_list_walk_step, NULL },
3258         { "txg_list0", "given any txg_list_t *, walk all entries in txg 0",
3259             txg_list0_walk_init, txg_list_walk_step, NULL },
3260         { "txg_list1", "given any txg_list_t *, walk all entries in txg 1",
3261             txg_list1_walk_init, txg_list_walk_step, NULL },
3262         { "txg_list2", "given any txg_list_t *, walk all entries in txg 2",
3263             txg_list2_walk_init, txg_list_walk_step, NULL },
3264         { "txg_list3", "given any txg_list_t *, walk all entries in txg 3",
3265             txg_list3_walk_init, txg_list_walk_step, NULL },
3266         { "zio", "walk all zio structures, optionally for a particular spa_t",
3267             zio_walk_init, zio_walk_step, NULL },
3268         { "zio_root",
3269             "walk all root zio_t structures, optionally for a particular spa_t",
3270             zio_walk_init, zio_walk_root_step, NULL },
3271         { "spa", "walk all spa_t entries in the namespace",
3272             spa_walk_init, spa_walk_step, NULL },
3273         { "metaslab", "given a spa_t *, walk all metaslab_t structures",
3274             metaslab_walk_init, metaslab_walk_step, NULL },
3275         { "zfs_acl_node", "given a zfs_acl_t, walk all zfs_acl_nodes",
3276             zfs_acl_node_walk_init, zfs_acl_node_walk_step, NULL },
3277         { "zfs_acl_node_aces", "given a zfs_acl_node_t, walk all ACEs",
3278             zfs_acl_node_aces_walk_init, zfs_aces_walk_step, NULL },
3279         { "zfs_acl_node_aces0",
3280             "given a zfs_acl_node_t, walk all ACEs as ace_t",
3281             zfs_acl_node_aces0_walk_init, zfs_aces_walk_step, NULL },
3282         { NULL }
3283 };
3284 
3285 static const mdb_modinfo_t modinfo = {
3286         MDB_API_VERSION, dcmds, walkers
3287 };
3288 
3289 const mdb_modinfo_t *
3290 _mdb_init(void)
3291 {
3292         return (&modinfo);
3293 }