patch nuke-the-dbuf-hash

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  24  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  25  */
  26 
  27 /* Portions Copyright 2010 Robert Milkowski */
  28 
  29 #include <mdb/mdb_ctf.h>
  30 #include <sys/zfs_context.h>
  31 #include <sys/mdb_modapi.h>
  32 #include <sys/dbuf.h>
  33 #include <sys/dmu_objset.h>
  34 #include <sys/dsl_dir.h>
  35 #include <sys/dsl_pool.h>
  36 #include <sys/metaslab_impl.h>
  37 #include <sys/space_map.h>
  38 #include <sys/list.h>
  39 #include <sys/vdev_impl.h>
  40 #include <sys/zap_leaf.h>
  41 #include <sys/zap_impl.h>
  42 #include <ctype.h>
  43 #include <sys/zfs_acl.h>
  44 #include <sys/sa_impl.h>
  45 
  46 #ifdef _KERNEL
  47 #define ZFS_OBJ_NAME    "zfs"
  48 extern int64_t mdb_gethrtime(void);
  49 #else
  50 #define ZFS_OBJ_NAME    "libzpool.so.1"
  51 #endif
  52 
  53 #define ZFS_STRUCT      "struct " ZFS_OBJ_NAME "`"
  54 
  55 #ifndef _KERNEL
  56 int aok;
  57 #endif
  58 
  59 enum spa_flags {
  60         SPA_FLAG_CONFIG                 = 1 << 0,
  61         SPA_FLAG_VDEVS                  = 1 << 1,
  62         SPA_FLAG_ERRORS                 = 1 << 2,
  63         SPA_FLAG_METASLAB_GROUPS        = 1 << 3,
  64         SPA_FLAG_METASLABS              = 1 << 4,
  65         SPA_FLAG_HISTOGRAMS             = 1 << 5
  66 };
  67 
  68 #define SPA_FLAG_ALL_VDEV       \
  69         (SPA_FLAG_VDEVS | SPA_FLAG_ERRORS | SPA_FLAG_METASLAB_GROUPS | \
  70         SPA_FLAG_METASLABS | SPA_FLAG_HISTOGRAMS)
  71 
  72 static int
  73 getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp,
  74     const char *member, int len, void *buf)
  75 {
  76         mdb_ctf_id_t id;
  77         ulong_t off;
  78         char name[64];
  79 
  80         if (idp == NULL) {
  81                 if (mdb_ctf_lookup_by_name(type, &id) == -1) {
  82                         mdb_warn("couldn't find type %s", type);
  83                         return (DCMD_ERR);
  84                 }
  85                 idp = &id;
  86         } else {
  87                 type = name;
  88                 mdb_ctf_type_name(*idp, name, sizeof (name));
  89         }
  90 
  91         if (mdb_ctf_offsetof(*idp, member, &off) == -1) {
  92                 mdb_warn("couldn't find member %s of type %s\n", member, type);
  93                 return (DCMD_ERR);
  94         }
  95         if (off % 8 != 0) {
  96                 mdb_warn("member %s of type %s is unsupported bitfield",
  97                     member, type);
  98                 return (DCMD_ERR);
  99         }
 100         off /= 8;
 101 
 102         if (mdb_vread(buf, len, addr + off) == -1) {
 103                 mdb_warn("failed to read %s from %s at %p",
 104                     member, type, addr + off);
 105                 return (DCMD_ERR);
 106         }
 107         /* mdb_warn("read %s from %s at %p+%llx\n", member, type, addr, off); */
 108 
 109         return (0);
 110 }
 111 
 112 #define GETMEMB(addr, structname, member, dest) \
 113         getmember(addr, ZFS_STRUCT structname, NULL, #member, \
 114         sizeof (dest), &(dest))
 115 
 116 #define GETMEMBID(addr, ctfid, member, dest) \
 117         getmember(addr, NULL, ctfid, #member, sizeof (dest), &(dest))
 118 
 119 static boolean_t
 120 strisprint(const char *cp)
 121 {
 122         for (; *cp; cp++) {
 123                 if (!isprint(*cp))
 124                         return (B_FALSE);
 125         }
 126         return (B_TRUE);
 127 }
 128 
 129 #define NICENUM_BUFLEN 6
 130 
 131 static int
 132 snprintfrac(char *buf, int len,
 133     uint64_t numerator, uint64_t denom, int frac_digits)
 134 {
 135         int mul = 1;
 136         int whole, frac, i;
 137 
 138         for (i = frac_digits; i; i--)
 139                 mul *= 10;
 140         whole = numerator / denom;
 141         frac = mul * numerator / denom - mul * whole;
 142         return (mdb_snprintf(buf, len, "%u.%0*u", whole, frac_digits, frac));
 143 }
 144 
 145 static void
 146 mdb_nicenum(uint64_t num, char *buf)
 147 {
 148         uint64_t n = num;
 149         int index = 0;
 150         char *u;
 151 
 152         while (n >= 1024) {
 153                 n = (n + (1024 / 2)) / 1024; /* Round up or down */
 154                 index++;
 155         }
 156 
 157         u = &" \0K\0M\0G\0T\0P\0E\0"[index*2];
 158 
 159         if (index == 0) {
 160                 (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu",
 161                     (u_longlong_t)n);
 162         } else if (n < 10 && (num & (num - 1)) != 0) {
 163                 (void) snprintfrac(buf, NICENUM_BUFLEN,
 164                     num, 1ULL << 10 * index, 2);
 165                 strcat(buf, u);
 166         } else if (n < 100 && (num & (num - 1)) != 0) {
 167                 (void) snprintfrac(buf, NICENUM_BUFLEN,
 168                     num, 1ULL << 10 * index, 1);
 169                 strcat(buf, u);
 170         } else {
 171                 (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu%s",
 172                     (u_longlong_t)n, u);
 173         }
 174 }
 175 
 176 static int verbose;
 177 
 178 static int
 179 freelist_walk_init(mdb_walk_state_t *wsp)
 180 {
 181         if (wsp->walk_addr == NULL) {
 182                 mdb_warn("must supply starting address\n");
 183                 return (WALK_ERR);
 184         }
 185 
 186         wsp->walk_data = 0;  /* Index into the freelist */
 187         return (WALK_NEXT);
 188 }
 189 
 190 static int
 191 freelist_walk_step(mdb_walk_state_t *wsp)
 192 {
 193         uint64_t entry;
 194         uintptr_t number = (uintptr_t)wsp->walk_data;
 195         char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
 196                             "INVALID", "INVALID", "INVALID", "INVALID" };
 197         int mapshift = SPA_MINBLOCKSHIFT;
 198 
 199         if (mdb_vread(&entry, sizeof (entry), wsp->walk_addr) == -1) {
 200                 mdb_warn("failed to read freelist entry %p", wsp->walk_addr);
 201                 return (WALK_DONE);
 202         }
 203         wsp->walk_addr += sizeof (entry);
 204         wsp->walk_data = (void *)(number + 1);
 205 
 206         if (SM_DEBUG_DECODE(entry)) {
 207                 mdb_printf("DEBUG: %3u  %10s: txg=%llu  pass=%llu\n",
 208                     number,
 209                     ddata[SM_DEBUG_ACTION_DECODE(entry)],
 210                     SM_DEBUG_TXG_DECODE(entry),
 211                     SM_DEBUG_SYNCPASS_DECODE(entry));
 212         } else {
 213                 mdb_printf("Entry: %3u  offsets=%08llx-%08llx  type=%c  "
 214                     "size=%06llx", number,
 215                     SM_OFFSET_DECODE(entry) << mapshift,
 216                     (SM_OFFSET_DECODE(entry) + SM_RUN_DECODE(entry)) <<
 217                     mapshift,
 218                     SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
 219                     SM_RUN_DECODE(entry) << mapshift);
 220                 if (verbose)
 221                         mdb_printf("      (raw=%012llx)\n", entry);
 222                 mdb_printf("\n");
 223         }
 224         return (WALK_NEXT);
 225 }
 226 
 227 static int
 228 mdb_dsl_dir_name(uintptr_t addr, char *buf)
 229 {
 230         static int gotid;
 231         static mdb_ctf_id_t dd_id;
 232         uintptr_t dd_parent;
 233         char dd_myname[MAXNAMELEN];
 234 
 235         if (!gotid) {
 236                 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dir",
 237                     &dd_id) == -1) {
 238                         mdb_warn("couldn't find struct dsl_dir");
 239                         return (DCMD_ERR);
 240                 }
 241                 gotid = TRUE;
 242         }
 243         if (GETMEMBID(addr, &dd_id, dd_parent, dd_parent) ||
 244             GETMEMBID(addr, &dd_id, dd_myname, dd_myname)) {
 245                 return (DCMD_ERR);
 246         }
 247 
 248         if (dd_parent) {
 249                 if (mdb_dsl_dir_name(dd_parent, buf))
 250                         return (DCMD_ERR);
 251                 strcat(buf, "/");
 252         }
 253 
 254         if (dd_myname[0])
 255                 strcat(buf, dd_myname);
 256         else
 257                 strcat(buf, "???");
 258 
 259         return (0);
 260 }
 261 
 262 static int
 263 objset_name(uintptr_t addr, char *buf)
 264 {
 265         static int gotid;
 266         static mdb_ctf_id_t os_id, ds_id;
 267         uintptr_t os_dsl_dataset;
 268         char ds_snapname[MAXNAMELEN];
 269         uintptr_t ds_dir;
 270 
 271         buf[0] = '\0';
 272 
 273         if (!gotid) {
 274                 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "objset",
 275                     &os_id) == -1) {
 276                         mdb_warn("couldn't find struct objset");
 277                         return (DCMD_ERR);
 278                 }
 279                 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dataset",
 280                     &ds_id) == -1) {
 281                         mdb_warn("couldn't find struct dsl_dataset");
 282                         return (DCMD_ERR);
 283                 }
 284 
 285                 gotid = TRUE;
 286         }
 287 
 288         if (GETMEMBID(addr, &os_id, os_dsl_dataset, os_dsl_dataset))
 289                 return (DCMD_ERR);
 290 
 291         if (os_dsl_dataset == 0) {
 292                 strcat(buf, "mos");
 293                 return (0);
 294         }
 295 
 296         if (GETMEMBID(os_dsl_dataset, &ds_id, ds_snapname, ds_snapname) ||
 297             GETMEMBID(os_dsl_dataset, &ds_id, ds_dir, ds_dir)) {
 298                 return (DCMD_ERR);
 299         }
 300 
 301         if (ds_dir && mdb_dsl_dir_name(ds_dir, buf))
 302                 return (DCMD_ERR);
 303 
 304         if (ds_snapname[0]) {
 305                 strcat(buf, "@");
 306                 strcat(buf, ds_snapname);
 307         }
 308         return (0);
 309 }
 310 
 311 static void
 312 enum_lookup(char *out, size_t size, mdb_ctf_id_t id, int val,
 313     const char *prefix)
 314 {
 315         const char *cp;
 316         size_t len = strlen(prefix);
 317 
 318         if ((cp = mdb_ctf_enum_name(id, val)) != NULL) {
 319                 if (strncmp(cp, prefix, len) == 0)
 320                         cp += len;
 321                 (void) strncpy(out, cp, size);
 322         } else {
 323                 mdb_snprintf(out, size, "? (%d)", val);
 324         }
 325 }
 326 
 327 /* ARGSUSED */
 328 static int
 329 zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 330 {
 331         /*
 332          * This table can be approximately generated by running:
 333          * egrep "^[a-z0-9_]+ [a-z0-9_]+( =.*)?;" *.c | cut -d ' ' -f 2
 334          */
 335         static const char *params[] = {
 336                 "arc_reduce_dnlc_percent",
 337                 "arc_lotsfree_percent",
 338                 "zfs_dirty_data_max",
 339                 "zfs_dirty_data_sync",
 340                 "zfs_delay_max_ns",
 341                 "zfs_delay_min_dirty_percent",
 342                 "zfs_delay_scale",
 343                 "zfs_vdev_max_active",
 344                 "zfs_vdev_sync_read_min_active",
 345                 "zfs_vdev_sync_read_max_active",
 346                 "zfs_vdev_sync_write_min_active",
 347                 "zfs_vdev_sync_write_max_active",
 348                 "zfs_vdev_async_read_min_active",
 349                 "zfs_vdev_async_read_max_active",
 350                 "zfs_vdev_async_write_min_active",
 351                 "zfs_vdev_async_write_max_active",
 352                 "zfs_vdev_scrub_min_active",
 353                 "zfs_vdev_scrub_max_active",
 354                 "zfs_vdev_async_write_active_min_dirty_percent",
 355                 "zfs_vdev_async_write_active_max_dirty_percent",
 356                 "spa_asize_inflation",
 357                 "zfs_arc_max",
 358                 "zfs_arc_min",
 359                 "arc_shrink_shift",
 360                 "zfs_mdcomp_disable",
 361                 "zfs_prefetch_disable",
 362                 "zfetch_max_streams",
 363                 "zfetch_min_sec_reap",
 364                 "zfetch_block_cap",
 365                 "zfetch_array_rd_sz",
 366                 "zfs_default_bs",
 367                 "zfs_default_ibs",
 368                 "metaslab_aliquot",
 369                 "reference_tracking_enable",
 370                 "reference_history",
 371                 "spa_max_replication_override",
 372                 "spa_mode_global",
 373                 "zfs_flags",
 374                 "zfs_txg_timeout",
 375                 "zfs_vdev_cache_max",
 376                 "zfs_vdev_cache_size",
 377                 "zfs_vdev_cache_bshift",
 378                 "vdev_mirror_shift",
 379                 "zfs_scrub_limit",
 380                 "zfs_no_scrub_io",
 381                 "zfs_no_scrub_prefetch",
 382                 "zfs_vdev_aggregation_limit",
 383                 "fzap_default_block_shift",
 384                 "zfs_immediate_write_sz",
 385                 "zfs_read_chunk_size",
 386                 "zfs_nocacheflush",
 387                 "zil_replay_disable",
 388                 "metaslab_gang_bang",
 389                 "metaslab_df_alloc_threshold",
 390                 "metaslab_df_free_pct",
 391                 "zio_injection_enabled",
 392                 "zvol_immediate_write_sz",
 393                 "zio_max_timeout_ms",
 394                 "zio_min_timeout_ms",
 395                 "zio_timeout_shift",
 396         };
 397 
 398         for (int i = 0; i < sizeof (params) / sizeof (params[0]); i++) {
 399                 int sz;
 400                 uint64_t val64;
 401                 uint32_t *val32p = (uint32_t *)&val64;
 402 
 403                 sz = mdb_readvar(&val64, params[i]);
 404                 if (sz == 4) {
 405                         mdb_printf("%s = 0x%x\n", params[i], *val32p);
 406                 } else if (sz == 8) {
 407                         mdb_printf("%s = 0x%llx\n", params[i], val64);
 408                 } else {
 409                         mdb_warn("variable %s not found", params[i]);
 410                 }
 411         }
 412 
 413         return (DCMD_OK);
 414 }
 415 
 416 /* ARGSUSED */
 417 static int
 418 blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 419 {
 420         mdb_ctf_id_t type_enum, checksum_enum, compress_enum;
 421         char type[80], checksum[80], compress[80];
 422         blkptr_t blk, *bp = &blk;
 423         char buf[BP_SPRINTF_LEN];
 424 
 425         if (mdb_vread(&blk, sizeof (blkptr_t), addr) == -1) {
 426                 mdb_warn("failed to read blkptr_t");
 427                 return (DCMD_ERR);
 428         }
 429 
 430         if (mdb_ctf_lookup_by_name("enum dmu_object_type", &type_enum) == -1 ||
 431             mdb_ctf_lookup_by_name("enum zio_checksum", &checksum_enum) == -1 ||
 432             mdb_ctf_lookup_by_name("enum zio_compress", &compress_enum) == -1) {
 433                 mdb_warn("Could not find blkptr enumerated types");
 434                 return (DCMD_ERR);
 435         }
 436 
 437         enum_lookup(type, sizeof (type), type_enum,
 438             BP_GET_TYPE(bp), "DMU_OT_");
 439         enum_lookup(checksum, sizeof (checksum), checksum_enum,
 440             BP_GET_CHECKSUM(bp), "ZIO_CHECKSUM_");
 441         enum_lookup(compress, sizeof (compress), compress_enum,
 442             BP_GET_COMPRESS(bp), "ZIO_COMPRESS_");
 443 
 444         SNPRINTF_BLKPTR(mdb_snprintf, '\n', buf, sizeof (buf), bp, type,
 445             checksum, compress);
 446 
 447         mdb_printf("%s\n", buf);
 448 
 449         return (DCMD_OK);
 450 }
 451 
 452 typedef struct mdb_dmu_buf_impl {
 453         struct {
 454                 uint64_t db_object;
 455         } db;
 456         uintptr_t db_objset;
 457         uint64_t db_level;
 458         uint64_t db_blkid;
 459         struct {
 460                 uint64_t rc_count;
 461         } db_holds;
 462 } mdb_dmu_buf_impl_t;
 463 
 464 /* ARGSUSED */
 465 static int
 466 dbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 467 {
 468         mdb_dmu_buf_impl_t db;
 469         char objectname[32];
 470         char blkidname[32];
 471         char path[MAXNAMELEN];
 472 
 473         if (DCMD_HDRSPEC(flags))
 474                 mdb_printf("        addr object lvl blkid holds os\n");
 475 
 476         if (mdb_ctf_vread(&db, ZFS_STRUCT "dmu_buf_impl", "mdb_dmu_buf_impl_t",
 477             addr, 0) == -1)
 478                 return (DCMD_ERR);
 479 
 480         if (db.db.db_object == DMU_META_DNODE_OBJECT)
 481                 (void) strcpy(objectname, "mdn");
 482         else
 483                 (void) mdb_snprintf(objectname, sizeof (objectname), "%llx",
 484                     (u_longlong_t)db.db.db_object);
 485 
 486         if (db.db_blkid == DMU_BONUS_BLKID)
 487                 (void) strcpy(blkidname, "bonus");
 488         else
 489                 (void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx",
 490                     (u_longlong_t)db.db_blkid);
 491 
 492         if (objset_name(db.db_objset, path)) {
 493                 return (DCMD_ERR);
 494         }
 495 
 496         mdb_printf("%p %8s %1u %9s %2llu %s\n", addr,
 497             objectname, (int)db.db_level, blkidname,
 498             db.db_holds.rc_count, path);
 499 
 500         return (DCMD_OK);
 501 }
 502 
 503 /* ARGSUSED */
 504 static int
 505 dbuf_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 506 {
 507 #define HISTOSZ 32
 508         uintptr_t dbp;
 509         dmu_buf_impl_t db;
 510         dbuf_hash_table_t ht;
 511         uint64_t bucket, ndbufs;
 512         uint64_t histo[HISTOSZ];
 513         uint64_t histo2[HISTOSZ];
 514         int i, maxidx;
 515 
 516         if (mdb_readvar(&ht, "dbuf_hash_table") == -1) {
 517                 mdb_warn("failed to read 'dbuf_hash_table'");
 518                 return (DCMD_ERR);
 519         }
 520 
 521         for (i = 0; i < HISTOSZ; i++) {
 522                 histo[i] = 0;
 523                 histo2[i] = 0;
 524         }
 525 
 526         ndbufs = 0;
 527         for (bucket = 0; bucket < ht.hash_table_mask+1; bucket++) {
 528                 int len;
 529 
 530                 if (mdb_vread(&dbp, sizeof (void *),
 531                     (uintptr_t)(ht.hash_table+bucket)) == -1) {
 532                         mdb_warn("failed to read hash bucket %u at %p",
 533                             bucket, ht.hash_table+bucket);
 534                         return (DCMD_ERR);
 535                 }
 536 
 537                 len = 0;
 538                 while (dbp != 0) {
 539                         if (mdb_vread(&db, sizeof (dmu_buf_impl_t),
 540                             dbp) == -1) {
 541                                 mdb_warn("failed to read dbuf at %p", dbp);
 542                                 return (DCMD_ERR);
 543                         }
 544                         dbp = (uintptr_t)db.db_hash_next;
 545                         for (i = MIN(len, HISTOSZ - 1); i >= 0; i--)
 546                                 histo2[i]++;
 547                         len++;
 548                         ndbufs++;
 549                 }
 550 
 551                 if (len >= HISTOSZ)
 552                         len = HISTOSZ-1;
 553                 histo[len]++;
 554         }
 555 
 556         mdb_printf("hash table has %llu buckets, %llu dbufs "
 557             "(avg %llu buckets/dbuf)\n",
 558             ht.hash_table_mask+1, ndbufs,
 559             (ht.hash_table_mask+1)/ndbufs);
 560 
 561         mdb_printf("\n");
 562         maxidx = 0;
 563         for (i = 0; i < HISTOSZ; i++)
 564                 if (histo[i] > 0)
 565                         maxidx = i;
 566         mdb_printf("hash chain length   number of buckets\n");
 567         for (i = 0; i <= maxidx; i++)
 568                 mdb_printf("%u                  %llu\n", i, histo[i]);
 569 
 570         mdb_printf("\n");
 571         maxidx = 0;
 572         for (i = 0; i < HISTOSZ; i++)
 573                 if (histo2[i] > 0)
 574                         maxidx = i;
 575         mdb_printf("hash chain depth    number of dbufs\n");
 576         for (i = 0; i <= maxidx; i++)
 577                 mdb_printf("%u or more          %llu    %llu%%\n",
 578                     i, histo2[i], histo2[i]*100/ndbufs);
 579 
 580 
 581         return (DCMD_OK);
 582 }
 583 
 584 #define CHAIN_END 0xffff
 585 /*
 586  * ::zap_leaf [-v]
 587  *
 588  * Print a zap_leaf_phys_t, assumed to be 16k
 589  */
 590 /* ARGSUSED */
 591 static int
 592 zap_leaf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 593 {
 594         char buf[16*1024];
 595         int verbose = B_FALSE;
 596         int four = B_FALSE;
 597         zap_leaf_t l;
 598         zap_leaf_phys_t *zlp = (void *)buf;
 599         int i;
 600 
 601         if (mdb_getopts(argc, argv,
 602             'v', MDB_OPT_SETBITS, TRUE, &verbose,
 603             '4', MDB_OPT_SETBITS, TRUE, &four,
 604             NULL) != argc)
 605                 return (DCMD_USAGE);
 606 
 607         l.l_phys = zlp;
 608         l.l_bs = 14; /* assume 16k blocks */
 609         if (four)
 610                 l.l_bs = 12;
 611 
 612         if (!(flags & DCMD_ADDRSPEC)) {
 613                 return (DCMD_USAGE);
 614         }
 615 
 616         if (mdb_vread(buf, sizeof (buf), addr) == -1) {
 617                 mdb_warn("failed to read zap_leaf_phys_t at %p", addr);
 618                 return (DCMD_ERR);
 619         }
 620 
 621         if (zlp->l_hdr.lh_block_type != ZBT_LEAF ||
 622             zlp->l_hdr.lh_magic != ZAP_LEAF_MAGIC) {
 623                 mdb_warn("This does not appear to be a zap_leaf_phys_t");
 624                 return (DCMD_ERR);
 625         }
 626 
 627         mdb_printf("zap_leaf_phys_t at %p:\n", addr);
 628         mdb_printf("    lh_prefix_len = %u\n", zlp->l_hdr.lh_prefix_len);
 629         mdb_printf("    lh_prefix = %llx\n", zlp->l_hdr.lh_prefix);
 630         mdb_printf("    lh_nentries = %u\n", zlp->l_hdr.lh_nentries);
 631         mdb_printf("    lh_nfree = %u\n", zlp->l_hdr.lh_nfree,
 632             zlp->l_hdr.lh_nfree * 100 / (ZAP_LEAF_NUMCHUNKS(&l)));
 633         mdb_printf("    lh_freelist = %u\n", zlp->l_hdr.lh_freelist);
 634         mdb_printf("    lh_flags = %x (%s)\n", zlp->l_hdr.lh_flags,
 635             zlp->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED ?
 636             "ENTRIES_CDSORTED" : "");
 637 
 638         if (verbose) {
 639                 mdb_printf(" hash table:\n");
 640                 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) {
 641                         if (zlp->l_hash[i] != CHAIN_END)
 642                                 mdb_printf("    %u: %u\n", i, zlp->l_hash[i]);
 643                 }
 644         }
 645 
 646         mdb_printf(" chunks:\n");
 647         for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) {
 648                 /* LINTED: alignment */
 649                 zap_leaf_chunk_t *zlc = &ZAP_LEAF_CHUNK(&l, i);
 650                 switch (zlc->l_entry.le_type) {
 651                 case ZAP_CHUNK_FREE:
 652                         if (verbose) {
 653                                 mdb_printf("    %u: free; lf_next = %u\n",
 654                                     i, zlc->l_free.lf_next);
 655                         }
 656                         break;
 657                 case ZAP_CHUNK_ENTRY:
 658                         mdb_printf("    %u: entry\n", i);
 659                         if (verbose) {
 660                                 mdb_printf("        le_next = %u\n",
 661                                     zlc->l_entry.le_next);
 662                         }
 663                         mdb_printf("        le_name_chunk = %u\n",
 664                             zlc->l_entry.le_name_chunk);
 665                         mdb_printf("        le_name_numints = %u\n",
 666                             zlc->l_entry.le_name_numints);
 667                         mdb_printf("        le_value_chunk = %u\n",
 668                             zlc->l_entry.le_value_chunk);
 669                         mdb_printf("        le_value_intlen = %u\n",
 670                             zlc->l_entry.le_value_intlen);
 671                         mdb_printf("        le_value_numints = %u\n",
 672                             zlc->l_entry.le_value_numints);
 673                         mdb_printf("        le_cd = %u\n",
 674                             zlc->l_entry.le_cd);
 675                         mdb_printf("        le_hash = %llx\n",
 676                             zlc->l_entry.le_hash);
 677                         break;
 678                 case ZAP_CHUNK_ARRAY:
 679                         mdb_printf("    %u: array", i);
 680                         if (strisprint((char *)zlc->l_array.la_array))
 681                                 mdb_printf(" \"%s\"", zlc->l_array.la_array);
 682                         mdb_printf("\n");
 683                         if (verbose) {
 684                                 int j;
 685                                 mdb_printf("        ");
 686                                 for (j = 0; j < ZAP_LEAF_ARRAY_BYTES; j++) {
 687                                         mdb_printf("%02x ",
 688                                             zlc->l_array.la_array[j]);
 689                                 }
 690                                 mdb_printf("\n");
 691                         }
 692                         if (zlc->l_array.la_next != CHAIN_END) {
 693                                 mdb_printf("        lf_next = %u\n",
 694                                     zlc->l_array.la_next);
 695                         }
 696                         break;
 697                 default:
 698                         mdb_printf("    %u: undefined type %u\n",
 699                             zlc->l_entry.le_type);
 700                 }
 701         }
 702 
 703         return (DCMD_OK);
 704 }
 705 
 706 typedef struct dbufs_data {
 707         mdb_ctf_id_t id;
 708         uint64_t objset;
 709         uint64_t object;
 710         uint64_t level;
 711         uint64_t blkid;
 712         char *osname;
 713 } dbufs_data_t;
 714 
 715 #define DBUFS_UNSET     (0xbaddcafedeadbeefULL)
 716 
 717 /* ARGSUSED */
 718 static int
 719 dbufs_cb(uintptr_t addr, const void *unknown, void *arg)
 720 {
 721         dbufs_data_t *data = arg;
 722         uintptr_t objset;
 723         dmu_buf_t db;
 724         uint8_t level;
 725         uint64_t blkid;
 726         char osname[MAXNAMELEN];
 727 
 728         if (GETMEMBID(addr, &data->id, db_objset, objset) ||
 729             GETMEMBID(addr, &data->id, db, db) ||
 730             GETMEMBID(addr, &data->id, db_level, level) ||
 731             GETMEMBID(addr, &data->id, db_blkid, blkid)) {
 732                 return (WALK_ERR);
 733         }
 734 
 735         if ((data->objset == DBUFS_UNSET || data->objset == objset) &&
 736             (data->osname == NULL || (objset_name(objset, osname) == 0 &&
 737             strcmp(data->osname, osname) == 0)) &&
 738             (data->object == DBUFS_UNSET || data->object == db.db_object) &&
 739             (data->level == DBUFS_UNSET || data->level == level) &&
 740             (data->blkid == DBUFS_UNSET || data->blkid == blkid)) {
 741                 mdb_printf("%#lr\n", addr);
 742         }
 743         return (WALK_NEXT);
 744 }
 745 
 746 /* ARGSUSED */
 747 static int
 748 dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 749 {
 750         dbufs_data_t data;
 751         char *object = NULL;
 752         char *blkid = NULL;
 753 
 754         data.objset = data.object = data.level = data.blkid = DBUFS_UNSET;
 755         data.osname = NULL;
 756 
 757         if (mdb_getopts(argc, argv,
 758             'O', MDB_OPT_UINT64, &data.objset,
 759             'n', MDB_OPT_STR, &data.osname,
 760             'o', MDB_OPT_STR, &object,
 761             'l', MDB_OPT_UINT64, &data.level,
 762             'b', MDB_OPT_STR, &blkid) != argc) {
 763                 return (DCMD_USAGE);
 764         }
 765 
 766         if (object) {
 767                 if (strcmp(object, "mdn") == 0) {
 768                         data.object = DMU_META_DNODE_OBJECT;
 769                 } else {
 770                         data.object = mdb_strtoull(object);
 771                 }
 772         }
 773 
 774         if (blkid) {
 775                 if (strcmp(blkid, "bonus") == 0) {
 776                         data.blkid = DMU_BONUS_BLKID;
 777                 } else {
 778                         data.blkid = mdb_strtoull(blkid);
 779                 }
 780         }
 781 
 782         if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dmu_buf_impl", &data.id) == -1) {
 783                 mdb_warn("couldn't find struct dmu_buf_impl_t");
 784                 return (DCMD_ERR);
 785         }
 786 
 787         if (mdb_walk("dmu_buf_impl_t", dbufs_cb, &data) != 0) {
 788                 mdb_warn("can't walk dbufs");
 789                 return (DCMD_ERR);
 790         }
 791 
 792         return (DCMD_OK);
 793 }
 794 
 795 typedef struct abuf_find_data {
 796         dva_t dva;
 797         mdb_ctf_id_t id;
 798 } abuf_find_data_t;
 799 
 800 /* ARGSUSED */
 801 static int
 802 abuf_find_cb(uintptr_t addr, const void *unknown, void *arg)
 803 {
 804         abuf_find_data_t *data = arg;
 805         dva_t dva;
 806 
 807         if (GETMEMBID(addr, &data->id, b_dva, dva)) {
 808                 return (WALK_ERR);
 809         }
 810 
 811         if (dva.dva_word[0] == data->dva.dva_word[0] &&
 812             dva.dva_word[1] == data->dva.dva_word[1]) {
 813                 mdb_printf("%#lr\n", addr);
 814         }
 815         return (WALK_NEXT);
 816 }
 817 
 818 /* ARGSUSED */
 819 static int
 820 abuf_find(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 821 {
 822         abuf_find_data_t data;
 823         GElf_Sym sym;
 824         int i;
 825         const char *syms[] = {
 826                 "ARC_mru",
 827                 "ARC_mru_ghost",
 828                 "ARC_mfu",
 829                 "ARC_mfu_ghost",
 830         };
 831 
 832         if (argc != 2)
 833                 return (DCMD_USAGE);
 834 
 835         for (i = 0; i < 2; i ++) {
 836                 switch (argv[i].a_type) {
 837                 case MDB_TYPE_STRING:
 838                         data.dva.dva_word[i] = mdb_strtoull(argv[i].a_un.a_str);
 839                         break;
 840                 case MDB_TYPE_IMMEDIATE:
 841                         data.dva.dva_word[i] = argv[i].a_un.a_val;
 842                         break;
 843                 default:
 844                         return (DCMD_USAGE);
 845                 }
 846         }
 847 
 848         if (mdb_ctf_lookup_by_name(ZFS_STRUCT "arc_buf_hdr", &data.id) == -1) {
 849                 mdb_warn("couldn't find struct arc_buf_hdr");
 850                 return (DCMD_ERR);
 851         }
 852 
 853         for (i = 0; i < sizeof (syms) / sizeof (syms[0]); i++) {
 854                 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, syms[i], &sym)) {
 855                         mdb_warn("can't find symbol %s", syms[i]);
 856                         return (DCMD_ERR);
 857                 }
 858 
 859                 if (mdb_pwalk("list", abuf_find_cb, &data, sym.st_value) != 0) {
 860                         mdb_warn("can't walk %s", syms[i]);
 861                         return (DCMD_ERR);
 862                 }
 863         }
 864 
 865         return (DCMD_OK);
 866 }
 867 
 868 
 869 typedef struct dbgmsg_arg {
 870         boolean_t da_verbose;
 871         boolean_t da_address;
 872 } dbgmsg_arg_t;
 873 
 874 /* ARGSUSED */
 875 static int
 876 dbgmsg_cb(uintptr_t addr, const void *unknown, void *arg)
 877 {
 878         static mdb_ctf_id_t id;
 879         static boolean_t gotid;
 880         static ulong_t off;
 881 
 882         dbgmsg_arg_t *da = arg;
 883         time_t timestamp;
 884         char buf[1024];
 885 
 886         if (!gotid) {
 887                 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "zfs_dbgmsg", &id) ==
 888                     -1) {
 889                         mdb_warn("couldn't find struct zfs_dbgmsg");
 890                         return (WALK_ERR);
 891                 }
 892                 gotid = TRUE;
 893                 if (mdb_ctf_offsetof(id, "zdm_msg", &off) == -1) {
 894                         mdb_warn("couldn't find zdm_msg");
 895                         return (WALK_ERR);
 896                 }
 897                 off /= 8;
 898         }
 899 
 900 
 901         if (GETMEMBID(addr, &id, zdm_timestamp, timestamp)) {
 902                 return (WALK_ERR);
 903         }
 904 
 905         if (mdb_readstr(buf, sizeof (buf), addr + off) == -1) {
 906                 mdb_warn("failed to read zdm_msg at %p\n", addr + off);
 907                 return (DCMD_ERR);
 908         }
 909 
 910         if (da->da_address)
 911                 mdb_printf("%p ", addr);
 912         if (da->da_verbose)
 913                 mdb_printf("%Y ", timestamp);
 914 
 915         mdb_printf("%s\n", buf);
 916 
 917         if (da->da_verbose)
 918                 (void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL);
 919 
 920         return (WALK_NEXT);
 921 }
 922 
 923 /* ARGSUSED */
 924 static int
 925 dbgmsg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 926 {
 927         GElf_Sym sym;
 928         dbgmsg_arg_t da = { 0 };
 929 
 930         if (mdb_getopts(argc, argv,
 931             'v', MDB_OPT_SETBITS, B_TRUE, &da.da_verbose,
 932             'a', MDB_OPT_SETBITS, B_TRUE, &da.da_address,
 933             NULL) != argc)
 934                 return (DCMD_USAGE);
 935 
 936         if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "zfs_dbgmsgs", &sym)) {
 937                 mdb_warn("can't find zfs_dbgmsgs");
 938                 return (DCMD_ERR);
 939         }
 940 
 941         if (mdb_pwalk("list", dbgmsg_cb, &da, sym.st_value) != 0) {
 942                 mdb_warn("can't walk zfs_dbgmsgs");
 943                 return (DCMD_ERR);
 944         }
 945 
 946         return (DCMD_OK);
 947 }
 948 
 949 /*ARGSUSED*/
 950 static int
 951 arc_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 952 {
 953         kstat_named_t *stats;
 954         GElf_Sym sym;
 955         int nstats, i;
 956         uint_t opt_a = FALSE;
 957         uint_t opt_b = FALSE;
 958         uint_t shift = 0;
 959         const char *suffix;
 960 
 961         static const char *bytestats[] = {
 962                 "p", "c", "c_min", "c_max", "size", "duplicate_buffers_size",
 963                 "arc_meta_used", "arc_meta_limit", "arc_meta_max",
 964                 NULL
 965         };
 966 
 967         static const char *extras[] = {
 968                 "arc_no_grow", "arc_tempreserve",
 969                 NULL
 970         };
 971 
 972         if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "arc_stats", &sym) == -1) {
 973                 mdb_warn("failed to find 'arc_stats'");
 974                 return (DCMD_ERR);
 975         }
 976 
 977         stats = mdb_zalloc(sym.st_size, UM_SLEEP | UM_GC);
 978 
 979         if (mdb_vread(stats, sym.st_size, sym.st_value) == -1) {
 980                 mdb_warn("couldn't read 'arc_stats' at %p", sym.st_value);
 981                 return (DCMD_ERR);
 982         }
 983 
 984         nstats = sym.st_size / sizeof (kstat_named_t);
 985 
 986         /* NB: -a / opt_a are ignored for backwards compatability */
 987         if (mdb_getopts(argc, argv,
 988             'a', MDB_OPT_SETBITS, TRUE, &opt_a,
 989             'b', MDB_OPT_SETBITS, TRUE, &opt_b,
 990             'k', MDB_OPT_SETBITS, 10, &shift,
 991             'm', MDB_OPT_SETBITS, 20, &shift,
 992             'g', MDB_OPT_SETBITS, 30, &shift,
 993             NULL) != argc)
 994                 return (DCMD_USAGE);
 995 
 996         if (!opt_b && !shift)
 997                 shift = 20;
 998 
 999         switch (shift) {
1000         case 0:
1001                 suffix = "B";
1002                 break;
1003         case 10:
1004                 suffix = "KB";
1005                 break;
1006         case 20:
1007                 suffix = "MB";
1008                 break;
1009         case 30:
1010                 suffix = "GB";
1011                 break;
1012         default:
1013                 suffix = "XX";
1014         }
1015 
1016         for (i = 0; i < nstats; i++) {
1017                 int j;
1018                 boolean_t bytes = B_FALSE;
1019 
1020                 for (j = 0; bytestats[j]; j++) {
1021                         if (strcmp(stats[i].name, bytestats[j]) == 0) {
1022                                 bytes = B_TRUE;
1023                                 break;
1024                         }
1025                 }
1026 
1027                 if (bytes) {
1028                         mdb_printf("%-25s = %9llu %s\n", stats[i].name,
1029                             stats[i].value.ui64 >> shift, suffix);
1030                 } else {
1031                         mdb_printf("%-25s = %9llu\n", stats[i].name,
1032                             stats[i].value.ui64);
1033                 }
1034         }
1035 
1036         for (i = 0; extras[i]; i++) {
1037                 uint64_t buf;
1038 
1039                 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, extras[i], &sym) == -1) {
1040                         mdb_warn("failed to find '%s'", extras[i]);
1041                         return (DCMD_ERR);
1042                 }
1043 
1044                 if (sym.st_size != sizeof (uint64_t) &&
1045                     sym.st_size != sizeof (uint32_t)) {
1046                         mdb_warn("expected scalar for variable '%s'\n",
1047                             extras[i]);
1048                         return (DCMD_ERR);
1049                 }
1050 
1051                 if (mdb_vread(&buf, sym.st_size, sym.st_value) == -1) {
1052                         mdb_warn("couldn't read '%s'", extras[i]);
1053                         return (DCMD_ERR);
1054                 }
1055 
1056                 mdb_printf("%-25s = ", extras[i]);
1057 
1058                 /* NB: all the 64-bit extras happen to be byte counts */
1059                 if (sym.st_size == sizeof (uint64_t))
1060                         mdb_printf("%9llu %s\n", buf >> shift, suffix);
1061 
1062                 if (sym.st_size == sizeof (uint32_t))
1063                         mdb_printf("%9d\n", *((uint32_t *)&buf));
1064         }
1065         return (DCMD_OK);
1066 }
1067 
1068 typedef struct mdb_spa_print {
1069         pool_state_t spa_state;
1070         char spa_name[MAXNAMELEN];
1071 } mdb_spa_print_t;
1072 
1073 /*
1074  * ::spa
1075  *
1076  *      -c      Print configuration information as well
1077  *      -v      Print vdev state
1078  *      -e      Print vdev error stats
1079  *      -m      Print vdev metaslab info
1080  *      -M      print vdev metaslab group info
1081  *      -h      Print histogram info (must be combined with -m or -M)
1082  *
1083  * Print a summarized spa_t.  When given no arguments, prints out a table of all
1084  * active pools on the system.
1085  */
1086 /* ARGSUSED */
1087 static int
1088 spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1089 {
1090         const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED",
1091                 "SPARE", "L2CACHE", "UNINIT", "UNAVAIL", "POTENTIAL" };
1092         const char *state;
1093         int spa_flags = 0;
1094 
1095         if (mdb_getopts(argc, argv,
1096             'c', MDB_OPT_SETBITS, SPA_FLAG_CONFIG, &spa_flags,
1097             'v', MDB_OPT_SETBITS, SPA_FLAG_VDEVS, &spa_flags,
1098             'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags,
1099             'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags,
1100             'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags,
1101             'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags,
1102             NULL) != argc)
1103                 return (DCMD_USAGE);
1104 
1105         if (!(flags & DCMD_ADDRSPEC)) {
1106                 if (mdb_walk_dcmd("spa", "spa", argc, argv) == -1) {
1107                         mdb_warn("can't walk spa");
1108                         return (DCMD_ERR);
1109                 }
1110 
1111                 return (DCMD_OK);
1112         }
1113 
1114         if (flags & DCMD_PIPE_OUT) {
1115                 mdb_printf("%#lr\n", addr);
1116                 return (DCMD_OK);
1117         }
1118 
1119         if (DCMD_HDRSPEC(flags))
1120                 mdb_printf("%<u>%-?s %9s %-*s%</u>\n", "ADDR", "STATE",
1121                     sizeof (uintptr_t) == 4 ? 60 : 52, "NAME");
1122 
1123         mdb_spa_print_t spa;
1124         if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_print_t", addr, 0) == -1)
1125                 return (DCMD_ERR);
1126 
1127         if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL)
1128                 state = "UNKNOWN";
1129         else
1130                 state = statetab[spa.spa_state];
1131 
1132         mdb_printf("%0?p %9s %s\n", addr, state, spa.spa_name);
1133 
1134         if (spa_flags & SPA_FLAG_CONFIG) {
1135                 mdb_printf("\n");
1136                 mdb_inc_indent(4);
1137                 if (mdb_call_dcmd("spa_config", addr, flags, 0,
1138                     NULL) != DCMD_OK)
1139                         return (DCMD_ERR);
1140                 mdb_dec_indent(4);
1141         }
1142 
1143         if (spa_flags & SPA_FLAG_ALL_VDEV) {
1144                 mdb_arg_t v;
1145                 char opts[100] = "-";
1146                 int args =
1147                     (spa_flags | SPA_FLAG_VDEVS) == SPA_FLAG_VDEVS ? 0 : 1;
1148 
1149                 if (spa_flags & SPA_FLAG_ERRORS)
1150                         strcat(opts, "e");
1151                 if (spa_flags & SPA_FLAG_METASLABS)
1152                         strcat(opts, "m");
1153                 if (spa_flags & SPA_FLAG_METASLAB_GROUPS)
1154                         strcat(opts, "M");
1155                 if (spa_flags & SPA_FLAG_HISTOGRAMS)
1156                         strcat(opts, "h");
1157 
1158                 v.a_type = MDB_TYPE_STRING;
1159                 v.a_un.a_str = opts;
1160 
1161                 mdb_printf("\n");
1162                 mdb_inc_indent(4);
1163                 if (mdb_call_dcmd("spa_vdevs", addr, flags, args,
1164                     &v) != DCMD_OK)
1165                         return (DCMD_ERR);
1166                 mdb_dec_indent(4);
1167         }
1168 
1169         return (DCMD_OK);
1170 }
1171 
1172 typedef struct mdb_spa_config_spa {
1173         uintptr_t spa_config;
1174 } mdb_spa_config_spa_t;
1175 
1176 /*
1177  * ::spa_config
1178  *
1179  * Given a spa_t, print the configuration information stored in spa_config.
1180  * Since it's just an nvlist, format it as an indented list of name=value pairs.
1181  * We simply read the value of spa_config and pass off to ::nvlist.
1182  */
1183 /* ARGSUSED */
1184 static int
1185 spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1186 {
1187         mdb_spa_config_spa_t spa;
1188 
1189         if (argc != 0 || !(flags & DCMD_ADDRSPEC))
1190                 return (DCMD_USAGE);
1191 
1192         if (mdb_ctf_vread(&spa, ZFS_STRUCT "spa", "mdb_spa_config_spa_t",
1193             addr, 0) == -1)
1194                 return (DCMD_ERR);
1195 
1196         if (spa.spa_config == 0) {
1197                 mdb_printf("(none)\n");
1198                 return (DCMD_OK);
1199         }
1200 
1201         return (mdb_call_dcmd("nvlist", spa.spa_config, flags,
1202             0, NULL));
1203 }
1204 
1205 const char histo_stars[] = "****************************************";
1206 const int histo_width = sizeof (histo_stars) - 1;
1207 
1208 static void
1209 dump_histogram(const uint64_t *histo, int size, int offset)
1210 {
1211         int i;
1212         int minidx = size - 1;
1213         int maxidx = 0;
1214         uint64_t max = 0;
1215 
1216         for (i = 0; i < size; i++) {
1217                 if (histo[i] > max)
1218                         max = histo[i];
1219                 if (histo[i] > 0 && i > maxidx)
1220                         maxidx = i;
1221                 if (histo[i] > 0 && i < minidx)
1222                         minidx = i;
1223         }
1224 
1225         if (max < histo_width)
1226                 max = histo_width;
1227 
1228         for (i = minidx; i <= maxidx; i++) {
1229                 mdb_printf("%3u: %6llu %s\n",
1230                     i + offset, (u_longlong_t)histo[i],
1231                     &histo_stars[(max - histo[i]) * histo_width / max]);
1232         }
1233 }
1234 
1235 typedef struct mdb_range_tree {
1236         uint64_t rt_space;
1237 } mdb_range_tree_t;
1238 
1239 typedef struct mdb_metaslab_group {
1240         uint64_t mg_fragmentation;
1241         uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE];
1242 } mdb_metaslab_group_t;
1243 
1244 typedef struct mdb_metaslab {
1245         uint64_t ms_id;
1246         uint64_t ms_start;
1247         uint64_t ms_size;
1248         uint64_t ms_fragmentation;
1249         uintptr_t ms_alloctree[TXG_SIZE];
1250         uintptr_t ms_freetree[TXG_SIZE];
1251         uintptr_t ms_tree;
1252         uintptr_t ms_sm;
1253 } mdb_metaslab_t;
1254 
1255 typedef struct mdb_space_map_phys_t {
1256         uint64_t smp_alloc;
1257         uint64_t smp_histogram[SPACE_MAP_HISTOGRAM_SIZE];
1258 } mdb_space_map_phys_t;
1259 
1260 typedef struct mdb_space_map {
1261         uint64_t sm_size;
1262         uint8_t sm_shift;
1263         uint64_t sm_alloc;
1264         uintptr_t sm_phys;
1265 } mdb_space_map_t;
1266 
1267 typedef struct mdb_vdev {
1268         uintptr_t vdev_ms;
1269         uint64_t vdev_ms_count;
1270         vdev_stat_t vdev_stat;
1271 } mdb_vdev_t;
1272 
1273 static int
1274 metaslab_stats(uintptr_t addr, int spa_flags)
1275 {
1276         mdb_vdev_t vdev;
1277         uintptr_t *vdev_ms;
1278 
1279         if (mdb_ctf_vread(&vdev, "vdev_t", "mdb_vdev_t",
1280             (uintptr_t)addr, 0) == -1) {
1281                 mdb_warn("failed to read vdev at %p\n", addr);
1282                 return (DCMD_ERR);
1283         }
1284 
1285         mdb_inc_indent(4);
1286         mdb_printf("%<u>%-?s %6s %20s %10s %9s%</u>\n", "ADDR", "ID",
1287             "OFFSET", "FREE", "FRAGMENTATION");
1288 
1289         vdev_ms = mdb_alloc(vdev.vdev_ms_count * sizeof (void *),
1290             UM_SLEEP | UM_GC);
1291         if (mdb_vread(vdev_ms, vdev.vdev_ms_count * sizeof (void *),
1292             (uintptr_t)vdev.vdev_ms) == -1) {
1293                 mdb_warn("failed to read vdev_ms at %p\n", vdev.vdev_ms);
1294                 return (DCMD_ERR);
1295         }
1296 
1297         for (int m = 0; m < vdev.vdev_ms_count; m++) {
1298                 mdb_metaslab_t ms;
1299                 mdb_space_map_t sm = { 0 };
1300                 char free[NICENUM_BUFLEN];
1301 
1302                 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t",
1303                     (uintptr_t)vdev_ms[m], 0) == -1)
1304                         return (DCMD_ERR);
1305 
1306                 if (ms.ms_sm != NULL &&
1307                     mdb_ctf_vread(&sm, "space_map_t", "mdb_space_map_t",
1308                     ms.ms_sm, 0) == -1)
1309                         return (DCMD_ERR);
1310 
1311                 mdb_nicenum(ms.ms_size - sm.sm_alloc, free);
1312 
1313                 mdb_printf("%0?p %6llu %20llx %10s ", vdev_ms[m], ms.ms_id,
1314                     ms.ms_start, free);
1315                 if (ms.ms_fragmentation == ZFS_FRAG_INVALID)
1316                         mdb_printf("%9s\n", "-");
1317                 else
1318                         mdb_printf("%9llu%%\n", ms.ms_fragmentation);
1319 
1320                 if ((spa_flags & SPA_FLAG_HISTOGRAMS) && ms.ms_sm != NULL) {
1321                         mdb_space_map_phys_t smp;
1322 
1323                         if (sm.sm_phys == NULL)
1324                                 continue;
1325 
1326                         (void) mdb_ctf_vread(&smp, "space_map_phys_t",
1327                             "mdb_space_map_phys_t", sm.sm_phys, 0);
1328 
1329                         dump_histogram(smp.smp_histogram,
1330                             SPACE_MAP_HISTOGRAM_SIZE, sm.sm_shift);
1331                 }
1332         }
1333         mdb_dec_indent(4);
1334         return (DCMD_OK);
1335 }
1336 
1337 static int
1338 metaslab_group_stats(uintptr_t addr, int spa_flags)
1339 {
1340         mdb_metaslab_group_t mg;
1341         if (mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t",
1342             (uintptr_t)addr, 0) == -1) {
1343                 mdb_warn("failed to read vdev_mg at %p\n", addr);
1344                 return (DCMD_ERR);
1345         }
1346 
1347         mdb_inc_indent(4);
1348         mdb_printf("%<u>%-?s %15s%</u>\n", "ADDR", "FRAGMENTATION");
1349         if (mg.mg_fragmentation == ZFS_FRAG_INVALID)
1350                 mdb_printf("%0?p %15s\n", addr, "-");
1351         else
1352                 mdb_printf("%0?p %15llu%%\n", addr, mg.mg_fragmentation);
1353 
1354         if (spa_flags & SPA_FLAG_HISTOGRAMS)
1355                 dump_histogram(mg.mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
1356         mdb_dec_indent(4);
1357         return (DCMD_OK);
1358 }
1359 
1360 /*
1361  * ::vdev
1362  *
1363  * Print out a summarized vdev_t, in the following form:
1364  *
1365  * ADDR             STATE       AUX            DESC
1366  * fffffffbcde23df0 HEALTHY     -              /dev/dsk/c0t0d0
1367  *
1368  * If '-r' is specified, recursively visit all children.
1369  *
1370  * With '-e', the statistics associated with the vdev are printed as well.
1371  */
1372 static int
1373 do_print_vdev(uintptr_t addr, int flags, int depth, boolean_t recursive,
1374     int spa_flags)
1375 {
1376         vdev_t vdev;
1377         char desc[MAXNAMELEN];
1378         int c, children;
1379         uintptr_t *child;
1380         const char *state, *aux;
1381 
1382         if (mdb_vread(&vdev, sizeof (vdev), (uintptr_t)addr) == -1) {
1383                 mdb_warn("failed to read vdev_t at %p\n", (uintptr_t)addr);
1384                 return (DCMD_ERR);
1385         }
1386 
1387         if (flags & DCMD_PIPE_OUT) {
1388                 mdb_printf("%#lr\n", addr);
1389         } else {
1390                 if (vdev.vdev_path != NULL) {
1391                         if (mdb_readstr(desc, sizeof (desc),
1392                             (uintptr_t)vdev.vdev_path) == -1) {
1393                                 mdb_warn("failed to read vdev_path at %p\n",
1394                                     vdev.vdev_path);
1395                                 return (DCMD_ERR);
1396                         }
1397                 } else if (vdev.vdev_ops != NULL) {
1398                         vdev_ops_t ops;
1399                         if (mdb_vread(&ops, sizeof (ops),
1400                             (uintptr_t)vdev.vdev_ops) == -1) {
1401                                 mdb_warn("failed to read vdev_ops at %p\n",
1402                                     vdev.vdev_ops);
1403                                 return (DCMD_ERR);
1404                         }
1405                         (void) strcpy(desc, ops.vdev_op_type);
1406                 } else {
1407                         (void) strcpy(desc, "<unknown>");
1408                 }
1409 
1410                 if (depth == 0 && DCMD_HDRSPEC(flags))
1411                         mdb_printf("%<u>%-?s %-9s %-12s %-*s%</u>\n",
1412                             "ADDR", "STATE", "AUX",
1413                             sizeof (uintptr_t) == 4 ? 43 : 35,
1414                             "DESCRIPTION");
1415 
1416                 mdb_printf("%0?p ", addr);
1417 
1418                 switch (vdev.vdev_state) {
1419                 case VDEV_STATE_CLOSED:
1420                         state = "CLOSED";
1421                         break;
1422                 case VDEV_STATE_OFFLINE:
1423                         state = "OFFLINE";
1424                         break;
1425                 case VDEV_STATE_CANT_OPEN:
1426                         state = "CANT_OPEN";
1427                         break;
1428                 case VDEV_STATE_DEGRADED:
1429                         state = "DEGRADED";
1430                         break;
1431                 case VDEV_STATE_HEALTHY:
1432                         state = "HEALTHY";
1433                         break;
1434                 case VDEV_STATE_REMOVED:
1435                         state = "REMOVED";
1436                         break;
1437                 case VDEV_STATE_FAULTED:
1438                         state = "FAULTED";
1439                         break;
1440                 default:
1441                         state = "UNKNOWN";
1442                         break;
1443                 }
1444 
1445                 switch (vdev.vdev_stat.vs_aux) {
1446                 case VDEV_AUX_NONE:
1447                         aux = "-";
1448                         break;
1449                 case VDEV_AUX_OPEN_FAILED:
1450                         aux = "OPEN_FAILED";
1451                         break;
1452                 case VDEV_AUX_CORRUPT_DATA:
1453                         aux = "CORRUPT_DATA";
1454                         break;
1455                 case VDEV_AUX_NO_REPLICAS:
1456                         aux = "NO_REPLICAS";
1457                         break;
1458                 case VDEV_AUX_BAD_GUID_SUM:
1459                         aux = "BAD_GUID_SUM";
1460                         break;
1461                 case VDEV_AUX_TOO_SMALL:
1462                         aux = "TOO_SMALL";
1463                         break;
1464                 case VDEV_AUX_BAD_LABEL:
1465                         aux = "BAD_LABEL";
1466                         break;
1467                 case VDEV_AUX_VERSION_NEWER:
1468                         aux = "VERS_NEWER";
1469                         break;
1470                 case VDEV_AUX_VERSION_OLDER:
1471                         aux = "VERS_OLDER";
1472                         break;
1473                 case VDEV_AUX_UNSUP_FEAT:
1474                         aux = "UNSUP_FEAT";
1475                         break;
1476                 case VDEV_AUX_SPARED:
1477                         aux = "SPARED";
1478                         break;
1479                 case VDEV_AUX_ERR_EXCEEDED:
1480                         aux = "ERR_EXCEEDED";
1481                         break;
1482                 case VDEV_AUX_IO_FAILURE:
1483                         aux = "IO_FAILURE";
1484                         break;
1485                 case VDEV_AUX_BAD_LOG:
1486                         aux = "BAD_LOG";
1487                         break;
1488                 case VDEV_AUX_EXTERNAL:
1489                         aux = "EXTERNAL";
1490                         break;
1491                 case VDEV_AUX_SPLIT_POOL:
1492                         aux = "SPLIT_POOL";
1493                         break;
1494                 default:
1495                         aux = "UNKNOWN";
1496                         break;
1497                 }
1498 
1499                 mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc);
1500 
1501                 if (spa_flags & SPA_FLAG_ERRORS) {
1502                         vdev_stat_t *vs = &vdev.vdev_stat;
1503                         int i;
1504 
1505                         mdb_inc_indent(4);
1506                         mdb_printf("\n");
1507                         mdb_printf("%<u>       %12s %12s %12s %12s "
1508                             "%12s%</u>\n", "READ", "WRITE", "FREE", "CLAIM",
1509                             "IOCTL");
1510                         mdb_printf("OPS     ");
1511                         for (i = 1; i < ZIO_TYPES; i++)
1512                                 mdb_printf("%11#llx%s", vs->vs_ops[i],
1513                                     i == ZIO_TYPES - 1 ? "" : "  ");
1514                         mdb_printf("\n");
1515                         mdb_printf("BYTES   ");
1516                         for (i = 1; i < ZIO_TYPES; i++)
1517                                 mdb_printf("%11#llx%s", vs->vs_bytes[i],
1518                                     i == ZIO_TYPES - 1 ? "" : "  ");
1519 
1520 
1521                         mdb_printf("\n");
1522                         mdb_printf("EREAD    %10#llx\n", vs->vs_read_errors);
1523                         mdb_printf("EWRITE   %10#llx\n", vs->vs_write_errors);
1524                         mdb_printf("ECKSUM   %10#llx\n",
1525                             vs->vs_checksum_errors);
1526                         mdb_dec_indent(4);
1527                         mdb_printf("\n");
1528                 }
1529 
1530                 if (spa_flags & SPA_FLAG_METASLAB_GROUPS &&
1531                     vdev.vdev_mg != NULL) {
1532                         metaslab_group_stats((uintptr_t)vdev.vdev_mg,
1533                             spa_flags);
1534                 }
1535                 if (spa_flags & SPA_FLAG_METASLABS && vdev.vdev_ms != NULL) {
1536                         metaslab_stats((uintptr_t)addr, spa_flags);
1537                 }
1538         }
1539 
1540         children = vdev.vdev_children;
1541 
1542         if (children == 0 || !recursive)
1543                 return (DCMD_OK);
1544 
1545         child = mdb_alloc(children * sizeof (void *), UM_SLEEP | UM_GC);
1546         if (mdb_vread(child, children * sizeof (void *),
1547             (uintptr_t)vdev.vdev_child) == -1) {
1548                 mdb_warn("failed to read vdev children at %p", vdev.vdev_child);
1549                 return (DCMD_ERR);
1550         }
1551 
1552         for (c = 0; c < children; c++) {
1553                 if (do_print_vdev(child[c], flags, depth + 2, recursive,
1554                     spa_flags)) {
1555                         return (DCMD_ERR);
1556                 }
1557         }
1558 
1559         return (DCMD_OK);
1560 }
1561 
1562 static int
1563 vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1564 {
1565         uint64_t depth = 0;
1566         boolean_t recursive = B_FALSE;
1567         int spa_flags = 0;
1568 
1569         if (mdb_getopts(argc, argv,
1570             'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags,
1571             'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags,
1572             'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags,
1573             'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags,
1574             'r', MDB_OPT_SETBITS, TRUE, &recursive,
1575             'd', MDB_OPT_UINT64, &depth, NULL) != argc)
1576                 return (DCMD_USAGE);
1577 
1578         if (!(flags & DCMD_ADDRSPEC)) {
1579                 mdb_warn("no vdev_t address given\n");
1580                 return (DCMD_ERR);
1581         }
1582 
1583         return (do_print_vdev(addr, flags, (int)depth, recursive, spa_flags));
1584 }
1585 
1586 typedef struct metaslab_walk_data {
1587         uint64_t mw_numvdevs;
1588         uintptr_t *mw_vdevs;
1589         int mw_curvdev;
1590         uint64_t mw_nummss;
1591         uintptr_t *mw_mss;
1592         int mw_curms;
1593 } metaslab_walk_data_t;
1594 
1595 static int
1596 metaslab_walk_step(mdb_walk_state_t *wsp)
1597 {
1598         metaslab_walk_data_t *mw = wsp->walk_data;
1599         metaslab_t ms;
1600         uintptr_t msp;
1601 
1602         if (mw->mw_curvdev >= mw->mw_numvdevs)
1603                 return (WALK_DONE);
1604 
1605         if (mw->mw_mss == NULL) {
1606                 uintptr_t mssp;
1607                 uintptr_t vdevp;
1608 
1609                 ASSERT(mw->mw_curms == 0);
1610                 ASSERT(mw->mw_nummss == 0);
1611 
1612                 vdevp = mw->mw_vdevs[mw->mw_curvdev];
1613                 if (GETMEMB(vdevp, "vdev", vdev_ms, mssp) ||
1614                     GETMEMB(vdevp, "vdev", vdev_ms_count, mw->mw_nummss)) {
1615                         return (WALK_ERR);
1616                 }
1617 
1618                 mw->mw_mss = mdb_alloc(mw->mw_nummss * sizeof (void*),
1619                     UM_SLEEP | UM_GC);
1620                 if (mdb_vread(mw->mw_mss, mw->mw_nummss * sizeof (void*),
1621                     mssp) == -1) {
1622                         mdb_warn("failed to read vdev_ms at %p", mssp);
1623                         return (WALK_ERR);
1624                 }
1625         }
1626 
1627         if (mw->mw_curms >= mw->mw_nummss) {
1628                 mw->mw_mss = NULL;
1629                 mw->mw_curms = 0;
1630                 mw->mw_nummss = 0;
1631                 mw->mw_curvdev++;
1632                 return (WALK_NEXT);
1633         }
1634 
1635         msp = mw->mw_mss[mw->mw_curms];
1636         if (mdb_vread(&ms, sizeof (metaslab_t), msp) == -1) {
1637                 mdb_warn("failed to read metaslab_t at %p", msp);
1638                 return (WALK_ERR);
1639         }
1640 
1641         mw->mw_curms++;
1642 
1643         return (wsp->walk_callback(msp, &ms, wsp->walk_cbdata));
1644 }
1645 
1646 /* ARGSUSED */
1647 static int
1648 metaslab_walk_init(mdb_walk_state_t *wsp)
1649 {
1650         metaslab_walk_data_t *mw;
1651         uintptr_t root_vdevp;
1652         uintptr_t childp;
1653 
1654         if (wsp->walk_addr == NULL) {
1655                 mdb_warn("must supply address of spa_t\n");
1656                 return (WALK_ERR);
1657         }
1658 
1659         mw = mdb_zalloc(sizeof (metaslab_walk_data_t), UM_SLEEP | UM_GC);
1660 
1661         if (GETMEMB(wsp->walk_addr, "spa", spa_root_vdev, root_vdevp) ||
1662             GETMEMB(root_vdevp, "vdev", vdev_children, mw->mw_numvdevs) ||
1663             GETMEMB(root_vdevp, "vdev", vdev_child, childp)) {
1664                 return (DCMD_ERR);
1665         }
1666 
1667         mw->mw_vdevs = mdb_alloc(mw->mw_numvdevs * sizeof (void *),
1668             UM_SLEEP | UM_GC);
1669         if (mdb_vread(mw->mw_vdevs, mw->mw_numvdevs * sizeof (void *),
1670             childp) == -1) {
1671                 mdb_warn("failed to read root vdev children at %p", childp);
1672                 return (DCMD_ERR);
1673         }
1674 
1675         wsp->walk_data = mw;
1676 
1677         return (WALK_NEXT);
1678 }
1679 
1680 typedef struct mdb_spa {
1681         uintptr_t spa_dsl_pool;
1682         uintptr_t spa_root_vdev;
1683 } mdb_spa_t;
1684 
1685 typedef struct mdb_dsl_dir {
1686         uintptr_t dd_phys;
1687         int64_t dd_space_towrite[TXG_SIZE];
1688 } mdb_dsl_dir_t;
1689 
1690 typedef struct mdb_dsl_dir_phys {
1691         uint64_t dd_used_bytes;
1692         uint64_t dd_compressed_bytes;
1693         uint64_t dd_uncompressed_bytes;
1694 } mdb_dsl_dir_phys_t;
1695 
1696 typedef struct space_data {
1697         uint64_t ms_alloctree[TXG_SIZE];
1698         uint64_t ms_freetree[TXG_SIZE];
1699         uint64_t ms_tree;
1700         uint64_t avail;
1701         uint64_t nowavail;
1702 } space_data_t;
1703 
1704 /* ARGSUSED */
1705 static int
1706 space_cb(uintptr_t addr, const void *unknown, void *arg)
1707 {
1708         space_data_t *sd = arg;
1709         mdb_metaslab_t ms;
1710         mdb_range_tree_t rt;
1711         mdb_space_map_t sm = { 0 };
1712         mdb_space_map_phys_t smp = { 0 };
1713         int i;
1714 
1715         if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t",
1716             addr, 0) == -1)
1717                 return (WALK_ERR);
1718 
1719         for (i = 0; i < TXG_SIZE; i++) {
1720                 if (mdb_ctf_vread(&rt, "range_tree_t",
1721                     "mdb_range_tree_t", ms.ms_alloctree[i], 0) == -1)
1722                         return (WALK_ERR);
1723 
1724                 sd->ms_alloctree[i] += rt.rt_space;
1725 
1726                 if (mdb_ctf_vread(&rt, "range_tree_t",
1727                     "mdb_range_tree_t", ms.ms_freetree[i], 0) == -1)
1728                         return (WALK_ERR);
1729 
1730                 sd->ms_freetree[i] += rt.rt_space;
1731         }
1732 
1733         if (mdb_ctf_vread(&rt, "range_tree_t",
1734             "mdb_range_tree_t", ms.ms_tree, 0) == -1)
1735                 return (WALK_ERR);
1736 
1737         if (ms.ms_sm != NULL &&
1738             mdb_ctf_vread(&sm, "space_map_t",
1739             "mdb_space_map_t", ms.ms_sm, 0) == -1)
1740                 return (WALK_ERR);
1741 
1742         if (sm.sm_phys != NULL) {
1743                 (void) mdb_ctf_vread(&smp, "space_map_phys_t",
1744                     "mdb_space_map_phys_t", sm.sm_phys, 0);
1745         }
1746 
1747         sd->ms_tree += rt.rt_space;
1748         sd->avail += sm.sm_size - sm.sm_alloc;
1749         sd->nowavail += sm.sm_size - smp.smp_alloc;
1750 
1751         return (WALK_NEXT);
1752 }
1753 
1754 /*
1755  * ::spa_space [-b]
1756  *
1757  * Given a spa_t, print out it's on-disk space usage and in-core
1758  * estimates of future usage.  If -b is given, print space in bytes.
1759  * Otherwise print in megabytes.
1760  */
1761 /* ARGSUSED */
1762 static int
1763 spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1764 {
1765         mdb_spa_t spa;
1766         uintptr_t dp_root_dir;
1767         mdb_dsl_dir_t dd;
1768         mdb_dsl_dir_phys_t dsp;
1769         uint64_t children;
1770         uintptr_t childaddr;
1771         space_data_t sd;
1772         int shift = 20;
1773         char *suffix = "M";
1774         int bytes = B_FALSE;
1775 
1776         if (mdb_getopts(argc, argv, 'b', MDB_OPT_SETBITS, TRUE, &bytes, NULL) !=
1777             argc)
1778                 return (DCMD_USAGE);
1779         if (!(flags & DCMD_ADDRSPEC))
1780                 return (DCMD_USAGE);
1781 
1782         if (bytes) {
1783                 shift = 0;
1784                 suffix = "";
1785         }
1786 
1787         if (GETMEMB(addr, "spa", spa_dsl_pool, spa.spa_dsl_pool) ||
1788             GETMEMB(addr, "spa", spa_root_vdev, spa.spa_root_vdev) ||
1789             GETMEMB(spa.spa_root_vdev, "vdev", vdev_children, children) ||
1790             GETMEMB(spa.spa_root_vdev, "vdev", vdev_child, childaddr) ||
1791             GETMEMB(spa.spa_dsl_pool, "dsl_pool",
1792             dp_root_dir, dp_root_dir) ||
1793             GETMEMB(dp_root_dir, "dsl_dir", dd_phys, dd.dd_phys) ||
1794             GETMEMB(dp_root_dir, "dsl_dir",
1795             dd_space_towrite, dd.dd_space_towrite) ||
1796             GETMEMB(dd.dd_phys, "dsl_dir_phys",
1797             dd_used_bytes, dsp.dd_used_bytes) ||
1798             GETMEMB(dd.dd_phys, "dsl_dir_phys",
1799             dd_compressed_bytes, dsp.dd_compressed_bytes) ||
1800             GETMEMB(dd.dd_phys, "dsl_dir_phys",
1801             dd_uncompressed_bytes, dsp.dd_uncompressed_bytes)) {
1802                 return (DCMD_ERR);
1803         }
1804 
1805         mdb_printf("dd_space_towrite = %llu%s %llu%s %llu%s %llu%s\n",
1806             dd.dd_space_towrite[0] >> shift, suffix,
1807             dd.dd_space_towrite[1] >> shift, suffix,
1808             dd.dd_space_towrite[2] >> shift, suffix,
1809             dd.dd_space_towrite[3] >> shift, suffix);
1810 
1811         mdb_printf("dd_phys.dd_used_bytes = %llu%s\n",
1812             dsp.dd_used_bytes >> shift, suffix);
1813         mdb_printf("dd_phys.dd_compressed_bytes = %llu%s\n",
1814             dsp.dd_compressed_bytes >> shift, suffix);
1815         mdb_printf("dd_phys.dd_uncompressed_bytes = %llu%s\n",
1816             dsp.dd_uncompressed_bytes >> shift, suffix);
1817 
1818         bzero(&sd, sizeof (sd));
1819         if (mdb_pwalk("metaslab", space_cb, &sd, addr) != 0) {
1820                 mdb_warn("can't walk metaslabs");
1821                 return (DCMD_ERR);
1822         }
1823 
1824         mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n",
1825             sd.ms_alloctree[0] >> shift, suffix,
1826             sd.ms_alloctree[1] >> shift, suffix,
1827             sd.ms_alloctree[2] >> shift, suffix,
1828             sd.ms_alloctree[3] >> shift, suffix);
1829         mdb_printf("ms_freemap = %llu%s %llu%s %llu%s %llu%s\n",
1830             sd.ms_freetree[0] >> shift, suffix,
1831             sd.ms_freetree[1] >> shift, suffix,
1832             sd.ms_freetree[2] >> shift, suffix,
1833             sd.ms_freetree[3] >> shift, suffix);
1834         mdb_printf("ms_tree = %llu%s\n", sd.ms_tree >> shift, suffix);
1835         mdb_printf("last synced avail = %llu%s\n", sd.avail >> shift, suffix);
1836         mdb_printf("current syncing avail = %llu%s\n",
1837             sd.nowavail >> shift, suffix);
1838 
1839         return (DCMD_OK);
1840 }
1841 
1842 typedef struct mdb_spa_aux_vdev {
1843         int sav_count;
1844         uintptr_t sav_vdevs;
1845 } mdb_spa_aux_vdev_t;
1846 
1847 typedef struct mdb_spa_vdevs {
1848         uintptr_t spa_root_vdev;
1849         mdb_spa_aux_vdev_t spa_l2cache;
1850         mdb_spa_aux_vdev_t spa_spares;
1851 } mdb_spa_vdevs_t;
1852 
1853 static int
1854 spa_print_aux(mdb_spa_aux_vdev_t *sav, uint_t flags, mdb_arg_t *v,
1855     const char *name)
1856 {
1857         uintptr_t *aux;
1858         size_t len;
1859         int ret, i;
1860 
1861         /*
1862          * Iterate over aux vdevs and print those out as well.  This is a
1863          * little annoying because we don't have a root vdev to pass to ::vdev.
1864          * Instead, we print a single line and then call it for each child
1865          * vdev.
1866          */
1867         if (sav->sav_count != 0) {
1868                 v[1].a_type = MDB_TYPE_STRING;
1869                 v[1].a_un.a_str = "-d";
1870                 v[2].a_type = MDB_TYPE_IMMEDIATE;
1871                 v[2].a_un.a_val = 2;
1872 
1873                 len = sav->sav_count * sizeof (uintptr_t);
1874                 aux = mdb_alloc(len, UM_SLEEP);
1875                 if (mdb_vread(aux, len, sav->sav_vdevs) == -1) {
1876                         mdb_free(aux, len);
1877                         mdb_warn("failed to read l2cache vdevs at %p",
1878                             sav->sav_vdevs);
1879                         return (DCMD_ERR);
1880                 }
1881 
1882                 mdb_printf("%-?s %-9s %-12s %s\n", "-", "-", "-", name);
1883 
1884                 for (i = 0; i < sav->sav_count; i++) {
1885                         ret = mdb_call_dcmd("vdev", aux[i], flags, 3, v);
1886                         if (ret != DCMD_OK) {
1887                                 mdb_free(aux, len);
1888                                 return (ret);
1889                         }
1890                 }
1891 
1892                 mdb_free(aux, len);
1893         }
1894 
1895         return (0);
1896 }
1897 
1898 /*
1899  * ::spa_vdevs
1900  *
1901  *      -e      Include error stats
1902  *      -m      Include metaslab information
1903  *      -M      Include metaslab group information
1904  *      -h      Include histogram information (requires -m or -M)
1905  *
1906  * Print out a summarized list of vdevs for the given spa_t.
1907  * This is accomplished by invoking "::vdev -re" on the root vdev, as well as
1908  * iterating over the cache devices.
1909  */
1910 /* ARGSUSED */
1911 static int
1912 spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1913 {
1914         mdb_arg_t v[3];
1915         int ret;
1916         char opts[100] = "-r";
1917         int spa_flags = 0;
1918 
1919         if (mdb_getopts(argc, argv,
1920             'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags,
1921             'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags,
1922             'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags,
1923             'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags,
1924             NULL) != argc)
1925                 return (DCMD_USAGE);
1926 
1927         if (!(flags & DCMD_ADDRSPEC))
1928                 return (DCMD_USAGE);
1929 
1930         mdb_spa_vdevs_t spa;
1931         if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_vdevs_t", addr, 0) == -1)
1932                 return (DCMD_ERR);
1933 
1934         /*
1935          * Unitialized spa_t structures can have a NULL root vdev.
1936          */
1937         if (spa.spa_root_vdev == NULL) {
1938                 mdb_printf("no associated vdevs\n");
1939                 return (DCMD_OK);
1940         }
1941 
1942         if (spa_flags & SPA_FLAG_ERRORS)
1943                 strcat(opts, "e");
1944         if (spa_flags & SPA_FLAG_METASLABS)
1945                 strcat(opts, "m");
1946         if (spa_flags & SPA_FLAG_METASLAB_GROUPS)
1947                 strcat(opts, "M");
1948         if (spa_flags & SPA_FLAG_HISTOGRAMS)
1949                 strcat(opts, "h");
1950 
1951         v[0].a_type = MDB_TYPE_STRING;
1952         v[0].a_un.a_str = opts;
1953 
1954         ret = mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev,
1955             flags, 1, v);
1956         if (ret != DCMD_OK)
1957                 return (ret);
1958 
1959         if (spa_print_aux(&spa.spa_l2cache, flags, v, "cache") != 0 ||
1960             spa_print_aux(&spa.spa_spares, flags, v, "spares") != 0)
1961                 return (DCMD_ERR);
1962 
1963         return (DCMD_OK);
1964 }
1965 
1966 /*
1967  * ::zio
1968  *
1969  * Print a summary of zio_t and all its children.  This is intended to display a
1970  * zio tree, and hence we only pick the most important pieces of information for
1971  * the main summary.  More detailed information can always be found by doing a
1972  * '::print zio' on the underlying zio_t.  The columns we display are:
1973  *
1974  *      ADDRESS  TYPE  STAGE  WAITER  TIME_ELAPSED
1975  *
1976  * The 'address' column is indented by one space for each depth level as we
1977  * descend down the tree.
1978  */
1979 
1980 #define ZIO_MAXINDENT   7
1981 #define ZIO_MAXWIDTH    (sizeof (uintptr_t) * 2 + ZIO_MAXINDENT)
1982 #define ZIO_WALK_SELF   0
1983 #define ZIO_WALK_CHILD  1
1984 #define ZIO_WALK_PARENT 2
1985 
1986 typedef struct zio_print_args {
1987         int     zpa_current_depth;
1988         int     zpa_min_depth;
1989         int     zpa_max_depth;
1990         int     zpa_type;
1991         uint_t  zpa_flags;
1992 } zio_print_args_t;
1993 
1994 typedef struct mdb_zio {
1995         enum zio_type io_type;
1996         enum zio_stage io_stage;
1997         uintptr_t io_waiter;
1998         uintptr_t io_spa;
1999         struct {
2000                 struct {
2001                         uintptr_t list_next;
2002                 } list_head;
2003         } io_parent_list;
2004         int io_error;
2005 } mdb_zio_t;
2006 
2007 typedef struct mdb_zio_timestamp {
2008         hrtime_t io_timestamp;
2009 } mdb_zio_timestamp_t;
2010 
2011 static int zio_child_cb(uintptr_t addr, const void *unknown, void *arg);
2012 
2013 static int
2014 zio_print_cb(uintptr_t addr, zio_print_args_t *zpa)
2015 {
2016         mdb_ctf_id_t type_enum, stage_enum;
2017         int indent = zpa->zpa_current_depth;
2018         const char *type, *stage;
2019         uintptr_t laddr;
2020         mdb_zio_t zio;
2021         mdb_zio_timestamp_t zio_timestamp = { 0 };
2022 
2023         if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", addr, 0) == -1)
2024                 return (WALK_ERR);
2025         (void) mdb_ctf_vread(&zio_timestamp, ZFS_STRUCT "zio",
2026             "mdb_zio_timestamp_t", addr, MDB_CTF_VREAD_QUIET);
2027 
2028         if (indent > ZIO_MAXINDENT)
2029                 indent = ZIO_MAXINDENT;
2030 
2031         if (mdb_ctf_lookup_by_name("enum zio_type", &type_enum) == -1 ||
2032             mdb_ctf_lookup_by_name("enum zio_stage", &stage_enum) == -1) {
2033                 mdb_warn("failed to lookup zio enums");
2034                 return (WALK_ERR);
2035         }
2036 
2037         if ((type = mdb_ctf_enum_name(type_enum, zio.io_type)) != NULL)
2038                 type += sizeof ("ZIO_TYPE_") - 1;
2039         else
2040                 type = "?";
2041 
2042         if (zio.io_error == 0) {
2043                 stage = mdb_ctf_enum_name(stage_enum, zio.io_stage);
2044                 if (stage != NULL)
2045                         stage += sizeof ("ZIO_STAGE_") - 1;
2046                 else
2047                         stage = "?";
2048         } else {
2049                 stage = "FAILED";
2050         }
2051 
2052         if (zpa->zpa_current_depth >= zpa->zpa_min_depth) {
2053                 if (zpa->zpa_flags & DCMD_PIPE_OUT) {
2054                         mdb_printf("%?p\n", addr);
2055                 } else {
2056                         mdb_printf("%*s%-*p %-5s %-16s ", indent, "",
2057                             ZIO_MAXWIDTH - indent, addr, type, stage);
2058                         if (zio.io_waiter != 0)
2059                                 mdb_printf("%-16lx ", zio.io_waiter);
2060                         else
2061                                 mdb_printf("%-16s ", "-");
2062 #ifdef _KERNEL
2063                         if (zio_timestamp.io_timestamp != 0) {
2064                                 mdb_printf("%llums", (mdb_gethrtime() -
2065                                     zio_timestamp.io_timestamp) /
2066                                     1000000);
2067                         } else {
2068                                 mdb_printf("%-12s ", "-");
2069                         }
2070 #else
2071                         mdb_printf("%-12s ", "-");
2072 #endif
2073                         mdb_printf("\n");
2074                 }
2075         }
2076 
2077         if (zpa->zpa_current_depth >= zpa->zpa_max_depth)
2078                 return (WALK_NEXT);
2079 
2080         if (zpa->zpa_type == ZIO_WALK_PARENT)
2081                 laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio",
2082                     "io_parent_list");
2083         else
2084                 laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio",
2085                     "io_child_list");
2086 
2087         zpa->zpa_current_depth++;
2088         if (mdb_pwalk("list", zio_child_cb, zpa, laddr) != 0) {
2089                 mdb_warn("failed to walk zio_t children at %p\n", laddr);
2090                 return (WALK_ERR);
2091         }
2092         zpa->zpa_current_depth--;
2093 
2094         return (WALK_NEXT);
2095 }
2096 
2097 /* ARGSUSED */
2098 static int
2099 zio_child_cb(uintptr_t addr, const void *unknown, void *arg)
2100 {
2101         zio_link_t zl;
2102         uintptr_t ziop;
2103         zio_print_args_t *zpa = arg;
2104 
2105         if (mdb_vread(&zl, sizeof (zl), addr) == -1) {
2106                 mdb_warn("failed to read zio_link_t at %p", addr);
2107                 return (WALK_ERR);
2108         }
2109 
2110         if (zpa->zpa_type == ZIO_WALK_PARENT)
2111                 ziop = (uintptr_t)zl.zl_parent;
2112         else
2113                 ziop = (uintptr_t)zl.zl_child;
2114 
2115         return (zio_print_cb(ziop, zpa));
2116 }
2117 
2118 /* ARGSUSED */
2119 static int
2120 zio_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2121 {
2122         zio_print_args_t zpa = { 0 };
2123 
2124         if (!(flags & DCMD_ADDRSPEC))
2125                 return (DCMD_USAGE);
2126 
2127         if (mdb_getopts(argc, argv,
2128             'r', MDB_OPT_SETBITS, INT_MAX, &zpa.zpa_max_depth,
2129             'c', MDB_OPT_SETBITS, ZIO_WALK_CHILD, &zpa.zpa_type,
2130             'p', MDB_OPT_SETBITS, ZIO_WALK_PARENT, &zpa.zpa_type,
2131             NULL) != argc)
2132                 return (DCMD_USAGE);
2133 
2134         zpa.zpa_flags = flags;
2135         if (zpa.zpa_max_depth != 0) {
2136                 if (zpa.zpa_type == ZIO_WALK_SELF)
2137                         zpa.zpa_type = ZIO_WALK_CHILD;
2138         } else if (zpa.zpa_type != ZIO_WALK_SELF) {
2139                 zpa.zpa_min_depth = 1;
2140                 zpa.zpa_max_depth = 1;
2141         }
2142 
2143         if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) {
2144                 mdb_printf("%<u>%-*s %-5s %-16s %-16s %-12s%</u>\n",
2145                     ZIO_MAXWIDTH, "ADDRESS", "TYPE", "STAGE", "WAITER",
2146                     "TIME_ELAPSED");
2147         }
2148 
2149         if (zio_print_cb(addr, &zpa) != WALK_NEXT)
2150                 return (DCMD_ERR);
2151 
2152         return (DCMD_OK);
2153 }
2154 
2155 /*
2156  * [addr]::zio_state
2157  *
2158  * Print a summary of all zio_t structures on the system, or for a particular
2159  * pool.  This is equivalent to '::walk zio_root | ::zio'.
2160  */
2161 /*ARGSUSED*/
2162 static int
2163 zio_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2164 {
2165         /*
2166          * MDB will remember the last address of the pipeline, so if we don't
2167          * zero this we'll end up trying to walk zio structures for a
2168          * non-existent spa_t.
2169          */
2170         if (!(flags & DCMD_ADDRSPEC))
2171                 addr = 0;
2172 
2173         return (mdb_pwalk_dcmd("zio_root", "zio", argc, argv, addr));
2174 }
2175 
2176 typedef struct txg_list_walk_data {
2177         uintptr_t lw_head[TXG_SIZE];
2178         int     lw_txgoff;
2179         int     lw_maxoff;
2180         size_t  lw_offset;
2181         void    *lw_obj;
2182 } txg_list_walk_data_t;
2183 
2184 static int
2185 txg_list_walk_init_common(mdb_walk_state_t *wsp, int txg, int maxoff)
2186 {
2187         txg_list_walk_data_t *lwd;
2188         txg_list_t list;
2189         int i;
2190 
2191         lwd = mdb_alloc(sizeof (txg_list_walk_data_t), UM_SLEEP | UM_GC);
2192         if (mdb_vread(&list, sizeof (txg_list_t), wsp->walk_addr) == -1) {
2193                 mdb_warn("failed to read txg_list_t at %#lx", wsp->walk_addr);
2194                 return (WALK_ERR);
2195         }
2196 
2197         for (i = 0; i < TXG_SIZE; i++)
2198                 lwd->lw_head[i] = (uintptr_t)list.tl_head[i];
2199         lwd->lw_offset = list.tl_offset;
2200         lwd->lw_obj = mdb_alloc(lwd->lw_offset + sizeof (txg_node_t),
2201             UM_SLEEP | UM_GC);
2202         lwd->lw_txgoff = txg;
2203         lwd->lw_maxoff = maxoff;
2204 
2205         wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
2206         wsp->walk_data = lwd;
2207 
2208         return (WALK_NEXT);
2209 }
2210 
2211 static int
2212 txg_list_walk_init(mdb_walk_state_t *wsp)
2213 {
2214         return (txg_list_walk_init_common(wsp, 0, TXG_SIZE-1));
2215 }
2216 
2217 static int
2218 txg_list0_walk_init(mdb_walk_state_t *wsp)
2219 {
2220         return (txg_list_walk_init_common(wsp, 0, 0));
2221 }
2222 
2223 static int
2224 txg_list1_walk_init(mdb_walk_state_t *wsp)
2225 {
2226         return (txg_list_walk_init_common(wsp, 1, 1));
2227 }
2228 
2229 static int
2230 txg_list2_walk_init(mdb_walk_state_t *wsp)
2231 {
2232         return (txg_list_walk_init_common(wsp, 2, 2));
2233 }
2234 
2235 static int
2236 txg_list3_walk_init(mdb_walk_state_t *wsp)
2237 {
2238         return (txg_list_walk_init_common(wsp, 3, 3));
2239 }
2240 
2241 static int
2242 txg_list_walk_step(mdb_walk_state_t *wsp)
2243 {
2244         txg_list_walk_data_t *lwd = wsp->walk_data;
2245         uintptr_t addr;
2246         txg_node_t *node;
2247         int status;
2248 
2249         while (wsp->walk_addr == NULL && lwd->lw_txgoff < lwd->lw_maxoff) {
2250                 lwd->lw_txgoff++;
2251                 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
2252         }
2253 
2254         if (wsp->walk_addr == NULL)
2255                 return (WALK_DONE);
2256 
2257         addr = wsp->walk_addr - lwd->lw_offset;
2258 
2259         if (mdb_vread(lwd->lw_obj,
2260             lwd->lw_offset + sizeof (txg_node_t), addr) == -1) {
2261                 mdb_warn("failed to read list element at %#lx", addr);
2262                 return (WALK_ERR);
2263         }
2264 
2265         status = wsp->walk_callback(addr, lwd->lw_obj, wsp->walk_cbdata);
2266         node = (txg_node_t *)((uintptr_t)lwd->lw_obj + lwd->lw_offset);
2267         wsp->walk_addr = (uintptr_t)node->tn_next[lwd->lw_txgoff];
2268 
2269         return (status);
2270 }
2271 
2272 /*
2273  * ::walk spa
2274  *
2275  * Walk all named spa_t structures in the namespace.  This is nothing more than
2276  * a layered avl walk.
2277  */
2278 static int
2279 spa_walk_init(mdb_walk_state_t *wsp)
2280 {
2281         GElf_Sym sym;
2282 
2283         if (wsp->walk_addr != NULL) {
2284                 mdb_warn("spa walk only supports global walks\n");
2285                 return (WALK_ERR);
2286         }
2287 
2288         if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "spa_namespace_avl", &sym) == -1) {
2289                 mdb_warn("failed to find symbol 'spa_namespace_avl'");
2290                 return (WALK_ERR);
2291         }
2292 
2293         wsp->walk_addr = (uintptr_t)sym.st_value;
2294 
2295         if (mdb_layered_walk("avl", wsp) == -1) {
2296                 mdb_warn("failed to walk 'avl'\n");
2297                 return (WALK_ERR);
2298         }
2299 
2300         return (WALK_NEXT);
2301 }
2302 
2303 static int
2304 spa_walk_step(mdb_walk_state_t *wsp)
2305 {
2306         return (wsp->walk_callback(wsp->walk_addr, NULL, wsp->walk_cbdata));
2307 }
2308 
2309 /*
2310  * [addr]::walk zio
2311  *
2312  * Walk all active zio_t structures on the system.  This is simply a layered
2313  * walk on top of ::walk zio_cache, with the optional ability to limit the
2314  * structures to a particular pool.
2315  */
2316 static int
2317 zio_walk_init(mdb_walk_state_t *wsp)
2318 {
2319         wsp->walk_data = (void *)wsp->walk_addr;
2320 
2321         if (mdb_layered_walk("zio_cache", wsp) == -1) {
2322                 mdb_warn("failed to walk 'zio_cache'\n");
2323                 return (WALK_ERR);
2324         }
2325 
2326         return (WALK_NEXT);
2327 }
2328 
2329 static int
2330 zio_walk_step(mdb_walk_state_t *wsp)
2331 {
2332         mdb_zio_t zio;
2333         uintptr_t spa = (uintptr_t)wsp->walk_data;
2334 
2335         if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t",
2336             wsp->walk_addr, 0) == -1)
2337                 return (WALK_ERR);
2338 
2339         if (spa != 0 && spa != zio.io_spa)
2340                 return (WALK_NEXT);
2341 
2342         return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
2343 }
2344 
2345 /*
2346  * [addr]::walk zio_root
2347  *
2348  * Walk only root zio_t structures, optionally for a particular spa_t.
2349  */
2350 static int
2351 zio_walk_root_step(mdb_walk_state_t *wsp)
2352 {
2353         mdb_zio_t zio;
2354         uintptr_t spa = (uintptr_t)wsp->walk_data;
2355 
2356         if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t",
2357             wsp->walk_addr, 0) == -1)
2358                 return (WALK_ERR);
2359 
2360         if (spa != 0 && spa != zio.io_spa)
2361                 return (WALK_NEXT);
2362 
2363         /* If the parent list is not empty, ignore */
2364         if (zio.io_parent_list.list_head.list_next !=
2365             wsp->walk_addr +
2366             mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", "io_parent_list") +
2367             mdb_ctf_offsetof_by_name("struct list", "list_head"))
2368                 return (WALK_NEXT);
2369 
2370         return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
2371 }
2372 
2373 /*
2374  * ::zfs_blkstats
2375  *
2376  *      -v      print verbose per-level information
2377  *
2378  */
2379 static int
2380 zfs_blkstats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2381 {
2382         boolean_t verbose = B_FALSE;
2383         zfs_all_blkstats_t stats;
2384         dmu_object_type_t t;
2385         zfs_blkstat_t *tzb;
2386         uint64_t ditto;
2387         dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES + 10];
2388         /* +10 in case it grew */
2389 
2390         if (mdb_readvar(&dmu_ot, "dmu_ot") == -1) {
2391                 mdb_warn("failed to read 'dmu_ot'");
2392                 return (DCMD_ERR);
2393         }
2394 
2395         if (mdb_getopts(argc, argv,
2396             'v', MDB_OPT_SETBITS, TRUE, &verbose,
2397             NULL) != argc)
2398                 return (DCMD_USAGE);
2399 
2400         if (!(flags & DCMD_ADDRSPEC))
2401                 return (DCMD_USAGE);
2402 
2403         if (GETMEMB(addr, "spa", spa_dsl_pool, addr) ||
2404             GETMEMB(addr, "dsl_pool", dp_blkstats, addr) ||
2405             mdb_vread(&stats, sizeof (zfs_all_blkstats_t), addr) == -1) {
2406                 mdb_warn("failed to read data at %p;", addr);
2407                 mdb_printf("maybe no stats? run \"zpool scrub\" first.");
2408                 return (DCMD_ERR);
2409         }
2410 
2411         tzb = &stats.zab_type[DN_MAX_LEVELS][DMU_OT_TOTAL];
2412         if (tzb->zb_gangs != 0) {
2413                 mdb_printf("Ganged blocks: %llu\n",
2414                     (longlong_t)tzb->zb_gangs);
2415         }
2416 
2417         ditto = tzb->zb_ditto_2_of_2_samevdev + tzb->zb_ditto_2_of_3_samevdev +
2418             tzb->zb_ditto_3_of_3_samevdev;
2419         if (ditto != 0) {
2420                 mdb_printf("Dittoed blocks on same vdev: %llu\n",
2421                     (longlong_t)ditto);
2422         }
2423 
2424         mdb_printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2425             "\t  avg\t comp\t%%Total\tType\n");
2426 
2427         for (t = 0; t <= DMU_OT_TOTAL; t++) {
2428                 char csize[NICENUM_BUFLEN], lsize[NICENUM_BUFLEN];
2429                 char psize[NICENUM_BUFLEN], asize[NICENUM_BUFLEN];
2430                 char avg[NICENUM_BUFLEN];
2431                 char comp[NICENUM_BUFLEN], pct[NICENUM_BUFLEN];
2432                 char typename[64];
2433                 int l;
2434 
2435 
2436                 if (t == DMU_OT_DEFERRED)
2437                         strcpy(typename, "deferred free");
2438                 else if (t == DMU_OT_OTHER)
2439                         strcpy(typename, "other");
2440                 else if (t == DMU_OT_TOTAL)
2441                         strcpy(typename, "Total");
2442                 else if (mdb_readstr(typename, sizeof (typename),
2443                     (uintptr_t)dmu_ot[t].ot_name) == -1) {
2444                         mdb_warn("failed to read type name");
2445                         return (DCMD_ERR);
2446                 }
2447 
2448                 if (stats.zab_type[DN_MAX_LEVELS][t].zb_asize == 0)
2449                         continue;
2450 
2451                 for (l = -1; l < DN_MAX_LEVELS; l++) {
2452                         int level = (l == -1 ? DN_MAX_LEVELS : l);
2453                         zfs_blkstat_t *zb = &stats.zab_type[level][t];
2454 
2455                         if (zb->zb_asize == 0)
2456                                 continue;
2457 
2458                         /*
2459                          * Don't print each level unless requested.
2460                          */
2461                         if (!verbose && level != DN_MAX_LEVELS)
2462                                 continue;
2463 
2464                         /*
2465                          * If all the space is level 0, don't print the
2466                          * level 0 separately.
2467                          */
2468                         if (level == 0 && zb->zb_asize ==
2469                             stats.zab_type[DN_MAX_LEVELS][t].zb_asize)
2470                                 continue;
2471 
2472                         mdb_nicenum(zb->zb_count, csize);
2473                         mdb_nicenum(zb->zb_lsize, lsize);
2474                         mdb_nicenum(zb->zb_psize, psize);
2475                         mdb_nicenum(zb->zb_asize, asize);
2476                         mdb_nicenum(zb->zb_asize / zb->zb_count, avg);
2477                         (void) snprintfrac(comp, NICENUM_BUFLEN,
2478                             zb->zb_lsize, zb->zb_psize, 2);
2479                         (void) snprintfrac(pct, NICENUM_BUFLEN,
2480                             100 * zb->zb_asize, tzb->zb_asize, 2);
2481 
2482                         mdb_printf("%6s\t%5s\t%5s\t%5s\t%5s"
2483                             "\t%5s\t%6s\t",
2484                             csize, lsize, psize, asize, avg, comp, pct);
2485 
2486                         if (level == DN_MAX_LEVELS)
2487                                 mdb_printf("%s\n", typename);
2488                         else
2489                                 mdb_printf("  L%d %s\n",
2490                                     level, typename);
2491                 }
2492         }
2493 
2494         return (DCMD_OK);
2495 }
2496 
2497 typedef struct mdb_reference {
2498         uintptr_t ref_holder;
2499         uintptr_t ref_removed;
2500         uint64_t ref_number;
2501 } mdb_reference_t;
2502 
2503 /* ARGSUSED */
2504 static int
2505 reference_cb(uintptr_t addr, const void *ignored, void *arg)
2506 {
2507         mdb_reference_t ref;
2508         boolean_t holder_is_str = B_FALSE;
2509         char holder_str[128];
2510         boolean_t removed = (boolean_t)arg;
2511 
2512         if (mdb_ctf_vread(&ref, "reference_t", "mdb_reference_t", addr,
2513             0) == -1)
2514                 return (DCMD_ERR);
2515 
2516         if (mdb_readstr(holder_str, sizeof (holder_str),
2517             ref.ref_holder) != -1)
2518                 holder_is_str = strisprint(holder_str);
2519 
2520         if (removed)
2521                 mdb_printf("removed ");
2522         mdb_printf("reference ");
2523         if (ref.ref_number != 1)
2524                 mdb_printf("with count=%llu ", ref.ref_number);
2525         mdb_printf("with tag %lx", ref.ref_holder);
2526         if (holder_is_str)
2527                 mdb_printf(" \"%s\"", holder_str);
2528         mdb_printf(", held at:\n");
2529 
2530         (void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL);
2531 
2532         if (removed) {
2533                 mdb_printf("removed at:\n");
2534                 (void) mdb_call_dcmd("whatis", ref.ref_removed,
2535                     DCMD_ADDRSPEC, 0, NULL);
2536         }
2537 
2538         mdb_printf("\n");
2539 
2540         return (WALK_NEXT);
2541 }
2542 
2543 typedef struct mdb_refcount {
2544         uint64_t rc_count;
2545 } mdb_refcount_t;
2546 
2547 typedef struct mdb_refcount_removed {
2548         uint64_t rc_removed_count;
2549 } mdb_refcount_removed_t;
2550 
2551 typedef struct mdb_refcount_tracked {
2552         boolean_t rc_tracked;
2553 } mdb_refcount_tracked_t;
2554 
2555 /* ARGSUSED */
2556 static int
2557 refcount(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2558 {
2559         mdb_refcount_t rc;
2560         mdb_refcount_removed_t rcr;
2561         mdb_refcount_tracked_t rct;
2562         int off;
2563         boolean_t released = B_FALSE;
2564 
2565         if (!(flags & DCMD_ADDRSPEC))
2566                 return (DCMD_USAGE);
2567 
2568         if (mdb_getopts(argc, argv,
2569             'r', MDB_OPT_SETBITS, B_TRUE, &released,
2570             NULL) != argc)
2571                 return (DCMD_USAGE);
2572 
2573         if (mdb_ctf_vread(&rc, "refcount_t", "mdb_refcount_t", addr,
2574             0) == -1)
2575                 return (DCMD_ERR);
2576 
2577         if (mdb_ctf_vread(&rcr, "refcount_t", "mdb_refcount_removed_t", addr,
2578             MDB_CTF_VREAD_QUIET) == -1) {
2579                 mdb_printf("refcount_t at %p has %llu holds (untracked)\n",
2580                     addr, (longlong_t)rc.rc_count);
2581                 return (DCMD_OK);
2582         }
2583 
2584         if (mdb_ctf_vread(&rct, "refcount_t", "mdb_refcount_tracked_t", addr,
2585             MDB_CTF_VREAD_QUIET) == -1) {
2586                 /* If this is an old target, it might be tracked. */
2587                 rct.rc_tracked = B_TRUE;
2588         }
2589 
2590         mdb_printf("refcount_t at %p has %llu current holds, "
2591             "%llu recently released holds\n",
2592             addr, (longlong_t)rc.rc_count, (longlong_t)rcr.rc_removed_count);
2593 
2594         if (rct.rc_tracked && rc.rc_count > 0)
2595                 mdb_printf("current holds:\n");
2596         off = mdb_ctf_offsetof_by_name("refcount_t", "rc_list");
2597         if (off == -1)
2598                 return (DCMD_ERR);
2599         mdb_pwalk("list", reference_cb, (void*)B_FALSE, addr + off);
2600 
2601         if (released && rcr.rc_removed_count > 0) {
2602                 mdb_printf("released holds:\n");
2603 
2604                 off = mdb_ctf_offsetof_by_name("refcount_t", "rc_removed");
2605                 if (off == -1)
2606                         return (DCMD_ERR);
2607                 mdb_pwalk("list", reference_cb, (void*)B_FALSE, addr + off);
2608         }
2609 
2610         return (DCMD_OK);
2611 }
2612 
2613 /* ARGSUSED */
2614 static int
2615 sa_attr_table(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2616 {
2617         sa_attr_table_t *table;
2618         sa_os_t sa_os;
2619         char *name;
2620         int i;
2621 
2622         if (mdb_vread(&sa_os, sizeof (sa_os_t), addr) == -1) {
2623                 mdb_warn("failed to read sa_os at %p", addr);
2624                 return (DCMD_ERR);
2625         }
2626 
2627         table = mdb_alloc(sizeof (sa_attr_table_t) * sa_os.sa_num_attrs,
2628             UM_SLEEP | UM_GC);
2629         name = mdb_alloc(MAXPATHLEN, UM_SLEEP | UM_GC);
2630 
2631         if (mdb_vread(table, sizeof (sa_attr_table_t) * sa_os.sa_num_attrs,
2632             (uintptr_t)sa_os.sa_attr_table) == -1) {
2633                 mdb_warn("failed to read sa_os at %p", addr);
2634                 return (DCMD_ERR);
2635         }
2636 
2637         mdb_printf("%<u>%-10s %-10s %-10s %-10s %s%</u>\n",
2638             "ATTR ID", "REGISTERED", "LENGTH", "BSWAP", "NAME");
2639         for (i = 0; i != sa_os.sa_num_attrs; i++) {
2640                 mdb_readstr(name, MAXPATHLEN, (uintptr_t)table[i].sa_name);
2641                 mdb_printf("%5x   %8x %8x %8x          %-s\n",
2642                     (int)table[i].sa_attr, (int)table[i].sa_registered,
2643                     (int)table[i].sa_length, table[i].sa_byteswap, name);
2644         }
2645 
2646         return (DCMD_OK);
2647 }
2648 
2649 static int
2650 sa_get_off_table(uintptr_t addr, uint32_t **off_tab, int attr_count)
2651 {
2652         uintptr_t idx_table;
2653 
2654         if (GETMEMB(addr, "sa_idx_tab", sa_idx_tab, idx_table)) {
2655                 mdb_printf("can't find offset table in sa_idx_tab\n");
2656                 return (-1);
2657         }
2658 
2659         *off_tab = mdb_alloc(attr_count * sizeof (uint32_t),
2660             UM_SLEEP | UM_GC);
2661 
2662         if (mdb_vread(*off_tab,
2663             attr_count * sizeof (uint32_t), idx_table) == -1) {
2664                 mdb_warn("failed to attribute offset table %p", idx_table);
2665                 return (-1);
2666         }
2667 
2668         return (DCMD_OK);
2669 }
2670 
2671 /*ARGSUSED*/
2672 static int
2673 sa_attr_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2674 {
2675         uint32_t *offset_tab;
2676         int attr_count;
2677         uint64_t attr_id;
2678         uintptr_t attr_addr;
2679         uintptr_t bonus_tab, spill_tab;
2680         uintptr_t db_bonus, db_spill;
2681         uintptr_t os, os_sa;
2682         uintptr_t db_data;
2683 
2684         if (argc != 1)
2685                 return (DCMD_USAGE);
2686 
2687         if (argv[0].a_type == MDB_TYPE_STRING)
2688                 attr_id = mdb_strtoull(argv[0].a_un.a_str);
2689         else
2690                 return (DCMD_USAGE);
2691 
2692         if (GETMEMB(addr, "sa_handle", sa_bonus_tab, bonus_tab) ||
2693             GETMEMB(addr, "sa_handle", sa_spill_tab, spill_tab) ||
2694             GETMEMB(addr, "sa_handle", sa_os, os) ||
2695             GETMEMB(addr, "sa_handle", sa_bonus, db_bonus) ||
2696             GETMEMB(addr, "sa_handle", sa_spill, db_spill)) {
2697                 mdb_printf("Can't find necessary information in sa_handle "
2698                     "in sa_handle\n");
2699                 return (DCMD_ERR);
2700         }
2701 
2702         if (GETMEMB(os, "objset", os_sa, os_sa)) {
2703                 mdb_printf("Can't find os_sa in objset\n");
2704                 return (DCMD_ERR);
2705         }
2706 
2707         if (GETMEMB(os_sa, "sa_os", sa_num_attrs, attr_count)) {
2708                 mdb_printf("Can't find sa_num_attrs\n");
2709                 return (DCMD_ERR);
2710         }
2711 
2712         if (attr_id > attr_count) {
2713                 mdb_printf("attribute id number is out of range\n");
2714                 return (DCMD_ERR);
2715         }
2716 
2717         if (bonus_tab) {
2718                 if (sa_get_off_table(bonus_tab, &offset_tab,
2719                     attr_count) == -1) {
2720                         return (DCMD_ERR);
2721                 }
2722 
2723                 if (GETMEMB(db_bonus, "dmu_buf", db_data, db_data)) {
2724                         mdb_printf("can't find db_data in bonus dbuf\n");
2725                         return (DCMD_ERR);
2726                 }
2727         }
2728 
2729         if (bonus_tab && !TOC_ATTR_PRESENT(offset_tab[attr_id]) &&
2730             spill_tab == NULL) {
2731                 mdb_printf("Attribute does not exist\n");
2732                 return (DCMD_ERR);
2733         } else if (!TOC_ATTR_PRESENT(offset_tab[attr_id]) && spill_tab) {
2734                 if (sa_get_off_table(spill_tab, &offset_tab,
2735                     attr_count) == -1) {
2736                         return (DCMD_ERR);
2737                 }
2738                 if (GETMEMB(db_spill, "dmu_buf", db_data, db_data)) {
2739                         mdb_printf("can't find db_data in spill dbuf\n");
2740                         return (DCMD_ERR);
2741                 }
2742                 if (!TOC_ATTR_PRESENT(offset_tab[attr_id])) {
2743                         mdb_printf("Attribute does not exist\n");
2744                         return (DCMD_ERR);
2745                 }
2746         }
2747         attr_addr = db_data + TOC_OFF(offset_tab[attr_id]);
2748         mdb_printf("%p\n", attr_addr);
2749         return (DCMD_OK);
2750 }
2751 
2752 /* ARGSUSED */
2753 static int
2754 zfs_ace_print_common(uintptr_t addr, uint_t flags,
2755     uint64_t id, uint32_t access_mask, uint16_t ace_flags,
2756     uint16_t ace_type, int verbose)
2757 {
2758         if (DCMD_HDRSPEC(flags) && !verbose)
2759                 mdb_printf("%<u>%-?s %-8s %-8s %-8s %s%</u>\n",
2760                     "ADDR", "FLAGS", "MASK", "TYPE", "ID");
2761 
2762         if (!verbose) {
2763                 mdb_printf("%0?p %-8x %-8x %-8x %-llx\n", addr,
2764                     ace_flags, access_mask, ace_type, id);
2765                 return (DCMD_OK);
2766         }
2767 
2768         switch (ace_flags & ACE_TYPE_FLAGS) {
2769         case ACE_OWNER:
2770                 mdb_printf("owner@:");
2771                 break;
2772         case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
2773                 mdb_printf("group@:");
2774                 break;
2775         case ACE_EVERYONE:
2776                 mdb_printf("everyone@:");
2777                 break;
2778         case ACE_IDENTIFIER_GROUP:
2779                 mdb_printf("group:%llx:", (u_longlong_t)id);
2780                 break;
2781         case 0: /* User entry */
2782                 mdb_printf("user:%llx:", (u_longlong_t)id);
2783                 break;
2784         }
2785 
2786         /* print out permission mask */
2787         if (access_mask & ACE_READ_DATA)
2788                 mdb_printf("r");
2789         else
2790                 mdb_printf("-");
2791         if (access_mask & ACE_WRITE_DATA)
2792                 mdb_printf("w");
2793         else
2794                 mdb_printf("-");
2795         if (access_mask & ACE_EXECUTE)
2796                 mdb_printf("x");
2797         else
2798                 mdb_printf("-");
2799         if (access_mask & ACE_APPEND_DATA)
2800                 mdb_printf("p");
2801         else
2802                 mdb_printf("-");
2803         if (access_mask & ACE_DELETE)
2804                 mdb_printf("d");
2805         else
2806                 mdb_printf("-");
2807         if (access_mask & ACE_DELETE_CHILD)
2808                 mdb_printf("D");
2809         else
2810                 mdb_printf("-");
2811         if (access_mask & ACE_READ_ATTRIBUTES)
2812                 mdb_printf("a");
2813         else
2814                 mdb_printf("-");
2815         if (access_mask & ACE_WRITE_ATTRIBUTES)
2816                 mdb_printf("A");
2817         else
2818                 mdb_printf("-");
2819         if (access_mask & ACE_READ_NAMED_ATTRS)
2820                 mdb_printf("R");
2821         else
2822                 mdb_printf("-");
2823         if (access_mask & ACE_WRITE_NAMED_ATTRS)
2824                 mdb_printf("W");
2825         else
2826                 mdb_printf("-");
2827         if (access_mask & ACE_READ_ACL)
2828                 mdb_printf("c");
2829         else
2830                 mdb_printf("-");
2831         if (access_mask & ACE_WRITE_ACL)
2832                 mdb_printf("C");
2833         else
2834                 mdb_printf("-");
2835         if (access_mask & ACE_WRITE_OWNER)
2836                 mdb_printf("o");
2837         else
2838                 mdb_printf("-");
2839         if (access_mask & ACE_SYNCHRONIZE)
2840                 mdb_printf("s");
2841         else
2842                 mdb_printf("-");
2843 
2844         mdb_printf(":");
2845 
2846         /* Print out inheritance flags */
2847         if (ace_flags & ACE_FILE_INHERIT_ACE)
2848                 mdb_printf("f");
2849         else
2850                 mdb_printf("-");
2851         if (ace_flags & ACE_DIRECTORY_INHERIT_ACE)
2852                 mdb_printf("d");
2853         else
2854                 mdb_printf("-");
2855         if (ace_flags & ACE_INHERIT_ONLY_ACE)
2856                 mdb_printf("i");
2857         else
2858                 mdb_printf("-");
2859         if (ace_flags & ACE_NO_PROPAGATE_INHERIT_ACE)
2860                 mdb_printf("n");
2861         else
2862                 mdb_printf("-");
2863         if (ace_flags & ACE_SUCCESSFUL_ACCESS_ACE_FLAG)
2864                 mdb_printf("S");
2865         else
2866                 mdb_printf("-");
2867         if (ace_flags & ACE_FAILED_ACCESS_ACE_FLAG)
2868                 mdb_printf("F");
2869         else
2870                 mdb_printf("-");
2871         if (ace_flags & ACE_INHERITED_ACE)
2872                 mdb_printf("I");
2873         else
2874                 mdb_printf("-");
2875 
2876         switch (ace_type) {
2877         case ACE_ACCESS_ALLOWED_ACE_TYPE:
2878                 mdb_printf(":allow\n");
2879                 break;
2880         case ACE_ACCESS_DENIED_ACE_TYPE:
2881                 mdb_printf(":deny\n");
2882                 break;
2883         case ACE_SYSTEM_AUDIT_ACE_TYPE:
2884                 mdb_printf(":audit\n");
2885                 break;
2886         case ACE_SYSTEM_ALARM_ACE_TYPE:
2887                 mdb_printf(":alarm\n");
2888                 break;
2889         default:
2890                 mdb_printf(":?\n");
2891         }
2892         return (DCMD_OK);
2893 }
2894 
2895 /* ARGSUSED */
2896 static int
2897 zfs_ace_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2898 {
2899         zfs_ace_t zace;
2900         int verbose = FALSE;
2901         uint64_t id;
2902 
2903         if (!(flags & DCMD_ADDRSPEC))
2904                 return (DCMD_USAGE);
2905 
2906         if (mdb_getopts(argc, argv,
2907             'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
2908                 return (DCMD_USAGE);
2909 
2910         if (mdb_vread(&zace, sizeof (zfs_ace_t), addr) == -1) {
2911                 mdb_warn("failed to read zfs_ace_t");
2912                 return (DCMD_ERR);
2913         }
2914 
2915         if ((zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == 0 ||
2916             (zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP)
2917                 id = zace.z_fuid;
2918         else
2919                 id = -1;
2920 
2921         return (zfs_ace_print_common(addr, flags, id, zace.z_hdr.z_access_mask,
2922             zace.z_hdr.z_flags, zace.z_hdr.z_type, verbose));
2923 }
2924 
2925 /* ARGSUSED */
2926 static int
2927 zfs_ace0_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2928 {
2929         ace_t ace;
2930         uint64_t id;
2931         int verbose = FALSE;
2932 
2933         if (!(flags & DCMD_ADDRSPEC))
2934                 return (DCMD_USAGE);
2935 
2936         if (mdb_getopts(argc, argv,
2937             'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
2938                 return (DCMD_USAGE);
2939 
2940         if (mdb_vread(&ace, sizeof (ace_t), addr) == -1) {
2941                 mdb_warn("failed to read ace_t");
2942                 return (DCMD_ERR);
2943         }
2944 
2945         if ((ace.a_flags & ACE_TYPE_FLAGS) == 0 ||
2946             (ace.a_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP)
2947                 id = ace.a_who;
2948         else
2949                 id = -1;
2950 
2951         return (zfs_ace_print_common(addr, flags, id, ace.a_access_mask,
2952             ace.a_flags, ace.a_type, verbose));
2953 }
2954 
2955 typedef struct acl_dump_args {
2956         int a_argc;
2957         const mdb_arg_t *a_argv;
2958         uint16_t a_version;
2959         int a_flags;
2960 } acl_dump_args_t;
2961 
2962 /* ARGSUSED */
2963 static int
2964 acl_aces_cb(uintptr_t addr, const void *unknown, void *arg)
2965 {
2966         acl_dump_args_t *acl_args = (acl_dump_args_t *)arg;
2967 
2968         if (acl_args->a_version == 1) {
2969                 if (mdb_call_dcmd("zfs_ace", addr,
2970                     DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc,
2971                     acl_args->a_argv) != DCMD_OK) {
2972                         return (WALK_ERR);
2973                 }
2974         } else {
2975                 if (mdb_call_dcmd("zfs_ace0", addr,
2976                     DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc,
2977                     acl_args->a_argv) != DCMD_OK) {
2978                         return (WALK_ERR);
2979                 }
2980         }
2981         acl_args->a_flags = DCMD_LOOP;
2982         return (WALK_NEXT);
2983 }
2984 
2985 /* ARGSUSED */
2986 static int
2987 acl_cb(uintptr_t addr, const void *unknown, void *arg)
2988 {
2989         acl_dump_args_t *acl_args = (acl_dump_args_t *)arg;
2990 
2991         if (acl_args->a_version == 1) {
2992                 if (mdb_pwalk("zfs_acl_node_aces", acl_aces_cb,
2993                     arg, addr) != 0) {
2994                         mdb_warn("can't walk ACEs");
2995                         return (DCMD_ERR);
2996                 }
2997         } else {
2998                 if (mdb_pwalk("zfs_acl_node_aces0", acl_aces_cb,
2999                     arg, addr) != 0) {
3000                         mdb_warn("can't walk ACEs");
3001                         return (DCMD_ERR);
3002                 }
3003         }
3004         return (WALK_NEXT);
3005 }
3006 
3007 /* ARGSUSED */
3008 static int
3009 zfs_acl_dump(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3010 {
3011         zfs_acl_t zacl;
3012         int verbose = FALSE;
3013         acl_dump_args_t acl_args;
3014 
3015         if (!(flags & DCMD_ADDRSPEC))
3016                 return (DCMD_USAGE);
3017 
3018         if (mdb_getopts(argc, argv,
3019             'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
3020                 return (DCMD_USAGE);
3021 
3022         if (mdb_vread(&zacl, sizeof (zfs_acl_t), addr) == -1) {
3023                 mdb_warn("failed to read zfs_acl_t");
3024                 return (DCMD_ERR);
3025         }
3026 
3027         acl_args.a_argc = argc;
3028         acl_args.a_argv = argv;
3029         acl_args.a_version = zacl.z_version;
3030         acl_args.a_flags = DCMD_LOOPFIRST;
3031 
3032         if (mdb_pwalk("zfs_acl_node", acl_cb, &acl_args, addr) != 0) {
3033                 mdb_warn("can't walk ACL");
3034                 return (DCMD_ERR);
3035         }
3036 
3037         return (DCMD_OK);
3038 }
3039 
3040 /* ARGSUSED */
3041 static int
3042 zfs_acl_node_walk_init(mdb_walk_state_t *wsp)
3043 {
3044         if (wsp->walk_addr == NULL) {
3045                 mdb_warn("must supply address of zfs_acl_node_t\n");
3046                 return (WALK_ERR);
3047         }
3048 
3049         wsp->walk_addr +=
3050             mdb_ctf_offsetof_by_name(ZFS_STRUCT "zfs_acl", "z_acl");
3051 
3052         if (mdb_layered_walk("list", wsp) == -1) {
3053                 mdb_warn("failed to walk 'list'\n");
3054                 return (WALK_ERR);
3055         }
3056 
3057         return (WALK_NEXT);
3058 }
3059 
3060 static int
3061 zfs_acl_node_walk_step(mdb_walk_state_t *wsp)
3062 {
3063         zfs_acl_node_t  aclnode;
3064 
3065         if (mdb_vread(&aclnode, sizeof (zfs_acl_node_t),
3066             wsp->walk_addr) == -1) {
3067                 mdb_warn("failed to read zfs_acl_node at %p", wsp->walk_addr);
3068                 return (WALK_ERR);
3069         }
3070 
3071         return (wsp->walk_callback(wsp->walk_addr, &aclnode, wsp->walk_cbdata));
3072 }
3073 
3074 typedef struct ace_walk_data {
3075         int             ace_count;
3076         int             ace_version;
3077 } ace_walk_data_t;
3078 
3079 static int
3080 zfs_aces_walk_init_common(mdb_walk_state_t *wsp, int version,
3081     int ace_count, uintptr_t ace_data)
3082 {
3083         ace_walk_data_t *ace_walk_data;
3084 
3085         if (wsp->walk_addr == NULL) {
3086                 mdb_warn("must supply address of zfs_acl_node_t\n");
3087                 return (WALK_ERR);
3088         }
3089 
3090         ace_walk_data = mdb_alloc(sizeof (ace_walk_data_t), UM_SLEEP | UM_GC);
3091 
3092         ace_walk_data->ace_count = ace_count;
3093         ace_walk_data->ace_version = version;
3094 
3095         wsp->walk_addr = ace_data;
3096         wsp->walk_data = ace_walk_data;
3097 
3098         return (WALK_NEXT);
3099 }
3100 
3101 static int
3102 zfs_acl_node_aces_walk_init_common(mdb_walk_state_t *wsp, int version)
3103 {
3104         static int gotid;
3105         static mdb_ctf_id_t acl_id;
3106         int z_ace_count;
3107         uintptr_t z_acldata;
3108 
3109         if (!gotid) {
3110                 if (mdb_ctf_lookup_by_name("struct zfs_acl_node",
3111                     &acl_id) == -1) {
3112                         mdb_warn("couldn't find struct zfs_acl_node");
3113                         return (DCMD_ERR);
3114                 }
3115                 gotid = TRUE;
3116         }
3117 
3118         if (GETMEMBID(wsp->walk_addr, &acl_id, z_ace_count, z_ace_count)) {
3119                 return (DCMD_ERR);
3120         }
3121         if (GETMEMBID(wsp->walk_addr, &acl_id, z_acldata, z_acldata)) {
3122                 return (DCMD_ERR);
3123         }
3124 
3125         return (zfs_aces_walk_init_common(wsp, version,
3126             z_ace_count, z_acldata));
3127 }
3128 
3129 /* ARGSUSED */
3130 static int
3131 zfs_acl_node_aces_walk_init(mdb_walk_state_t *wsp)
3132 {
3133         return (zfs_acl_node_aces_walk_init_common(wsp, 1));
3134 }
3135 
3136 /* ARGSUSED */
3137 static int
3138 zfs_acl_node_aces0_walk_init(mdb_walk_state_t *wsp)
3139 {
3140         return (zfs_acl_node_aces_walk_init_common(wsp, 0));
3141 }
3142 
3143 static int
3144 zfs_aces_walk_step(mdb_walk_state_t *wsp)
3145 {
3146         ace_walk_data_t *ace_data = wsp->walk_data;
3147         zfs_ace_t zace;
3148         ace_t *acep;
3149         int status;
3150         int entry_type;
3151         int allow_type;
3152         uintptr_t ptr;
3153 
3154         if (ace_data->ace_count == 0)
3155                 return (WALK_DONE);
3156 
3157         if (mdb_vread(&zace, sizeof (zfs_ace_t), wsp->walk_addr) == -1) {
3158                 mdb_warn("failed to read zfs_ace_t at %#lx",
3159                     wsp->walk_addr);
3160                 return (WALK_ERR);
3161         }
3162 
3163         switch (ace_data->ace_version) {
3164         case 0:
3165                 acep = (ace_t *)&zace;
3166                 entry_type = acep->a_flags & ACE_TYPE_FLAGS;
3167                 allow_type = acep->a_type;
3168                 break;
3169         case 1:
3170                 entry_type = zace.z_hdr.z_flags & ACE_TYPE_FLAGS;
3171                 allow_type = zace.z_hdr.z_type;
3172                 break;
3173         default:
3174                 return (WALK_ERR);
3175         }
3176 
3177         ptr = (uintptr_t)wsp->walk_addr;
3178         switch (entry_type) {
3179         case ACE_OWNER:
3180         case ACE_EVERYONE:
3181         case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
3182                 ptr += ace_data->ace_version == 0 ?
3183                     sizeof (ace_t) : sizeof (zfs_ace_hdr_t);
3184                 break;
3185         case ACE_IDENTIFIER_GROUP:
3186         default:
3187                 switch (allow_type) {
3188                 case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
3189                 case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
3190                 case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
3191                 case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
3192                         ptr += ace_data->ace_version == 0 ?
3193                             sizeof (ace_t) : sizeof (zfs_object_ace_t);
3194                         break;
3195                 default:
3196                         ptr += ace_data->ace_version == 0 ?
3197                             sizeof (ace_t) : sizeof (zfs_ace_t);
3198                         break;
3199                 }
3200         }
3201 
3202         ace_data->ace_count--;
3203         status = wsp->walk_callback(wsp->walk_addr,
3204             (void *)(uintptr_t)&zace, wsp->walk_cbdata);
3205 
3206         wsp->walk_addr = ptr;
3207         return (status);
3208 }
3209 
3210 typedef struct mdb_zfs_rrwlock {
3211         uintptr_t       rr_writer;
3212         boolean_t       rr_writer_wanted;
3213 } mdb_zfs_rrwlock_t;
3214 
3215 static uint_t rrw_key;
3216 
3217 /* ARGSUSED */
3218 static int
3219 rrwlock(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3220 {
3221         mdb_zfs_rrwlock_t rrw;
3222 
3223         if (rrw_key == 0) {
3224                 if (mdb_ctf_readsym(&rrw_key, "uint_t", "rrw_tsd_key", 0) == -1)
3225                         return (DCMD_ERR);
3226         }
3227 
3228         if (mdb_ctf_vread(&rrw, "rrwlock_t", "mdb_zfs_rrwlock_t", addr,
3229             0) == -1)
3230                 return (DCMD_ERR);
3231 
3232         if (rrw.rr_writer != 0) {
3233                 mdb_printf("write lock held by thread %lx\n", rrw.rr_writer);
3234                 return (DCMD_OK);
3235         }
3236 
3237         if (rrw.rr_writer_wanted) {
3238                 mdb_printf("writer wanted\n");
3239         }
3240 
3241         mdb_printf("anonymous references:\n");
3242         (void) mdb_call_dcmd("refcount", addr +
3243             mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_anon_rcount"),
3244             DCMD_ADDRSPEC, 0, NULL);
3245 
3246         mdb_printf("linked references:\n");
3247         (void) mdb_call_dcmd("refcount", addr +
3248             mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_linked_rcount"),
3249             DCMD_ADDRSPEC, 0, NULL);
3250 
3251         /*
3252          * XXX This should find references from
3253          * "::walk thread | ::tsd -v <rrw_key>", but there is no support
3254          * for programmatic consumption of dcmds, so this would be
3255          * difficult, potentially requiring reimplementing ::tsd (both
3256          * user and kernel versions) in this MDB module.
3257          */
3258 
3259         return (DCMD_OK);
3260 }
3261 
3262 /*
3263  * MDB module linkage information:
3264  *
3265  * We declare a list of structures describing our dcmds, and a function
3266  * named _mdb_init to return a pointer to our module information.
3267  */
3268 
3269 static const mdb_dcmd_t dcmds[] = {
3270         { "arc", "[-bkmg]", "print ARC variables", arc_print },
3271         { "blkptr", ":", "print blkptr_t", blkptr },
3272         { "dbuf", ":", "print dmu_buf_impl_t", dbuf },
3273         { "dbuf_stats", ":", "dbuf stats", dbuf_stats },
3274         { "dbufs",
3275             "\t[-O objset_t*] [-n objset_name | \"mos\"] "
3276             "[-o object | \"mdn\"] \n"
3277             "\t[-l level] [-b blkid | \"bonus\"]",
3278             "find dmu_buf_impl_t's that match specified criteria", dbufs },
3279         { "abuf_find", "dva_word[0] dva_word[1]",
3280             "find arc_buf_hdr_t of a specified DVA",
3281             abuf_find },
3282         { "spa", "?[-cevmMh]\n"
3283             "\t-c display spa config\n"
3284             "\t-e display vdev statistics\n"
3285             "\t-v display vdev information\n"
3286             "\t-m display metaslab statistics\n"
3287             "\t-M display metaslab group statistics\n"
3288             "\t-h display histogram (requires -m or -M)\n",
3289             "spa_t summary", spa_print },
3290         { "spa_config", ":", "print spa_t configuration", spa_print_config },
3291         { "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space },
3292         { "spa_vdevs", ":[-emMh]\n"
3293             "\t-e display vdev statistics\n"
3294             "\t-m dispaly metaslab statistics\n"
3295             "\t-M display metaslab group statistic\n"
3296             "\t-h display histogram (requires -m or -M)\n",
3297             "given a spa_t, print vdev summary", spa_vdevs },
3298         { "vdev", ":[-re]\n"
3299             "\t-r display recursively\n"
3300             "\t-e display statistics\n"
3301             "\t-m display metaslab statistics\n"
3302             "\t-M display metaslab group statistics\n"
3303             "\t-h display histogram (requires -m or -M)\n",
3304             "vdev_t summary", vdev_print },
3305         { "zio", ":[-cpr]\n"
3306             "\t-c display children\n"
3307             "\t-p display parents\n"
3308             "\t-r display recursively",
3309             "zio_t summary", zio_print },
3310         { "zio_state", "?", "print out all zio_t structures on system or "
3311             "for a particular pool", zio_state },
3312         { "zfs_blkstats", ":[-v]",
3313             "given a spa_t, print block type stats from last scrub",
3314             zfs_blkstats },
3315         { "zfs_params", "", "print zfs tunable parameters", zfs_params },
3316         { "refcount", ":[-r]\n"
3317             "\t-r display recently removed references",
3318             "print refcount_t holders", refcount },
3319         { "zap_leaf", "", "print zap_leaf_phys_t", zap_leaf },
3320         { "zfs_aces", ":[-v]", "print all ACEs from a zfs_acl_t",
3321             zfs_acl_dump },
3322         { "zfs_ace", ":[-v]", "print zfs_ace", zfs_ace_print },
3323         { "zfs_ace0", ":[-v]", "print zfs_ace0", zfs_ace0_print },
3324         { "sa_attr_table", ":", "print SA attribute table from sa_os_t",
3325             sa_attr_table},
3326         { "sa_attr", ": attr_id",
3327             "print SA attribute address when given sa_handle_t", sa_attr_print},
3328         { "zfs_dbgmsg", ":[-va]",
3329             "print zfs debug log", dbgmsg},
3330         { "rrwlock", ":",
3331             "print rrwlock_t, including readers", rrwlock},
3332         { NULL }
3333 };
3334 
3335 static const mdb_walker_t walkers[] = {
3336         { "zms_freelist", "walk ZFS metaslab freelist",
3337             freelist_walk_init, freelist_walk_step, NULL },
3338         { "txg_list", "given any txg_list_t *, walk all entries in all txgs",
3339             txg_list_walk_init, txg_list_walk_step, NULL },
3340         { "txg_list0", "given any txg_list_t *, walk all entries in txg 0",
3341             txg_list0_walk_init, txg_list_walk_step, NULL },
3342         { "txg_list1", "given any txg_list_t *, walk all entries in txg 1",
3343             txg_list1_walk_init, txg_list_walk_step, NULL },
3344         { "txg_list2", "given any txg_list_t *, walk all entries in txg 2",
3345             txg_list2_walk_init, txg_list_walk_step, NULL },
3346         { "txg_list3", "given any txg_list_t *, walk all entries in txg 3",
3347             txg_list3_walk_init, txg_list_walk_step, NULL },
3348         { "zio", "walk all zio structures, optionally for a particular spa_t",
3349             zio_walk_init, zio_walk_step, NULL },
3350         { "zio_root",
3351             "walk all root zio_t structures, optionally for a particular spa_t",
3352             zio_walk_init, zio_walk_root_step, NULL },
3353         { "spa", "walk all spa_t entries in the namespace",
3354             spa_walk_init, spa_walk_step, NULL },
3355         { "metaslab", "given a spa_t *, walk all metaslab_t structures",
3356             metaslab_walk_init, metaslab_walk_step, NULL },
3357         { "zfs_acl_node", "given a zfs_acl_t, walk all zfs_acl_nodes",
3358             zfs_acl_node_walk_init, zfs_acl_node_walk_step, NULL },
3359         { "zfs_acl_node_aces", "given a zfs_acl_node_t, walk all ACEs",
3360             zfs_acl_node_aces_walk_init, zfs_aces_walk_step, NULL },
3361         { "zfs_acl_node_aces0",
3362             "given a zfs_acl_node_t, walk all ACEs as ace_t",
3363             zfs_acl_node_aces0_walk_init, zfs_aces_walk_step, NULL },
3364         { NULL }
3365 };
3366 
3367 static const mdb_modinfo_t modinfo = {
3368         MDB_API_VERSION, dcmds, walkers
3369 };
3370 
3371 const mdb_modinfo_t *
3372 _mdb_init(void)
3373 {
3374         return (&modinfo);
3375 }
--- EOF ---