1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2013 by Delphix. All rights reserved.
  25  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
  26  */
  27 
  28 /*
  29  * Functions to convert between a list of vdevs and an nvlist representing the
  30  * configuration.  Each entry in the list can be one of:
  31  *
  32  *      Device vdevs
  33  *              disk=(path=..., devid=...)
  34  *              file=(path=...)
  35  *
  36  *      Group vdevs
  37  *              raidz[1|2]=(...)
  38  *              mirror=(...)
  39  *
  40  *      Hot spares
  41  *
  42  * While the underlying implementation supports it, group vdevs cannot contain
  43  * other group vdevs.  All userland verification of devices is contained within
  44  * this file.  If successful, the nvlist returned can be passed directly to the
  45  * kernel; we've done as much verification as possible in userland.
  46  *
  47  * Hot spares are a special case, and passed down as an array of disk vdevs, at
  48  * the same level as the root of the vdev tree.
  49  *
  50  * The only function exported by this file is 'make_root_vdev'.  The
  51  * function performs several passes:
  52  *
  53  *      1. Construct the vdev specification.  Performs syntax validation and
  54  *         makes sure each device is valid.
  55  *      2. Check for devices in use.  Using libdiskmgt, makes sure that no
  56  *         devices are also in use.  Some can be overridden using the 'force'
  57  *         flag, others cannot.
  58  *      3. Check for replication errors if the 'force' flag is not specified.
  59  *         validates that the replication level is consistent across the
  60  *         entire pool.
  61  *      4. Call libzfs to label any whole disks with an EFI label.
  62  */
  63 
  64 #include <assert.h>
  65 #include <devid.h>
  66 #include <errno.h>
  67 #include <fcntl.h>
  68 #include <libdiskmgt.h>
  69 #include <libintl.h>
  70 #include <libnvpair.h>
  71 #include <limits.h>
  72 #include <stdio.h>
  73 #include <string.h>
  74 #include <unistd.h>
  75 #include <sys/efi_partition.h>
  76 #include <sys/stat.h>
  77 #include <sys/vtoc.h>
  78 #include <sys/mntent.h>
  79 
  80 #include "zpool_util.h"
  81 
  82 #define DISK_ROOT       "/dev/dsk"
  83 #define RDISK_ROOT      "/dev/rdsk"
  84 #define BACKUP_SLICE    "s2"
  85 
  86 /*
  87  * For any given vdev specification, we can have multiple errors.  The
  88  * vdev_error() function keeps track of whether we have seen an error yet, and
  89  * prints out a header if its the first error we've seen.
  90  */
  91 boolean_t error_seen;
  92 boolean_t is_force;
  93 
  94 /*PRINTFLIKE1*/
  95 static void
  96 vdev_error(const char *fmt, ...)
  97 {
  98         va_list ap;
  99 
 100         if (!error_seen) {
 101                 (void) fprintf(stderr, gettext("invalid vdev specification\n"));
 102                 if (!is_force)
 103                         (void) fprintf(stderr, gettext("use '-f' to override "
 104                             "the following errors:\n"));
 105                 else
 106                         (void) fprintf(stderr, gettext("the following errors "
 107                             "must be manually repaired:\n"));
 108                 error_seen = B_TRUE;
 109         }
 110 
 111         va_start(ap, fmt);
 112         (void) vfprintf(stderr, fmt, ap);
 113         va_end(ap);
 114 }
 115 
 116 static void
 117 libdiskmgt_error(int error)
 118 {
 119         /*
 120          * ENXIO/ENODEV is a valid error message if the device doesn't live in
 121          * /dev/dsk.  Don't bother printing an error message in this case.
 122          */
 123         if (error == ENXIO || error == ENODEV)
 124                 return;
 125 
 126         (void) fprintf(stderr, gettext("warning: device in use checking "
 127             "failed: %s\n"), strerror(error));
 128 }
 129 
 130 /*
 131  * Validate a device, passing the bulk of the work off to libdiskmgt.
 132  */
 133 static int
 134 check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare)
 135 {
 136         char *msg;
 137         int error = 0;
 138         dm_who_type_t who;
 139 
 140         if (force)
 141                 who = DM_WHO_ZPOOL_FORCE;
 142         else if (isspare)
 143                 who = DM_WHO_ZPOOL_SPARE;
 144         else
 145                 who = DM_WHO_ZPOOL;
 146 
 147         if (dm_inuse((char *)path, &msg, who, &error) || error) {
 148                 if (error != 0) {
 149                         libdiskmgt_error(error);
 150                         return (0);
 151                 } else {
 152                         vdev_error("%s", msg);
 153                         free(msg);
 154                         return (-1);
 155                 }
 156         }
 157 
 158         /*
 159          * If we're given a whole disk, ignore overlapping slices since we're
 160          * about to label it anyway.
 161          */
 162         error = 0;
 163         if (!wholedisk && !force &&
 164             (dm_isoverlapping((char *)path, &msg, &error) || error)) {
 165                 if (error == 0) {
 166                         /* dm_isoverlapping returned -1 */
 167                         vdev_error(gettext("%s overlaps with %s\n"), path, msg);
 168                         free(msg);
 169                         return (-1);
 170                 } else if (error != ENODEV) {
 171                         /* libdiskmgt's devcache only handles physical drives */
 172                         libdiskmgt_error(error);
 173                         return (0);
 174                 }
 175         }
 176 
 177         return (0);
 178 }
 179 
 180 
 181 /*
 182  * Validate a whole disk.  Iterate over all slices on the disk and make sure
 183  * that none is in use by calling check_slice().
 184  */
 185 static int
 186 check_disk(const char *name, dm_descriptor_t disk, int force, int isspare)
 187 {
 188         dm_descriptor_t *drive, *media, *slice;
 189         int err = 0;
 190         int i;
 191         int ret;
 192 
 193         /*
 194          * Get the drive associated with this disk.  This should never fail,
 195          * because we already have an alias handle open for the device.
 196          */
 197         if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE,
 198             &err)) == NULL || *drive == NULL) {
 199                 if (err)
 200                         libdiskmgt_error(err);
 201                 return (0);
 202         }
 203 
 204         if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA,
 205             &err)) == NULL) {
 206                 dm_free_descriptors(drive);
 207                 if (err)
 208                         libdiskmgt_error(err);
 209                 return (0);
 210         }
 211 
 212         dm_free_descriptors(drive);
 213 
 214         /*
 215          * It is possible that the user has specified a removable media drive,
 216          * and the media is not present.
 217          */
 218         if (*media == NULL) {
 219                 dm_free_descriptors(media);
 220                 vdev_error(gettext("'%s' has no media in drive\n"), name);
 221                 return (-1);
 222         }
 223 
 224         if ((slice = dm_get_associated_descriptors(*media, DM_SLICE,
 225             &err)) == NULL) {
 226                 dm_free_descriptors(media);
 227                 if (err)
 228                         libdiskmgt_error(err);
 229                 return (0);
 230         }
 231 
 232         dm_free_descriptors(media);
 233 
 234         ret = 0;
 235 
 236         /*
 237          * Iterate over all slices and report any errors.  We don't care about
 238          * overlapping slices because we are using the whole disk.
 239          */
 240         for (i = 0; slice[i] != NULL; i++) {
 241                 char *name = dm_get_name(slice[i], &err);
 242 
 243                 if (check_slice(name, force, B_TRUE, isspare) != 0)
 244                         ret = -1;
 245 
 246                 dm_free_name(name);
 247         }
 248 
 249         dm_free_descriptors(slice);
 250         return (ret);
 251 }
 252 
 253 /*
 254  * Validate a device.
 255  */
 256 static int
 257 check_device(const char *path, boolean_t force, boolean_t isspare)
 258 {
 259         dm_descriptor_t desc;
 260         int err;
 261         char *dev;
 262 
 263         /*
 264          * For whole disks, libdiskmgt does not include the leading dev path.
 265          */
 266         dev = strrchr(path, '/');
 267         assert(dev != NULL);
 268         dev++;
 269         if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) {
 270                 err = check_disk(path, desc, force, isspare);
 271                 dm_free_descriptor(desc);
 272                 return (err);
 273         }
 274 
 275         return (check_slice(path, force, B_FALSE, isspare));
 276 }
 277 
 278 /*
 279  * Check that a file is valid.  All we can do in this case is check that it's
 280  * not in use by another pool, and not in use by swap.
 281  */
 282 static int
 283 check_file(const char *file, boolean_t force, boolean_t isspare)
 284 {
 285         char  *name;
 286         int fd;
 287         int ret = 0;
 288         int err;
 289         pool_state_t state;
 290         boolean_t inuse;
 291 
 292         if (dm_inuse_swap(file, &err)) {
 293                 if (err)
 294                         libdiskmgt_error(err);
 295                 else
 296                         vdev_error(gettext("%s is currently used by swap. "
 297                             "Please see swap(1M).\n"), file);
 298                 return (-1);
 299         }
 300 
 301         if ((fd = open(file, O_RDONLY)) < 0)
 302                 return (0);
 303 
 304         if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) {
 305                 const char *desc;
 306 
 307                 switch (state) {
 308                 case POOL_STATE_ACTIVE:
 309                         desc = gettext("active");
 310                         break;
 311 
 312                 case POOL_STATE_EXPORTED:
 313                         desc = gettext("exported");
 314                         break;
 315 
 316                 case POOL_STATE_POTENTIALLY_ACTIVE:
 317                         desc = gettext("potentially active");
 318                         break;
 319 
 320                 default:
 321                         desc = gettext("unknown");
 322                         break;
 323                 }
 324 
 325                 /*
 326                  * Allow hot spares to be shared between pools.
 327                  */
 328                 if (state == POOL_STATE_SPARE && isspare)
 329                         return (0);
 330 
 331                 if (state == POOL_STATE_ACTIVE ||
 332                     state == POOL_STATE_SPARE || !force) {
 333                         switch (state) {
 334                         case POOL_STATE_SPARE:
 335                                 vdev_error(gettext("%s is reserved as a hot "
 336                                     "spare for pool %s\n"), file, name);
 337                                 break;
 338                         default:
 339                                 vdev_error(gettext("%s is part of %s pool "
 340                                     "'%s'\n"), file, desc, name);
 341                                 break;
 342                         }
 343                         ret = -1;
 344                 }
 345 
 346                 free(name);
 347         }
 348 
 349         (void) close(fd);
 350         return (ret);
 351 }
 352 
 353 
 354 /*
 355  * By "whole disk" we mean an entire physical disk (something we can
 356  * label, toggle the write cache on, etc.) as opposed to the full
 357  * capacity of a pseudo-device such as lofi or did.  We act as if we
 358  * are labeling the disk, which should be a pretty good test of whether
 359  * it's a viable device or not.  Returns B_TRUE if it is and B_FALSE if
 360  * it isn't.
 361  */
 362 static boolean_t
 363 is_whole_disk(const char *arg)
 364 {
 365         struct dk_gpt *label;
 366         int     fd;
 367         char    path[MAXPATHLEN];
 368 
 369         (void) snprintf(path, sizeof (path), "%s%s%s",
 370             RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE);
 371         if ((fd = open(path, O_RDWR | O_NDELAY)) < 0)
 372                 return (B_FALSE);
 373         if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
 374                 (void) close(fd);
 375                 return (B_FALSE);
 376         }
 377         efi_free(label);
 378         (void) close(fd);
 379         return (B_TRUE);
 380 }
 381 
 382 /*
 383  * Create a leaf vdev.  Determine if this is a file or a device.  If it's a
 384  * device, fill in the device id to make a complete nvlist.  Valid forms for a
 385  * leaf vdev are:
 386  *
 387  *      /dev/dsk/xxx    Complete disk path
 388  *      /xxx            Full path to file
 389  *      xxx             Shorthand for /dev/dsk/xxx
 390  */
 391 static nvlist_t *
 392 make_leaf_vdev(const char *arg, uint64_t is_log)
 393 {
 394         char path[MAXPATHLEN];
 395         struct stat64 statbuf;
 396         nvlist_t *vdev = NULL;
 397         char *type = NULL;
 398         boolean_t wholedisk = B_FALSE;
 399 
 400         /*
 401          * Determine what type of vdev this is, and put the full path into
 402          * 'path'.  We detect whether this is a device of file afterwards by
 403          * checking the st_mode of the file.
 404          */
 405         if (arg[0] == '/') {
 406                 /*
 407                  * Complete device or file path.  Exact type is determined by
 408                  * examining the file descriptor afterwards.
 409                  */
 410                 wholedisk = is_whole_disk(arg);
 411                 if (!wholedisk && (stat64(arg, &statbuf) != 0)) {
 412                         (void) fprintf(stderr,
 413                             gettext("cannot open '%s': %s\n"),
 414                             arg, strerror(errno));
 415                         return (NULL);
 416                 }
 417 
 418                 (void) strlcpy(path, arg, sizeof (path));
 419         } else {
 420                 /*
 421                  * This may be a short path for a device, or it could be total
 422                  * gibberish.  Check to see if it's a known device in
 423                  * /dev/dsk/.  As part of this check, see if we've been given a
 424                  * an entire disk (minus the slice number).
 425                  */
 426                 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT,
 427                     arg);
 428                 wholedisk = is_whole_disk(path);
 429                 if (!wholedisk && (stat64(path, &statbuf) != 0)) {
 430                         /*
 431                          * If we got ENOENT, then the user gave us
 432                          * gibberish, so try to direct them with a
 433                          * reasonable error message.  Otherwise,
 434                          * regurgitate strerror() since it's the best we
 435                          * can do.
 436                          */
 437                         if (errno == ENOENT) {
 438                                 (void) fprintf(stderr,
 439                                     gettext("cannot open '%s': no such "
 440                                     "device in %s\n"), arg, DISK_ROOT);
 441                                 (void) fprintf(stderr,
 442                                     gettext("must be a full path or "
 443                                     "shorthand device name\n"));
 444                                 return (NULL);
 445                         } else {
 446                                 (void) fprintf(stderr,
 447                                     gettext("cannot open '%s': %s\n"),
 448                                     path, strerror(errno));
 449                                 return (NULL);
 450                         }
 451                 }
 452         }
 453 
 454         /*
 455          * Determine whether this is a device or a file.
 456          */
 457         if (wholedisk || S_ISBLK(statbuf.st_mode)) {
 458                 type = VDEV_TYPE_DISK;
 459         } else if (S_ISREG(statbuf.st_mode)) {
 460                 type = VDEV_TYPE_FILE;
 461         } else {
 462                 (void) fprintf(stderr, gettext("cannot use '%s': must be a "
 463                     "block device or regular file\n"), path);
 464                 return (NULL);
 465         }
 466 
 467         /*
 468          * Finally, we have the complete device or file, and we know that it is
 469          * acceptable to use.  Construct the nvlist to describe this vdev.  All
 470          * vdevs have a 'path' element, and devices also have a 'devid' element.
 471          */
 472         verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0);
 473         verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
 474         verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
 475         verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0);
 476         if (strcmp(type, VDEV_TYPE_DISK) == 0)
 477                 verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
 478                     (uint64_t)wholedisk) == 0);
 479 
 480         /*
 481          * For a whole disk, defer getting its devid until after labeling it.
 482          */
 483         if (S_ISBLK(statbuf.st_mode) && !wholedisk) {
 484                 /*
 485                  * Get the devid for the device.
 486                  */
 487                 int fd;
 488                 ddi_devid_t devid;
 489                 char *minor = NULL, *devid_str = NULL;
 490 
 491                 if ((fd = open(path, O_RDONLY)) < 0) {
 492                         (void) fprintf(stderr, gettext("cannot open '%s': "
 493                             "%s\n"), path, strerror(errno));
 494                         nvlist_free(vdev);
 495                         return (NULL);
 496                 }
 497 
 498                 if (devid_get(fd, &devid) == 0) {
 499                         if (devid_get_minor_name(fd, &minor) == 0 &&
 500                             (devid_str = devid_str_encode(devid, minor)) !=
 501                             NULL) {
 502                                 verify(nvlist_add_string(vdev,
 503                                     ZPOOL_CONFIG_DEVID, devid_str) == 0);
 504                         }
 505                         if (devid_str != NULL)
 506                                 devid_str_free(devid_str);
 507                         if (minor != NULL)
 508                                 devid_str_free(minor);
 509                         devid_free(devid);
 510                 }
 511 
 512                 (void) close(fd);
 513         }
 514 
 515         return (vdev);
 516 }
 517 
 518 /*
 519  * Go through and verify the replication level of the pool is consistent.
 520  * Performs the following checks:
 521  *
 522  *      For the new spec, verifies that devices in mirrors and raidz are the
 523  *      same size.
 524  *
 525  *      If the current configuration already has inconsistent replication
 526  *      levels, ignore any other potential problems in the new spec.
 527  *
 528  *      Otherwise, make sure that the current spec (if there is one) and the new
 529  *      spec have consistent replication levels.
 530  */
 531 typedef struct replication_level {
 532         char *zprl_type;
 533         uint64_t zprl_children;
 534         uint64_t zprl_parity;
 535 } replication_level_t;
 536 
 537 #define ZPOOL_FUZZ      (16 * 1024 * 1024)
 538 
 539 /*
 540  * Given a list of toplevel vdevs, return the current replication level.  If
 541  * the config is inconsistent, then NULL is returned.  If 'fatal' is set, then
 542  * an error message will be displayed for each self-inconsistent vdev.
 543  */
 544 static replication_level_t *
 545 get_replication(nvlist_t *nvroot, boolean_t fatal)
 546 {
 547         nvlist_t **top;
 548         uint_t t, toplevels;
 549         nvlist_t **child;
 550         uint_t c, children;
 551         nvlist_t *nv;
 552         char *type;
 553         replication_level_t lastrep = {0};
 554         replication_level_t rep;
 555         replication_level_t *ret;
 556         boolean_t dontreport;
 557 
 558         ret = safe_malloc(sizeof (replication_level_t));
 559 
 560         verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
 561             &top, &toplevels) == 0);
 562 
 563         lastrep.zprl_type = NULL;
 564         for (t = 0; t < toplevels; t++) {
 565                 uint64_t is_log = B_FALSE;
 566 
 567                 nv = top[t];
 568 
 569                 /*
 570                  * For separate logs we ignore the top level vdev replication
 571                  * constraints.
 572                  */
 573                 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log);
 574                 if (is_log)
 575                         continue;
 576 
 577                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE,
 578                     &type) == 0);
 579                 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 580                     &child, &children) != 0) {
 581                         /*
 582                          * This is a 'file' or 'disk' vdev.
 583                          */
 584                         rep.zprl_type = type;
 585                         rep.zprl_children = 1;
 586                         rep.zprl_parity = 0;
 587                 } else {
 588                         uint64_t vdev_size;
 589 
 590                         /*
 591                          * This is a mirror or RAID-Z vdev.  Go through and make
 592                          * sure the contents are all the same (files vs. disks),
 593                          * keeping track of the number of elements in the
 594                          * process.
 595                          *
 596                          * We also check that the size of each vdev (if it can
 597                          * be determined) is the same.
 598                          */
 599                         rep.zprl_type = type;
 600                         rep.zprl_children = 0;
 601 
 602                         if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
 603                                 verify(nvlist_lookup_uint64(nv,
 604                                     ZPOOL_CONFIG_NPARITY,
 605                                     &rep.zprl_parity) == 0);
 606                                 assert(rep.zprl_parity != 0);
 607                         } else {
 608                                 rep.zprl_parity = 0;
 609                         }
 610 
 611                         /*
 612                          * The 'dontreport' variable indicates that we've
 613                          * already reported an error for this spec, so don't
 614                          * bother doing it again.
 615                          */
 616                         type = NULL;
 617                         dontreport = 0;
 618                         vdev_size = -1ULL;
 619                         for (c = 0; c < children; c++) {
 620                                 nvlist_t *cnv = child[c];
 621                                 char *path;
 622                                 struct stat64 statbuf;
 623                                 uint64_t size = -1ULL;
 624                                 char *childtype;
 625                                 int fd, err;
 626 
 627                                 rep.zprl_children++;
 628 
 629                                 verify(nvlist_lookup_string(cnv,
 630                                     ZPOOL_CONFIG_TYPE, &childtype) == 0);
 631 
 632                                 /*
 633                                  * If this is a replacing or spare vdev, then
 634                                  * get the real first child of the vdev.
 635                                  */
 636                                 if (strcmp(childtype,
 637                                     VDEV_TYPE_REPLACING) == 0 ||
 638                                     strcmp(childtype, VDEV_TYPE_SPARE) == 0) {
 639                                         nvlist_t **rchild;
 640                                         uint_t rchildren;
 641 
 642                                         verify(nvlist_lookup_nvlist_array(cnv,
 643                                             ZPOOL_CONFIG_CHILDREN, &rchild,
 644                                             &rchildren) == 0);
 645                                         assert(rchildren == 2);
 646                                         cnv = rchild[0];
 647 
 648                                         verify(nvlist_lookup_string(cnv,
 649                                             ZPOOL_CONFIG_TYPE,
 650                                             &childtype) == 0);
 651                                 }
 652 
 653                                 verify(nvlist_lookup_string(cnv,
 654                                     ZPOOL_CONFIG_PATH, &path) == 0);
 655 
 656                                 /*
 657                                  * If we have a raidz/mirror that combines disks
 658                                  * with files, report it as an error.
 659                                  */
 660                                 if (!dontreport && type != NULL &&
 661                                     strcmp(type, childtype) != 0) {
 662                                         if (ret != NULL)
 663                                                 free(ret);
 664                                         ret = NULL;
 665                                         if (fatal)
 666                                                 vdev_error(gettext(
 667                                                     "mismatched replication "
 668                                                     "level: %s contains both "
 669                                                     "files and devices\n"),
 670                                                     rep.zprl_type);
 671                                         else
 672                                                 return (NULL);
 673                                         dontreport = B_TRUE;
 674                                 }
 675 
 676                                 /*
 677                                  * According to stat(2), the value of 'st_size'
 678                                  * is undefined for block devices and character
 679                                  * devices.  But there is no effective way to
 680                                  * determine the real size in userland.
 681                                  *
 682                                  * Instead, we'll take advantage of an
 683                                  * implementation detail of spec_size().  If the
 684                                  * device is currently open, then we (should)
 685                                  * return a valid size.
 686                                  *
 687                                  * If we still don't get a valid size (indicated
 688                                  * by a size of 0 or MAXOFFSET_T), then ignore
 689                                  * this device altogether.
 690                                  */
 691                                 if ((fd = open(path, O_RDONLY)) >= 0) {
 692                                         err = fstat64(fd, &statbuf);
 693                                         (void) close(fd);
 694                                 } else {
 695                                         err = stat64(path, &statbuf);
 696                                 }
 697 
 698                                 if (err != 0 ||
 699                                     statbuf.st_size == 0 ||
 700                                     statbuf.st_size == MAXOFFSET_T)
 701                                         continue;
 702 
 703                                 size = statbuf.st_size;
 704 
 705                                 /*
 706                                  * Also make sure that devices and
 707                                  * slices have a consistent size.  If
 708                                  * they differ by a significant amount
 709                                  * (~16MB) then report an error.
 710                                  */
 711                                 if (!dontreport &&
 712                                     (vdev_size != -1ULL &&
 713                                     (labs(size - vdev_size) >
 714                                     ZPOOL_FUZZ))) {
 715                                         if (ret != NULL)
 716                                                 free(ret);
 717                                         ret = NULL;
 718                                         if (fatal)
 719                                                 vdev_error(gettext(
 720                                                     "%s contains devices of "
 721                                                     "different sizes\n"),
 722                                                     rep.zprl_type);
 723                                         else
 724                                                 return (NULL);
 725                                         dontreport = B_TRUE;
 726                                 }
 727 
 728                                 type = childtype;
 729                                 vdev_size = size;
 730                         }
 731                 }
 732 
 733                 /*
 734                  * At this point, we have the replication of the last toplevel
 735                  * vdev in 'rep'.  Compare it to 'lastrep' to see if its
 736                  * different.
 737                  */
 738                 if (lastrep.zprl_type != NULL) {
 739                         if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) {
 740                                 if (ret != NULL)
 741                                         free(ret);
 742                                 ret = NULL;
 743                                 if (fatal)
 744                                         vdev_error(gettext(
 745                                             "mismatched replication level: "
 746                                             "both %s and %s vdevs are "
 747                                             "present\n"),
 748                                             lastrep.zprl_type, rep.zprl_type);
 749                                 else
 750                                         return (NULL);
 751                         } else if (lastrep.zprl_parity != rep.zprl_parity) {
 752                                 if (ret)
 753                                         free(ret);
 754                                 ret = NULL;
 755                                 if (fatal)
 756                                         vdev_error(gettext(
 757                                             "mismatched replication level: "
 758                                             "both %llu and %llu device parity "
 759                                             "%s vdevs are present\n"),
 760                                             lastrep.zprl_parity,
 761                                             rep.zprl_parity,
 762                                             rep.zprl_type);
 763                                 else
 764                                         return (NULL);
 765                         } else if (lastrep.zprl_children != rep.zprl_children) {
 766                                 if (ret)
 767                                         free(ret);
 768                                 ret = NULL;
 769                                 if (fatal)
 770                                         vdev_error(gettext(
 771                                             "mismatched replication level: "
 772                                             "both %llu-way and %llu-way %s "
 773                                             "vdevs are present\n"),
 774                                             lastrep.zprl_children,
 775                                             rep.zprl_children,
 776                                             rep.zprl_type);
 777                                 else
 778                                         return (NULL);
 779                         }
 780                 }
 781                 lastrep = rep;
 782         }
 783 
 784         if (ret != NULL)
 785                 *ret = rep;
 786 
 787         return (ret);
 788 }
 789 
 790 /*
 791  * Check the replication level of the vdev spec against the current pool.  Calls
 792  * get_replication() to make sure the new spec is self-consistent.  If the pool
 793  * has a consistent replication level, then we ignore any errors.  Otherwise,
 794  * report any difference between the two.
 795  */
 796 static int
 797 check_replication(nvlist_t *config, nvlist_t *newroot)
 798 {
 799         nvlist_t **child;
 800         uint_t  children;
 801         replication_level_t *current = NULL, *new;
 802         int ret;
 803 
 804         /*
 805          * If we have a current pool configuration, check to see if it's
 806          * self-consistent.  If not, simply return success.
 807          */
 808         if (config != NULL) {
 809                 nvlist_t *nvroot;
 810 
 811                 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 812                     &nvroot) == 0);
 813                 if ((current = get_replication(nvroot, B_FALSE)) == NULL)
 814                         return (0);
 815         }
 816         /*
 817          * for spares there may be no children, and therefore no
 818          * replication level to check
 819          */
 820         if ((nvlist_lookup_nvlist_array(newroot, ZPOOL_CONFIG_CHILDREN,
 821             &child, &children) != 0) || (children == 0)) {
 822                 free(current);
 823                 return (0);
 824         }
 825 
 826         /*
 827          * If all we have is logs then there's no replication level to check.
 828          */
 829         if (num_logs(newroot) == children) {
 830                 free(current);
 831                 return (0);
 832         }
 833 
 834         /*
 835          * Get the replication level of the new vdev spec, reporting any
 836          * inconsistencies found.
 837          */
 838         if ((new = get_replication(newroot, B_TRUE)) == NULL) {
 839                 free(current);
 840                 return (-1);
 841         }
 842 
 843         /*
 844          * Check to see if the new vdev spec matches the replication level of
 845          * the current pool.
 846          */
 847         ret = 0;
 848         if (current != NULL) {
 849                 if (strcmp(current->zprl_type, new->zprl_type) != 0) {
 850                         vdev_error(gettext(
 851                             "mismatched replication level: pool uses %s "
 852                             "and new vdev is %s\n"),
 853                             current->zprl_type, new->zprl_type);
 854                         ret = -1;
 855                 } else if (current->zprl_parity != new->zprl_parity) {
 856                         vdev_error(gettext(
 857                             "mismatched replication level: pool uses %llu "
 858                             "device parity and new vdev uses %llu\n"),
 859                             current->zprl_parity, new->zprl_parity);
 860                         ret = -1;
 861                 } else if (current->zprl_children != new->zprl_children) {
 862                         vdev_error(gettext(
 863                             "mismatched replication level: pool uses %llu-way "
 864                             "%s and new vdev uses %llu-way %s\n"),
 865                             current->zprl_children, current->zprl_type,
 866                             new->zprl_children, new->zprl_type);
 867                         ret = -1;
 868                 }
 869         }
 870 
 871         free(new);
 872         if (current != NULL)
 873                 free(current);
 874 
 875         return (ret);
 876 }
 877 
 878 /*
 879  * Go through and find any whole disks in the vdev specification, labelling them
 880  * as appropriate.  When constructing the vdev spec, we were unable to open this
 881  * device in order to provide a devid.  Now that we have labelled the disk and
 882  * know that slice 0 is valid, we can construct the devid now.
 883  *
 884  * If the disk was already labeled with an EFI label, we will have gotten the
 885  * devid already (because we were able to open the whole disk).  Otherwise, we
 886  * need to get the devid after we label the disk.
 887  */
 888 static int
 889 make_disks(zpool_handle_t *zhp, nvlist_t *nv)
 890 {
 891         nvlist_t **child;
 892         uint_t c, children;
 893         char *type, *path, *diskname;
 894         char buf[MAXPATHLEN];
 895         uint64_t wholedisk;
 896         int fd;
 897         int ret;
 898         ddi_devid_t devid;
 899         char *minor = NULL, *devid_str = NULL;
 900 
 901         verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
 902 
 903         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 904             &child, &children) != 0) {
 905 
 906                 if (strcmp(type, VDEV_TYPE_DISK) != 0)
 907                         return (0);
 908 
 909                 /*
 910                  * We have a disk device.  Get the path to the device
 911                  * and see if it's a whole disk by appending the backup
 912                  * slice and stat()ing the device.
 913                  */
 914                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
 915                 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
 916                     &wholedisk) != 0 || !wholedisk)
 917                         return (0);
 918 
 919                 diskname = strrchr(path, '/');
 920                 assert(diskname != NULL);
 921                 diskname++;
 922                 if (zpool_label_disk(g_zfs, zhp, diskname) == -1)
 923                         return (-1);
 924 
 925                 /*
 926                  * Fill in the devid, now that we've labeled the disk.
 927                  */
 928                 (void) snprintf(buf, sizeof (buf), "%ss0", path);
 929                 if ((fd = open(buf, O_RDONLY)) < 0) {
 930                         (void) fprintf(stderr,
 931                             gettext("cannot open '%s': %s\n"),
 932                             buf, strerror(errno));
 933                         return (-1);
 934                 }
 935 
 936                 if (devid_get(fd, &devid) == 0) {
 937                         if (devid_get_minor_name(fd, &minor) == 0 &&
 938                             (devid_str = devid_str_encode(devid, minor)) !=
 939                             NULL) {
 940                                 verify(nvlist_add_string(nv,
 941                                     ZPOOL_CONFIG_DEVID, devid_str) == 0);
 942                         }
 943                         if (devid_str != NULL)
 944                                 devid_str_free(devid_str);
 945                         if (minor != NULL)
 946                                 devid_str_free(minor);
 947                         devid_free(devid);
 948                 }
 949 
 950                 /*
 951                  * Update the path to refer to the 's0' slice.  The presence of
 952                  * the 'whole_disk' field indicates to the CLI that we should
 953                  * chop off the slice number when displaying the device in
 954                  * future output.
 955                  */
 956                 verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0);
 957 
 958                 (void) close(fd);
 959 
 960                 return (0);
 961         }
 962 
 963         for (c = 0; c < children; c++)
 964                 if ((ret = make_disks(zhp, child[c])) != 0)
 965                         return (ret);
 966 
 967         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
 968             &child, &children) == 0)
 969                 for (c = 0; c < children; c++)
 970                         if ((ret = make_disks(zhp, child[c])) != 0)
 971                                 return (ret);
 972 
 973         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
 974             &child, &children) == 0)
 975                 for (c = 0; c < children; c++)
 976                         if ((ret = make_disks(zhp, child[c])) != 0)
 977                                 return (ret);
 978 
 979         return (0);
 980 }
 981 
 982 /*
 983  * Determine if the given path is a hot spare within the given configuration.
 984  */
 985 static boolean_t
 986 is_spare(nvlist_t *config, const char *path)
 987 {
 988         int fd;
 989         pool_state_t state;
 990         char *name = NULL;
 991         nvlist_t *label;
 992         uint64_t guid, spareguid;
 993         nvlist_t *nvroot;
 994         nvlist_t **spares;
 995         uint_t i, nspares;
 996         boolean_t inuse;
 997 
 998         if ((fd = open(path, O_RDONLY)) < 0)
 999                 return (B_FALSE);
1000 
1001         if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
1002             !inuse ||
1003             state != POOL_STATE_SPARE ||
1004             zpool_read_label(fd, &label) != 0) {
1005                 free(name);
1006                 (void) close(fd);
1007                 return (B_FALSE);
1008         }
1009         free(name);
1010         (void) close(fd);
1011 
1012         verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
1013         nvlist_free(label);
1014 
1015         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
1016             &nvroot) == 0);
1017         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1018             &spares, &nspares) == 0) {
1019                 for (i = 0; i < nspares; i++) {
1020                         verify(nvlist_lookup_uint64(spares[i],
1021                             ZPOOL_CONFIG_GUID, &spareguid) == 0);
1022                         if (spareguid == guid)
1023                                 return (B_TRUE);
1024                 }
1025         }
1026 
1027         return (B_FALSE);
1028 }
1029 
1030 /*
1031  * Go through and find any devices that are in use.  We rely on libdiskmgt for
1032  * the majority of this task.
1033  */
1034 static boolean_t
1035 is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
1036     boolean_t replacing, boolean_t isspare)
1037 {
1038         nvlist_t **child;
1039         uint_t c, children;
1040         char *type, *path;
1041         int ret = 0;
1042         char buf[MAXPATHLEN];
1043         uint64_t wholedisk;
1044         boolean_t anyinuse = B_FALSE;
1045 
1046         verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
1047 
1048         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1049             &child, &children) != 0) {
1050 
1051                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
1052 
1053                 /*
1054                  * As a generic check, we look to see if this is a replace of a
1055                  * hot spare within the same pool.  If so, we allow it
1056                  * regardless of what libdiskmgt or zpool_in_use() says.
1057                  */
1058                 if (replacing) {
1059                         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1060                             &wholedisk) == 0 && wholedisk)
1061                                 (void) snprintf(buf, sizeof (buf), "%ss0",
1062                                     path);
1063                         else
1064                                 (void) strlcpy(buf, path, sizeof (buf));
1065 
1066                         if (is_spare(config, buf))
1067                                 return (B_FALSE);
1068                 }
1069 
1070                 if (strcmp(type, VDEV_TYPE_DISK) == 0)
1071                         ret = check_device(path, force, isspare);
1072                 else if (strcmp(type, VDEV_TYPE_FILE) == 0)
1073                         ret = check_file(path, force, isspare);
1074 
1075                 return (ret != 0);
1076         }
1077 
1078         for (c = 0; c < children; c++)
1079                 if (is_device_in_use(config, child[c], force, replacing,
1080                     B_FALSE))
1081                         anyinuse = B_TRUE;
1082 
1083         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1084             &child, &children) == 0)
1085                 for (c = 0; c < children; c++)
1086                         if (is_device_in_use(config, child[c], force, replacing,
1087                             B_TRUE))
1088                                 anyinuse = B_TRUE;
1089 
1090         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1091             &child, &children) == 0)
1092                 for (c = 0; c < children; c++)
1093                         if (is_device_in_use(config, child[c], force, replacing,
1094                             B_FALSE))
1095                                 anyinuse = B_TRUE;
1096 
1097         return (anyinuse);
1098 }
1099 
1100 static const char *
1101 is_grouping(const char *type, int *mindev, int *maxdev)
1102 {
1103         if (strncmp(type, "raidz", 5) == 0) {
1104                 const char *p = type + 5;
1105                 char *end;
1106                 long nparity;
1107 
1108                 if (*p == '\0') {
1109                         nparity = 1;
1110                 } else if (*p == '0') {
1111                         return (NULL); /* no zero prefixes allowed */
1112                 } else {
1113                         errno = 0;
1114                         nparity = strtol(p, &end, 10);
1115                         if (errno != 0 || nparity < 1 || nparity >= 255 ||
1116                             *end != '\0')
1117                                 return (NULL);
1118                 }
1119 
1120                 if (mindev != NULL)
1121                         *mindev = nparity + 1;
1122                 if (maxdev != NULL)
1123                         *maxdev = 255;
1124                 return (VDEV_TYPE_RAIDZ);
1125         }
1126 
1127         if (maxdev != NULL)
1128                 *maxdev = INT_MAX;
1129 
1130         if (strcmp(type, "mirror") == 0) {
1131                 if (mindev != NULL)
1132                         *mindev = 2;
1133                 return (VDEV_TYPE_MIRROR);
1134         }
1135 
1136         if (strcmp(type, "spare") == 0) {
1137                 if (mindev != NULL)
1138                         *mindev = 1;
1139                 return (VDEV_TYPE_SPARE);
1140         }
1141 
1142         if (strcmp(type, "log") == 0) {
1143                 if (mindev != NULL)
1144                         *mindev = 1;
1145                 return (VDEV_TYPE_LOG);
1146         }
1147 
1148         if (strcmp(type, "cache") == 0) {
1149                 if (mindev != NULL)
1150                         *mindev = 1;
1151                 return (VDEV_TYPE_L2CACHE);
1152         }
1153 
1154         return (NULL);
1155 }
1156 
1157 /*
1158  * Construct a syntactically valid vdev specification,
1159  * and ensure that all devices and files exist and can be opened.
1160  * Note: we don't bother freeing anything in the error paths
1161  * because the program is just going to exit anyway.
1162  */
1163 nvlist_t *
1164 construct_spec(int argc, char **argv)
1165 {
1166         nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
1167         int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
1168         const char *type;
1169         uint64_t is_log;
1170         boolean_t seen_logs;
1171 
1172         top = NULL;
1173         toplevels = 0;
1174         spares = NULL;
1175         l2cache = NULL;
1176         nspares = 0;
1177         nlogs = 0;
1178         nl2cache = 0;
1179         is_log = B_FALSE;
1180         seen_logs = B_FALSE;
1181 
1182         while (argc > 0) {
1183                 nv = NULL;
1184 
1185                 /*
1186                  * If it's a mirror or raidz, the subsequent arguments are
1187                  * its leaves -- until we encounter the next mirror or raidz.
1188                  */
1189                 if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) {
1190                         nvlist_t **child = NULL;
1191                         int c, children = 0;
1192 
1193                         if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
1194                                 if (spares != NULL) {
1195                                         (void) fprintf(stderr,
1196                                             gettext("invalid vdev "
1197                                             "specification: 'spare' can be "
1198                                             "specified only once\n"));
1199                                         return (NULL);
1200                                 }
1201                                 is_log = B_FALSE;
1202                         }
1203 
1204                         if (strcmp(type, VDEV_TYPE_LOG) == 0) {
1205                                 if (seen_logs) {
1206                                         (void) fprintf(stderr,
1207                                             gettext("invalid vdev "
1208                                             "specification: 'log' can be "
1209                                             "specified only once\n"));
1210                                         return (NULL);
1211                                 }
1212                                 seen_logs = B_TRUE;
1213                                 is_log = B_TRUE;
1214                                 argc--;
1215                                 argv++;
1216                                 /*
1217                                  * A log is not a real grouping device.
1218                                  * We just set is_log and continue.
1219                                  */
1220                                 continue;
1221                         }
1222 
1223                         if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
1224                                 if (l2cache != NULL) {
1225                                         (void) fprintf(stderr,
1226                                             gettext("invalid vdev "
1227                                             "specification: 'cache' can be "
1228                                             "specified only once\n"));
1229                                         return (NULL);
1230                                 }
1231                                 is_log = B_FALSE;
1232                         }
1233 
1234                         if (is_log) {
1235                                 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
1236                                         (void) fprintf(stderr,
1237                                             gettext("invalid vdev "
1238                                             "specification: unsupported 'log' "
1239                                             "device: %s\n"), type);
1240                                         return (NULL);
1241                                 }
1242                                 nlogs++;
1243                         }
1244 
1245                         for (c = 1; c < argc; c++) {
1246                                 if (is_grouping(argv[c], NULL, NULL) != NULL)
1247                                         break;
1248                                 children++;
1249                                 child = realloc(child,
1250                                     children * sizeof (nvlist_t *));
1251                                 if (child == NULL)
1252                                         zpool_no_memory();
1253                                 if ((nv = make_leaf_vdev(argv[c], B_FALSE))
1254                                     == NULL)
1255                                         return (NULL);
1256                                 child[children - 1] = nv;
1257                         }
1258 
1259                         if (children < mindev) {
1260                                 (void) fprintf(stderr, gettext("invalid vdev "
1261                                     "specification: %s requires at least %d "
1262                                     "devices\n"), argv[0], mindev);
1263                                 return (NULL);
1264                         }
1265 
1266                         if (children > maxdev) {
1267                                 (void) fprintf(stderr, gettext("invalid vdev "
1268                                     "specification: %s supports no more than "
1269                                     "%d devices\n"), argv[0], maxdev);
1270                                 return (NULL);
1271                         }
1272 
1273                         argc -= c;
1274                         argv += c;
1275 
1276                         if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
1277                                 spares = child;
1278                                 nspares = children;
1279                                 continue;
1280                         } else if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
1281                                 l2cache = child;
1282                                 nl2cache = children;
1283                                 continue;
1284                         } else {
1285                                 verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
1286                                     0) == 0);
1287                                 verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
1288                                     type) == 0);
1289                                 verify(nvlist_add_uint64(nv,
1290                                     ZPOOL_CONFIG_IS_LOG, is_log) == 0);
1291                                 if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
1292                                         verify(nvlist_add_uint64(nv,
1293                                             ZPOOL_CONFIG_NPARITY,
1294                                             mindev - 1) == 0);
1295                                 }
1296                                 verify(nvlist_add_nvlist_array(nv,
1297                                     ZPOOL_CONFIG_CHILDREN, child,
1298                                     children) == 0);
1299 
1300                                 for (c = 0; c < children; c++)
1301                                         nvlist_free(child[c]);
1302                                 free(child);
1303                         }
1304                 } else {
1305                         /*
1306                          * We have a device.  Pass off to make_leaf_vdev() to
1307                          * construct the appropriate nvlist describing the vdev.
1308                          */
1309                         if ((nv = make_leaf_vdev(argv[0], is_log)) == NULL)
1310                                 return (NULL);
1311                         if (is_log)
1312                                 nlogs++;
1313                         argc--;
1314                         argv++;
1315                 }
1316 
1317                 toplevels++;
1318                 top = realloc(top, toplevels * sizeof (nvlist_t *));
1319                 if (top == NULL)
1320                         zpool_no_memory();
1321                 top[toplevels - 1] = nv;
1322         }
1323 
1324         if (toplevels == 0 && nspares == 0 && nl2cache == 0) {
1325                 (void) fprintf(stderr, gettext("invalid vdev "
1326                     "specification: at least one toplevel vdev must be "
1327                     "specified\n"));
1328                 return (NULL);
1329         }
1330 
1331         if (seen_logs && nlogs == 0) {
1332                 (void) fprintf(stderr, gettext("invalid vdev specification: "
1333                     "log requires at least 1 device\n"));
1334                 return (NULL);
1335         }
1336 
1337         /*
1338          * Finally, create nvroot and add all top-level vdevs to it.
1339          */
1340         verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0);
1341         verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
1342             VDEV_TYPE_ROOT) == 0);
1343         verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1344             top, toplevels) == 0);
1345         if (nspares != 0)
1346                 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1347                     spares, nspares) == 0);
1348         if (nl2cache != 0)
1349                 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1350                     l2cache, nl2cache) == 0);
1351 
1352         for (t = 0; t < toplevels; t++)
1353                 nvlist_free(top[t]);
1354         for (t = 0; t < nspares; t++)
1355                 nvlist_free(spares[t]);
1356         for (t = 0; t < nl2cache; t++)
1357                 nvlist_free(l2cache[t]);
1358         if (spares)
1359                 free(spares);
1360         if (l2cache)
1361                 free(l2cache);
1362         free(top);
1363 
1364         return (nvroot);
1365 }
1366 
1367 nvlist_t *
1368 split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
1369     splitflags_t flags, int argc, char **argv)
1370 {
1371         nvlist_t *newroot = NULL, **child;
1372         uint_t c, children;
1373 
1374         if (argc > 0) {
1375                 if ((newroot = construct_spec(argc, argv)) == NULL) {
1376                         (void) fprintf(stderr, gettext("Unable to build a "
1377                             "pool from the specified devices\n"));
1378                         return (NULL);
1379                 }
1380 
1381                 if (!flags.dryrun && make_disks(zhp, newroot) != 0) {
1382                         nvlist_free(newroot);
1383                         return (NULL);
1384                 }
1385 
1386                 /* avoid any tricks in the spec */
1387                 verify(nvlist_lookup_nvlist_array(newroot,
1388                     ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
1389                 for (c = 0; c < children; c++) {
1390                         char *path;
1391                         const char *type;
1392                         int min, max;
1393 
1394                         verify(nvlist_lookup_string(child[c],
1395                             ZPOOL_CONFIG_PATH, &path) == 0);
1396                         if ((type = is_grouping(path, &min, &max)) != NULL) {
1397                                 (void) fprintf(stderr, gettext("Cannot use "
1398                                     "'%s' as a device for splitting\n"), type);
1399                                 nvlist_free(newroot);
1400                                 return (NULL);
1401                         }
1402                 }
1403         }
1404 
1405         if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) {
1406                 nvlist_free(newroot);
1407                 return (NULL);
1408         }
1409 
1410         return (newroot);
1411 }
1412 
1413 /*
1414  * Get and validate the contents of the given vdev specification.  This ensures
1415  * that the nvlist returned is well-formed, that all the devices exist, and that
1416  * they are not currently in use by any other known consumer.  The 'poolconfig'
1417  * parameter is the current configuration of the pool when adding devices
1418  * existing pool, and is used to perform additional checks, such as changing the
1419  * replication level of the pool.  It can be 'NULL' to indicate that this is a
1420  * new pool.  The 'force' flag controls whether devices should be forcefully
1421  * added, even if they appear in use.
1422  */
1423 nvlist_t *
1424 make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
1425     boolean_t replacing, boolean_t dryrun, int argc, char **argv)
1426 {
1427         nvlist_t *newroot;
1428         nvlist_t *poolconfig = NULL;
1429         is_force = force;
1430 
1431         /*
1432          * Construct the vdev specification.  If this is successful, we know
1433          * that we have a valid specification, and that all devices can be
1434          * opened.
1435          */
1436         if ((newroot = construct_spec(argc, argv)) == NULL)
1437                 return (NULL);
1438 
1439         if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL))
1440                 return (NULL);
1441 
1442         /*
1443          * Validate each device to make sure that its not shared with another
1444          * subsystem.  We do this even if 'force' is set, because there are some
1445          * uses (such as a dedicated dump device) that even '-f' cannot
1446          * override.
1447          */
1448         if (is_device_in_use(poolconfig, newroot, force, replacing, B_FALSE)) {
1449                 nvlist_free(newroot);
1450                 return (NULL);
1451         }
1452 
1453         /*
1454          * Check the replication level of the given vdevs and report any errors
1455          * found.  We include the existing pool spec, if any, as we need to
1456          * catch changes against the existing replication level.
1457          */
1458         if (check_rep && check_replication(poolconfig, newroot) != 0) {
1459                 nvlist_free(newroot);
1460                 return (NULL);
1461         }
1462 
1463         /*
1464          * Run through the vdev specification and label any whole disks found.
1465          */
1466         if (!dryrun && make_disks(zhp, newroot) != 0) {
1467                 nvlist_free(newroot);
1468                 return (NULL);
1469         }
1470 
1471         return (newroot);
1472 }