Print this page
patch v2
6120 libzfs leaks a config nvlist for spares and l2arc
Reviewed by: Igor Kozhukhov <ikozhukhov@gmail.com>
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libzfs/common/libzfs_import.c
+++ new/usr/src/lib/libzfs/common/libzfs_import.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24 24 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25 25 * Copyright (c) 2013 by Delphix. All rights reserved.
26 26 * Copyright 2015 RackTop Systems.
27 27 */
28 28
29 29 /*
30 30 * Pool import support functions.
31 31 *
32 32 * To import a pool, we rely on reading the configuration information from the
33 33 * ZFS label of each device. If we successfully read the label, then we
34 34 * organize the configuration information in the following hierarchy:
35 35 *
36 36 * pool guid -> toplevel vdev guid -> label txg
37 37 *
38 38 * Duplicate entries matching this same tuple will be discarded. Once we have
39 39 * examined every device, we pick the best label txg config for each toplevel
40 40 * vdev. We then arrange these toplevel vdevs into a complete pool config, and
41 41 * update any paths that have changed. Finally, we attempt to import the pool
42 42 * using our derived config, and record the results.
43 43 */
44 44
45 45 #include <ctype.h>
46 46 #include <devid.h>
47 47 #include <dirent.h>
48 48 #include <errno.h>
49 49 #include <libintl.h>
50 50 #include <stddef.h>
51 51 #include <stdlib.h>
52 52 #include <string.h>
53 53 #include <sys/stat.h>
54 54 #include <unistd.h>
55 55 #include <fcntl.h>
56 56 #include <sys/vtoc.h>
57 57 #include <sys/dktp/fdisk.h>
58 58 #include <sys/efi_partition.h>
59 59 #include <thread_pool.h>
60 60
61 61 #include <sys/vdev_impl.h>
62 62
63 63 #include "libzfs.h"
64 64 #include "libzfs_impl.h"
65 65
66 66 /*
67 67 * Intermediate structures used to gather configuration information.
68 68 */
69 69 typedef struct config_entry {
70 70 uint64_t ce_txg;
71 71 nvlist_t *ce_config;
72 72 struct config_entry *ce_next;
73 73 } config_entry_t;
74 74
75 75 typedef struct vdev_entry {
76 76 uint64_t ve_guid;
77 77 config_entry_t *ve_configs;
78 78 struct vdev_entry *ve_next;
79 79 } vdev_entry_t;
80 80
81 81 typedef struct pool_entry {
82 82 uint64_t pe_guid;
83 83 vdev_entry_t *pe_vdevs;
84 84 struct pool_entry *pe_next;
85 85 } pool_entry_t;
86 86
87 87 typedef struct name_entry {
88 88 char *ne_name;
89 89 uint64_t ne_guid;
90 90 struct name_entry *ne_next;
91 91 } name_entry_t;
92 92
93 93 typedef struct pool_list {
94 94 pool_entry_t *pools;
95 95 name_entry_t *names;
96 96 } pool_list_t;
97 97
98 98 static char *
99 99 get_devid(const char *path)
100 100 {
101 101 int fd;
102 102 ddi_devid_t devid;
103 103 char *minor, *ret;
104 104
105 105 if ((fd = open(path, O_RDONLY)) < 0)
106 106 return (NULL);
107 107
108 108 minor = NULL;
109 109 ret = NULL;
110 110 if (devid_get(fd, &devid) == 0) {
111 111 if (devid_get_minor_name(fd, &minor) == 0)
112 112 ret = devid_str_encode(devid, minor);
113 113 if (minor != NULL)
114 114 devid_str_free(minor);
115 115 devid_free(devid);
116 116 }
117 117 (void) close(fd);
118 118
119 119 return (ret);
120 120 }
121 121
122 122
123 123 /*
124 124 * Go through and fix up any path and/or devid information for the given vdev
125 125 * configuration.
126 126 */
127 127 static int
128 128 fix_paths(nvlist_t *nv, name_entry_t *names)
129 129 {
130 130 nvlist_t **child;
131 131 uint_t c, children;
132 132 uint64_t guid;
133 133 name_entry_t *ne, *best;
134 134 char *path, *devid;
135 135 int matched;
136 136
137 137 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
138 138 &child, &children) == 0) {
139 139 for (c = 0; c < children; c++)
140 140 if (fix_paths(child[c], names) != 0)
141 141 return (-1);
142 142 return (0);
143 143 }
144 144
145 145 /*
146 146 * This is a leaf (file or disk) vdev. In either case, go through
147 147 * the name list and see if we find a matching guid. If so, replace
148 148 * the path and see if we can calculate a new devid.
149 149 *
150 150 * There may be multiple names associated with a particular guid, in
151 151 * which case we have overlapping slices or multiple paths to the same
152 152 * disk. If this is the case, then we want to pick the path that is
153 153 * the most similar to the original, where "most similar" is the number
154 154 * of matching characters starting from the end of the path. This will
155 155 * preserve slice numbers even if the disks have been reorganized, and
156 156 * will also catch preferred disk names if multiple paths exist.
157 157 */
158 158 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
159 159 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
160 160 path = NULL;
161 161
162 162 matched = 0;
163 163 best = NULL;
164 164 for (ne = names; ne != NULL; ne = ne->ne_next) {
165 165 if (ne->ne_guid == guid) {
166 166 const char *src, *dst;
167 167 int count;
168 168
169 169 if (path == NULL) {
170 170 best = ne;
171 171 break;
172 172 }
173 173
174 174 src = ne->ne_name + strlen(ne->ne_name) - 1;
175 175 dst = path + strlen(path) - 1;
176 176 for (count = 0; src >= ne->ne_name && dst >= path;
177 177 src--, dst--, count++)
178 178 if (*src != *dst)
179 179 break;
180 180
181 181 /*
182 182 * At this point, 'count' is the number of characters
183 183 * matched from the end.
184 184 */
185 185 if (count > matched || best == NULL) {
186 186 best = ne;
187 187 matched = count;
188 188 }
189 189 }
190 190 }
191 191
192 192 if (best == NULL)
193 193 return (0);
194 194
195 195 if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
196 196 return (-1);
197 197
198 198 if ((devid = get_devid(best->ne_name)) == NULL) {
199 199 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
200 200 } else {
201 201 if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0) {
202 202 devid_str_free(devid);
203 203 return (-1);
204 204 }
205 205 devid_str_free(devid);
206 206 }
207 207
208 208 return (0);
209 209 }
210 210
211 211 /*
212 212 * Add the given configuration to the list of known devices.
213 213 */
214 214 static int
215 215 add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
216 216 nvlist_t *config)
217 217 {
218 218 uint64_t pool_guid, vdev_guid, top_guid, txg, state;
219 219 pool_entry_t *pe;
220 220 vdev_entry_t *ve;
221 221 config_entry_t *ce;
222 222 name_entry_t *ne;
223 223
224 224 /*
225 225 * If this is a hot spare not currently in use or level 2 cache
226 226 * device, add it to the list of names to translate, but don't do
227 227 * anything else.
228 228 */
229 229 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
230 230 &state) == 0 &&
231 231 (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
232 232 nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
↓ open down ↓ |
232 lines elided |
↑ open up ↑ |
233 233 if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
234 234 return (-1);
235 235
236 236 if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
237 237 free(ne);
238 238 return (-1);
239 239 }
240 240 ne->ne_guid = vdev_guid;
241 241 ne->ne_next = pl->names;
242 242 pl->names = ne;
243 + nvlist_free(config);
243 244 return (0);
244 245 }
245 246
246 247 /*
247 248 * If we have a valid config but cannot read any of these fields, then
248 249 * it means we have a half-initialized label. In vdev_label_init()
249 250 * we write a label with txg == 0 so that we can identify the device
250 251 * in case the user refers to the same disk later on. If we fail to
251 252 * create the pool, we'll be left with a label in this state
252 253 * which should not be considered part of a valid pool.
253 254 */
254 255 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
255 256 &pool_guid) != 0 ||
256 257 nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
257 258 &vdev_guid) != 0 ||
258 259 nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
259 260 &top_guid) != 0 ||
260 261 nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
261 262 &txg) != 0 || txg == 0) {
262 263 nvlist_free(config);
263 264 return (0);
264 265 }
265 266
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
266 267 /*
267 268 * First, see if we know about this pool. If not, then add it to the
268 269 * list of known pools.
269 270 */
270 271 for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
271 272 if (pe->pe_guid == pool_guid)
272 273 break;
273 274 }
274 275
275 276 if (pe == NULL) {
276 - if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
277 - nvlist_free(config);
277 + if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL)
278 278 return (-1);
279 - }
280 279 pe->pe_guid = pool_guid;
281 280 pe->pe_next = pl->pools;
282 281 pl->pools = pe;
283 282 }
284 283
285 284 /*
286 285 * Second, see if we know about this toplevel vdev. Add it if its
287 286 * missing.
288 287 */
289 288 for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
290 289 if (ve->ve_guid == top_guid)
291 290 break;
292 291 }
293 292
294 293 if (ve == NULL) {
295 - if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
296 - nvlist_free(config);
294 + if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL)
297 295 return (-1);
298 - }
299 296 ve->ve_guid = top_guid;
300 297 ve->ve_next = pe->pe_vdevs;
301 298 pe->pe_vdevs = ve;
302 299 }
303 300
304 301 /*
305 - * Third, see if we have a config with a matching transaction group. If
306 - * so, then we do nothing. Otherwise, add it to the list of known
307 - * configs.
302 + * Third, add the vdev guid -> path mappings so that we can fix up
303 + * the configuration as necessary before doing the import.
304 + */
305 + if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
306 + return (-1);
307 +
308 + if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
309 + free(ne);
310 + return (-1);
311 + }
312 +
313 + ne->ne_guid = vdev_guid;
314 + ne->ne_next = pl->names;
315 + pl->names = ne;
316 +
317 + /*
318 + * Finally, see if we have a config with a matching transaction
319 + * group. If so, then we do nothing. Otherwise, add it to the list
320 + * of known configs.
308 321 */
309 322 for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
310 323 if (ce->ce_txg == txg)
311 324 break;
312 325 }
313 326
314 327 if (ce == NULL) {
315 - if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
316 - nvlist_free(config);
328 + if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL)
317 329 return (-1);
318 - }
319 330 ce->ce_txg = txg;
320 331 ce->ce_config = config;
321 332 ce->ce_next = ve->ve_configs;
322 333 ve->ve_configs = ce;
323 334 } else {
324 335 nvlist_free(config);
325 336 }
326 337
327 - /*
328 - * At this point we've successfully added our config to the list of
329 - * known configs. The last thing to do is add the vdev guid -> path
330 - * mappings so that we can fix up the configuration as necessary before
331 - * doing the import.
332 - */
333 - if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
334 - return (-1);
335 -
336 - if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
337 - free(ne);
338 - return (-1);
339 - }
340 -
341 - ne->ne_guid = vdev_guid;
342 - ne->ne_next = pl->names;
343 - pl->names = ne;
344 -
345 338 return (0);
346 339 }
347 340
348 341 /*
349 342 * Returns true if the named pool matches the given GUID.
350 343 */
351 344 static int
352 345 pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
353 346 boolean_t *isactive)
354 347 {
355 348 zpool_handle_t *zhp;
356 349 uint64_t theguid;
357 350
358 351 if (zpool_open_silent(hdl, name, &zhp) != 0)
359 352 return (-1);
360 353
361 354 if (zhp == NULL) {
362 355 *isactive = B_FALSE;
363 356 return (0);
364 357 }
365 358
366 359 verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
367 360 &theguid) == 0);
368 361
369 362 zpool_close(zhp);
370 363
371 364 *isactive = (theguid == guid);
372 365 return (0);
373 366 }
374 367
375 368 static nvlist_t *
376 369 refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
377 370 {
378 371 nvlist_t *nvl;
379 372 zfs_cmd_t zc = { 0 };
380 373 int err;
381 374
382 375 if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
383 376 return (NULL);
384 377
385 378 if (zcmd_alloc_dst_nvlist(hdl, &zc,
386 379 zc.zc_nvlist_conf_size * 2) != 0) {
387 380 zcmd_free_nvlists(&zc);
388 381 return (NULL);
389 382 }
390 383
391 384 while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
392 385 &zc)) != 0 && errno == ENOMEM) {
393 386 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
394 387 zcmd_free_nvlists(&zc);
395 388 return (NULL);
396 389 }
397 390 }
398 391
399 392 if (err) {
400 393 zcmd_free_nvlists(&zc);
401 394 return (NULL);
402 395 }
403 396
404 397 if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) {
405 398 zcmd_free_nvlists(&zc);
406 399 return (NULL);
407 400 }
408 401
409 402 zcmd_free_nvlists(&zc);
410 403 return (nvl);
411 404 }
412 405
413 406 /*
414 407 * Determine if the vdev id is a hole in the namespace.
415 408 */
416 409 boolean_t
417 410 vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
418 411 {
419 412 for (int c = 0; c < holes; c++) {
420 413
421 414 /* Top-level is a hole */
422 415 if (hole_array[c] == id)
423 416 return (B_TRUE);
424 417 }
425 418 return (B_FALSE);
426 419 }
427 420
428 421 /*
429 422 * Convert our list of pools into the definitive set of configurations. We
430 423 * start by picking the best config for each toplevel vdev. Once that's done,
431 424 * we assemble the toplevel vdevs into a full config for the pool. We make a
432 425 * pass to fix up any incorrect paths, and then add it to the main list to
433 426 * return to the user.
434 427 */
435 428 static nvlist_t *
436 429 get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
437 430 {
438 431 pool_entry_t *pe;
439 432 vdev_entry_t *ve;
440 433 config_entry_t *ce;
441 434 nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
442 435 nvlist_t **spares, **l2cache;
443 436 uint_t i, nspares, nl2cache;
444 437 boolean_t config_seen;
445 438 uint64_t best_txg;
446 439 char *name, *hostname;
447 440 uint64_t guid;
448 441 uint_t children = 0;
449 442 nvlist_t **child = NULL;
450 443 uint_t holes;
451 444 uint64_t *hole_array, max_id;
452 445 uint_t c;
453 446 boolean_t isactive;
454 447 uint64_t hostid;
455 448 nvlist_t *nvl;
456 449 boolean_t found_one = B_FALSE;
457 450 boolean_t valid_top_config = B_FALSE;
458 451
459 452 if (nvlist_alloc(&ret, 0, 0) != 0)
460 453 goto nomem;
461 454
462 455 for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
463 456 uint64_t id, max_txg = 0;
464 457
465 458 if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
466 459 goto nomem;
467 460 config_seen = B_FALSE;
468 461
469 462 /*
470 463 * Iterate over all toplevel vdevs. Grab the pool configuration
471 464 * from the first one we find, and then go through the rest and
472 465 * add them as necessary to the 'vdevs' member of the config.
473 466 */
474 467 for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
475 468
476 469 /*
477 470 * Determine the best configuration for this vdev by
478 471 * selecting the config with the latest transaction
479 472 * group.
480 473 */
481 474 best_txg = 0;
482 475 for (ce = ve->ve_configs; ce != NULL;
483 476 ce = ce->ce_next) {
484 477
485 478 if (ce->ce_txg > best_txg) {
486 479 tmp = ce->ce_config;
487 480 best_txg = ce->ce_txg;
488 481 }
489 482 }
490 483
491 484 /*
492 485 * We rely on the fact that the max txg for the
493 486 * pool will contain the most up-to-date information
494 487 * about the valid top-levels in the vdev namespace.
495 488 */
496 489 if (best_txg > max_txg) {
497 490 (void) nvlist_remove(config,
498 491 ZPOOL_CONFIG_VDEV_CHILDREN,
499 492 DATA_TYPE_UINT64);
500 493 (void) nvlist_remove(config,
501 494 ZPOOL_CONFIG_HOLE_ARRAY,
502 495 DATA_TYPE_UINT64_ARRAY);
503 496
504 497 max_txg = best_txg;
505 498 hole_array = NULL;
506 499 holes = 0;
507 500 max_id = 0;
508 501 valid_top_config = B_FALSE;
509 502
510 503 if (nvlist_lookup_uint64(tmp,
511 504 ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
512 505 verify(nvlist_add_uint64(config,
513 506 ZPOOL_CONFIG_VDEV_CHILDREN,
514 507 max_id) == 0);
515 508 valid_top_config = B_TRUE;
516 509 }
517 510
518 511 if (nvlist_lookup_uint64_array(tmp,
519 512 ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
520 513 &holes) == 0) {
521 514 verify(nvlist_add_uint64_array(config,
522 515 ZPOOL_CONFIG_HOLE_ARRAY,
523 516 hole_array, holes) == 0);
524 517 }
525 518 }
526 519
527 520 if (!config_seen) {
528 521 /*
529 522 * Copy the relevant pieces of data to the pool
530 523 * configuration:
531 524 *
532 525 * version
533 526 * pool guid
534 527 * name
535 528 * comment (if available)
536 529 * pool state
537 530 * hostid (if available)
538 531 * hostname (if available)
539 532 */
540 533 uint64_t state, version;
541 534 char *comment = NULL;
542 535
543 536 version = fnvlist_lookup_uint64(tmp,
544 537 ZPOOL_CONFIG_VERSION);
545 538 fnvlist_add_uint64(config,
546 539 ZPOOL_CONFIG_VERSION, version);
547 540 guid = fnvlist_lookup_uint64(tmp,
548 541 ZPOOL_CONFIG_POOL_GUID);
549 542 fnvlist_add_uint64(config,
550 543 ZPOOL_CONFIG_POOL_GUID, guid);
551 544 name = fnvlist_lookup_string(tmp,
552 545 ZPOOL_CONFIG_POOL_NAME);
553 546 fnvlist_add_string(config,
554 547 ZPOOL_CONFIG_POOL_NAME, name);
555 548
556 549 if (nvlist_lookup_string(tmp,
557 550 ZPOOL_CONFIG_COMMENT, &comment) == 0)
558 551 fnvlist_add_string(config,
559 552 ZPOOL_CONFIG_COMMENT, comment);
560 553
561 554 state = fnvlist_lookup_uint64(tmp,
562 555 ZPOOL_CONFIG_POOL_STATE);
563 556 fnvlist_add_uint64(config,
564 557 ZPOOL_CONFIG_POOL_STATE, state);
565 558
566 559 hostid = 0;
567 560 if (nvlist_lookup_uint64(tmp,
568 561 ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
569 562 fnvlist_add_uint64(config,
570 563 ZPOOL_CONFIG_HOSTID, hostid);
571 564 hostname = fnvlist_lookup_string(tmp,
572 565 ZPOOL_CONFIG_HOSTNAME);
573 566 fnvlist_add_string(config,
574 567 ZPOOL_CONFIG_HOSTNAME, hostname);
575 568 }
576 569
577 570 config_seen = B_TRUE;
578 571 }
579 572
580 573 /*
581 574 * Add this top-level vdev to the child array.
582 575 */
583 576 verify(nvlist_lookup_nvlist(tmp,
584 577 ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
585 578 verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
586 579 &id) == 0);
587 580
588 581 if (id >= children) {
589 582 nvlist_t **newchild;
590 583
591 584 newchild = zfs_alloc(hdl, (id + 1) *
592 585 sizeof (nvlist_t *));
593 586 if (newchild == NULL)
594 587 goto nomem;
595 588
596 589 for (c = 0; c < children; c++)
597 590 newchild[c] = child[c];
598 591
599 592 free(child);
600 593 child = newchild;
601 594 children = id + 1;
602 595 }
603 596 if (nvlist_dup(nvtop, &child[id], 0) != 0)
604 597 goto nomem;
605 598
606 599 }
607 600
608 601 /*
609 602 * If we have information about all the top-levels then
610 603 * clean up the nvlist which we've constructed. This
611 604 * means removing any extraneous devices that are
612 605 * beyond the valid range or adding devices to the end
613 606 * of our array which appear to be missing.
614 607 */
615 608 if (valid_top_config) {
616 609 if (max_id < children) {
617 610 for (c = max_id; c < children; c++)
618 611 nvlist_free(child[c]);
619 612 children = max_id;
620 613 } else if (max_id > children) {
621 614 nvlist_t **newchild;
622 615
623 616 newchild = zfs_alloc(hdl, (max_id) *
624 617 sizeof (nvlist_t *));
625 618 if (newchild == NULL)
626 619 goto nomem;
627 620
628 621 for (c = 0; c < children; c++)
629 622 newchild[c] = child[c];
630 623
631 624 free(child);
632 625 child = newchild;
633 626 children = max_id;
634 627 }
635 628 }
636 629
637 630 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
638 631 &guid) == 0);
639 632
640 633 /*
641 634 * The vdev namespace may contain holes as a result of
642 635 * device removal. We must add them back into the vdev
643 636 * tree before we process any missing devices.
644 637 */
645 638 if (holes > 0) {
646 639 ASSERT(valid_top_config);
647 640
648 641 for (c = 0; c < children; c++) {
649 642 nvlist_t *holey;
650 643
651 644 if (child[c] != NULL ||
652 645 !vdev_is_hole(hole_array, holes, c))
653 646 continue;
654 647
655 648 if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
656 649 0) != 0)
657 650 goto nomem;
658 651
659 652 /*
660 653 * Holes in the namespace are treated as
661 654 * "hole" top-level vdevs and have a
662 655 * special flag set on them.
663 656 */
664 657 if (nvlist_add_string(holey,
665 658 ZPOOL_CONFIG_TYPE,
666 659 VDEV_TYPE_HOLE) != 0 ||
667 660 nvlist_add_uint64(holey,
668 661 ZPOOL_CONFIG_ID, c) != 0 ||
669 662 nvlist_add_uint64(holey,
670 663 ZPOOL_CONFIG_GUID, 0ULL) != 0) {
671 664 nvlist_free(holey);
672 665 goto nomem;
673 666 }
674 667 child[c] = holey;
675 668 }
676 669 }
677 670
678 671 /*
679 672 * Look for any missing top-level vdevs. If this is the case,
680 673 * create a faked up 'missing' vdev as a placeholder. We cannot
681 674 * simply compress the child array, because the kernel performs
682 675 * certain checks to make sure the vdev IDs match their location
683 676 * in the configuration.
684 677 */
685 678 for (c = 0; c < children; c++) {
686 679 if (child[c] == NULL) {
687 680 nvlist_t *missing;
688 681 if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
689 682 0) != 0)
690 683 goto nomem;
691 684 if (nvlist_add_string(missing,
692 685 ZPOOL_CONFIG_TYPE,
693 686 VDEV_TYPE_MISSING) != 0 ||
694 687 nvlist_add_uint64(missing,
695 688 ZPOOL_CONFIG_ID, c) != 0 ||
696 689 nvlist_add_uint64(missing,
697 690 ZPOOL_CONFIG_GUID, 0ULL) != 0) {
698 691 nvlist_free(missing);
699 692 goto nomem;
700 693 }
701 694 child[c] = missing;
702 695 }
703 696 }
704 697
705 698 /*
706 699 * Put all of this pool's top-level vdevs into a root vdev.
707 700 */
708 701 if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
709 702 goto nomem;
710 703 if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
711 704 VDEV_TYPE_ROOT) != 0 ||
712 705 nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
713 706 nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
714 707 nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
715 708 child, children) != 0) {
716 709 nvlist_free(nvroot);
717 710 goto nomem;
718 711 }
719 712
720 713 for (c = 0; c < children; c++)
721 714 nvlist_free(child[c]);
722 715 free(child);
723 716 children = 0;
724 717 child = NULL;
725 718
726 719 /*
727 720 * Go through and fix up any paths and/or devids based on our
728 721 * known list of vdev GUID -> path mappings.
729 722 */
730 723 if (fix_paths(nvroot, pl->names) != 0) {
731 724 nvlist_free(nvroot);
732 725 goto nomem;
733 726 }
734 727
735 728 /*
736 729 * Add the root vdev to this pool's configuration.
737 730 */
738 731 if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
739 732 nvroot) != 0) {
740 733 nvlist_free(nvroot);
741 734 goto nomem;
742 735 }
743 736 nvlist_free(nvroot);
744 737
745 738 /*
746 739 * zdb uses this path to report on active pools that were
747 740 * imported or created using -R.
748 741 */
749 742 if (active_ok)
750 743 goto add_pool;
751 744
752 745 /*
753 746 * Determine if this pool is currently active, in which case we
754 747 * can't actually import it.
755 748 */
756 749 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
757 750 &name) == 0);
758 751 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
759 752 &guid) == 0);
760 753
761 754 if (pool_active(hdl, name, guid, &isactive) != 0)
762 755 goto error;
763 756
764 757 if (isactive) {
765 758 nvlist_free(config);
766 759 config = NULL;
767 760 continue;
768 761 }
769 762
770 763 if ((nvl = refresh_config(hdl, config)) == NULL) {
771 764 nvlist_free(config);
772 765 config = NULL;
773 766 continue;
774 767 }
775 768
776 769 nvlist_free(config);
777 770 config = nvl;
778 771
779 772 /*
780 773 * Go through and update the paths for spares, now that we have
781 774 * them.
782 775 */
783 776 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
784 777 &nvroot) == 0);
785 778 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
786 779 &spares, &nspares) == 0) {
787 780 for (i = 0; i < nspares; i++) {
788 781 if (fix_paths(spares[i], pl->names) != 0)
789 782 goto nomem;
790 783 }
791 784 }
792 785
793 786 /*
794 787 * Update the paths for l2cache devices.
795 788 */
796 789 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
797 790 &l2cache, &nl2cache) == 0) {
798 791 for (i = 0; i < nl2cache; i++) {
799 792 if (fix_paths(l2cache[i], pl->names) != 0)
800 793 goto nomem;
801 794 }
802 795 }
803 796
804 797 /*
805 798 * Restore the original information read from the actual label.
806 799 */
807 800 (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
808 801 DATA_TYPE_UINT64);
809 802 (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
810 803 DATA_TYPE_STRING);
811 804 if (hostid != 0) {
812 805 verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
813 806 hostid) == 0);
814 807 verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
815 808 hostname) == 0);
816 809 }
817 810
818 811 add_pool:
819 812 /*
820 813 * Add this pool to the list of configs.
821 814 */
822 815 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
823 816 &name) == 0);
824 817 if (nvlist_add_nvlist(ret, name, config) != 0)
825 818 goto nomem;
826 819
827 820 found_one = B_TRUE;
828 821 nvlist_free(config);
829 822 config = NULL;
830 823 }
831 824
832 825 if (!found_one) {
833 826 nvlist_free(ret);
834 827 ret = NULL;
835 828 }
836 829
837 830 return (ret);
838 831
839 832 nomem:
840 833 (void) no_memory(hdl);
841 834 error:
842 835 nvlist_free(config);
843 836 nvlist_free(ret);
844 837 for (c = 0; c < children; c++)
845 838 nvlist_free(child[c]);
846 839 free(child);
847 840
848 841 return (NULL);
849 842 }
850 843
851 844 /*
852 845 * Return the offset of the given label.
853 846 */
854 847 static uint64_t
855 848 label_offset(uint64_t size, int l)
856 849 {
857 850 ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
858 851 return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
859 852 0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
860 853 }
861 854
862 855 /*
863 856 * Given a file descriptor, read the label information and return an nvlist
864 857 * describing the configuration, if there is one.
865 858 */
866 859 int
867 860 zpool_read_label(int fd, nvlist_t **config)
868 861 {
869 862 struct stat64 statbuf;
870 863 int l;
871 864 vdev_label_t *label;
872 865 uint64_t state, txg, size;
873 866
874 867 *config = NULL;
875 868
876 869 if (fstat64(fd, &statbuf) == -1)
877 870 return (0);
878 871 size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
879 872
880 873 if ((label = malloc(sizeof (vdev_label_t))) == NULL)
881 874 return (-1);
882 875
883 876 for (l = 0; l < VDEV_LABELS; l++) {
884 877 if (pread64(fd, label, sizeof (vdev_label_t),
885 878 label_offset(size, l)) != sizeof (vdev_label_t))
886 879 continue;
887 880
888 881 if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
889 882 sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
890 883 continue;
891 884
892 885 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
893 886 &state) != 0 || state > POOL_STATE_L2CACHE) {
894 887 nvlist_free(*config);
895 888 continue;
896 889 }
897 890
898 891 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
899 892 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
900 893 &txg) != 0 || txg == 0)) {
901 894 nvlist_free(*config);
902 895 continue;
903 896 }
904 897
905 898 free(label);
906 899 return (0);
907 900 }
908 901
909 902 free(label);
910 903 *config = NULL;
911 904 return (0);
912 905 }
913 906
914 907 typedef struct rdsk_node {
915 908 char *rn_name;
916 909 int rn_dfd;
917 910 libzfs_handle_t *rn_hdl;
918 911 nvlist_t *rn_config;
919 912 avl_tree_t *rn_avl;
920 913 avl_node_t rn_node;
921 914 boolean_t rn_nozpool;
922 915 } rdsk_node_t;
923 916
924 917 static int
925 918 slice_cache_compare(const void *arg1, const void *arg2)
926 919 {
927 920 const char *nm1 = ((rdsk_node_t *)arg1)->rn_name;
928 921 const char *nm2 = ((rdsk_node_t *)arg2)->rn_name;
929 922 char *nm1slice, *nm2slice;
930 923 int rv;
931 924
932 925 /*
933 926 * slices zero and two are the most likely to provide results,
934 927 * so put those first
935 928 */
936 929 nm1slice = strstr(nm1, "s0");
937 930 nm2slice = strstr(nm2, "s0");
938 931 if (nm1slice && !nm2slice) {
939 932 return (-1);
940 933 }
941 934 if (!nm1slice && nm2slice) {
942 935 return (1);
943 936 }
944 937 nm1slice = strstr(nm1, "s2");
945 938 nm2slice = strstr(nm2, "s2");
946 939 if (nm1slice && !nm2slice) {
947 940 return (-1);
948 941 }
949 942 if (!nm1slice && nm2slice) {
950 943 return (1);
951 944 }
952 945
953 946 rv = strcmp(nm1, nm2);
954 947 if (rv == 0)
955 948 return (0);
956 949 return (rv > 0 ? 1 : -1);
957 950 }
958 951
959 952 static void
960 953 check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
961 954 diskaddr_t size, uint_t blksz)
962 955 {
963 956 rdsk_node_t tmpnode;
964 957 rdsk_node_t *node;
965 958 char sname[MAXNAMELEN];
966 959
967 960 tmpnode.rn_name = &sname[0];
968 961 (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
969 962 diskname, partno);
970 963 /*
971 964 * protect against division by zero for disk labels that
972 965 * contain a bogus sector size
973 966 */
974 967 if (blksz == 0)
975 968 blksz = DEV_BSIZE;
976 969 /* too small to contain a zpool? */
977 970 if ((size < (SPA_MINDEVSIZE / blksz)) &&
978 971 (node = avl_find(r, &tmpnode, NULL)))
979 972 node->rn_nozpool = B_TRUE;
980 973 }
981 974
982 975 static void
983 976 nozpool_all_slices(avl_tree_t *r, const char *sname)
984 977 {
985 978 char diskname[MAXNAMELEN];
986 979 char *ptr;
987 980 int i;
988 981
989 982 (void) strncpy(diskname, sname, MAXNAMELEN);
990 983 if (((ptr = strrchr(diskname, 's')) == NULL) &&
991 984 ((ptr = strrchr(diskname, 'p')) == NULL))
992 985 return;
993 986 ptr[0] = 's';
994 987 ptr[1] = '\0';
995 988 for (i = 0; i < NDKMAP; i++)
996 989 check_one_slice(r, diskname, i, 0, 1);
997 990 ptr[0] = 'p';
998 991 for (i = 0; i <= FD_NUMPART; i++)
999 992 check_one_slice(r, diskname, i, 0, 1);
1000 993 }
1001 994
1002 995 static void
1003 996 check_slices(avl_tree_t *r, int fd, const char *sname)
1004 997 {
1005 998 struct extvtoc vtoc;
1006 999 struct dk_gpt *gpt;
1007 1000 char diskname[MAXNAMELEN];
1008 1001 char *ptr;
1009 1002 int i;
1010 1003
1011 1004 (void) strncpy(diskname, sname, MAXNAMELEN);
1012 1005 if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
1013 1006 return;
1014 1007 ptr[1] = '\0';
1015 1008
1016 1009 if (read_extvtoc(fd, &vtoc) >= 0) {
1017 1010 for (i = 0; i < NDKMAP; i++)
1018 1011 check_one_slice(r, diskname, i,
1019 1012 vtoc.v_part[i].p_size, vtoc.v_sectorsz);
1020 1013 } else if (efi_alloc_and_read(fd, &gpt) >= 0) {
1021 1014 /*
1022 1015 * on x86 we'll still have leftover links that point
1023 1016 * to slices s[9-15], so use NDKMAP instead
1024 1017 */
1025 1018 for (i = 0; i < NDKMAP; i++)
1026 1019 check_one_slice(r, diskname, i,
1027 1020 gpt->efi_parts[i].p_size, gpt->efi_lbasize);
1028 1021 /* nodes p[1-4] are never used with EFI labels */
1029 1022 ptr[0] = 'p';
1030 1023 for (i = 1; i <= FD_NUMPART; i++)
1031 1024 check_one_slice(r, diskname, i, 0, 1);
1032 1025 efi_free(gpt);
1033 1026 }
1034 1027 }
1035 1028
1036 1029 static void
1037 1030 zpool_open_func(void *arg)
1038 1031 {
1039 1032 rdsk_node_t *rn = arg;
1040 1033 struct stat64 statbuf;
1041 1034 nvlist_t *config;
1042 1035 int fd;
1043 1036
1044 1037 if (rn->rn_nozpool)
1045 1038 return;
1046 1039 if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
1047 1040 /* symlink to a device that's no longer there */
1048 1041 if (errno == ENOENT)
1049 1042 nozpool_all_slices(rn->rn_avl, rn->rn_name);
1050 1043 return;
1051 1044 }
1052 1045 /*
1053 1046 * Ignore failed stats. We only want regular
1054 1047 * files, character devs and block devs.
1055 1048 */
1056 1049 if (fstat64(fd, &statbuf) != 0 ||
1057 1050 (!S_ISREG(statbuf.st_mode) &&
1058 1051 !S_ISCHR(statbuf.st_mode) &&
1059 1052 !S_ISBLK(statbuf.st_mode))) {
1060 1053 (void) close(fd);
1061 1054 return;
1062 1055 }
1063 1056 /* this file is too small to hold a zpool */
1064 1057 if (S_ISREG(statbuf.st_mode) &&
1065 1058 statbuf.st_size < SPA_MINDEVSIZE) {
1066 1059 (void) close(fd);
1067 1060 return;
1068 1061 } else if (!S_ISREG(statbuf.st_mode)) {
1069 1062 /*
1070 1063 * Try to read the disk label first so we don't have to
1071 1064 * open a bunch of minor nodes that can't have a zpool.
1072 1065 */
1073 1066 check_slices(rn->rn_avl, fd, rn->rn_name);
1074 1067 }
1075 1068
1076 1069 if ((zpool_read_label(fd, &config)) != 0) {
1077 1070 (void) close(fd);
1078 1071 (void) no_memory(rn->rn_hdl);
1079 1072 return;
1080 1073 }
1081 1074 (void) close(fd);
1082 1075
1083 1076 rn->rn_config = config;
1084 1077 }
1085 1078
1086 1079 /*
1087 1080 * Given a file descriptor, clear (zero) the label information. This function
1088 1081 * is currently only used in the appliance stack as part of the ZFS sysevent
1089 1082 * module.
1090 1083 */
1091 1084 int
1092 1085 zpool_clear_label(int fd)
1093 1086 {
1094 1087 struct stat64 statbuf;
1095 1088 int l;
1096 1089 vdev_label_t *label;
1097 1090 uint64_t size;
1098 1091
1099 1092 if (fstat64(fd, &statbuf) == -1)
1100 1093 return (0);
1101 1094 size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
1102 1095
1103 1096 if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
1104 1097 return (-1);
1105 1098
1106 1099 for (l = 0; l < VDEV_LABELS; l++) {
1107 1100 if (pwrite64(fd, label, sizeof (vdev_label_t),
1108 1101 label_offset(size, l)) != sizeof (vdev_label_t)) {
1109 1102 free(label);
1110 1103 return (-1);
1111 1104 }
1112 1105 }
1113 1106
1114 1107 free(label);
1115 1108 return (0);
1116 1109 }
1117 1110
1118 1111 /*
1119 1112 * Given a list of directories to search, find all pools stored on disk. This
1120 1113 * includes partial pools which are not available to import. If no args are
1121 1114 * given (argc is 0), then the default directory (/dev/dsk) is searched.
1122 1115 * poolname or guid (but not both) are provided by the caller when trying
1123 1116 * to import a specific pool.
1124 1117 */
1125 1118 static nvlist_t *
1126 1119 zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
1127 1120 {
1128 1121 int i, dirs = iarg->paths;
1129 1122 struct dirent64 *dp;
1130 1123 char path[MAXPATHLEN];
1131 1124 char *end, **dir = iarg->path;
1132 1125 size_t pathleft;
1133 1126 nvlist_t *ret = NULL;
1134 1127 static char *default_dir = "/dev/dsk";
1135 1128 pool_list_t pools = { 0 };
1136 1129 pool_entry_t *pe, *penext;
1137 1130 vdev_entry_t *ve, *venext;
1138 1131 config_entry_t *ce, *cenext;
1139 1132 name_entry_t *ne, *nenext;
1140 1133 avl_tree_t slice_cache;
1141 1134 rdsk_node_t *slice;
1142 1135 void *cookie;
1143 1136
1144 1137 if (dirs == 0) {
1145 1138 dirs = 1;
1146 1139 dir = &default_dir;
1147 1140 }
1148 1141
1149 1142 /*
1150 1143 * Go through and read the label configuration information from every
1151 1144 * possible device, organizing the information according to pool GUID
1152 1145 * and toplevel GUID.
1153 1146 */
1154 1147 for (i = 0; i < dirs; i++) {
1155 1148 tpool_t *t;
1156 1149 char *rdsk;
1157 1150 int dfd;
1158 1151 boolean_t config_failed = B_FALSE;
1159 1152 DIR *dirp;
1160 1153
1161 1154 /* use realpath to normalize the path */
1162 1155 if (realpath(dir[i], path) == 0) {
1163 1156 (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1164 1157 dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
1165 1158 goto error;
1166 1159 }
1167 1160 end = &path[strlen(path)];
1168 1161 *end++ = '/';
1169 1162 *end = 0;
1170 1163 pathleft = &path[sizeof (path)] - end;
1171 1164
1172 1165 /*
1173 1166 * Using raw devices instead of block devices when we're
1174 1167 * reading the labels skips a bunch of slow operations during
1175 1168 * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
1176 1169 */
1177 1170 if (strcmp(path, "/dev/dsk/") == 0)
1178 1171 rdsk = "/dev/rdsk/";
1179 1172 else
1180 1173 rdsk = path;
1181 1174
1182 1175 if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
1183 1176 (dirp = fdopendir(dfd)) == NULL) {
1184 1177 if (dfd >= 0)
1185 1178 (void) close(dfd);
1186 1179 zfs_error_aux(hdl, strerror(errno));
1187 1180 (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1188 1181 dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1189 1182 rdsk);
1190 1183 goto error;
1191 1184 }
1192 1185
1193 1186 avl_create(&slice_cache, slice_cache_compare,
1194 1187 sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
1195 1188 /*
1196 1189 * This is not MT-safe, but we have no MT consumers of libzfs
1197 1190 */
1198 1191 while ((dp = readdir64(dirp)) != NULL) {
1199 1192 const char *name = dp->d_name;
1200 1193 if (name[0] == '.' &&
1201 1194 (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
1202 1195 continue;
1203 1196
1204 1197 slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
1205 1198 slice->rn_name = zfs_strdup(hdl, name);
1206 1199 slice->rn_avl = &slice_cache;
1207 1200 slice->rn_dfd = dfd;
1208 1201 slice->rn_hdl = hdl;
1209 1202 slice->rn_nozpool = B_FALSE;
1210 1203 avl_add(&slice_cache, slice);
1211 1204 }
1212 1205 /*
1213 1206 * create a thread pool to do all of this in parallel;
1214 1207 * rn_nozpool is not protected, so this is racy in that
1215 1208 * multiple tasks could decide that the same slice can
1216 1209 * not hold a zpool, which is benign. Also choose
1217 1210 * double the number of processors; we hold a lot of
1218 1211 * locks in the kernel, so going beyond this doesn't
1219 1212 * buy us much.
1220 1213 */
1221 1214 t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
1222 1215 0, NULL);
1223 1216 for (slice = avl_first(&slice_cache); slice;
1224 1217 (slice = avl_walk(&slice_cache, slice,
1225 1218 AVL_AFTER)))
1226 1219 (void) tpool_dispatch(t, zpool_open_func, slice);
1227 1220 tpool_wait(t);
1228 1221 tpool_destroy(t);
1229 1222
1230 1223 cookie = NULL;
1231 1224 while ((slice = avl_destroy_nodes(&slice_cache,
1232 1225 &cookie)) != NULL) {
1233 1226 if (slice->rn_config != NULL && !config_failed) {
1234 1227 nvlist_t *config = slice->rn_config;
1235 1228 boolean_t matched = B_TRUE;
1236 1229
1237 1230 if (iarg->poolname != NULL) {
1238 1231 char *pname;
1239 1232
1240 1233 matched = nvlist_lookup_string(config,
1241 1234 ZPOOL_CONFIG_POOL_NAME,
1242 1235 &pname) == 0 &&
1243 1236 strcmp(iarg->poolname, pname) == 0;
1244 1237 } else if (iarg->guid != 0) {
1245 1238 uint64_t this_guid;
1246 1239
1247 1240 matched = nvlist_lookup_uint64(config,
1248 1241 ZPOOL_CONFIG_POOL_GUID,
1249 1242 &this_guid) == 0 &&
1250 1243 iarg->guid == this_guid;
↓ open down ↓ |
896 lines elided |
↑ open up ↑ |
1251 1244 }
1252 1245 if (!matched) {
1253 1246 nvlist_free(config);
1254 1247 } else {
1255 1248 /*
1256 1249 * use the non-raw path for the config
1257 1250 */
1258 1251 (void) strlcpy(end, slice->rn_name,
1259 1252 pathleft);
1260 1253 if (add_config(hdl, &pools, path,
1261 - config) != 0)
1254 + config) != 0) {
1255 + nvlist_free(config);
1262 1256 config_failed = B_TRUE;
1257 + }
1263 1258 }
1264 1259 }
1265 1260 free(slice->rn_name);
1266 1261 free(slice);
1267 1262 }
1268 1263 avl_destroy(&slice_cache);
1269 1264
1270 1265 (void) closedir(dirp);
1271 1266
1272 1267 if (config_failed)
1273 1268 goto error;
1274 1269 }
1275 1270
1276 1271 ret = get_configs(hdl, &pools, iarg->can_be_active);
1277 1272
1278 1273 error:
1279 1274 for (pe = pools.pools; pe != NULL; pe = penext) {
1280 1275 penext = pe->pe_next;
1281 1276 for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
1282 1277 venext = ve->ve_next;
1283 1278 for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
1284 1279 cenext = ce->ce_next;
1285 1280 if (ce->ce_config)
1286 1281 nvlist_free(ce->ce_config);
1287 1282 free(ce);
1288 1283 }
1289 1284 free(ve);
1290 1285 }
1291 1286 free(pe);
1292 1287 }
1293 1288
1294 1289 for (ne = pools.names; ne != NULL; ne = nenext) {
1295 1290 nenext = ne->ne_next;
1296 1291 free(ne->ne_name);
1297 1292 free(ne);
1298 1293 }
1299 1294
1300 1295 return (ret);
1301 1296 }
1302 1297
1303 1298 nvlist_t *
1304 1299 zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
1305 1300 {
1306 1301 importargs_t iarg = { 0 };
1307 1302
1308 1303 iarg.paths = argc;
1309 1304 iarg.path = argv;
1310 1305
1311 1306 return (zpool_find_import_impl(hdl, &iarg));
1312 1307 }
1313 1308
1314 1309 /*
1315 1310 * Given a cache file, return the contents as a list of importable pools.
1316 1311 * poolname or guid (but not both) are provided by the caller when trying
1317 1312 * to import a specific pool.
1318 1313 */
1319 1314 nvlist_t *
1320 1315 zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile,
1321 1316 char *poolname, uint64_t guid)
1322 1317 {
1323 1318 char *buf;
1324 1319 int fd;
1325 1320 struct stat64 statbuf;
1326 1321 nvlist_t *raw, *src, *dst;
1327 1322 nvlist_t *pools;
1328 1323 nvpair_t *elem;
1329 1324 char *name;
1330 1325 uint64_t this_guid;
1331 1326 boolean_t active;
1332 1327
1333 1328 verify(poolname == NULL || guid == 0);
1334 1329
1335 1330 if ((fd = open(cachefile, O_RDONLY)) < 0) {
1336 1331 zfs_error_aux(hdl, "%s", strerror(errno));
1337 1332 (void) zfs_error(hdl, EZFS_BADCACHE,
1338 1333 dgettext(TEXT_DOMAIN, "failed to open cache file"));
1339 1334 return (NULL);
1340 1335 }
1341 1336
1342 1337 if (fstat64(fd, &statbuf) != 0) {
1343 1338 zfs_error_aux(hdl, "%s", strerror(errno));
1344 1339 (void) close(fd);
1345 1340 (void) zfs_error(hdl, EZFS_BADCACHE,
1346 1341 dgettext(TEXT_DOMAIN, "failed to get size of cache file"));
1347 1342 return (NULL);
1348 1343 }
1349 1344
1350 1345 if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) {
1351 1346 (void) close(fd);
1352 1347 return (NULL);
1353 1348 }
1354 1349
1355 1350 if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
1356 1351 (void) close(fd);
1357 1352 free(buf);
1358 1353 (void) zfs_error(hdl, EZFS_BADCACHE,
1359 1354 dgettext(TEXT_DOMAIN,
1360 1355 "failed to read cache file contents"));
1361 1356 return (NULL);
1362 1357 }
1363 1358
1364 1359 (void) close(fd);
1365 1360
1366 1361 if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) {
1367 1362 free(buf);
1368 1363 (void) zfs_error(hdl, EZFS_BADCACHE,
1369 1364 dgettext(TEXT_DOMAIN,
1370 1365 "invalid or corrupt cache file contents"));
1371 1366 return (NULL);
1372 1367 }
1373 1368
1374 1369 free(buf);
1375 1370
1376 1371 /*
1377 1372 * Go through and get the current state of the pools and refresh their
1378 1373 * state.
1379 1374 */
1380 1375 if (nvlist_alloc(&pools, 0, 0) != 0) {
1381 1376 (void) no_memory(hdl);
1382 1377 nvlist_free(raw);
1383 1378 return (NULL);
1384 1379 }
1385 1380
1386 1381 elem = NULL;
1387 1382 while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) {
1388 1383 src = fnvpair_value_nvlist(elem);
1389 1384
1390 1385 name = fnvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME);
1391 1386 if (poolname != NULL && strcmp(poolname, name) != 0)
1392 1387 continue;
1393 1388
1394 1389 this_guid = fnvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID);
1395 1390 if (guid != 0 && guid != this_guid)
1396 1391 continue;
1397 1392
1398 1393 if (pool_active(hdl, name, this_guid, &active) != 0) {
1399 1394 nvlist_free(raw);
1400 1395 nvlist_free(pools);
1401 1396 return (NULL);
1402 1397 }
1403 1398
1404 1399 if (active)
1405 1400 continue;
1406 1401
1407 1402 if ((dst = refresh_config(hdl, src)) == NULL) {
1408 1403 nvlist_free(raw);
1409 1404 nvlist_free(pools);
1410 1405 return (NULL);
1411 1406 }
1412 1407
1413 1408 if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) {
1414 1409 (void) no_memory(hdl);
1415 1410 nvlist_free(dst);
1416 1411 nvlist_free(raw);
1417 1412 nvlist_free(pools);
1418 1413 return (NULL);
1419 1414 }
1420 1415 nvlist_free(dst);
1421 1416 }
1422 1417
1423 1418 nvlist_free(raw);
1424 1419 return (pools);
1425 1420 }
1426 1421
1427 1422 static int
1428 1423 name_or_guid_exists(zpool_handle_t *zhp, void *data)
1429 1424 {
1430 1425 importargs_t *import = data;
1431 1426 int found = 0;
1432 1427
1433 1428 if (import->poolname != NULL) {
1434 1429 char *pool_name;
1435 1430
1436 1431 verify(nvlist_lookup_string(zhp->zpool_config,
1437 1432 ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0);
1438 1433 if (strcmp(pool_name, import->poolname) == 0)
1439 1434 found = 1;
1440 1435 } else {
1441 1436 uint64_t pool_guid;
1442 1437
1443 1438 verify(nvlist_lookup_uint64(zhp->zpool_config,
1444 1439 ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0);
1445 1440 if (pool_guid == import->guid)
1446 1441 found = 1;
1447 1442 }
1448 1443
1449 1444 zpool_close(zhp);
1450 1445 return (found);
1451 1446 }
1452 1447
1453 1448 nvlist_t *
1454 1449 zpool_search_import(libzfs_handle_t *hdl, importargs_t *import)
1455 1450 {
1456 1451 verify(import->poolname == NULL || import->guid == 0);
1457 1452
1458 1453 if (import->unique)
1459 1454 import->exists = zpool_iter(hdl, name_or_guid_exists, import);
1460 1455
1461 1456 if (import->cachefile != NULL)
1462 1457 return (zpool_find_import_cached(hdl, import->cachefile,
1463 1458 import->poolname, import->guid));
1464 1459
1465 1460 return (zpool_find_import_impl(hdl, import));
1466 1461 }
1467 1462
1468 1463 boolean_t
1469 1464 find_guid(nvlist_t *nv, uint64_t guid)
1470 1465 {
1471 1466 uint64_t tmp;
1472 1467 nvlist_t **child;
1473 1468 uint_t c, children;
1474 1469
1475 1470 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0);
1476 1471 if (tmp == guid)
1477 1472 return (B_TRUE);
1478 1473
1479 1474 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1480 1475 &child, &children) == 0) {
1481 1476 for (c = 0; c < children; c++)
1482 1477 if (find_guid(child[c], guid))
1483 1478 return (B_TRUE);
1484 1479 }
1485 1480
1486 1481 return (B_FALSE);
1487 1482 }
1488 1483
1489 1484 typedef struct aux_cbdata {
1490 1485 const char *cb_type;
1491 1486 uint64_t cb_guid;
1492 1487 zpool_handle_t *cb_zhp;
1493 1488 } aux_cbdata_t;
1494 1489
1495 1490 static int
1496 1491 find_aux(zpool_handle_t *zhp, void *data)
1497 1492 {
1498 1493 aux_cbdata_t *cbp = data;
1499 1494 nvlist_t **list;
1500 1495 uint_t i, count;
1501 1496 uint64_t guid;
1502 1497 nvlist_t *nvroot;
1503 1498
1504 1499 verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1505 1500 &nvroot) == 0);
1506 1501
1507 1502 if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type,
1508 1503 &list, &count) == 0) {
1509 1504 for (i = 0; i < count; i++) {
1510 1505 verify(nvlist_lookup_uint64(list[i],
1511 1506 ZPOOL_CONFIG_GUID, &guid) == 0);
1512 1507 if (guid == cbp->cb_guid) {
1513 1508 cbp->cb_zhp = zhp;
1514 1509 return (1);
1515 1510 }
1516 1511 }
1517 1512 }
1518 1513
1519 1514 zpool_close(zhp);
1520 1515 return (0);
1521 1516 }
1522 1517
1523 1518 /*
1524 1519 * Determines if the pool is in use. If so, it returns true and the state of
1525 1520 * the pool as well as the name of the pool. Both strings are allocated and
1526 1521 * must be freed by the caller.
1527 1522 */
1528 1523 int
1529 1524 zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
1530 1525 boolean_t *inuse)
1531 1526 {
1532 1527 nvlist_t *config;
1533 1528 char *name;
1534 1529 boolean_t ret;
1535 1530 uint64_t guid, vdev_guid;
1536 1531 zpool_handle_t *zhp;
1537 1532 nvlist_t *pool_config;
1538 1533 uint64_t stateval, isspare;
1539 1534 aux_cbdata_t cb = { 0 };
1540 1535 boolean_t isactive;
1541 1536
1542 1537 *inuse = B_FALSE;
1543 1538
1544 1539 if (zpool_read_label(fd, &config) != 0) {
1545 1540 (void) no_memory(hdl);
1546 1541 return (-1);
1547 1542 }
1548 1543
1549 1544 if (config == NULL)
1550 1545 return (0);
1551 1546
1552 1547 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
1553 1548 &stateval) == 0);
1554 1549 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
1555 1550 &vdev_guid) == 0);
1556 1551
1557 1552 if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) {
1558 1553 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1559 1554 &name) == 0);
1560 1555 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1561 1556 &guid) == 0);
1562 1557 }
1563 1558
1564 1559 switch (stateval) {
1565 1560 case POOL_STATE_EXPORTED:
1566 1561 /*
1567 1562 * A pool with an exported state may in fact be imported
1568 1563 * read-only, so check the in-core state to see if it's
1569 1564 * active and imported read-only. If it is, set
1570 1565 * its state to active.
1571 1566 */
1572 1567 if (pool_active(hdl, name, guid, &isactive) == 0 && isactive &&
1573 1568 (zhp = zpool_open_canfail(hdl, name)) != NULL) {
1574 1569 if (zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL))
1575 1570 stateval = POOL_STATE_ACTIVE;
1576 1571
1577 1572 /*
1578 1573 * All we needed the zpool handle for is the
1579 1574 * readonly prop check.
1580 1575 */
1581 1576 zpool_close(zhp);
1582 1577 }
1583 1578
1584 1579 ret = B_TRUE;
1585 1580 break;
1586 1581
1587 1582 case POOL_STATE_ACTIVE:
1588 1583 /*
1589 1584 * For an active pool, we have to determine if it's really part
1590 1585 * of a currently active pool (in which case the pool will exist
1591 1586 * and the guid will be the same), or whether it's part of an
1592 1587 * active pool that was disconnected without being explicitly
1593 1588 * exported.
1594 1589 */
1595 1590 if (pool_active(hdl, name, guid, &isactive) != 0) {
1596 1591 nvlist_free(config);
1597 1592 return (-1);
1598 1593 }
1599 1594
1600 1595 if (isactive) {
1601 1596 /*
1602 1597 * Because the device may have been removed while
1603 1598 * offlined, we only report it as active if the vdev is
1604 1599 * still present in the config. Otherwise, pretend like
1605 1600 * it's not in use.
1606 1601 */
1607 1602 if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
1608 1603 (pool_config = zpool_get_config(zhp, NULL))
1609 1604 != NULL) {
1610 1605 nvlist_t *nvroot;
1611 1606
1612 1607 verify(nvlist_lookup_nvlist(pool_config,
1613 1608 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
1614 1609 ret = find_guid(nvroot, vdev_guid);
1615 1610 } else {
1616 1611 ret = B_FALSE;
1617 1612 }
1618 1613
1619 1614 /*
1620 1615 * If this is an active spare within another pool, we
1621 1616 * treat it like an unused hot spare. This allows the
1622 1617 * user to create a pool with a hot spare that currently
1623 1618 * in use within another pool. Since we return B_TRUE,
1624 1619 * libdiskmgt will continue to prevent generic consumers
1625 1620 * from using the device.
1626 1621 */
1627 1622 if (ret && nvlist_lookup_uint64(config,
1628 1623 ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
1629 1624 stateval = POOL_STATE_SPARE;
1630 1625
1631 1626 if (zhp != NULL)
1632 1627 zpool_close(zhp);
1633 1628 } else {
1634 1629 stateval = POOL_STATE_POTENTIALLY_ACTIVE;
1635 1630 ret = B_TRUE;
1636 1631 }
1637 1632 break;
1638 1633
1639 1634 case POOL_STATE_SPARE:
1640 1635 /*
1641 1636 * For a hot spare, it can be either definitively in use, or
1642 1637 * potentially active. To determine if it's in use, we iterate
1643 1638 * over all pools in the system and search for one with a spare
1644 1639 * with a matching guid.
1645 1640 *
1646 1641 * Due to the shared nature of spares, we don't actually report
1647 1642 * the potentially active case as in use. This means the user
1648 1643 * can freely create pools on the hot spares of exported pools,
1649 1644 * but to do otherwise makes the resulting code complicated, and
1650 1645 * we end up having to deal with this case anyway.
1651 1646 */
1652 1647 cb.cb_zhp = NULL;
1653 1648 cb.cb_guid = vdev_guid;
1654 1649 cb.cb_type = ZPOOL_CONFIG_SPARES;
1655 1650 if (zpool_iter(hdl, find_aux, &cb) == 1) {
1656 1651 name = (char *)zpool_get_name(cb.cb_zhp);
1657 1652 ret = B_TRUE;
1658 1653 } else {
1659 1654 ret = B_FALSE;
1660 1655 }
1661 1656 break;
1662 1657
1663 1658 case POOL_STATE_L2CACHE:
1664 1659
1665 1660 /*
1666 1661 * Check if any pool is currently using this l2cache device.
1667 1662 */
1668 1663 cb.cb_zhp = NULL;
1669 1664 cb.cb_guid = vdev_guid;
1670 1665 cb.cb_type = ZPOOL_CONFIG_L2CACHE;
1671 1666 if (zpool_iter(hdl, find_aux, &cb) == 1) {
1672 1667 name = (char *)zpool_get_name(cb.cb_zhp);
1673 1668 ret = B_TRUE;
1674 1669 } else {
1675 1670 ret = B_FALSE;
1676 1671 }
1677 1672 break;
1678 1673
1679 1674 default:
1680 1675 ret = B_FALSE;
1681 1676 }
1682 1677
1683 1678
1684 1679 if (ret) {
1685 1680 if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
1686 1681 if (cb.cb_zhp)
1687 1682 zpool_close(cb.cb_zhp);
1688 1683 nvlist_free(config);
1689 1684 return (-1);
1690 1685 }
1691 1686 *state = (pool_state_t)stateval;
1692 1687 }
1693 1688
1694 1689 if (cb.cb_zhp)
1695 1690 zpool_close(cb.cb_zhp);
1696 1691
1697 1692 nvlist_free(config);
1698 1693 *inuse = ret;
1699 1694 return (0);
1700 1695 }
↓ open down ↓ |
428 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX