Print this page
patch first-pass
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/zfs/dsl_dataset.c
+++ new/usr/src/uts/common/fs/zfs/dsl_dataset.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
24 24 * Copyright (c) 2014, Joyent, Inc. All rights reserved.
25 25 * Copyright (c) 2014 RackTop Systems.
26 26 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
27 27 * Copyright (c) 2014 Integros [integros.com]
28 28 * Copyright 2016, OmniTI Computer Consulting, Inc. All rights reserved.
29 29 */
30 30
31 31 #include <sys/dmu_objset.h>
32 32 #include <sys/dsl_dataset.h>
33 33 #include <sys/dsl_dir.h>
34 34 #include <sys/dsl_prop.h>
35 35 #include <sys/dsl_synctask.h>
36 36 #include <sys/dmu_traverse.h>
37 37 #include <sys/dmu_impl.h>
38 38 #include <sys/dmu_tx.h>
39 39 #include <sys/arc.h>
40 40 #include <sys/zio.h>
41 41 #include <sys/zap.h>
42 42 #include <sys/zfeature.h>
43 43 #include <sys/unique.h>
44 44 #include <sys/zfs_context.h>
45 45 #include <sys/zfs_ioctl.h>
46 46 #include <sys/spa.h>
47 47 #include <sys/zfs_znode.h>
48 48 #include <sys/zfs_onexit.h>
49 49 #include <sys/zvol.h>
50 50 #include <sys/dsl_scan.h>
51 51 #include <sys/dsl_deadlist.h>
52 52 #include <sys/dsl_destroy.h>
53 53 #include <sys/dsl_userhold.h>
54 54 #include <sys/dsl_bookmark.h>
55 55 #include <sys/dmu_send.h>
56 56 #include <sys/zio_checksum.h>
57 57 #include <sys/zio_compress.h>
58 58 #include <zfs_fletcher.h>
59 59
60 60 /*
61 61 * The SPA supports block sizes up to 16MB. However, very large blocks
62 62 * can have an impact on i/o latency (e.g. tying up a spinning disk for
63 63 * ~300ms), and also potentially on the memory allocator. Therefore,
64 64 * we do not allow the recordsize to be set larger than zfs_max_recordsize
65 65 * (default 1MB). Larger blocks can be created by changing this tunable,
66 66 * and pools with larger blocks can always be imported and used, regardless
67 67 * of this setting.
68 68 */
69 69 int zfs_max_recordsize = 1 * 1024 * 1024;
70 70
71 71 #define SWITCH64(x, y) \
72 72 { \
73 73 uint64_t __tmp = (x); \
74 74 (x) = (y); \
75 75 (y) = __tmp; \
76 76 }
77 77
78 78 #define DS_REF_MAX (1ULL << 62)
79 79
80 80 extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds);
81 81
82 82 extern int spa_asize_inflation;
83 83
84 84 /*
85 85 * Figure out how much of this delta should be propogated to the dsl_dir
86 86 * layer. If there's a refreservation, that space has already been
87 87 * partially accounted for in our ancestors.
88 88 */
89 89 static int64_t
90 90 parent_delta(dsl_dataset_t *ds, int64_t delta)
91 91 {
92 92 dsl_dataset_phys_t *ds_phys;
93 93 uint64_t old_bytes, new_bytes;
94 94
95 95 if (ds->ds_reserved == 0)
96 96 return (delta);
97 97
98 98 ds_phys = dsl_dataset_phys(ds);
99 99 old_bytes = MAX(ds_phys->ds_unique_bytes, ds->ds_reserved);
100 100 new_bytes = MAX(ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
101 101
102 102 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
103 103 return (new_bytes - old_bytes);
104 104 }
105 105
106 106 void
107 107 dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
108 108 {
109 109 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
110 110 int compressed = BP_GET_PSIZE(bp);
111 111 int uncompressed = BP_GET_UCSIZE(bp);
112 112 int64_t delta;
113 113
114 114 dprintf_bp(bp, "ds=%p", ds);
115 115
116 116 ASSERT(dmu_tx_is_syncing(tx));
117 117 /* It could have been compressed away to nothing */
118 118 if (BP_IS_HOLE(bp))
119 119 return;
120 120 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
121 121 ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
122 122 if (ds == NULL) {
123 123 dsl_pool_mos_diduse_space(tx->tx_pool,
124 124 used, compressed, uncompressed);
125 125 return;
126 126 }
127 127
128 128 dmu_buf_will_dirty(ds->ds_dbuf, tx);
129 129 mutex_enter(&ds->ds_lock);
130 130 delta = parent_delta(ds, used);
131 131 dsl_dataset_phys(ds)->ds_referenced_bytes += used;
132 132 dsl_dataset_phys(ds)->ds_compressed_bytes += compressed;
133 133 dsl_dataset_phys(ds)->ds_uncompressed_bytes += uncompressed;
134 134 dsl_dataset_phys(ds)->ds_unique_bytes += used;
135 135
136 136 if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) {
137 137 ds->ds_feature_activation_needed[SPA_FEATURE_LARGE_BLOCKS] =
138 138 B_TRUE;
139 139 }
140 140
141 141 spa_feature_t f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));
142 142 if (f != SPA_FEATURE_NONE)
143 143 ds->ds_feature_activation_needed[f] = B_TRUE;
144 144
145 145 mutex_exit(&ds->ds_lock);
146 146 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
147 147 compressed, uncompressed, tx);
148 148 dsl_dir_transfer_space(ds->ds_dir, used - delta,
149 149 DD_USED_REFRSRV, DD_USED_HEAD, tx);
150 150 }
151 151
152 152 int
153 153 dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
154 154 boolean_t async)
155 155 {
156 156 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
157 157 int compressed = BP_GET_PSIZE(bp);
158 158 int uncompressed = BP_GET_UCSIZE(bp);
159 159
160 160 if (BP_IS_HOLE(bp))
161 161 return (0);
162 162
163 163 ASSERT(dmu_tx_is_syncing(tx));
164 164 ASSERT(bp->blk_birth <= tx->tx_txg);
165 165
166 166 if (ds == NULL) {
167 167 dsl_free(tx->tx_pool, tx->tx_txg, bp);
168 168 dsl_pool_mos_diduse_space(tx->tx_pool,
169 169 -used, -compressed, -uncompressed);
170 170 return (used);
171 171 }
172 172 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
173 173
174 174 ASSERT(!ds->ds_is_snapshot);
175 175 dmu_buf_will_dirty(ds->ds_dbuf, tx);
176 176
177 177 if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
178 178 int64_t delta;
179 179
180 180 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object);
181 181 dsl_free(tx->tx_pool, tx->tx_txg, bp);
182 182
183 183 mutex_enter(&ds->ds_lock);
184 184 ASSERT(dsl_dataset_phys(ds)->ds_unique_bytes >= used ||
185 185 !DS_UNIQUE_IS_ACCURATE(ds));
186 186 delta = parent_delta(ds, -used);
187 187 dsl_dataset_phys(ds)->ds_unique_bytes -= used;
188 188 mutex_exit(&ds->ds_lock);
189 189 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
190 190 delta, -compressed, -uncompressed, tx);
191 191 dsl_dir_transfer_space(ds->ds_dir, -used - delta,
192 192 DD_USED_REFRSRV, DD_USED_HEAD, tx);
193 193 } else {
194 194 dprintf_bp(bp, "putting on dead list: %s", "");
195 195 if (async) {
196 196 /*
197 197 * We are here as part of zio's write done callback,
198 198 * which means we're a zio interrupt thread. We can't
199 199 * call dsl_deadlist_insert() now because it may block
200 200 * waiting for I/O. Instead, put bp on the deferred
201 201 * queue and let dsl_pool_sync() finish the job.
202 202 */
203 203 bplist_append(&ds->ds_pending_deadlist, bp);
204 204 } else {
205 205 dsl_deadlist_insert(&ds->ds_deadlist, bp, tx);
206 206 }
207 207 ASSERT3U(ds->ds_prev->ds_object, ==,
208 208 dsl_dataset_phys(ds)->ds_prev_snap_obj);
209 209 ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_num_children > 0);
210 210 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
211 211 if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj ==
212 212 ds->ds_object && bp->blk_birth >
213 213 dsl_dataset_phys(ds->ds_prev)->ds_prev_snap_txg) {
214 214 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
215 215 mutex_enter(&ds->ds_prev->ds_lock);
216 216 dsl_dataset_phys(ds->ds_prev)->ds_unique_bytes += used;
217 217 mutex_exit(&ds->ds_prev->ds_lock);
218 218 }
219 219 if (bp->blk_birth > ds->ds_dir->dd_origin_txg) {
220 220 dsl_dir_transfer_space(ds->ds_dir, used,
221 221 DD_USED_HEAD, DD_USED_SNAP, tx);
222 222 }
223 223 }
224 224 mutex_enter(&ds->ds_lock);
225 225 ASSERT3U(dsl_dataset_phys(ds)->ds_referenced_bytes, >=, used);
226 226 dsl_dataset_phys(ds)->ds_referenced_bytes -= used;
227 227 ASSERT3U(dsl_dataset_phys(ds)->ds_compressed_bytes, >=, compressed);
228 228 dsl_dataset_phys(ds)->ds_compressed_bytes -= compressed;
229 229 ASSERT3U(dsl_dataset_phys(ds)->ds_uncompressed_bytes, >=, uncompressed);
230 230 dsl_dataset_phys(ds)->ds_uncompressed_bytes -= uncompressed;
231 231 mutex_exit(&ds->ds_lock);
232 232
233 233 return (used);
234 234 }
235 235
236 236 uint64_t
237 237 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
238 238 {
239 239 uint64_t trysnap = 0;
240 240
241 241 if (ds == NULL)
242 242 return (0);
243 243 /*
244 244 * The snapshot creation could fail, but that would cause an
245 245 * incorrect FALSE return, which would only result in an
246 246 * overestimation of the amount of space that an operation would
247 247 * consume, which is OK.
248 248 *
249 249 * There's also a small window where we could miss a pending
250 250 * snapshot, because we could set the sync task in the quiescing
251 251 * phase. So this should only be used as a guess.
252 252 */
253 253 if (ds->ds_trysnap_txg >
254 254 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
255 255 trysnap = ds->ds_trysnap_txg;
256 256 return (MAX(dsl_dataset_phys(ds)->ds_prev_snap_txg, trysnap));
257 257 }
258 258
259 259 boolean_t
260 260 dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
261 261 uint64_t blk_birth)
262 262 {
↓ open down ↓ |
262 lines elided |
↑ open up ↑ |
263 263 if (blk_birth <= dsl_dataset_prev_snap_txg(ds) ||
264 264 (bp != NULL && BP_IS_HOLE(bp)))
265 265 return (B_FALSE);
266 266
267 267 ddt_prefetch(dsl_dataset_get_spa(ds), bp);
268 268
269 269 return (B_TRUE);
270 270 }
271 271
272 272 static void
273 +dsl_dataset_evict_prep(void *dbu)
274 +{
275 + dsl_dataset_t *ds = dbu;
276 +
277 + ASSERT(ds->ds_owner == NULL);
278 +
279 + unique_remove(ds->ds_fsid_guid);
280 +}
281 +
282 +static void
273 283 dsl_dataset_evict(void *dbu)
274 284 {
275 285 dsl_dataset_t *ds = dbu;
276 286
277 287 ASSERT(ds->ds_owner == NULL);
278 288
279 289 ds->ds_dbuf = NULL;
280 290
281 - unique_remove(ds->ds_fsid_guid);
282 -
283 291 if (ds->ds_objset != NULL)
284 292 dmu_objset_evict(ds->ds_objset);
285 293
286 294 if (ds->ds_prev) {
287 295 dsl_dataset_rele(ds->ds_prev, ds);
288 296 ds->ds_prev = NULL;
289 297 }
290 298
291 299 bplist_destroy(&ds->ds_pending_deadlist);
292 300 if (ds->ds_deadlist.dl_os != NULL)
293 301 dsl_deadlist_close(&ds->ds_deadlist);
294 302 if (ds->ds_dir)
295 303 dsl_dir_async_rele(ds->ds_dir, ds);
296 304
297 305 ASSERT(!list_link_active(&ds->ds_synced_link));
298 306
299 307 list_destroy(&ds->ds_prop_cbs);
300 308 mutex_destroy(&ds->ds_lock);
301 309 mutex_destroy(&ds->ds_opening_lock);
302 310 mutex_destroy(&ds->ds_sendstream_lock);
303 311 refcount_destroy(&ds->ds_longholds);
304 312
305 313 kmem_free(ds, sizeof (dsl_dataset_t));
306 314 }
307 315
308 316 int
309 317 dsl_dataset_get_snapname(dsl_dataset_t *ds)
310 318 {
311 319 dsl_dataset_phys_t *headphys;
312 320 int err;
313 321 dmu_buf_t *headdbuf;
314 322 dsl_pool_t *dp = ds->ds_dir->dd_pool;
315 323 objset_t *mos = dp->dp_meta_objset;
316 324
317 325 if (ds->ds_snapname[0])
318 326 return (0);
319 327 if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0)
320 328 return (0);
321 329
322 330 err = dmu_bonus_hold(mos, dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj,
323 331 FTAG, &headdbuf);
324 332 if (err != 0)
325 333 return (err);
326 334 headphys = headdbuf->db_data;
327 335 err = zap_value_search(dp->dp_meta_objset,
328 336 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname);
329 337 dmu_buf_rele(headdbuf, FTAG);
330 338 return (err);
331 339 }
332 340
333 341 int
334 342 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
335 343 {
336 344 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
337 345 uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj;
338 346 matchtype_t mt;
339 347 int err;
340 348
341 349 if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
342 350 mt = MT_FIRST;
343 351 else
344 352 mt = MT_EXACT;
345 353
346 354 err = zap_lookup_norm(mos, snapobj, name, 8, 1,
347 355 value, mt, NULL, 0, NULL);
348 356 if (err == ENOTSUP && mt == MT_FIRST)
349 357 err = zap_lookup(mos, snapobj, name, 8, 1, value);
350 358 return (err);
351 359 }
352 360
353 361 int
354 362 dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx,
355 363 boolean_t adj_cnt)
356 364 {
357 365 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
358 366 uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj;
359 367 matchtype_t mt;
360 368 int err;
361 369
362 370 dsl_dir_snap_cmtime_update(ds->ds_dir);
363 371
364 372 if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
365 373 mt = MT_FIRST;
366 374 else
367 375 mt = MT_EXACT;
368 376
369 377 err = zap_remove_norm(mos, snapobj, name, mt, tx);
370 378 if (err == ENOTSUP && mt == MT_FIRST)
371 379 err = zap_remove(mos, snapobj, name, tx);
372 380
373 381 if (err == 0 && adj_cnt)
374 382 dsl_fs_ss_count_adjust(ds->ds_dir, -1,
375 383 DD_FIELD_SNAPSHOT_COUNT, tx);
376 384
377 385 return (err);
378 386 }
379 387
380 388 boolean_t
381 389 dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag)
382 390 {
383 391 dmu_buf_t *dbuf = ds->ds_dbuf;
384 392 boolean_t result = B_FALSE;
385 393
386 394 if (dbuf != NULL && dmu_buf_try_add_ref(dbuf, dp->dp_meta_objset,
387 395 ds->ds_object, DMU_BONUS_BLKID, tag)) {
388 396
389 397 if (ds == dmu_buf_get_user(dbuf))
390 398 result = B_TRUE;
391 399 else
392 400 dmu_buf_rele(dbuf, tag);
393 401 }
394 402
395 403 return (result);
396 404 }
397 405
398 406 int
399 407 dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
400 408 dsl_dataset_t **dsp)
401 409 {
402 410 objset_t *mos = dp->dp_meta_objset;
403 411 dmu_buf_t *dbuf;
404 412 dsl_dataset_t *ds;
405 413 int err;
406 414 dmu_object_info_t doi;
407 415
408 416 ASSERT(dsl_pool_config_held(dp));
409 417
410 418 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
411 419 if (err != 0)
412 420 return (err);
413 421
414 422 /* Make sure dsobj has the correct object type. */
415 423 dmu_object_info_from_db(dbuf, &doi);
416 424 if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) {
417 425 dmu_buf_rele(dbuf, tag);
418 426 return (SET_ERROR(EINVAL));
419 427 }
420 428
421 429 ds = dmu_buf_get_user(dbuf);
422 430 if (ds == NULL) {
423 431 dsl_dataset_t *winner = NULL;
424 432
425 433 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
426 434 ds->ds_dbuf = dbuf;
427 435 ds->ds_object = dsobj;
428 436 ds->ds_is_snapshot = dsl_dataset_phys(ds)->ds_num_children != 0;
429 437
430 438 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
431 439 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
432 440 mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
433 441 refcount_create(&ds->ds_longholds);
434 442
435 443 bplist_create(&ds->ds_pending_deadlist);
436 444 dsl_deadlist_open(&ds->ds_deadlist,
437 445 mos, dsl_dataset_phys(ds)->ds_deadlist_obj);
438 446
439 447 list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
440 448 offsetof(dmu_sendarg_t, dsa_link));
441 449
442 450 list_create(&ds->ds_prop_cbs, sizeof (dsl_prop_cb_record_t),
443 451 offsetof(dsl_prop_cb_record_t, cbr_ds_node));
444 452
445 453 if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
446 454 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
447 455 if (!(spa_feature_table[f].fi_flags &
448 456 ZFEATURE_FLAG_PER_DATASET))
449 457 continue;
450 458 err = zap_contains(mos, dsobj,
451 459 spa_feature_table[f].fi_guid);
452 460 if (err == 0) {
453 461 ds->ds_feature_inuse[f] = B_TRUE;
454 462 } else {
455 463 ASSERT3U(err, ==, ENOENT);
456 464 err = 0;
457 465 }
458 466 }
459 467 }
460 468
461 469 err = dsl_dir_hold_obj(dp,
462 470 dsl_dataset_phys(ds)->ds_dir_obj, NULL, ds, &ds->ds_dir);
463 471 if (err != 0) {
464 472 mutex_destroy(&ds->ds_lock);
465 473 mutex_destroy(&ds->ds_opening_lock);
466 474 mutex_destroy(&ds->ds_sendstream_lock);
467 475 refcount_destroy(&ds->ds_longholds);
468 476 bplist_destroy(&ds->ds_pending_deadlist);
469 477 dsl_deadlist_close(&ds->ds_deadlist);
470 478 kmem_free(ds, sizeof (dsl_dataset_t));
471 479 dmu_buf_rele(dbuf, tag);
472 480 return (err);
473 481 }
474 482
475 483 if (!ds->ds_is_snapshot) {
476 484 ds->ds_snapname[0] = '\0';
477 485 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
478 486 err = dsl_dataset_hold_obj(dp,
479 487 dsl_dataset_phys(ds)->ds_prev_snap_obj,
480 488 ds, &ds->ds_prev);
481 489 }
482 490 if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
483 491 int zaperr = zap_lookup(mos, ds->ds_object,
484 492 DS_FIELD_BOOKMARK_NAMES,
485 493 sizeof (ds->ds_bookmarks), 1,
486 494 &ds->ds_bookmarks);
487 495 if (zaperr != ENOENT)
488 496 VERIFY0(zaperr);
489 497 }
490 498 } else {
491 499 if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
492 500 err = dsl_dataset_get_snapname(ds);
493 501 if (err == 0 &&
494 502 dsl_dataset_phys(ds)->ds_userrefs_obj != 0) {
495 503 err = zap_count(
496 504 ds->ds_dir->dd_pool->dp_meta_objset,
497 505 dsl_dataset_phys(ds)->ds_userrefs_obj,
498 506 &ds->ds_userrefs);
499 507 }
500 508 }
501 509
502 510 if (err == 0 && !ds->ds_is_snapshot) {
503 511 err = dsl_prop_get_int_ds(ds,
504 512 zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
↓ open down ↓ |
212 lines elided |
↑ open up ↑ |
505 513 &ds->ds_reserved);
506 514 if (err == 0) {
507 515 err = dsl_prop_get_int_ds(ds,
508 516 zfs_prop_to_name(ZFS_PROP_REFQUOTA),
509 517 &ds->ds_quota);
510 518 }
511 519 } else {
512 520 ds->ds_reserved = ds->ds_quota = 0;
513 521 }
514 522
515 - dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict, &ds->ds_dbuf);
523 + dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict_prep,
524 + dsl_dataset_evict, &ds->ds_dbuf);
516 525 if (err == 0)
517 526 winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu);
518 527
519 528 if (err != 0 || winner != NULL) {
520 529 bplist_destroy(&ds->ds_pending_deadlist);
521 530 dsl_deadlist_close(&ds->ds_deadlist);
522 531 if (ds->ds_prev)
523 532 dsl_dataset_rele(ds->ds_prev, ds);
524 533 dsl_dir_rele(ds->ds_dir, ds);
525 534 mutex_destroy(&ds->ds_lock);
526 535 mutex_destroy(&ds->ds_opening_lock);
527 536 mutex_destroy(&ds->ds_sendstream_lock);
528 537 refcount_destroy(&ds->ds_longholds);
529 538 kmem_free(ds, sizeof (dsl_dataset_t));
530 539 if (err != 0) {
531 540 dmu_buf_rele(dbuf, tag);
532 541 return (err);
533 542 }
534 543 ds = winner;
535 544 } else {
536 545 ds->ds_fsid_guid =
537 546 unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid);
538 547 }
539 548 }
540 549 ASSERT3P(ds->ds_dbuf, ==, dbuf);
541 550 ASSERT3P(dsl_dataset_phys(ds), ==, dbuf->db_data);
542 551 ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0 ||
543 552 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||
544 553 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
545 554 *dsp = ds;
546 555 return (0);
547 556 }
548 557
549 558 int
550 559 dsl_dataset_hold(dsl_pool_t *dp, const char *name,
551 560 void *tag, dsl_dataset_t **dsp)
552 561 {
553 562 dsl_dir_t *dd;
554 563 const char *snapname;
555 564 uint64_t obj;
556 565 int err = 0;
557 566 dsl_dataset_t *ds;
558 567
559 568 err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname);
560 569 if (err != 0)
561 570 return (err);
562 571
563 572 ASSERT(dsl_pool_config_held(dp));
564 573 obj = dsl_dir_phys(dd)->dd_head_dataset_obj;
565 574 if (obj != 0)
566 575 err = dsl_dataset_hold_obj(dp, obj, tag, &ds);
567 576 else
568 577 err = SET_ERROR(ENOENT);
569 578
570 579 /* we may be looking for a snapshot */
571 580 if (err == 0 && snapname != NULL) {
572 581 dsl_dataset_t *snap_ds;
573 582
574 583 if (*snapname++ != '@') {
575 584 dsl_dataset_rele(ds, tag);
576 585 dsl_dir_rele(dd, FTAG);
577 586 return (SET_ERROR(ENOENT));
578 587 }
579 588
580 589 dprintf("looking for snapshot '%s'\n", snapname);
581 590 err = dsl_dataset_snap_lookup(ds, snapname, &obj);
582 591 if (err == 0)
583 592 err = dsl_dataset_hold_obj(dp, obj, tag, &snap_ds);
584 593 dsl_dataset_rele(ds, tag);
585 594
586 595 if (err == 0) {
587 596 mutex_enter(&snap_ds->ds_lock);
588 597 if (snap_ds->ds_snapname[0] == 0)
589 598 (void) strlcpy(snap_ds->ds_snapname, snapname,
590 599 sizeof (snap_ds->ds_snapname));
591 600 mutex_exit(&snap_ds->ds_lock);
592 601 ds = snap_ds;
593 602 }
594 603 }
595 604 if (err == 0)
596 605 *dsp = ds;
597 606 dsl_dir_rele(dd, FTAG);
598 607 return (err);
599 608 }
600 609
601 610 int
602 611 dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj,
603 612 void *tag, dsl_dataset_t **dsp)
604 613 {
605 614 int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
606 615 if (err != 0)
607 616 return (err);
608 617 if (!dsl_dataset_tryown(*dsp, tag)) {
609 618 dsl_dataset_rele(*dsp, tag);
610 619 *dsp = NULL;
611 620 return (SET_ERROR(EBUSY));
612 621 }
613 622 return (0);
614 623 }
615 624
616 625 int
617 626 dsl_dataset_own(dsl_pool_t *dp, const char *name,
618 627 void *tag, dsl_dataset_t **dsp)
619 628 {
620 629 int err = dsl_dataset_hold(dp, name, tag, dsp);
621 630 if (err != 0)
622 631 return (err);
623 632 if (!dsl_dataset_tryown(*dsp, tag)) {
624 633 dsl_dataset_rele(*dsp, tag);
625 634 return (SET_ERROR(EBUSY));
626 635 }
627 636 return (0);
628 637 }
629 638
630 639 /*
631 640 * See the comment above dsl_pool_hold() for details. In summary, a long
632 641 * hold is used to prevent destruction of a dataset while the pool hold
633 642 * is dropped, allowing other concurrent operations (e.g. spa_sync()).
634 643 *
635 644 * The dataset and pool must be held when this function is called. After it
636 645 * is called, the pool hold may be released while the dataset is still held
637 646 * and accessed.
638 647 */
639 648 void
640 649 dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag)
641 650 {
642 651 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
643 652 (void) refcount_add(&ds->ds_longholds, tag);
644 653 }
645 654
646 655 void
647 656 dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag)
648 657 {
649 658 (void) refcount_remove(&ds->ds_longholds, tag);
650 659 }
651 660
652 661 /* Return B_TRUE if there are any long holds on this dataset. */
653 662 boolean_t
654 663 dsl_dataset_long_held(dsl_dataset_t *ds)
655 664 {
656 665 return (!refcount_is_zero(&ds->ds_longholds));
657 666 }
658 667
659 668 void
660 669 dsl_dataset_name(dsl_dataset_t *ds, char *name)
661 670 {
662 671 if (ds == NULL) {
663 672 (void) strcpy(name, "mos");
664 673 } else {
665 674 dsl_dir_name(ds->ds_dir, name);
666 675 VERIFY0(dsl_dataset_get_snapname(ds));
667 676 if (ds->ds_snapname[0]) {
668 677 (void) strcat(name, "@");
669 678 /*
670 679 * We use a "recursive" mutex so that we
671 680 * can call dprintf_ds() with ds_lock held.
672 681 */
673 682 if (!MUTEX_HELD(&ds->ds_lock)) {
674 683 mutex_enter(&ds->ds_lock);
675 684 (void) strcat(name, ds->ds_snapname);
676 685 mutex_exit(&ds->ds_lock);
677 686 } else {
678 687 (void) strcat(name, ds->ds_snapname);
679 688 }
680 689 }
681 690 }
682 691 }
683 692
684 693 void
685 694 dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
686 695 {
687 696 dmu_buf_rele(ds->ds_dbuf, tag);
688 697 }
689 698
690 699 void
691 700 dsl_dataset_disown(dsl_dataset_t *ds, void *tag)
692 701 {
693 702 ASSERT3P(ds->ds_owner, ==, tag);
694 703 ASSERT(ds->ds_dbuf != NULL);
695 704
696 705 mutex_enter(&ds->ds_lock);
697 706 ds->ds_owner = NULL;
698 707 mutex_exit(&ds->ds_lock);
699 708 dsl_dataset_long_rele(ds, tag);
700 709 dsl_dataset_rele(ds, tag);
701 710 }
702 711
703 712 boolean_t
704 713 dsl_dataset_tryown(dsl_dataset_t *ds, void *tag)
705 714 {
706 715 boolean_t gotit = FALSE;
707 716
708 717 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
709 718 mutex_enter(&ds->ds_lock);
710 719 if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) {
711 720 ds->ds_owner = tag;
712 721 dsl_dataset_long_hold(ds, tag);
713 722 gotit = TRUE;
714 723 }
715 724 mutex_exit(&ds->ds_lock);
716 725 return (gotit);
717 726 }
718 727
719 728 boolean_t
720 729 dsl_dataset_has_owner(dsl_dataset_t *ds)
721 730 {
722 731 boolean_t rv;
723 732 mutex_enter(&ds->ds_lock);
724 733 rv = (ds->ds_owner != NULL);
725 734 mutex_exit(&ds->ds_lock);
726 735 return (rv);
727 736 }
728 737
729 738 static void
730 739 dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
731 740 {
732 741 spa_t *spa = dmu_tx_pool(tx)->dp_spa;
733 742 objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset;
734 743 uint64_t zero = 0;
735 744
736 745 VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET);
737 746
738 747 spa_feature_incr(spa, f, tx);
739 748 dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx);
740 749
741 750 VERIFY0(zap_add(mos, dsobj, spa_feature_table[f].fi_guid,
742 751 sizeof (zero), 1, &zero, tx));
743 752 }
744 753
745 754 void
746 755 dsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
747 756 {
748 757 spa_t *spa = dmu_tx_pool(tx)->dp_spa;
749 758 objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset;
750 759
751 760 VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET);
752 761
753 762 VERIFY0(zap_remove(mos, dsobj, spa_feature_table[f].fi_guid, tx));
754 763 spa_feature_decr(spa, f, tx);
755 764 }
756 765
757 766 uint64_t
758 767 dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
759 768 uint64_t flags, dmu_tx_t *tx)
760 769 {
761 770 dsl_pool_t *dp = dd->dd_pool;
762 771 dmu_buf_t *dbuf;
763 772 dsl_dataset_phys_t *dsphys;
764 773 uint64_t dsobj;
765 774 objset_t *mos = dp->dp_meta_objset;
766 775
767 776 if (origin == NULL)
768 777 origin = dp->dp_origin_snap;
769 778
770 779 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
771 780 ASSERT(origin == NULL || dsl_dataset_phys(origin)->ds_num_children > 0);
772 781 ASSERT(dmu_tx_is_syncing(tx));
773 782 ASSERT(dsl_dir_phys(dd)->dd_head_dataset_obj == 0);
774 783
775 784 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
776 785 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
777 786 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
778 787 dmu_buf_will_dirty(dbuf, tx);
779 788 dsphys = dbuf->db_data;
780 789 bzero(dsphys, sizeof (dsl_dataset_phys_t));
781 790 dsphys->ds_dir_obj = dd->dd_object;
782 791 dsphys->ds_flags = flags;
783 792 dsphys->ds_fsid_guid = unique_create();
784 793 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
785 794 sizeof (dsphys->ds_guid));
786 795 dsphys->ds_snapnames_zapobj =
787 796 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
788 797 DMU_OT_NONE, 0, tx);
789 798 dsphys->ds_creation_time = gethrestime_sec();
790 799 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg;
791 800
792 801 if (origin == NULL) {
793 802 dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx);
794 803 } else {
795 804 dsl_dataset_t *ohds; /* head of the origin snapshot */
796 805
797 806 dsphys->ds_prev_snap_obj = origin->ds_object;
798 807 dsphys->ds_prev_snap_txg =
799 808 dsl_dataset_phys(origin)->ds_creation_txg;
800 809 dsphys->ds_referenced_bytes =
801 810 dsl_dataset_phys(origin)->ds_referenced_bytes;
802 811 dsphys->ds_compressed_bytes =
803 812 dsl_dataset_phys(origin)->ds_compressed_bytes;
804 813 dsphys->ds_uncompressed_bytes =
805 814 dsl_dataset_phys(origin)->ds_uncompressed_bytes;
806 815 dsphys->ds_bp = dsl_dataset_phys(origin)->ds_bp;
807 816
808 817 /*
809 818 * Inherit flags that describe the dataset's contents
810 819 * (INCONSISTENT) or properties (Case Insensitive).
811 820 */
812 821 dsphys->ds_flags |= dsl_dataset_phys(origin)->ds_flags &
813 822 (DS_FLAG_INCONSISTENT | DS_FLAG_CI_DATASET);
814 823
815 824 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
816 825 if (origin->ds_feature_inuse[f])
817 826 dsl_dataset_activate_feature(dsobj, f, tx);
818 827 }
819 828
820 829 dmu_buf_will_dirty(origin->ds_dbuf, tx);
821 830 dsl_dataset_phys(origin)->ds_num_children++;
822 831
823 832 VERIFY0(dsl_dataset_hold_obj(dp,
824 833 dsl_dir_phys(origin->ds_dir)->dd_head_dataset_obj,
825 834 FTAG, &ohds));
826 835 dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist,
827 836 dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx);
828 837 dsl_dataset_rele(ohds, FTAG);
829 838
830 839 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) {
831 840 if (dsl_dataset_phys(origin)->ds_next_clones_obj == 0) {
832 841 dsl_dataset_phys(origin)->ds_next_clones_obj =
833 842 zap_create(mos,
834 843 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
835 844 }
836 845 VERIFY0(zap_add_int(mos,
837 846 dsl_dataset_phys(origin)->ds_next_clones_obj,
838 847 dsobj, tx));
839 848 }
840 849
841 850 dmu_buf_will_dirty(dd->dd_dbuf, tx);
842 851 dsl_dir_phys(dd)->dd_origin_obj = origin->ds_object;
843 852 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
844 853 if (dsl_dir_phys(origin->ds_dir)->dd_clones == 0) {
845 854 dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx);
846 855 dsl_dir_phys(origin->ds_dir)->dd_clones =
847 856 zap_create(mos,
848 857 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
849 858 }
850 859 VERIFY0(zap_add_int(mos,
851 860 dsl_dir_phys(origin->ds_dir)->dd_clones,
852 861 dsobj, tx));
853 862 }
854 863 }
855 864
856 865 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
857 866 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
858 867
859 868 dmu_buf_rele(dbuf, FTAG);
860 869
861 870 dmu_buf_will_dirty(dd->dd_dbuf, tx);
862 871 dsl_dir_phys(dd)->dd_head_dataset_obj = dsobj;
863 872
864 873 return (dsobj);
865 874 }
866 875
867 876 static void
868 877 dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx)
869 878 {
870 879 objset_t *os;
871 880
872 881 VERIFY0(dmu_objset_from_ds(ds, &os));
873 882 bzero(&os->os_zil_header, sizeof (os->os_zil_header));
874 883 dsl_dataset_dirty(ds, tx);
875 884 }
876 885
877 886 uint64_t
878 887 dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
879 888 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx)
880 889 {
881 890 dsl_pool_t *dp = pdd->dd_pool;
882 891 uint64_t dsobj, ddobj;
883 892 dsl_dir_t *dd;
884 893
885 894 ASSERT(dmu_tx_is_syncing(tx));
886 895 ASSERT(lastname[0] != '@');
887 896
888 897 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
889 898 VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd));
890 899
891 900 dsobj = dsl_dataset_create_sync_dd(dd, origin,
892 901 flags & ~DS_CREATE_FLAG_NODIRTY, tx);
893 902
894 903 dsl_deleg_set_create_perms(dd, tx, cr);
895 904
896 905 /*
897 906 * Since we're creating a new node we know it's a leaf, so we can
898 907 * initialize the counts if the limit feature is active.
899 908 */
900 909 if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) {
901 910 uint64_t cnt = 0;
902 911 objset_t *os = dd->dd_pool->dp_meta_objset;
903 912
904 913 dsl_dir_zapify(dd, tx);
905 914 VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
906 915 sizeof (cnt), 1, &cnt, tx));
907 916 VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
908 917 sizeof (cnt), 1, &cnt, tx));
909 918 }
910 919
911 920 dsl_dir_rele(dd, FTAG);
912 921
913 922 /*
914 923 * If we are creating a clone, make sure we zero out any stale
915 924 * data from the origin snapshots zil header.
916 925 */
917 926 if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) {
918 927 dsl_dataset_t *ds;
919 928
920 929 VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
921 930 dsl_dataset_zero_zil(ds, tx);
922 931 dsl_dataset_rele(ds, FTAG);
923 932 }
924 933
925 934 return (dsobj);
926 935 }
927 936
928 937 /*
929 938 * The unique space in the head dataset can be calculated by subtracting
930 939 * the space used in the most recent snapshot, that is still being used
931 940 * in this file system, from the space currently in use. To figure out
932 941 * the space in the most recent snapshot still in use, we need to take
933 942 * the total space used in the snapshot and subtract out the space that
934 943 * has been freed up since the snapshot was taken.
935 944 */
936 945 void
937 946 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
938 947 {
939 948 uint64_t mrs_used;
940 949 uint64_t dlused, dlcomp, dluncomp;
941 950
942 951 ASSERT(!ds->ds_is_snapshot);
943 952
944 953 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0)
945 954 mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes;
946 955 else
947 956 mrs_used = 0;
948 957
949 958 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
950 959
951 960 ASSERT3U(dlused, <=, mrs_used);
952 961 dsl_dataset_phys(ds)->ds_unique_bytes =
953 962 dsl_dataset_phys(ds)->ds_referenced_bytes - (mrs_used - dlused);
954 963
955 964 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
956 965 SPA_VERSION_UNIQUE_ACCURATE)
957 966 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
958 967 }
959 968
960 969 void
961 970 dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj,
962 971 dmu_tx_t *tx)
963 972 {
964 973 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
965 974 uint64_t count;
966 975 int err;
967 976
968 977 ASSERT(dsl_dataset_phys(ds)->ds_num_children >= 2);
969 978 err = zap_remove_int(mos, dsl_dataset_phys(ds)->ds_next_clones_obj,
970 979 obj, tx);
971 980 /*
972 981 * The err should not be ENOENT, but a bug in a previous version
973 982 * of the code could cause upgrade_clones_cb() to not set
974 983 * ds_next_snap_obj when it should, leading to a missing entry.
975 984 * If we knew that the pool was created after
976 985 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't
977 986 * ENOENT. However, at least we can check that we don't have
978 987 * too many entries in the next_clones_obj even after failing to
979 988 * remove this one.
980 989 */
981 990 if (err != ENOENT)
982 991 VERIFY0(err);
983 992 ASSERT0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj,
984 993 &count));
985 994 ASSERT3U(count, <=, dsl_dataset_phys(ds)->ds_num_children - 2);
986 995 }
987 996
988 997
989 998 blkptr_t *
990 999 dsl_dataset_get_blkptr(dsl_dataset_t *ds)
991 1000 {
992 1001 return (&dsl_dataset_phys(ds)->ds_bp);
993 1002 }
994 1003
995 1004 void
996 1005 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
997 1006 {
998 1007 ASSERT(dmu_tx_is_syncing(tx));
999 1008 /* If it's the meta-objset, set dp_meta_rootbp */
1000 1009 if (ds == NULL) {
1001 1010 tx->tx_pool->dp_meta_rootbp = *bp;
1002 1011 } else {
1003 1012 dmu_buf_will_dirty(ds->ds_dbuf, tx);
1004 1013 dsl_dataset_phys(ds)->ds_bp = *bp;
1005 1014 }
1006 1015 }
1007 1016
1008 1017 spa_t *
1009 1018 dsl_dataset_get_spa(dsl_dataset_t *ds)
1010 1019 {
1011 1020 return (ds->ds_dir->dd_pool->dp_spa);
1012 1021 }
1013 1022
1014 1023 void
1015 1024 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
1016 1025 {
1017 1026 dsl_pool_t *dp;
1018 1027
1019 1028 if (ds == NULL) /* this is the meta-objset */
1020 1029 return;
1021 1030
1022 1031 ASSERT(ds->ds_objset != NULL);
1023 1032
1024 1033 if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0)
1025 1034 panic("dirtying snapshot!");
1026 1035
1027 1036 dp = ds->ds_dir->dd_pool;
1028 1037
1029 1038 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
1030 1039 /* up the hold count until we can be written out */
1031 1040 dmu_buf_add_ref(ds->ds_dbuf, ds);
1032 1041 }
1033 1042 }
1034 1043
1035 1044 boolean_t
1036 1045 dsl_dataset_is_dirty(dsl_dataset_t *ds)
1037 1046 {
1038 1047 for (int t = 0; t < TXG_SIZE; t++) {
1039 1048 if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets,
1040 1049 ds, t))
1041 1050 return (B_TRUE);
1042 1051 }
1043 1052 return (B_FALSE);
1044 1053 }
1045 1054
1046 1055 static int
1047 1056 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
1048 1057 {
1049 1058 uint64_t asize;
1050 1059
1051 1060 if (!dmu_tx_is_syncing(tx))
1052 1061 return (0);
1053 1062
1054 1063 /*
1055 1064 * If there's an fs-only reservation, any blocks that might become
1056 1065 * owned by the snapshot dataset must be accommodated by space
1057 1066 * outside of the reservation.
1058 1067 */
1059 1068 ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
1060 1069 asize = MIN(dsl_dataset_phys(ds)->ds_unique_bytes, ds->ds_reserved);
1061 1070 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
1062 1071 return (SET_ERROR(ENOSPC));
1063 1072
1064 1073 /*
1065 1074 * Propagate any reserved space for this snapshot to other
1066 1075 * snapshot checks in this sync group.
1067 1076 */
1068 1077 if (asize > 0)
1069 1078 dsl_dir_willuse_space(ds->ds_dir, asize, tx);
1070 1079
1071 1080 return (0);
1072 1081 }
1073 1082
1074 1083 typedef struct dsl_dataset_snapshot_arg {
1075 1084 nvlist_t *ddsa_snaps;
1076 1085 nvlist_t *ddsa_props;
1077 1086 nvlist_t *ddsa_errors;
1078 1087 cred_t *ddsa_cr;
1079 1088 } dsl_dataset_snapshot_arg_t;
1080 1089
1081 1090 int
1082 1091 dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
1083 1092 dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr)
1084 1093 {
1085 1094 int error;
1086 1095 uint64_t value;
1087 1096
1088 1097 ds->ds_trysnap_txg = tx->tx_txg;
1089 1098
1090 1099 if (!dmu_tx_is_syncing(tx))
1091 1100 return (0);
1092 1101
1093 1102 /*
1094 1103 * We don't allow multiple snapshots of the same txg. If there
1095 1104 * is already one, try again.
1096 1105 */
1097 1106 if (dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg)
1098 1107 return (SET_ERROR(EAGAIN));
1099 1108
1100 1109 /*
1101 1110 * Check for conflicting snapshot name.
1102 1111 */
1103 1112 error = dsl_dataset_snap_lookup(ds, snapname, &value);
1104 1113 if (error == 0)
1105 1114 return (SET_ERROR(EEXIST));
1106 1115 if (error != ENOENT)
1107 1116 return (error);
1108 1117
1109 1118 /*
1110 1119 * We don't allow taking snapshots of inconsistent datasets, such as
1111 1120 * those into which we are currently receiving. However, if we are
1112 1121 * creating this snapshot as part of a receive, this check will be
1113 1122 * executed atomically with respect to the completion of the receive
1114 1123 * itself but prior to the clearing of DS_FLAG_INCONSISTENT; in this
1115 1124 * case we ignore this, knowing it will be fixed up for us shortly in
1116 1125 * dmu_recv_end_sync().
1117 1126 */
1118 1127 if (!recv && DS_IS_INCONSISTENT(ds))
1119 1128 return (SET_ERROR(EBUSY));
1120 1129
1121 1130 /*
1122 1131 * Skip the check for temporary snapshots or if we have already checked
1123 1132 * the counts in dsl_dataset_snapshot_check. This means we really only
1124 1133 * check the count here when we're receiving a stream.
1125 1134 */
1126 1135 if (cnt != 0 && cr != NULL) {
1127 1136 error = dsl_fs_ss_limit_check(ds->ds_dir, cnt,
1128 1137 ZFS_PROP_SNAPSHOT_LIMIT, NULL, cr);
1129 1138 if (error != 0)
1130 1139 return (error);
1131 1140 }
1132 1141
1133 1142 error = dsl_dataset_snapshot_reserve_space(ds, tx);
1134 1143 if (error != 0)
1135 1144 return (error);
1136 1145
1137 1146 return (0);
1138 1147 }
1139 1148
1140 1149 static int
1141 1150 dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx)
1142 1151 {
1143 1152 dsl_dataset_snapshot_arg_t *ddsa = arg;
1144 1153 dsl_pool_t *dp = dmu_tx_pool(tx);
1145 1154 nvpair_t *pair;
1146 1155 int rv = 0;
1147 1156
1148 1157 /*
1149 1158 * Pre-compute how many total new snapshots will be created for each
1150 1159 * level in the tree and below. This is needed for validating the
1151 1160 * snapshot limit when either taking a recursive snapshot or when
1152 1161 * taking multiple snapshots.
1153 1162 *
1154 1163 * The problem is that the counts are not actually adjusted when
1155 1164 * we are checking, only when we finally sync. For a single snapshot,
1156 1165 * this is easy, the count will increase by 1 at each node up the tree,
1157 1166 * but its more complicated for the recursive/multiple snapshot case.
1158 1167 *
1159 1168 * The dsl_fs_ss_limit_check function does recursively check the count
1160 1169 * at each level up the tree but since it is validating each snapshot
1161 1170 * independently we need to be sure that we are validating the complete
1162 1171 * count for the entire set of snapshots. We do this by rolling up the
1163 1172 * counts for each component of the name into an nvlist and then
1164 1173 * checking each of those cases with the aggregated count.
1165 1174 *
1166 1175 * This approach properly handles not only the recursive snapshot
1167 1176 * case (where we get all of those on the ddsa_snaps list) but also
1168 1177 * the sibling case (e.g. snapshot a/b and a/c so that we will also
1169 1178 * validate the limit on 'a' using a count of 2).
1170 1179 *
1171 1180 * We validate the snapshot names in the third loop and only report
1172 1181 * name errors once.
1173 1182 */
1174 1183 if (dmu_tx_is_syncing(tx)) {
1175 1184 nvlist_t *cnt_track = NULL;
1176 1185 cnt_track = fnvlist_alloc();
1177 1186
1178 1187 /* Rollup aggregated counts into the cnt_track list */
1179 1188 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);
1180 1189 pair != NULL;
1181 1190 pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
1182 1191 char *pdelim;
1183 1192 uint64_t val;
1184 1193 char nm[MAXPATHLEN];
1185 1194
1186 1195 (void) strlcpy(nm, nvpair_name(pair), sizeof (nm));
1187 1196 pdelim = strchr(nm, '@');
1188 1197 if (pdelim == NULL)
1189 1198 continue;
1190 1199 *pdelim = '\0';
1191 1200
1192 1201 do {
1193 1202 if (nvlist_lookup_uint64(cnt_track, nm,
1194 1203 &val) == 0) {
1195 1204 /* update existing entry */
1196 1205 fnvlist_add_uint64(cnt_track, nm,
1197 1206 val + 1);
1198 1207 } else {
1199 1208 /* add to list */
1200 1209 fnvlist_add_uint64(cnt_track, nm, 1);
1201 1210 }
1202 1211
1203 1212 pdelim = strrchr(nm, '/');
1204 1213 if (pdelim != NULL)
1205 1214 *pdelim = '\0';
1206 1215 } while (pdelim != NULL);
1207 1216 }
1208 1217
1209 1218 /* Check aggregated counts at each level */
1210 1219 for (pair = nvlist_next_nvpair(cnt_track, NULL);
1211 1220 pair != NULL; pair = nvlist_next_nvpair(cnt_track, pair)) {
1212 1221 int error = 0;
1213 1222 char *name;
1214 1223 uint64_t cnt = 0;
1215 1224 dsl_dataset_t *ds;
1216 1225
1217 1226 name = nvpair_name(pair);
1218 1227 cnt = fnvpair_value_uint64(pair);
1219 1228 ASSERT(cnt > 0);
1220 1229
1221 1230 error = dsl_dataset_hold(dp, name, FTAG, &ds);
1222 1231 if (error == 0) {
1223 1232 error = dsl_fs_ss_limit_check(ds->ds_dir, cnt,
1224 1233 ZFS_PROP_SNAPSHOT_LIMIT, NULL,
1225 1234 ddsa->ddsa_cr);
1226 1235 dsl_dataset_rele(ds, FTAG);
1227 1236 }
1228 1237
1229 1238 if (error != 0) {
1230 1239 if (ddsa->ddsa_errors != NULL)
1231 1240 fnvlist_add_int32(ddsa->ddsa_errors,
1232 1241 name, error);
1233 1242 rv = error;
1234 1243 /* only report one error for this check */
1235 1244 break;
1236 1245 }
1237 1246 }
1238 1247 nvlist_free(cnt_track);
1239 1248 }
1240 1249
1241 1250 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);
1242 1251 pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
1243 1252 int error = 0;
1244 1253 dsl_dataset_t *ds;
1245 1254 char *name, *atp;
1246 1255 char dsname[MAXNAMELEN];
1247 1256
1248 1257 name = nvpair_name(pair);
1249 1258 if (strlen(name) >= MAXNAMELEN)
1250 1259 error = SET_ERROR(ENAMETOOLONG);
1251 1260 if (error == 0) {
1252 1261 atp = strchr(name, '@');
1253 1262 if (atp == NULL)
1254 1263 error = SET_ERROR(EINVAL);
1255 1264 if (error == 0)
1256 1265 (void) strlcpy(dsname, name, atp - name + 1);
1257 1266 }
1258 1267 if (error == 0)
1259 1268 error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
1260 1269 if (error == 0) {
1261 1270 /* passing 0/NULL skips dsl_fs_ss_limit_check */
1262 1271 error = dsl_dataset_snapshot_check_impl(ds,
1263 1272 atp + 1, tx, B_FALSE, 0, NULL);
1264 1273 dsl_dataset_rele(ds, FTAG);
1265 1274 }
1266 1275
1267 1276 if (error != 0) {
1268 1277 if (ddsa->ddsa_errors != NULL) {
1269 1278 fnvlist_add_int32(ddsa->ddsa_errors,
1270 1279 name, error);
1271 1280 }
1272 1281 rv = error;
1273 1282 }
1274 1283 }
1275 1284
1276 1285 return (rv);
1277 1286 }
1278 1287
1279 1288 void
1280 1289 dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
1281 1290 dmu_tx_t *tx)
1282 1291 {
1283 1292 static zil_header_t zero_zil;
1284 1293
1285 1294 dsl_pool_t *dp = ds->ds_dir->dd_pool;
1286 1295 dmu_buf_t *dbuf;
1287 1296 dsl_dataset_phys_t *dsphys;
1288 1297 uint64_t dsobj, crtxg;
1289 1298 objset_t *mos = dp->dp_meta_objset;
1290 1299 objset_t *os;
1291 1300
1292 1301 ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
1293 1302
1294 1303 /*
1295 1304 * If we are on an old pool, the zil must not be active, in which
1296 1305 * case it will be zeroed. Usually zil_suspend() accomplishes this.
1297 1306 */
1298 1307 ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP ||
1299 1308 dmu_objset_from_ds(ds, &os) != 0 ||
1300 1309 bcmp(&os->os_phys->os_zil_header, &zero_zil,
1301 1310 sizeof (zero_zil)) == 0);
1302 1311
1303 1312 dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx);
1304 1313
1305 1314 /*
1306 1315 * The origin's ds_creation_txg has to be < TXG_INITIAL
1307 1316 */
1308 1317 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
1309 1318 crtxg = 1;
1310 1319 else
1311 1320 crtxg = tx->tx_txg;
1312 1321
1313 1322 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
1314 1323 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
1315 1324 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
1316 1325 dmu_buf_will_dirty(dbuf, tx);
1317 1326 dsphys = dbuf->db_data;
1318 1327 bzero(dsphys, sizeof (dsl_dataset_phys_t));
1319 1328 dsphys->ds_dir_obj = ds->ds_dir->dd_object;
1320 1329 dsphys->ds_fsid_guid = unique_create();
1321 1330 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1322 1331 sizeof (dsphys->ds_guid));
1323 1332 dsphys->ds_prev_snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
1324 1333 dsphys->ds_prev_snap_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
1325 1334 dsphys->ds_next_snap_obj = ds->ds_object;
1326 1335 dsphys->ds_num_children = 1;
1327 1336 dsphys->ds_creation_time = gethrestime_sec();
1328 1337 dsphys->ds_creation_txg = crtxg;
1329 1338 dsphys->ds_deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj;
1330 1339 dsphys->ds_referenced_bytes = dsl_dataset_phys(ds)->ds_referenced_bytes;
1331 1340 dsphys->ds_compressed_bytes = dsl_dataset_phys(ds)->ds_compressed_bytes;
1332 1341 dsphys->ds_uncompressed_bytes =
1333 1342 dsl_dataset_phys(ds)->ds_uncompressed_bytes;
1334 1343 dsphys->ds_flags = dsl_dataset_phys(ds)->ds_flags;
1335 1344 dsphys->ds_bp = dsl_dataset_phys(ds)->ds_bp;
1336 1345 dmu_buf_rele(dbuf, FTAG);
1337 1346
1338 1347 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
1339 1348 if (ds->ds_feature_inuse[f])
1340 1349 dsl_dataset_activate_feature(dsobj, f, tx);
1341 1350 }
1342 1351
1343 1352 ASSERT3U(ds->ds_prev != 0, ==,
1344 1353 dsl_dataset_phys(ds)->ds_prev_snap_obj != 0);
1345 1354 if (ds->ds_prev) {
1346 1355 uint64_t next_clones_obj =
1347 1356 dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj;
1348 1357 ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj ==
1349 1358 ds->ds_object ||
1350 1359 dsl_dataset_phys(ds->ds_prev)->ds_num_children > 1);
1351 1360 if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj ==
1352 1361 ds->ds_object) {
1353 1362 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1354 1363 ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==,
1355 1364 dsl_dataset_phys(ds->ds_prev)->ds_creation_txg);
1356 1365 dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj = dsobj;
1357 1366 } else if (next_clones_obj != 0) {
1358 1367 dsl_dataset_remove_from_next_clones(ds->ds_prev,
1359 1368 dsphys->ds_next_snap_obj, tx);
1360 1369 VERIFY0(zap_add_int(mos,
1361 1370 next_clones_obj, dsobj, tx));
1362 1371 }
1363 1372 }
1364 1373
1365 1374 /*
1366 1375 * If we have a reference-reservation on this dataset, we will
1367 1376 * need to increase the amount of refreservation being charged
1368 1377 * since our unique space is going to zero.
1369 1378 */
1370 1379 if (ds->ds_reserved) {
1371 1380 int64_t delta;
1372 1381 ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
1373 1382 delta = MIN(dsl_dataset_phys(ds)->ds_unique_bytes,
1374 1383 ds->ds_reserved);
1375 1384 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV,
1376 1385 delta, 0, 0, tx);
1377 1386 }
1378 1387
1379 1388 dmu_buf_will_dirty(ds->ds_dbuf, tx);
1380 1389 dsl_dataset_phys(ds)->ds_deadlist_obj =
1381 1390 dsl_deadlist_clone(&ds->ds_deadlist, UINT64_MAX,
1382 1391 dsl_dataset_phys(ds)->ds_prev_snap_obj, tx);
1383 1392 dsl_deadlist_close(&ds->ds_deadlist);
1384 1393 dsl_deadlist_open(&ds->ds_deadlist, mos,
1385 1394 dsl_dataset_phys(ds)->ds_deadlist_obj);
1386 1395 dsl_deadlist_add_key(&ds->ds_deadlist,
1387 1396 dsl_dataset_phys(ds)->ds_prev_snap_txg, tx);
1388 1397
1389 1398 ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, <, tx->tx_txg);
1390 1399 dsl_dataset_phys(ds)->ds_prev_snap_obj = dsobj;
1391 1400 dsl_dataset_phys(ds)->ds_prev_snap_txg = crtxg;
1392 1401 dsl_dataset_phys(ds)->ds_unique_bytes = 0;
1393 1402 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
1394 1403 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1395 1404
1396 1405 VERIFY0(zap_add(mos, dsl_dataset_phys(ds)->ds_snapnames_zapobj,
1397 1406 snapname, 8, 1, &dsobj, tx));
1398 1407
1399 1408 if (ds->ds_prev)
1400 1409 dsl_dataset_rele(ds->ds_prev, ds);
1401 1410 VERIFY0(dsl_dataset_hold_obj(dp,
1402 1411 dsl_dataset_phys(ds)->ds_prev_snap_obj, ds, &ds->ds_prev));
1403 1412
1404 1413 dsl_scan_ds_snapshotted(ds, tx);
1405 1414
1406 1415 dsl_dir_snap_cmtime_update(ds->ds_dir);
1407 1416
1408 1417 spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, "");
1409 1418 }
1410 1419
1411 1420 static void
1412 1421 dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx)
1413 1422 {
1414 1423 dsl_dataset_snapshot_arg_t *ddsa = arg;
1415 1424 dsl_pool_t *dp = dmu_tx_pool(tx);
1416 1425 nvpair_t *pair;
1417 1426
1418 1427 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL);
1419 1428 pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
1420 1429 dsl_dataset_t *ds;
1421 1430 char *name, *atp;
1422 1431 char dsname[MAXNAMELEN];
1423 1432
1424 1433 name = nvpair_name(pair);
1425 1434 atp = strchr(name, '@');
1426 1435 (void) strlcpy(dsname, name, atp - name + 1);
1427 1436 VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds));
1428 1437
1429 1438 dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx);
1430 1439 if (ddsa->ddsa_props != NULL) {
1431 1440 dsl_props_set_sync_impl(ds->ds_prev,
1432 1441 ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx);
1433 1442 }
1434 1443 dsl_dataset_rele(ds, FTAG);
1435 1444 }
1436 1445 }
1437 1446
1438 1447 /*
1439 1448 * The snapshots must all be in the same pool.
1440 1449 * All-or-nothing: if there are any failures, nothing will be modified.
1441 1450 */
1442 1451 int
1443 1452 dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
1444 1453 {
1445 1454 dsl_dataset_snapshot_arg_t ddsa;
1446 1455 nvpair_t *pair;
1447 1456 boolean_t needsuspend;
1448 1457 int error;
1449 1458 spa_t *spa;
1450 1459 char *firstname;
1451 1460 nvlist_t *suspended = NULL;
1452 1461
1453 1462 pair = nvlist_next_nvpair(snaps, NULL);
1454 1463 if (pair == NULL)
1455 1464 return (0);
1456 1465 firstname = nvpair_name(pair);
1457 1466
1458 1467 error = spa_open(firstname, &spa, FTAG);
1459 1468 if (error != 0)
1460 1469 return (error);
1461 1470 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
1462 1471 spa_close(spa, FTAG);
1463 1472
1464 1473 if (needsuspend) {
1465 1474 suspended = fnvlist_alloc();
1466 1475 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1467 1476 pair = nvlist_next_nvpair(snaps, pair)) {
1468 1477 char fsname[MAXNAMELEN];
1469 1478 char *snapname = nvpair_name(pair);
1470 1479 char *atp;
1471 1480 void *cookie;
1472 1481
1473 1482 atp = strchr(snapname, '@');
1474 1483 if (atp == NULL) {
1475 1484 error = SET_ERROR(EINVAL);
1476 1485 break;
1477 1486 }
1478 1487 (void) strlcpy(fsname, snapname, atp - snapname + 1);
1479 1488
1480 1489 error = zil_suspend(fsname, &cookie);
1481 1490 if (error != 0)
1482 1491 break;
1483 1492 fnvlist_add_uint64(suspended, fsname,
1484 1493 (uintptr_t)cookie);
1485 1494 }
1486 1495 }
1487 1496
1488 1497 ddsa.ddsa_snaps = snaps;
1489 1498 ddsa.ddsa_props = props;
1490 1499 ddsa.ddsa_errors = errors;
1491 1500 ddsa.ddsa_cr = CRED();
1492 1501
1493 1502 if (error == 0) {
1494 1503 error = dsl_sync_task(firstname, dsl_dataset_snapshot_check,
1495 1504 dsl_dataset_snapshot_sync, &ddsa,
1496 1505 fnvlist_num_pairs(snaps) * 3, ZFS_SPACE_CHECK_NORMAL);
1497 1506 }
1498 1507
1499 1508 if (suspended != NULL) {
1500 1509 for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL;
1501 1510 pair = nvlist_next_nvpair(suspended, pair)) {
1502 1511 zil_resume((void *)(uintptr_t)
1503 1512 fnvpair_value_uint64(pair));
1504 1513 }
1505 1514 fnvlist_free(suspended);
1506 1515 }
1507 1516
1508 1517 return (error);
1509 1518 }
1510 1519
1511 1520 typedef struct dsl_dataset_snapshot_tmp_arg {
1512 1521 const char *ddsta_fsname;
1513 1522 const char *ddsta_snapname;
1514 1523 minor_t ddsta_cleanup_minor;
1515 1524 const char *ddsta_htag;
1516 1525 } dsl_dataset_snapshot_tmp_arg_t;
1517 1526
1518 1527 static int
1519 1528 dsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx)
1520 1529 {
1521 1530 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg;
1522 1531 dsl_pool_t *dp = dmu_tx_pool(tx);
1523 1532 dsl_dataset_t *ds;
1524 1533 int error;
1525 1534
1526 1535 error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds);
1527 1536 if (error != 0)
1528 1537 return (error);
1529 1538
1530 1539 /* NULL cred means no limit check for tmp snapshot */
1531 1540 error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname,
1532 1541 tx, B_FALSE, 0, NULL);
1533 1542 if (error != 0) {
1534 1543 dsl_dataset_rele(ds, FTAG);
1535 1544 return (error);
1536 1545 }
1537 1546
1538 1547 if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) {
1539 1548 dsl_dataset_rele(ds, FTAG);
1540 1549 return (SET_ERROR(ENOTSUP));
1541 1550 }
1542 1551 error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag,
1543 1552 B_TRUE, tx);
1544 1553 if (error != 0) {
1545 1554 dsl_dataset_rele(ds, FTAG);
1546 1555 return (error);
1547 1556 }
1548 1557
1549 1558 dsl_dataset_rele(ds, FTAG);
1550 1559 return (0);
1551 1560 }
1552 1561
1553 1562 static void
1554 1563 dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx)
1555 1564 {
1556 1565 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg;
1557 1566 dsl_pool_t *dp = dmu_tx_pool(tx);
1558 1567 dsl_dataset_t *ds;
1559 1568
1560 1569 VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds));
1561 1570
1562 1571 dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx);
1563 1572 dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag,
1564 1573 ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx);
1565 1574 dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx);
1566 1575
1567 1576 dsl_dataset_rele(ds, FTAG);
1568 1577 }
1569 1578
1570 1579 int
1571 1580 dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
1572 1581 minor_t cleanup_minor, const char *htag)
1573 1582 {
1574 1583 dsl_dataset_snapshot_tmp_arg_t ddsta;
1575 1584 int error;
1576 1585 spa_t *spa;
1577 1586 boolean_t needsuspend;
1578 1587 void *cookie;
1579 1588
1580 1589 ddsta.ddsta_fsname = fsname;
1581 1590 ddsta.ddsta_snapname = snapname;
1582 1591 ddsta.ddsta_cleanup_minor = cleanup_minor;
1583 1592 ddsta.ddsta_htag = htag;
1584 1593
1585 1594 error = spa_open(fsname, &spa, FTAG);
1586 1595 if (error != 0)
1587 1596 return (error);
1588 1597 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
1589 1598 spa_close(spa, FTAG);
1590 1599
1591 1600 if (needsuspend) {
1592 1601 error = zil_suspend(fsname, &cookie);
1593 1602 if (error != 0)
1594 1603 return (error);
1595 1604 }
1596 1605
1597 1606 error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check,
1598 1607 dsl_dataset_snapshot_tmp_sync, &ddsta, 3, ZFS_SPACE_CHECK_RESERVED);
1599 1608
1600 1609 if (needsuspend)
1601 1610 zil_resume(cookie);
1602 1611 return (error);
1603 1612 }
1604 1613
1605 1614
1606 1615 void
1607 1616 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
1608 1617 {
1609 1618 ASSERT(dmu_tx_is_syncing(tx));
1610 1619 ASSERT(ds->ds_objset != NULL);
1611 1620 ASSERT(dsl_dataset_phys(ds)->ds_next_snap_obj == 0);
1612 1621
1613 1622 /*
1614 1623 * in case we had to change ds_fsid_guid when we opened it,
1615 1624 * sync it out now.
1616 1625 */
1617 1626 dmu_buf_will_dirty(ds->ds_dbuf, tx);
1618 1627 dsl_dataset_phys(ds)->ds_fsid_guid = ds->ds_fsid_guid;
1619 1628
1620 1629 if (ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] != 0) {
1621 1630 VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
1622 1631 ds->ds_object, DS_FIELD_RESUME_OBJECT, 8, 1,
1623 1632 &ds->ds_resume_object[tx->tx_txg & TXG_MASK], tx));
1624 1633 VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
1625 1634 ds->ds_object, DS_FIELD_RESUME_OFFSET, 8, 1,
1626 1635 &ds->ds_resume_offset[tx->tx_txg & TXG_MASK], tx));
1627 1636 VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
1628 1637 ds->ds_object, DS_FIELD_RESUME_BYTES, 8, 1,
1629 1638 &ds->ds_resume_bytes[tx->tx_txg & TXG_MASK], tx));
1630 1639 ds->ds_resume_object[tx->tx_txg & TXG_MASK] = 0;
1631 1640 ds->ds_resume_offset[tx->tx_txg & TXG_MASK] = 0;
1632 1641 ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] = 0;
1633 1642 }
1634 1643
1635 1644 dmu_objset_sync(ds->ds_objset, zio, tx);
1636 1645
1637 1646 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
1638 1647 if (ds->ds_feature_activation_needed[f]) {
1639 1648 if (ds->ds_feature_inuse[f])
1640 1649 continue;
1641 1650 dsl_dataset_activate_feature(ds->ds_object, f, tx);
1642 1651 ds->ds_feature_inuse[f] = B_TRUE;
1643 1652 }
1644 1653 }
1645 1654 }
1646 1655
1647 1656 static void
1648 1657 get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
1649 1658 {
1650 1659 uint64_t count = 0;
1651 1660 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1652 1661 zap_cursor_t zc;
1653 1662 zap_attribute_t za;
1654 1663 nvlist_t *propval = fnvlist_alloc();
1655 1664 nvlist_t *val = fnvlist_alloc();
1656 1665
1657 1666 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
1658 1667
1659 1668 /*
1660 1669 * There may be missing entries in ds_next_clones_obj
1661 1670 * due to a bug in a previous version of the code.
1662 1671 * Only trust it if it has the right number of entries.
1663 1672 */
1664 1673 if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) {
1665 1674 VERIFY0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj,
1666 1675 &count));
1667 1676 }
1668 1677 if (count != dsl_dataset_phys(ds)->ds_num_children - 1)
1669 1678 goto fail;
1670 1679 for (zap_cursor_init(&zc, mos,
1671 1680 dsl_dataset_phys(ds)->ds_next_clones_obj);
1672 1681 zap_cursor_retrieve(&zc, &za) == 0;
1673 1682 zap_cursor_advance(&zc)) {
1674 1683 dsl_dataset_t *clone;
1675 1684 char buf[ZFS_MAXNAMELEN];
1676 1685 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
1677 1686 za.za_first_integer, FTAG, &clone));
1678 1687 dsl_dir_name(clone->ds_dir, buf);
1679 1688 fnvlist_add_boolean(val, buf);
1680 1689 dsl_dataset_rele(clone, FTAG);
1681 1690 }
1682 1691 zap_cursor_fini(&zc);
1683 1692 fnvlist_add_nvlist(propval, ZPROP_VALUE, val);
1684 1693 fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval);
1685 1694 fail:
1686 1695 nvlist_free(val);
1687 1696 nvlist_free(propval);
1688 1697 }
1689 1698
1690 1699 static void
1691 1700 get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv)
1692 1701 {
1693 1702 dsl_pool_t *dp = ds->ds_dir->dd_pool;
1694 1703
1695 1704 if (dsl_dataset_has_resume_receive_state(ds)) {
1696 1705 char *str;
1697 1706 void *packed;
1698 1707 uint8_t *compressed;
1699 1708 uint64_t val;
1700 1709 nvlist_t *token_nv = fnvlist_alloc();
1701 1710 size_t packed_size, compressed_size;
1702 1711
1703 1712 if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
1704 1713 DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val) == 0) {
1705 1714 fnvlist_add_uint64(token_nv, "fromguid", val);
1706 1715 }
1707 1716 if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
1708 1717 DS_FIELD_RESUME_OBJECT, sizeof (val), 1, &val) == 0) {
1709 1718 fnvlist_add_uint64(token_nv, "object", val);
1710 1719 }
1711 1720 if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
1712 1721 DS_FIELD_RESUME_OFFSET, sizeof (val), 1, &val) == 0) {
1713 1722 fnvlist_add_uint64(token_nv, "offset", val);
1714 1723 }
1715 1724 if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
1716 1725 DS_FIELD_RESUME_BYTES, sizeof (val), 1, &val) == 0) {
1717 1726 fnvlist_add_uint64(token_nv, "bytes", val);
1718 1727 }
1719 1728 if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
1720 1729 DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val) == 0) {
1721 1730 fnvlist_add_uint64(token_nv, "toguid", val);
1722 1731 }
1723 1732 char buf[256];
1724 1733 if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
1725 1734 DS_FIELD_RESUME_TONAME, 1, sizeof (buf), buf) == 0) {
1726 1735 fnvlist_add_string(token_nv, "toname", buf);
1727 1736 }
1728 1737 if (zap_contains(dp->dp_meta_objset, ds->ds_object,
1729 1738 DS_FIELD_RESUME_EMBEDOK) == 0) {
1730 1739 fnvlist_add_boolean(token_nv, "embedok");
1731 1740 }
1732 1741 packed = fnvlist_pack(token_nv, &packed_size);
1733 1742 fnvlist_free(token_nv);
1734 1743 compressed = kmem_alloc(packed_size, KM_SLEEP);
1735 1744
1736 1745 compressed_size = gzip_compress(packed, compressed,
1737 1746 packed_size, packed_size, 6);
1738 1747
1739 1748 zio_cksum_t cksum;
1740 1749 fletcher_4_native(compressed, compressed_size, NULL, &cksum);
1741 1750
1742 1751 str = kmem_alloc(compressed_size * 2 + 1, KM_SLEEP);
1743 1752 for (int i = 0; i < compressed_size; i++) {
1744 1753 (void) sprintf(str + i * 2, "%02x", compressed[i]);
1745 1754 }
1746 1755 str[compressed_size * 2] = '\0';
1747 1756 char *propval = kmem_asprintf("%u-%llx-%llx-%s",
1748 1757 ZFS_SEND_RESUME_TOKEN_VERSION,
1749 1758 (longlong_t)cksum.zc_word[0],
1750 1759 (longlong_t)packed_size, str);
1751 1760 dsl_prop_nvlist_add_string(nv,
1752 1761 ZFS_PROP_RECEIVE_RESUME_TOKEN, propval);
1753 1762 kmem_free(packed, packed_size);
1754 1763 kmem_free(str, compressed_size * 2 + 1);
1755 1764 kmem_free(compressed, packed_size);
1756 1765 strfree(propval);
1757 1766 }
1758 1767 }
1759 1768
1760 1769 void
1761 1770 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
1762 1771 {
1763 1772 dsl_pool_t *dp = ds->ds_dir->dd_pool;
1764 1773 uint64_t refd, avail, uobjs, aobjs, ratio;
1765 1774
1766 1775 ASSERT(dsl_pool_config_held(dp));
1767 1776
1768 1777 ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 :
1769 1778 (dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 /
1770 1779 dsl_dataset_phys(ds)->ds_compressed_bytes);
1771 1780
1772 1781 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
1773 1782 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
1774 1783 dsl_dataset_phys(ds)->ds_uncompressed_bytes);
1775 1784
1776 1785 if (ds->ds_is_snapshot) {
1777 1786 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
1778 1787 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
1779 1788 dsl_dataset_phys(ds)->ds_unique_bytes);
1780 1789 get_clones_stat(ds, nv);
1781 1790 } else {
1782 1791 if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) {
1783 1792 char buf[MAXNAMELEN];
1784 1793 dsl_dataset_name(ds->ds_prev, buf);
1785 1794 dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf);
1786 1795 }
1787 1796
1788 1797 dsl_dir_stats(ds->ds_dir, nv);
1789 1798 }
1790 1799
1791 1800 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
1792 1801 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
1793 1802 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
1794 1803
1795 1804 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
1796 1805 dsl_dataset_phys(ds)->ds_creation_time);
1797 1806 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
1798 1807 dsl_dataset_phys(ds)->ds_creation_txg);
1799 1808 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,
1800 1809 ds->ds_quota);
1801 1810 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
1802 1811 ds->ds_reserved);
1803 1812 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
1804 1813 dsl_dataset_phys(ds)->ds_guid);
1805 1814 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE,
1806 1815 dsl_dataset_phys(ds)->ds_unique_bytes);
1807 1816 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID,
1808 1817 ds->ds_object);
1809 1818 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS,
1810 1819 ds->ds_userrefs);
1811 1820 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
1812 1821 DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
1813 1822
1814 1823 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
1815 1824 uint64_t written, comp, uncomp;
1816 1825 dsl_pool_t *dp = ds->ds_dir->dd_pool;
1817 1826 dsl_dataset_t *prev;
1818 1827
1819 1828 int err = dsl_dataset_hold_obj(dp,
1820 1829 dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev);
1821 1830 if (err == 0) {
1822 1831 err = dsl_dataset_space_written(prev, ds, &written,
1823 1832 &comp, &uncomp);
1824 1833 dsl_dataset_rele(prev, FTAG);
1825 1834 if (err == 0) {
1826 1835 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
1827 1836 written);
1828 1837 }
1829 1838 }
1830 1839 }
1831 1840
1832 1841 if (!dsl_dataset_is_snapshot(ds)) {
1833 1842 /*
1834 1843 * A failed "newfs" (e.g. full) resumable receive leaves
1835 1844 * the stats set on this dataset. Check here for the prop.
1836 1845 */
1837 1846 get_receive_resume_stats(ds, nv);
1838 1847
1839 1848 /*
1840 1849 * A failed incremental resumable receive leaves the
1841 1850 * stats set on our child named "%recv". Check the child
1842 1851 * for the prop.
1843 1852 */
1844 1853 char recvname[ZFS_MAXNAMELEN];
1845 1854 dsl_dataset_t *recv_ds;
1846 1855 dsl_dataset_name(ds, recvname);
1847 1856 (void) strcat(recvname, "/");
1848 1857 (void) strcat(recvname, recv_clone_name);
1849 1858 if (dsl_dataset_hold(dp, recvname, FTAG, &recv_ds) == 0) {
1850 1859 get_receive_resume_stats(recv_ds, nv);
1851 1860 dsl_dataset_rele(recv_ds, FTAG);
1852 1861 }
1853 1862 }
1854 1863 }
1855 1864
1856 1865 void
1857 1866 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
1858 1867 {
1859 1868 dsl_pool_t *dp = ds->ds_dir->dd_pool;
1860 1869 ASSERT(dsl_pool_config_held(dp));
1861 1870
1862 1871 stat->dds_creation_txg = dsl_dataset_phys(ds)->ds_creation_txg;
1863 1872 stat->dds_inconsistent =
1864 1873 dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT;
1865 1874 stat->dds_guid = dsl_dataset_phys(ds)->ds_guid;
1866 1875 stat->dds_origin[0] = '\0';
1867 1876 if (ds->ds_is_snapshot) {
1868 1877 stat->dds_is_snapshot = B_TRUE;
1869 1878 stat->dds_num_clones =
1870 1879 dsl_dataset_phys(ds)->ds_num_children - 1;
1871 1880 } else {
1872 1881 stat->dds_is_snapshot = B_FALSE;
1873 1882 stat->dds_num_clones = 0;
1874 1883
1875 1884 if (dsl_dir_is_clone(ds->ds_dir)) {
1876 1885 dsl_dataset_t *ods;
1877 1886
1878 1887 VERIFY0(dsl_dataset_hold_obj(dp,
1879 1888 dsl_dir_phys(ds->ds_dir)->dd_origin_obj,
1880 1889 FTAG, &ods));
1881 1890 dsl_dataset_name(ods, stat->dds_origin);
1882 1891 dsl_dataset_rele(ods, FTAG);
1883 1892 }
1884 1893 }
1885 1894 }
1886 1895
1887 1896 uint64_t
1888 1897 dsl_dataset_fsid_guid(dsl_dataset_t *ds)
1889 1898 {
1890 1899 return (ds->ds_fsid_guid);
1891 1900 }
1892 1901
1893 1902 void
1894 1903 dsl_dataset_space(dsl_dataset_t *ds,
1895 1904 uint64_t *refdbytesp, uint64_t *availbytesp,
1896 1905 uint64_t *usedobjsp, uint64_t *availobjsp)
1897 1906 {
1898 1907 *refdbytesp = dsl_dataset_phys(ds)->ds_referenced_bytes;
1899 1908 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
1900 1909 if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes)
1901 1910 *availbytesp +=
1902 1911 ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes;
1903 1912 if (ds->ds_quota != 0) {
1904 1913 /*
1905 1914 * Adjust available bytes according to refquota
1906 1915 */
1907 1916 if (*refdbytesp < ds->ds_quota)
1908 1917 *availbytesp = MIN(*availbytesp,
1909 1918 ds->ds_quota - *refdbytesp);
1910 1919 else
1911 1920 *availbytesp = 0;
1912 1921 }
1913 1922 *usedobjsp = BP_GET_FILL(&dsl_dataset_phys(ds)->ds_bp);
1914 1923 *availobjsp = DN_MAX_OBJECT - *usedobjsp;
1915 1924 }
1916 1925
1917 1926 boolean_t
1918 1927 dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap)
1919 1928 {
1920 1929 dsl_pool_t *dp = ds->ds_dir->dd_pool;
1921 1930
1922 1931 ASSERT(dsl_pool_config_held(dp));
1923 1932 if (snap == NULL)
1924 1933 return (B_FALSE);
1925 1934 if (dsl_dataset_phys(ds)->ds_bp.blk_birth >
1926 1935 dsl_dataset_phys(snap)->ds_creation_txg) {
1927 1936 objset_t *os, *os_snap;
1928 1937 /*
1929 1938 * It may be that only the ZIL differs, because it was
1930 1939 * reset in the head. Don't count that as being
1931 1940 * modified.
1932 1941 */
1933 1942 if (dmu_objset_from_ds(ds, &os) != 0)
1934 1943 return (B_TRUE);
1935 1944 if (dmu_objset_from_ds(snap, &os_snap) != 0)
1936 1945 return (B_TRUE);
1937 1946 return (bcmp(&os->os_phys->os_meta_dnode,
1938 1947 &os_snap->os_phys->os_meta_dnode,
1939 1948 sizeof (os->os_phys->os_meta_dnode)) != 0);
1940 1949 }
1941 1950 return (B_FALSE);
1942 1951 }
1943 1952
1944 1953 typedef struct dsl_dataset_rename_snapshot_arg {
1945 1954 const char *ddrsa_fsname;
1946 1955 const char *ddrsa_oldsnapname;
1947 1956 const char *ddrsa_newsnapname;
1948 1957 boolean_t ddrsa_recursive;
1949 1958 dmu_tx_t *ddrsa_tx;
1950 1959 } dsl_dataset_rename_snapshot_arg_t;
1951 1960
1952 1961 /* ARGSUSED */
1953 1962 static int
1954 1963 dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp,
1955 1964 dsl_dataset_t *hds, void *arg)
1956 1965 {
1957 1966 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
1958 1967 int error;
1959 1968 uint64_t val;
1960 1969
1961 1970 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val);
1962 1971 if (error != 0) {
1963 1972 /* ignore nonexistent snapshots */
1964 1973 return (error == ENOENT ? 0 : error);
1965 1974 }
1966 1975
1967 1976 /* new name should not exist */
1968 1977 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val);
1969 1978 if (error == 0)
1970 1979 error = SET_ERROR(EEXIST);
1971 1980 else if (error == ENOENT)
1972 1981 error = 0;
1973 1982
1974 1983 /* dataset name + 1 for the "@" + the new snapshot name must fit */
1975 1984 if (dsl_dir_namelen(hds->ds_dir) + 1 +
1976 1985 strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN)
1977 1986 error = SET_ERROR(ENAMETOOLONG);
1978 1987
1979 1988 return (error);
1980 1989 }
1981 1990
1982 1991 static int
1983 1992 dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx)
1984 1993 {
1985 1994 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
1986 1995 dsl_pool_t *dp = dmu_tx_pool(tx);
1987 1996 dsl_dataset_t *hds;
1988 1997 int error;
1989 1998
1990 1999 error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds);
1991 2000 if (error != 0)
1992 2001 return (error);
1993 2002
1994 2003 if (ddrsa->ddrsa_recursive) {
1995 2004 error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object,
1996 2005 dsl_dataset_rename_snapshot_check_impl, ddrsa,
1997 2006 DS_FIND_CHILDREN);
1998 2007 } else {
1999 2008 error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa);
2000 2009 }
2001 2010 dsl_dataset_rele(hds, FTAG);
2002 2011 return (error);
2003 2012 }
2004 2013
2005 2014 static int
2006 2015 dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
2007 2016 dsl_dataset_t *hds, void *arg)
2008 2017 {
2009 2018 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
2010 2019 dsl_dataset_t *ds;
2011 2020 uint64_t val;
2012 2021 dmu_tx_t *tx = ddrsa->ddrsa_tx;
2013 2022 int error;
2014 2023
2015 2024 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val);
2016 2025 ASSERT(error == 0 || error == ENOENT);
2017 2026 if (error == ENOENT) {
2018 2027 /* ignore nonexistent snapshots */
2019 2028 return (0);
2020 2029 }
2021 2030
2022 2031 VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds));
2023 2032
2024 2033 /* log before we change the name */
2025 2034 spa_history_log_internal_ds(ds, "rename", tx,
2026 2035 "-> @%s", ddrsa->ddrsa_newsnapname);
2027 2036
2028 2037 VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx,
2029 2038 B_FALSE));
2030 2039 mutex_enter(&ds->ds_lock);
2031 2040 (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname);
2032 2041 mutex_exit(&ds->ds_lock);
2033 2042 VERIFY0(zap_add(dp->dp_meta_objset,
2034 2043 dsl_dataset_phys(hds)->ds_snapnames_zapobj,
2035 2044 ds->ds_snapname, 8, 1, &ds->ds_object, tx));
2036 2045
2037 2046 dsl_dataset_rele(ds, FTAG);
2038 2047 return (0);
2039 2048 }
2040 2049
2041 2050 static void
2042 2051 dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx)
2043 2052 {
2044 2053 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
2045 2054 dsl_pool_t *dp = dmu_tx_pool(tx);
2046 2055 dsl_dataset_t *hds;
2047 2056
2048 2057 VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds));
2049 2058 ddrsa->ddrsa_tx = tx;
2050 2059 if (ddrsa->ddrsa_recursive) {
2051 2060 VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object,
2052 2061 dsl_dataset_rename_snapshot_sync_impl, ddrsa,
2053 2062 DS_FIND_CHILDREN));
2054 2063 } else {
2055 2064 VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa));
2056 2065 }
2057 2066 dsl_dataset_rele(hds, FTAG);
2058 2067 }
2059 2068
2060 2069 int
2061 2070 dsl_dataset_rename_snapshot(const char *fsname,
2062 2071 const char *oldsnapname, const char *newsnapname, boolean_t recursive)
2063 2072 {
2064 2073 dsl_dataset_rename_snapshot_arg_t ddrsa;
2065 2074
2066 2075 ddrsa.ddrsa_fsname = fsname;
2067 2076 ddrsa.ddrsa_oldsnapname = oldsnapname;
2068 2077 ddrsa.ddrsa_newsnapname = newsnapname;
2069 2078 ddrsa.ddrsa_recursive = recursive;
2070 2079
2071 2080 return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check,
2072 2081 dsl_dataset_rename_snapshot_sync, &ddrsa,
2073 2082 1, ZFS_SPACE_CHECK_RESERVED));
2074 2083 }
2075 2084
2076 2085 /*
2077 2086 * If we're doing an ownership handoff, we need to make sure that there is
2078 2087 * only one long hold on the dataset. We're not allowed to change anything here
2079 2088 * so we don't permanently release the long hold or regular hold here. We want
2080 2089 * to do this only when syncing to avoid the dataset unexpectedly going away
2081 2090 * when we release the long hold.
2082 2091 */
2083 2092 static int
2084 2093 dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
2085 2094 {
2086 2095 boolean_t held;
2087 2096
2088 2097 if (!dmu_tx_is_syncing(tx))
2089 2098 return (0);
2090 2099
2091 2100 if (owner != NULL) {
2092 2101 VERIFY3P(ds->ds_owner, ==, owner);
2093 2102 dsl_dataset_long_rele(ds, owner);
2094 2103 }
2095 2104
2096 2105 held = dsl_dataset_long_held(ds);
2097 2106
2098 2107 if (owner != NULL)
2099 2108 dsl_dataset_long_hold(ds, owner);
2100 2109
2101 2110 if (held)
2102 2111 return (SET_ERROR(EBUSY));
2103 2112
2104 2113 return (0);
2105 2114 }
2106 2115
2107 2116 typedef struct dsl_dataset_rollback_arg {
2108 2117 const char *ddra_fsname;
2109 2118 void *ddra_owner;
2110 2119 nvlist_t *ddra_result;
2111 2120 } dsl_dataset_rollback_arg_t;
2112 2121
2113 2122 static int
2114 2123 dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
2115 2124 {
2116 2125 dsl_dataset_rollback_arg_t *ddra = arg;
2117 2126 dsl_pool_t *dp = dmu_tx_pool(tx);
2118 2127 dsl_dataset_t *ds;
2119 2128 int64_t unused_refres_delta;
2120 2129 int error;
2121 2130
2122 2131 error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds);
2123 2132 if (error != 0)
2124 2133 return (error);
2125 2134
2126 2135 /* must not be a snapshot */
2127 2136 if (ds->ds_is_snapshot) {
2128 2137 dsl_dataset_rele(ds, FTAG);
2129 2138 return (SET_ERROR(EINVAL));
2130 2139 }
2131 2140
2132 2141 /* must have a most recent snapshot */
2133 2142 if (dsl_dataset_phys(ds)->ds_prev_snap_txg < TXG_INITIAL) {
2134 2143 dsl_dataset_rele(ds, FTAG);
2135 2144 return (SET_ERROR(EINVAL));
2136 2145 }
2137 2146
2138 2147 /* must not have any bookmarks after the most recent snapshot */
2139 2148 nvlist_t *proprequest = fnvlist_alloc();
2140 2149 fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG));
2141 2150 nvlist_t *bookmarks = fnvlist_alloc();
2142 2151 error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks);
2143 2152 fnvlist_free(proprequest);
2144 2153 if (error != 0)
2145 2154 return (error);
2146 2155 for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL);
2147 2156 pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) {
2148 2157 nvlist_t *valuenv =
2149 2158 fnvlist_lookup_nvlist(fnvpair_value_nvlist(pair),
2150 2159 zfs_prop_to_name(ZFS_PROP_CREATETXG));
2151 2160 uint64_t createtxg = fnvlist_lookup_uint64(valuenv, "value");
2152 2161 if (createtxg > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
2153 2162 fnvlist_free(bookmarks);
2154 2163 dsl_dataset_rele(ds, FTAG);
2155 2164 return (SET_ERROR(EEXIST));
2156 2165 }
2157 2166 }
2158 2167 fnvlist_free(bookmarks);
2159 2168
2160 2169 error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx);
2161 2170 if (error != 0) {
2162 2171 dsl_dataset_rele(ds, FTAG);
2163 2172 return (error);
2164 2173 }
2165 2174
2166 2175 /*
2167 2176 * Check if the snap we are rolling back to uses more than
2168 2177 * the refquota.
2169 2178 */
2170 2179 if (ds->ds_quota != 0 &&
2171 2180 dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes > ds->ds_quota) {
2172 2181 dsl_dataset_rele(ds, FTAG);
2173 2182 return (SET_ERROR(EDQUOT));
2174 2183 }
2175 2184
2176 2185 /*
2177 2186 * When we do the clone swap, we will temporarily use more space
2178 2187 * due to the refreservation (the head will no longer have any
2179 2188 * unique space, so the entire amount of the refreservation will need
2180 2189 * to be free). We will immediately destroy the clone, freeing
2181 2190 * this space, but the freeing happens over many txg's.
2182 2191 */
2183 2192 unused_refres_delta = (int64_t)MIN(ds->ds_reserved,
2184 2193 dsl_dataset_phys(ds)->ds_unique_bytes);
2185 2194
2186 2195 if (unused_refres_delta > 0 &&
2187 2196 unused_refres_delta >
2188 2197 dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) {
2189 2198 dsl_dataset_rele(ds, FTAG);
2190 2199 return (SET_ERROR(ENOSPC));
2191 2200 }
2192 2201
2193 2202 dsl_dataset_rele(ds, FTAG);
2194 2203 return (0);
2195 2204 }
2196 2205
2197 2206 static void
2198 2207 dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
2199 2208 {
2200 2209 dsl_dataset_rollback_arg_t *ddra = arg;
2201 2210 dsl_pool_t *dp = dmu_tx_pool(tx);
2202 2211 dsl_dataset_t *ds, *clone;
2203 2212 uint64_t cloneobj;
2204 2213 char namebuf[ZFS_MAXNAMELEN];
2205 2214
2206 2215 VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds));
2207 2216
2208 2217 dsl_dataset_name(ds->ds_prev, namebuf);
2209 2218 fnvlist_add_string(ddra->ddra_result, "target", namebuf);
2210 2219
2211 2220 cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback",
2212 2221 ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx);
2213 2222
2214 2223 VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone));
2215 2224
2216 2225 dsl_dataset_clone_swap_sync_impl(clone, ds, tx);
2217 2226 dsl_dataset_zero_zil(ds, tx);
2218 2227
2219 2228 dsl_destroy_head_sync_impl(clone, tx);
2220 2229
2221 2230 dsl_dataset_rele(clone, FTAG);
2222 2231 dsl_dataset_rele(ds, FTAG);
2223 2232 }
2224 2233
2225 2234 /*
2226 2235 * Rolls back the given filesystem or volume to the most recent snapshot.
2227 2236 * The name of the most recent snapshot will be returned under key "target"
2228 2237 * in the result nvlist.
2229 2238 *
2230 2239 * If owner != NULL:
2231 2240 * - The existing dataset MUST be owned by the specified owner at entry
2232 2241 * - Upon return, dataset will still be held by the same owner, whether we
2233 2242 * succeed or not.
2234 2243 *
2235 2244 * This mode is required any time the existing filesystem is mounted. See
2236 2245 * notes above zfs_suspend_fs() for further details.
2237 2246 */
2238 2247 int
2239 2248 dsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result)
2240 2249 {
2241 2250 dsl_dataset_rollback_arg_t ddra;
2242 2251
2243 2252 ddra.ddra_fsname = fsname;
2244 2253 ddra.ddra_owner = owner;
2245 2254 ddra.ddra_result = result;
2246 2255
2247 2256 return (dsl_sync_task(fsname, dsl_dataset_rollback_check,
2248 2257 dsl_dataset_rollback_sync, &ddra,
2249 2258 1, ZFS_SPACE_CHECK_RESERVED));
2250 2259 }
2251 2260
2252 2261 struct promotenode {
2253 2262 list_node_t link;
2254 2263 dsl_dataset_t *ds;
2255 2264 };
2256 2265
2257 2266 typedef struct dsl_dataset_promote_arg {
2258 2267 const char *ddpa_clonename;
2259 2268 dsl_dataset_t *ddpa_clone;
2260 2269 list_t shared_snaps, origin_snaps, clone_snaps;
2261 2270 dsl_dataset_t *origin_origin; /* origin of the origin */
2262 2271 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
2263 2272 char *err_ds;
2264 2273 cred_t *cr;
2265 2274 } dsl_dataset_promote_arg_t;
2266 2275
2267 2276 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
2268 2277 static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp,
2269 2278 void *tag);
2270 2279 static void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag);
2271 2280
2272 2281 static int
2273 2282 dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
2274 2283 {
2275 2284 dsl_dataset_promote_arg_t *ddpa = arg;
2276 2285 dsl_pool_t *dp = dmu_tx_pool(tx);
2277 2286 dsl_dataset_t *hds;
2278 2287 struct promotenode *snap;
2279 2288 dsl_dataset_t *origin_ds;
2280 2289 int err;
2281 2290 uint64_t unused;
2282 2291 uint64_t ss_mv_cnt;
2283 2292 size_t max_snap_len;
2284 2293
2285 2294 err = promote_hold(ddpa, dp, FTAG);
2286 2295 if (err != 0)
2287 2296 return (err);
2288 2297
2289 2298 hds = ddpa->ddpa_clone;
2290 2299 max_snap_len = MAXNAMELEN - strlen(ddpa->ddpa_clonename) - 1;
2291 2300
2292 2301 if (dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE) {
2293 2302 promote_rele(ddpa, FTAG);
2294 2303 return (SET_ERROR(EXDEV));
2295 2304 }
2296 2305
2297 2306 /*
2298 2307 * Compute and check the amount of space to transfer. Since this is
2299 2308 * so expensive, don't do the preliminary check.
2300 2309 */
2301 2310 if (!dmu_tx_is_syncing(tx)) {
2302 2311 promote_rele(ddpa, FTAG);
2303 2312 return (0);
2304 2313 }
2305 2314
2306 2315 snap = list_head(&ddpa->shared_snaps);
2307 2316 origin_ds = snap->ds;
2308 2317
2309 2318 /* compute origin's new unique space */
2310 2319 snap = list_tail(&ddpa->clone_snaps);
2311 2320 ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==,
2312 2321 origin_ds->ds_object);
2313 2322 dsl_deadlist_space_range(&snap->ds->ds_deadlist,
2314 2323 dsl_dataset_phys(origin_ds)->ds_prev_snap_txg, UINT64_MAX,
2315 2324 &ddpa->unique, &unused, &unused);
2316 2325
2317 2326 /*
2318 2327 * Walk the snapshots that we are moving
2319 2328 *
2320 2329 * Compute space to transfer. Consider the incremental changes
2321 2330 * to used by each snapshot:
2322 2331 * (my used) = (prev's used) + (blocks born) - (blocks killed)
2323 2332 * So each snapshot gave birth to:
2324 2333 * (blocks born) = (my used) - (prev's used) + (blocks killed)
2325 2334 * So a sequence would look like:
2326 2335 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)
2327 2336 * Which simplifies to:
2328 2337 * uN + kN + kN-1 + ... + k1 + k0
2329 2338 * Note however, if we stop before we reach the ORIGIN we get:
2330 2339 * uN + kN + kN-1 + ... + kM - uM-1
2331 2340 */
2332 2341 ss_mv_cnt = 0;
2333 2342 ddpa->used = dsl_dataset_phys(origin_ds)->ds_referenced_bytes;
2334 2343 ddpa->comp = dsl_dataset_phys(origin_ds)->ds_compressed_bytes;
2335 2344 ddpa->uncomp = dsl_dataset_phys(origin_ds)->ds_uncompressed_bytes;
2336 2345 for (snap = list_head(&ddpa->shared_snaps); snap;
2337 2346 snap = list_next(&ddpa->shared_snaps, snap)) {
2338 2347 uint64_t val, dlused, dlcomp, dluncomp;
2339 2348 dsl_dataset_t *ds = snap->ds;
2340 2349
2341 2350 ss_mv_cnt++;
2342 2351
2343 2352 /*
2344 2353 * If there are long holds, we won't be able to evict
2345 2354 * the objset.
2346 2355 */
2347 2356 if (dsl_dataset_long_held(ds)) {
2348 2357 err = SET_ERROR(EBUSY);
2349 2358 goto out;
2350 2359 }
2351 2360
2352 2361 /* Check that the snapshot name does not conflict */
2353 2362 VERIFY0(dsl_dataset_get_snapname(ds));
2354 2363 if (strlen(ds->ds_snapname) >= max_snap_len) {
2355 2364 err = SET_ERROR(ENAMETOOLONG);
2356 2365 goto out;
2357 2366 }
2358 2367 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
2359 2368 if (err == 0) {
2360 2369 (void) strcpy(ddpa->err_ds, snap->ds->ds_snapname);
2361 2370 err = SET_ERROR(EEXIST);
2362 2371 goto out;
2363 2372 }
2364 2373 if (err != ENOENT)
2365 2374 goto out;
2366 2375
2367 2376 /* The very first snapshot does not have a deadlist */
2368 2377 if (dsl_dataset_phys(ds)->ds_prev_snap_obj == 0)
2369 2378 continue;
2370 2379
2371 2380 dsl_deadlist_space(&ds->ds_deadlist,
2372 2381 &dlused, &dlcomp, &dluncomp);
2373 2382 ddpa->used += dlused;
2374 2383 ddpa->comp += dlcomp;
2375 2384 ddpa->uncomp += dluncomp;
2376 2385 }
2377 2386
2378 2387 /*
2379 2388 * If we are a clone of a clone then we never reached ORIGIN,
2380 2389 * so we need to subtract out the clone origin's used space.
2381 2390 */
2382 2391 if (ddpa->origin_origin) {
2383 2392 ddpa->used -=
2384 2393 dsl_dataset_phys(ddpa->origin_origin)->ds_referenced_bytes;
2385 2394 ddpa->comp -=
2386 2395 dsl_dataset_phys(ddpa->origin_origin)->ds_compressed_bytes;
2387 2396 ddpa->uncomp -=
2388 2397 dsl_dataset_phys(ddpa->origin_origin)->
2389 2398 ds_uncompressed_bytes;
2390 2399 }
2391 2400
2392 2401 /* Check that there is enough space and limit headroom here */
2393 2402 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
2394 2403 0, ss_mv_cnt, ddpa->used, ddpa->cr);
2395 2404 if (err != 0)
2396 2405 goto out;
2397 2406
2398 2407 /*
2399 2408 * Compute the amounts of space that will be used by snapshots
2400 2409 * after the promotion (for both origin and clone). For each,
2401 2410 * it is the amount of space that will be on all of their
2402 2411 * deadlists (that was not born before their new origin).
2403 2412 */
2404 2413 if (dsl_dir_phys(hds->ds_dir)->dd_flags & DD_FLAG_USED_BREAKDOWN) {
2405 2414 uint64_t space;
2406 2415
2407 2416 /*
2408 2417 * Note, typically this will not be a clone of a clone,
2409 2418 * so dd_origin_txg will be < TXG_INITIAL, so
2410 2419 * these snaplist_space() -> dsl_deadlist_space_range()
2411 2420 * calls will be fast because they do not have to
2412 2421 * iterate over all bps.
2413 2422 */
2414 2423 snap = list_head(&ddpa->origin_snaps);
2415 2424 err = snaplist_space(&ddpa->shared_snaps,
2416 2425 snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap);
2417 2426 if (err != 0)
2418 2427 goto out;
2419 2428
2420 2429 err = snaplist_space(&ddpa->clone_snaps,
2421 2430 snap->ds->ds_dir->dd_origin_txg, &space);
2422 2431 if (err != 0)
2423 2432 goto out;
2424 2433 ddpa->cloneusedsnap += space;
2425 2434 }
2426 2435 if (dsl_dir_phys(origin_ds->ds_dir)->dd_flags &
2427 2436 DD_FLAG_USED_BREAKDOWN) {
2428 2437 err = snaplist_space(&ddpa->origin_snaps,
2429 2438 dsl_dataset_phys(origin_ds)->ds_creation_txg,
2430 2439 &ddpa->originusedsnap);
2431 2440 if (err != 0)
2432 2441 goto out;
2433 2442 }
2434 2443
2435 2444 out:
2436 2445 promote_rele(ddpa, FTAG);
2437 2446 return (err);
2438 2447 }
2439 2448
2440 2449 static void
2441 2450 dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)
2442 2451 {
2443 2452 dsl_dataset_promote_arg_t *ddpa = arg;
2444 2453 dsl_pool_t *dp = dmu_tx_pool(tx);
2445 2454 dsl_dataset_t *hds;
2446 2455 struct promotenode *snap;
2447 2456 dsl_dataset_t *origin_ds;
2448 2457 dsl_dataset_t *origin_head;
2449 2458 dsl_dir_t *dd;
2450 2459 dsl_dir_t *odd = NULL;
2451 2460 uint64_t oldnext_obj;
2452 2461 int64_t delta;
2453 2462
2454 2463 VERIFY0(promote_hold(ddpa, dp, FTAG));
2455 2464 hds = ddpa->ddpa_clone;
2456 2465
2457 2466 ASSERT0(dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE);
2458 2467
2459 2468 snap = list_head(&ddpa->shared_snaps);
2460 2469 origin_ds = snap->ds;
2461 2470 dd = hds->ds_dir;
2462 2471
2463 2472 snap = list_head(&ddpa->origin_snaps);
2464 2473 origin_head = snap->ds;
2465 2474
2466 2475 /*
2467 2476 * We need to explicitly open odd, since origin_ds's dd will be
2468 2477 * changing.
2469 2478 */
2470 2479 VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object,
2471 2480 NULL, FTAG, &odd));
2472 2481
2473 2482 /* change origin's next snap */
2474 2483 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
2475 2484 oldnext_obj = dsl_dataset_phys(origin_ds)->ds_next_snap_obj;
2476 2485 snap = list_tail(&ddpa->clone_snaps);
2477 2486 ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==,
2478 2487 origin_ds->ds_object);
2479 2488 dsl_dataset_phys(origin_ds)->ds_next_snap_obj = snap->ds->ds_object;
2480 2489
2481 2490 /* change the origin's next clone */
2482 2491 if (dsl_dataset_phys(origin_ds)->ds_next_clones_obj) {
2483 2492 dsl_dataset_remove_from_next_clones(origin_ds,
2484 2493 snap->ds->ds_object, tx);
2485 2494 VERIFY0(zap_add_int(dp->dp_meta_objset,
2486 2495 dsl_dataset_phys(origin_ds)->ds_next_clones_obj,
2487 2496 oldnext_obj, tx));
2488 2497 }
2489 2498
2490 2499 /* change origin */
2491 2500 dmu_buf_will_dirty(dd->dd_dbuf, tx);
2492 2501 ASSERT3U(dsl_dir_phys(dd)->dd_origin_obj, ==, origin_ds->ds_object);
2493 2502 dsl_dir_phys(dd)->dd_origin_obj = dsl_dir_phys(odd)->dd_origin_obj;
2494 2503 dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg;
2495 2504 dmu_buf_will_dirty(odd->dd_dbuf, tx);
2496 2505 dsl_dir_phys(odd)->dd_origin_obj = origin_ds->ds_object;
2497 2506 origin_head->ds_dir->dd_origin_txg =
2498 2507 dsl_dataset_phys(origin_ds)->ds_creation_txg;
2499 2508
2500 2509 /* change dd_clone entries */
2501 2510 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
2502 2511 VERIFY0(zap_remove_int(dp->dp_meta_objset,
2503 2512 dsl_dir_phys(odd)->dd_clones, hds->ds_object, tx));
2504 2513 VERIFY0(zap_add_int(dp->dp_meta_objset,
2505 2514 dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones,
2506 2515 hds->ds_object, tx));
2507 2516
2508 2517 VERIFY0(zap_remove_int(dp->dp_meta_objset,
2509 2518 dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones,
2510 2519 origin_head->ds_object, tx));
2511 2520 if (dsl_dir_phys(dd)->dd_clones == 0) {
2512 2521 dsl_dir_phys(dd)->dd_clones =
2513 2522 zap_create(dp->dp_meta_objset, DMU_OT_DSL_CLONES,
2514 2523 DMU_OT_NONE, 0, tx);
2515 2524 }
2516 2525 VERIFY0(zap_add_int(dp->dp_meta_objset,
2517 2526 dsl_dir_phys(dd)->dd_clones, origin_head->ds_object, tx));
2518 2527 }
2519 2528
2520 2529 /* move snapshots to this dir */
2521 2530 for (snap = list_head(&ddpa->shared_snaps); snap;
2522 2531 snap = list_next(&ddpa->shared_snaps, snap)) {
2523 2532 dsl_dataset_t *ds = snap->ds;
2524 2533
2525 2534 /*
2526 2535 * Property callbacks are registered to a particular
2527 2536 * dsl_dir. Since ours is changing, evict the objset
2528 2537 * so that they will be unregistered from the old dsl_dir.
2529 2538 */
2530 2539 if (ds->ds_objset) {
2531 2540 dmu_objset_evict(ds->ds_objset);
2532 2541 ds->ds_objset = NULL;
2533 2542 }
2534 2543
2535 2544 /* move snap name entry */
2536 2545 VERIFY0(dsl_dataset_get_snapname(ds));
2537 2546 VERIFY0(dsl_dataset_snap_remove(origin_head,
2538 2547 ds->ds_snapname, tx, B_TRUE));
2539 2548 VERIFY0(zap_add(dp->dp_meta_objset,
2540 2549 dsl_dataset_phys(hds)->ds_snapnames_zapobj, ds->ds_snapname,
2541 2550 8, 1, &ds->ds_object, tx));
2542 2551 dsl_fs_ss_count_adjust(hds->ds_dir, 1,
2543 2552 DD_FIELD_SNAPSHOT_COUNT, tx);
2544 2553
2545 2554 /* change containing dsl_dir */
2546 2555 dmu_buf_will_dirty(ds->ds_dbuf, tx);
2547 2556 ASSERT3U(dsl_dataset_phys(ds)->ds_dir_obj, ==, odd->dd_object);
2548 2557 dsl_dataset_phys(ds)->ds_dir_obj = dd->dd_object;
2549 2558 ASSERT3P(ds->ds_dir, ==, odd);
2550 2559 dsl_dir_rele(ds->ds_dir, ds);
2551 2560 VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object,
2552 2561 NULL, ds, &ds->ds_dir));
2553 2562
2554 2563 /* move any clone references */
2555 2564 if (dsl_dataset_phys(ds)->ds_next_clones_obj &&
2556 2565 spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
2557 2566 zap_cursor_t zc;
2558 2567 zap_attribute_t za;
2559 2568
2560 2569 for (zap_cursor_init(&zc, dp->dp_meta_objset,
2561 2570 dsl_dataset_phys(ds)->ds_next_clones_obj);
2562 2571 zap_cursor_retrieve(&zc, &za) == 0;
2563 2572 zap_cursor_advance(&zc)) {
2564 2573 dsl_dataset_t *cnds;
2565 2574 uint64_t o;
2566 2575
2567 2576 if (za.za_first_integer == oldnext_obj) {
2568 2577 /*
2569 2578 * We've already moved the
2570 2579 * origin's reference.
2571 2580 */
2572 2581 continue;
2573 2582 }
2574 2583
2575 2584 VERIFY0(dsl_dataset_hold_obj(dp,
2576 2585 za.za_first_integer, FTAG, &cnds));
2577 2586 o = dsl_dir_phys(cnds->ds_dir)->
2578 2587 dd_head_dataset_obj;
2579 2588
2580 2589 VERIFY0(zap_remove_int(dp->dp_meta_objset,
2581 2590 dsl_dir_phys(odd)->dd_clones, o, tx));
2582 2591 VERIFY0(zap_add_int(dp->dp_meta_objset,
2583 2592 dsl_dir_phys(dd)->dd_clones, o, tx));
2584 2593 dsl_dataset_rele(cnds, FTAG);
2585 2594 }
2586 2595 zap_cursor_fini(&zc);
2587 2596 }
2588 2597
2589 2598 ASSERT(!dsl_prop_hascb(ds));
2590 2599 }
2591 2600
2592 2601 /*
2593 2602 * Change space accounting.
2594 2603 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either
2595 2604 * both be valid, or both be 0 (resulting in delta == 0). This
2596 2605 * is true for each of {clone,origin} independently.
2597 2606 */
2598 2607
2599 2608 delta = ddpa->cloneusedsnap -
2600 2609 dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP];
2601 2610 ASSERT3S(delta, >=, 0);
2602 2611 ASSERT3U(ddpa->used, >=, delta);
2603 2612 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx);
2604 2613 dsl_dir_diduse_space(dd, DD_USED_HEAD,
2605 2614 ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx);
2606 2615
2607 2616 delta = ddpa->originusedsnap -
2608 2617 dsl_dir_phys(odd)->dd_used_breakdown[DD_USED_SNAP];
2609 2618 ASSERT3S(delta, <=, 0);
2610 2619 ASSERT3U(ddpa->used, >=, -delta);
2611 2620 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx);
2612 2621 dsl_dir_diduse_space(odd, DD_USED_HEAD,
2613 2622 -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx);
2614 2623
2615 2624 dsl_dataset_phys(origin_ds)->ds_unique_bytes = ddpa->unique;
2616 2625
2617 2626 /* log history record */
2618 2627 spa_history_log_internal_ds(hds, "promote", tx, "");
2619 2628
2620 2629 dsl_dir_rele(odd, FTAG);
2621 2630 promote_rele(ddpa, FTAG);
2622 2631 }
2623 2632
2624 2633 /*
2625 2634 * Make a list of dsl_dataset_t's for the snapshots between first_obj
2626 2635 * (exclusive) and last_obj (inclusive). The list will be in reverse
2627 2636 * order (last_obj will be the list_head()). If first_obj == 0, do all
2628 2637 * snapshots back to this dataset's origin.
2629 2638 */
2630 2639 static int
2631 2640 snaplist_make(dsl_pool_t *dp,
2632 2641 uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag)
2633 2642 {
2634 2643 uint64_t obj = last_obj;
2635 2644
2636 2645 list_create(l, sizeof (struct promotenode),
2637 2646 offsetof(struct promotenode, link));
2638 2647
2639 2648 while (obj != first_obj) {
2640 2649 dsl_dataset_t *ds;
2641 2650 struct promotenode *snap;
2642 2651 int err;
2643 2652
2644 2653 err = dsl_dataset_hold_obj(dp, obj, tag, &ds);
2645 2654 ASSERT(err != ENOENT);
2646 2655 if (err != 0)
2647 2656 return (err);
2648 2657
2649 2658 if (first_obj == 0)
2650 2659 first_obj = dsl_dir_phys(ds->ds_dir)->dd_origin_obj;
2651 2660
2652 2661 snap = kmem_alloc(sizeof (*snap), KM_SLEEP);
2653 2662 snap->ds = ds;
2654 2663 list_insert_tail(l, snap);
2655 2664 obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
2656 2665 }
2657 2666
2658 2667 return (0);
2659 2668 }
2660 2669
2661 2670 static int
2662 2671 snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep)
2663 2672 {
2664 2673 struct promotenode *snap;
2665 2674
2666 2675 *spacep = 0;
2667 2676 for (snap = list_head(l); snap; snap = list_next(l, snap)) {
2668 2677 uint64_t used, comp, uncomp;
2669 2678 dsl_deadlist_space_range(&snap->ds->ds_deadlist,
2670 2679 mintxg, UINT64_MAX, &used, &comp, &uncomp);
2671 2680 *spacep += used;
2672 2681 }
2673 2682 return (0);
2674 2683 }
2675 2684
2676 2685 static void
2677 2686 snaplist_destroy(list_t *l, void *tag)
2678 2687 {
2679 2688 struct promotenode *snap;
2680 2689
2681 2690 if (l == NULL || !list_link_active(&l->list_head))
2682 2691 return;
2683 2692
2684 2693 while ((snap = list_tail(l)) != NULL) {
2685 2694 list_remove(l, snap);
2686 2695 dsl_dataset_rele(snap->ds, tag);
2687 2696 kmem_free(snap, sizeof (*snap));
2688 2697 }
2689 2698 list_destroy(l);
2690 2699 }
2691 2700
2692 2701 static int
2693 2702 promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag)
2694 2703 {
2695 2704 int error;
2696 2705 dsl_dir_t *dd;
2697 2706 struct promotenode *snap;
2698 2707
2699 2708 error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag,
2700 2709 &ddpa->ddpa_clone);
2701 2710 if (error != 0)
2702 2711 return (error);
2703 2712 dd = ddpa->ddpa_clone->ds_dir;
2704 2713
2705 2714 if (ddpa->ddpa_clone->ds_is_snapshot ||
2706 2715 !dsl_dir_is_clone(dd)) {
2707 2716 dsl_dataset_rele(ddpa->ddpa_clone, tag);
2708 2717 return (SET_ERROR(EINVAL));
2709 2718 }
2710 2719
2711 2720 error = snaplist_make(dp, 0, dsl_dir_phys(dd)->dd_origin_obj,
2712 2721 &ddpa->shared_snaps, tag);
2713 2722 if (error != 0)
2714 2723 goto out;
2715 2724
2716 2725 error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object,
2717 2726 &ddpa->clone_snaps, tag);
2718 2727 if (error != 0)
2719 2728 goto out;
2720 2729
2721 2730 snap = list_head(&ddpa->shared_snaps);
2722 2731 ASSERT3U(snap->ds->ds_object, ==, dsl_dir_phys(dd)->dd_origin_obj);
2723 2732 error = snaplist_make(dp, dsl_dir_phys(dd)->dd_origin_obj,
2724 2733 dsl_dir_phys(snap->ds->ds_dir)->dd_head_dataset_obj,
2725 2734 &ddpa->origin_snaps, tag);
2726 2735 if (error != 0)
2727 2736 goto out;
2728 2737
2729 2738 if (dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj != 0) {
2730 2739 error = dsl_dataset_hold_obj(dp,
2731 2740 dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj,
2732 2741 tag, &ddpa->origin_origin);
2733 2742 if (error != 0)
2734 2743 goto out;
2735 2744 }
2736 2745 out:
2737 2746 if (error != 0)
2738 2747 promote_rele(ddpa, tag);
2739 2748 return (error);
2740 2749 }
2741 2750
2742 2751 static void
2743 2752 promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag)
2744 2753 {
2745 2754 snaplist_destroy(&ddpa->shared_snaps, tag);
2746 2755 snaplist_destroy(&ddpa->clone_snaps, tag);
2747 2756 snaplist_destroy(&ddpa->origin_snaps, tag);
2748 2757 if (ddpa->origin_origin != NULL)
2749 2758 dsl_dataset_rele(ddpa->origin_origin, tag);
2750 2759 dsl_dataset_rele(ddpa->ddpa_clone, tag);
2751 2760 }
2752 2761
2753 2762 /*
2754 2763 * Promote a clone.
2755 2764 *
2756 2765 * If it fails due to a conflicting snapshot name, "conflsnap" will be filled
2757 2766 * in with the name. (It must be at least MAXNAMELEN bytes long.)
2758 2767 */
2759 2768 int
2760 2769 dsl_dataset_promote(const char *name, char *conflsnap)
2761 2770 {
2762 2771 dsl_dataset_promote_arg_t ddpa = { 0 };
2763 2772 uint64_t numsnaps;
2764 2773 int error;
2765 2774 objset_t *os;
2766 2775
2767 2776 /*
2768 2777 * We will modify space proportional to the number of
2769 2778 * snapshots. Compute numsnaps.
2770 2779 */
2771 2780 error = dmu_objset_hold(name, FTAG, &os);
2772 2781 if (error != 0)
2773 2782 return (error);
2774 2783 error = zap_count(dmu_objset_pool(os)->dp_meta_objset,
2775 2784 dsl_dataset_phys(dmu_objset_ds(os))->ds_snapnames_zapobj,
2776 2785 &numsnaps);
2777 2786 dmu_objset_rele(os, FTAG);
2778 2787 if (error != 0)
2779 2788 return (error);
2780 2789
2781 2790 ddpa.ddpa_clonename = name;
2782 2791 ddpa.err_ds = conflsnap;
2783 2792 ddpa.cr = CRED();
2784 2793
2785 2794 return (dsl_sync_task(name, dsl_dataset_promote_check,
2786 2795 dsl_dataset_promote_sync, &ddpa,
2787 2796 2 + numsnaps, ZFS_SPACE_CHECK_RESERVED));
2788 2797 }
2789 2798
2790 2799 int
2791 2800 dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
2792 2801 dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)
2793 2802 {
2794 2803 /*
2795 2804 * "slack" factor for received datasets with refquota set on them.
2796 2805 * See the bottom of this function for details on its use.
2797 2806 */
2798 2807 uint64_t refquota_slack = DMU_MAX_ACCESS * spa_asize_inflation;
2799 2808 int64_t unused_refres_delta;
2800 2809
2801 2810 /* they should both be heads */
2802 2811 if (clone->ds_is_snapshot ||
2803 2812 origin_head->ds_is_snapshot)
2804 2813 return (SET_ERROR(EINVAL));
2805 2814
2806 2815 /* if we are not forcing, the branch point should be just before them */
2807 2816 if (!force && clone->ds_prev != origin_head->ds_prev)
2808 2817 return (SET_ERROR(EINVAL));
2809 2818
2810 2819 /* clone should be the clone (unless they are unrelated) */
2811 2820 if (clone->ds_prev != NULL &&
2812 2821 clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap &&
2813 2822 origin_head->ds_dir != clone->ds_prev->ds_dir)
2814 2823 return (SET_ERROR(EINVAL));
2815 2824
2816 2825 /* the clone should be a child of the origin */
2817 2826 if (clone->ds_dir->dd_parent != origin_head->ds_dir)
2818 2827 return (SET_ERROR(EINVAL));
2819 2828
2820 2829 /* origin_head shouldn't be modified unless 'force' */
2821 2830 if (!force &&
2822 2831 dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev))
2823 2832 return (SET_ERROR(ETXTBSY));
2824 2833
2825 2834 /* origin_head should have no long holds (e.g. is not mounted) */
2826 2835 if (dsl_dataset_handoff_check(origin_head, owner, tx))
2827 2836 return (SET_ERROR(EBUSY));
2828 2837
2829 2838 /* check amount of any unconsumed refreservation */
2830 2839 unused_refres_delta =
2831 2840 (int64_t)MIN(origin_head->ds_reserved,
2832 2841 dsl_dataset_phys(origin_head)->ds_unique_bytes) -
2833 2842 (int64_t)MIN(origin_head->ds_reserved,
2834 2843 dsl_dataset_phys(clone)->ds_unique_bytes);
2835 2844
2836 2845 if (unused_refres_delta > 0 &&
2837 2846 unused_refres_delta >
2838 2847 dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE))
2839 2848 return (SET_ERROR(ENOSPC));
2840 2849
2841 2850 /*
2842 2851 * The clone can't be too much over the head's refquota.
2843 2852 *
2844 2853 * To ensure that the entire refquota can be used, we allow one
2845 2854 * transaction to exceed the the refquota. Therefore, this check
2846 2855 * needs to also allow for the space referenced to be more than the
2847 2856 * refquota. The maximum amount of space that one transaction can use
2848 2857 * on disk is DMU_MAX_ACCESS * spa_asize_inflation. Allowing this
2849 2858 * overage ensures that we are able to receive a filesystem that
2850 2859 * exceeds the refquota on the source system.
2851 2860 *
2852 2861 * So that overage is the refquota_slack we use below.
2853 2862 */
2854 2863 if (origin_head->ds_quota != 0 &&
2855 2864 dsl_dataset_phys(clone)->ds_referenced_bytes >
2856 2865 origin_head->ds_quota + refquota_slack)
2857 2866 return (SET_ERROR(EDQUOT));
2858 2867
2859 2868 return (0);
2860 2869 }
2861 2870
2862 2871 void
2863 2872 dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
2864 2873 dsl_dataset_t *origin_head, dmu_tx_t *tx)
2865 2874 {
2866 2875 dsl_pool_t *dp = dmu_tx_pool(tx);
2867 2876 int64_t unused_refres_delta;
2868 2877
2869 2878 ASSERT(clone->ds_reserved == 0);
2870 2879 /*
2871 2880 * NOTE: On DEBUG kernels there could be a race between this and
2872 2881 * the check function if spa_asize_inflation is adjusted...
2873 2882 */
2874 2883 ASSERT(origin_head->ds_quota == 0 ||
2875 2884 dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota +
2876 2885 DMU_MAX_ACCESS * spa_asize_inflation);
2877 2886 ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev);
2878 2887
2879 2888 /*
2880 2889 * Swap per-dataset feature flags.
2881 2890 */
2882 2891 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
2883 2892 if (!(spa_feature_table[f].fi_flags &
2884 2893 ZFEATURE_FLAG_PER_DATASET)) {
2885 2894 ASSERT(!clone->ds_feature_inuse[f]);
2886 2895 ASSERT(!origin_head->ds_feature_inuse[f]);
2887 2896 continue;
2888 2897 }
2889 2898
2890 2899 boolean_t clone_inuse = clone->ds_feature_inuse[f];
2891 2900 boolean_t origin_head_inuse = origin_head->ds_feature_inuse[f];
2892 2901
2893 2902 if (clone_inuse) {
2894 2903 dsl_dataset_deactivate_feature(clone->ds_object, f, tx);
2895 2904 clone->ds_feature_inuse[f] = B_FALSE;
2896 2905 }
2897 2906 if (origin_head_inuse) {
2898 2907 dsl_dataset_deactivate_feature(origin_head->ds_object,
2899 2908 f, tx);
2900 2909 origin_head->ds_feature_inuse[f] = B_FALSE;
2901 2910 }
2902 2911 if (clone_inuse) {
2903 2912 dsl_dataset_activate_feature(origin_head->ds_object,
2904 2913 f, tx);
2905 2914 origin_head->ds_feature_inuse[f] = B_TRUE;
2906 2915 }
2907 2916 if (origin_head_inuse) {
2908 2917 dsl_dataset_activate_feature(clone->ds_object, f, tx);
2909 2918 clone->ds_feature_inuse[f] = B_TRUE;
2910 2919 }
2911 2920 }
2912 2921
2913 2922 dmu_buf_will_dirty(clone->ds_dbuf, tx);
2914 2923 dmu_buf_will_dirty(origin_head->ds_dbuf, tx);
2915 2924
2916 2925 if (clone->ds_objset != NULL) {
2917 2926 dmu_objset_evict(clone->ds_objset);
2918 2927 clone->ds_objset = NULL;
2919 2928 }
2920 2929
2921 2930 if (origin_head->ds_objset != NULL) {
2922 2931 dmu_objset_evict(origin_head->ds_objset);
2923 2932 origin_head->ds_objset = NULL;
2924 2933 }
2925 2934
2926 2935 unused_refres_delta =
2927 2936 (int64_t)MIN(origin_head->ds_reserved,
2928 2937 dsl_dataset_phys(origin_head)->ds_unique_bytes) -
2929 2938 (int64_t)MIN(origin_head->ds_reserved,
2930 2939 dsl_dataset_phys(clone)->ds_unique_bytes);
2931 2940
2932 2941 /*
2933 2942 * Reset origin's unique bytes, if it exists.
2934 2943 */
2935 2944 if (clone->ds_prev) {
2936 2945 dsl_dataset_t *origin = clone->ds_prev;
2937 2946 uint64_t comp, uncomp;
2938 2947
2939 2948 dmu_buf_will_dirty(origin->ds_dbuf, tx);
2940 2949 dsl_deadlist_space_range(&clone->ds_deadlist,
2941 2950 dsl_dataset_phys(origin)->ds_prev_snap_txg, UINT64_MAX,
2942 2951 &dsl_dataset_phys(origin)->ds_unique_bytes, &comp, &uncomp);
2943 2952 }
2944 2953
2945 2954 /* swap blkptrs */
2946 2955 {
2947 2956 blkptr_t tmp;
2948 2957 tmp = dsl_dataset_phys(origin_head)->ds_bp;
2949 2958 dsl_dataset_phys(origin_head)->ds_bp =
2950 2959 dsl_dataset_phys(clone)->ds_bp;
2951 2960 dsl_dataset_phys(clone)->ds_bp = tmp;
2952 2961 }
2953 2962
2954 2963 /* set dd_*_bytes */
2955 2964 {
2956 2965 int64_t dused, dcomp, duncomp;
2957 2966 uint64_t cdl_used, cdl_comp, cdl_uncomp;
2958 2967 uint64_t odl_used, odl_comp, odl_uncomp;
2959 2968
2960 2969 ASSERT3U(dsl_dir_phys(clone->ds_dir)->
2961 2970 dd_used_breakdown[DD_USED_SNAP], ==, 0);
2962 2971
2963 2972 dsl_deadlist_space(&clone->ds_deadlist,
2964 2973 &cdl_used, &cdl_comp, &cdl_uncomp);
2965 2974 dsl_deadlist_space(&origin_head->ds_deadlist,
2966 2975 &odl_used, &odl_comp, &odl_uncomp);
2967 2976
2968 2977 dused = dsl_dataset_phys(clone)->ds_referenced_bytes +
2969 2978 cdl_used -
2970 2979 (dsl_dataset_phys(origin_head)->ds_referenced_bytes +
2971 2980 odl_used);
2972 2981 dcomp = dsl_dataset_phys(clone)->ds_compressed_bytes +
2973 2982 cdl_comp -
2974 2983 (dsl_dataset_phys(origin_head)->ds_compressed_bytes +
2975 2984 odl_comp);
2976 2985 duncomp = dsl_dataset_phys(clone)->ds_uncompressed_bytes +
2977 2986 cdl_uncomp -
2978 2987 (dsl_dataset_phys(origin_head)->ds_uncompressed_bytes +
2979 2988 odl_uncomp);
2980 2989
2981 2990 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD,
2982 2991 dused, dcomp, duncomp, tx);
2983 2992 dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD,
2984 2993 -dused, -dcomp, -duncomp, tx);
2985 2994
2986 2995 /*
2987 2996 * The difference in the space used by snapshots is the
2988 2997 * difference in snapshot space due to the head's
2989 2998 * deadlist (since that's the only thing that's
2990 2999 * changing that affects the snapused).
2991 3000 */
2992 3001 dsl_deadlist_space_range(&clone->ds_deadlist,
2993 3002 origin_head->ds_dir->dd_origin_txg, UINT64_MAX,
2994 3003 &cdl_used, &cdl_comp, &cdl_uncomp);
2995 3004 dsl_deadlist_space_range(&origin_head->ds_deadlist,
2996 3005 origin_head->ds_dir->dd_origin_txg, UINT64_MAX,
2997 3006 &odl_used, &odl_comp, &odl_uncomp);
2998 3007 dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used,
2999 3008 DD_USED_HEAD, DD_USED_SNAP, tx);
3000 3009 }
3001 3010
3002 3011 /* swap ds_*_bytes */
3003 3012 SWITCH64(dsl_dataset_phys(origin_head)->ds_referenced_bytes,
3004 3013 dsl_dataset_phys(clone)->ds_referenced_bytes);
3005 3014 SWITCH64(dsl_dataset_phys(origin_head)->ds_compressed_bytes,
3006 3015 dsl_dataset_phys(clone)->ds_compressed_bytes);
3007 3016 SWITCH64(dsl_dataset_phys(origin_head)->ds_uncompressed_bytes,
3008 3017 dsl_dataset_phys(clone)->ds_uncompressed_bytes);
3009 3018 SWITCH64(dsl_dataset_phys(origin_head)->ds_unique_bytes,
3010 3019 dsl_dataset_phys(clone)->ds_unique_bytes);
3011 3020
3012 3021 /* apply any parent delta for change in unconsumed refreservation */
3013 3022 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV,
3014 3023 unused_refres_delta, 0, 0, tx);
3015 3024
3016 3025 /*
3017 3026 * Swap deadlists.
3018 3027 */
3019 3028 dsl_deadlist_close(&clone->ds_deadlist);
3020 3029 dsl_deadlist_close(&origin_head->ds_deadlist);
3021 3030 SWITCH64(dsl_dataset_phys(origin_head)->ds_deadlist_obj,
3022 3031 dsl_dataset_phys(clone)->ds_deadlist_obj);
3023 3032 dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset,
3024 3033 dsl_dataset_phys(clone)->ds_deadlist_obj);
3025 3034 dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset,
3026 3035 dsl_dataset_phys(origin_head)->ds_deadlist_obj);
3027 3036
3028 3037 dsl_scan_ds_clone_swapped(origin_head, clone, tx);
3029 3038
3030 3039 spa_history_log_internal_ds(clone, "clone swap", tx,
3031 3040 "parent=%s", origin_head->ds_dir->dd_myname);
3032 3041 }
3033 3042
3034 3043 /*
3035 3044 * Given a pool name and a dataset object number in that pool,
3036 3045 * return the name of that dataset.
3037 3046 */
3038 3047 int
3039 3048 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
3040 3049 {
3041 3050 dsl_pool_t *dp;
3042 3051 dsl_dataset_t *ds;
3043 3052 int error;
3044 3053
3045 3054 error = dsl_pool_hold(pname, FTAG, &dp);
3046 3055 if (error != 0)
3047 3056 return (error);
3048 3057
3049 3058 error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
3050 3059 if (error == 0) {
3051 3060 dsl_dataset_name(ds, buf);
3052 3061 dsl_dataset_rele(ds, FTAG);
3053 3062 }
3054 3063 dsl_pool_rele(dp, FTAG);
3055 3064
3056 3065 return (error);
3057 3066 }
3058 3067
3059 3068 int
3060 3069 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
3061 3070 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv)
3062 3071 {
3063 3072 int error = 0;
3064 3073
3065 3074 ASSERT3S(asize, >, 0);
3066 3075
3067 3076 /*
3068 3077 * *ref_rsrv is the portion of asize that will come from any
3069 3078 * unconsumed refreservation space.
3070 3079 */
3071 3080 *ref_rsrv = 0;
3072 3081
3073 3082 mutex_enter(&ds->ds_lock);
3074 3083 /*
3075 3084 * Make a space adjustment for reserved bytes.
3076 3085 */
3077 3086 if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) {
3078 3087 ASSERT3U(*used, >=,
3079 3088 ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes);
3080 3089 *used -=
3081 3090 (ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes);
3082 3091 *ref_rsrv =
3083 3092 asize - MIN(asize, parent_delta(ds, asize + inflight));
3084 3093 }
3085 3094
3086 3095 if (!check_quota || ds->ds_quota == 0) {
3087 3096 mutex_exit(&ds->ds_lock);
3088 3097 return (0);
3089 3098 }
3090 3099 /*
3091 3100 * If they are requesting more space, and our current estimate
3092 3101 * is over quota, they get to try again unless the actual
3093 3102 * on-disk is over quota and there are no pending changes (which
3094 3103 * may free up space for us).
3095 3104 */
3096 3105 if (dsl_dataset_phys(ds)->ds_referenced_bytes + inflight >=
3097 3106 ds->ds_quota) {
3098 3107 if (inflight > 0 ||
3099 3108 dsl_dataset_phys(ds)->ds_referenced_bytes < ds->ds_quota)
3100 3109 error = SET_ERROR(ERESTART);
3101 3110 else
3102 3111 error = SET_ERROR(EDQUOT);
3103 3112 }
3104 3113 mutex_exit(&ds->ds_lock);
3105 3114
3106 3115 return (error);
3107 3116 }
3108 3117
3109 3118 typedef struct dsl_dataset_set_qr_arg {
3110 3119 const char *ddsqra_name;
3111 3120 zprop_source_t ddsqra_source;
3112 3121 uint64_t ddsqra_value;
3113 3122 } dsl_dataset_set_qr_arg_t;
3114 3123
3115 3124
3116 3125 /* ARGSUSED */
3117 3126 static int
3118 3127 dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx)
3119 3128 {
3120 3129 dsl_dataset_set_qr_arg_t *ddsqra = arg;
3121 3130 dsl_pool_t *dp = dmu_tx_pool(tx);
3122 3131 dsl_dataset_t *ds;
3123 3132 int error;
3124 3133 uint64_t newval;
3125 3134
3126 3135 if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA)
3127 3136 return (SET_ERROR(ENOTSUP));
3128 3137
3129 3138 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
3130 3139 if (error != 0)
3131 3140 return (error);
3132 3141
3133 3142 if (ds->ds_is_snapshot) {
3134 3143 dsl_dataset_rele(ds, FTAG);
3135 3144 return (SET_ERROR(EINVAL));
3136 3145 }
3137 3146
3138 3147 error = dsl_prop_predict(ds->ds_dir,
3139 3148 zfs_prop_to_name(ZFS_PROP_REFQUOTA),
3140 3149 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
3141 3150 if (error != 0) {
3142 3151 dsl_dataset_rele(ds, FTAG);
3143 3152 return (error);
3144 3153 }
3145 3154
3146 3155 if (newval == 0) {
3147 3156 dsl_dataset_rele(ds, FTAG);
3148 3157 return (0);
3149 3158 }
3150 3159
3151 3160 if (newval < dsl_dataset_phys(ds)->ds_referenced_bytes ||
3152 3161 newval < ds->ds_reserved) {
3153 3162 dsl_dataset_rele(ds, FTAG);
3154 3163 return (SET_ERROR(ENOSPC));
3155 3164 }
3156 3165
3157 3166 dsl_dataset_rele(ds, FTAG);
3158 3167 return (0);
3159 3168 }
3160 3169
3161 3170 static void
3162 3171 dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx)
3163 3172 {
3164 3173 dsl_dataset_set_qr_arg_t *ddsqra = arg;
3165 3174 dsl_pool_t *dp = dmu_tx_pool(tx);
3166 3175 dsl_dataset_t *ds;
3167 3176 uint64_t newval;
3168 3177
3169 3178 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
3170 3179
3171 3180 dsl_prop_set_sync_impl(ds,
3172 3181 zfs_prop_to_name(ZFS_PROP_REFQUOTA),
3173 3182 ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
3174 3183 &ddsqra->ddsqra_value, tx);
3175 3184
3176 3185 VERIFY0(dsl_prop_get_int_ds(ds,
3177 3186 zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval));
3178 3187
3179 3188 if (ds->ds_quota != newval) {
3180 3189 dmu_buf_will_dirty(ds->ds_dbuf, tx);
3181 3190 ds->ds_quota = newval;
3182 3191 }
3183 3192 dsl_dataset_rele(ds, FTAG);
3184 3193 }
3185 3194
3186 3195 int
3187 3196 dsl_dataset_set_refquota(const char *dsname, zprop_source_t source,
3188 3197 uint64_t refquota)
3189 3198 {
3190 3199 dsl_dataset_set_qr_arg_t ddsqra;
3191 3200
3192 3201 ddsqra.ddsqra_name = dsname;
3193 3202 ddsqra.ddsqra_source = source;
3194 3203 ddsqra.ddsqra_value = refquota;
3195 3204
3196 3205 return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check,
3197 3206 dsl_dataset_set_refquota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE));
3198 3207 }
3199 3208
3200 3209 static int
3201 3210 dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx)
3202 3211 {
3203 3212 dsl_dataset_set_qr_arg_t *ddsqra = arg;
3204 3213 dsl_pool_t *dp = dmu_tx_pool(tx);
3205 3214 dsl_dataset_t *ds;
3206 3215 int error;
3207 3216 uint64_t newval, unique;
3208 3217
3209 3218 if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION)
3210 3219 return (SET_ERROR(ENOTSUP));
3211 3220
3212 3221 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
3213 3222 if (error != 0)
3214 3223 return (error);
3215 3224
3216 3225 if (ds->ds_is_snapshot) {
3217 3226 dsl_dataset_rele(ds, FTAG);
3218 3227 return (SET_ERROR(EINVAL));
3219 3228 }
3220 3229
3221 3230 error = dsl_prop_predict(ds->ds_dir,
3222 3231 zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
3223 3232 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
3224 3233 if (error != 0) {
3225 3234 dsl_dataset_rele(ds, FTAG);
3226 3235 return (error);
3227 3236 }
3228 3237
3229 3238 /*
3230 3239 * If we are doing the preliminary check in open context, the
3231 3240 * space estimates may be inaccurate.
3232 3241 */
3233 3242 if (!dmu_tx_is_syncing(tx)) {
3234 3243 dsl_dataset_rele(ds, FTAG);
3235 3244 return (0);
3236 3245 }
3237 3246
3238 3247 mutex_enter(&ds->ds_lock);
3239 3248 if (!DS_UNIQUE_IS_ACCURATE(ds))
3240 3249 dsl_dataset_recalc_head_uniq(ds);
3241 3250 unique = dsl_dataset_phys(ds)->ds_unique_bytes;
3242 3251 mutex_exit(&ds->ds_lock);
3243 3252
3244 3253 if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) {
3245 3254 uint64_t delta = MAX(unique, newval) -
3246 3255 MAX(unique, ds->ds_reserved);
3247 3256
3248 3257 if (delta >
3249 3258 dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) ||
3250 3259 (ds->ds_quota > 0 && newval > ds->ds_quota)) {
3251 3260 dsl_dataset_rele(ds, FTAG);
3252 3261 return (SET_ERROR(ENOSPC));
3253 3262 }
3254 3263 }
3255 3264
3256 3265 dsl_dataset_rele(ds, FTAG);
3257 3266 return (0);
3258 3267 }
3259 3268
3260 3269 void
3261 3270 dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
3262 3271 zprop_source_t source, uint64_t value, dmu_tx_t *tx)
3263 3272 {
3264 3273 uint64_t newval;
3265 3274 uint64_t unique;
3266 3275 int64_t delta;
3267 3276
3268 3277 dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
3269 3278 source, sizeof (value), 1, &value, tx);
3270 3279
3271 3280 VERIFY0(dsl_prop_get_int_ds(ds,
3272 3281 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval));
3273 3282
3274 3283 dmu_buf_will_dirty(ds->ds_dbuf, tx);
3275 3284 mutex_enter(&ds->ds_dir->dd_lock);
3276 3285 mutex_enter(&ds->ds_lock);
3277 3286 ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
3278 3287 unique = dsl_dataset_phys(ds)->ds_unique_bytes;
3279 3288 delta = MAX(0, (int64_t)(newval - unique)) -
3280 3289 MAX(0, (int64_t)(ds->ds_reserved - unique));
3281 3290 ds->ds_reserved = newval;
3282 3291 mutex_exit(&ds->ds_lock);
3283 3292
3284 3293 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
3285 3294 mutex_exit(&ds->ds_dir->dd_lock);
3286 3295 }
3287 3296
3288 3297 static void
3289 3298 dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx)
3290 3299 {
3291 3300 dsl_dataset_set_qr_arg_t *ddsqra = arg;
3292 3301 dsl_pool_t *dp = dmu_tx_pool(tx);
3293 3302 dsl_dataset_t *ds;
3294 3303
3295 3304 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
3296 3305 dsl_dataset_set_refreservation_sync_impl(ds,
3297 3306 ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx);
3298 3307 dsl_dataset_rele(ds, FTAG);
3299 3308 }
3300 3309
3301 3310 int
3302 3311 dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
3303 3312 uint64_t refreservation)
3304 3313 {
3305 3314 dsl_dataset_set_qr_arg_t ddsqra;
3306 3315
3307 3316 ddsqra.ddsqra_name = dsname;
3308 3317 ddsqra.ddsqra_source = source;
3309 3318 ddsqra.ddsqra_value = refreservation;
3310 3319
3311 3320 return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check,
3312 3321 dsl_dataset_set_refreservation_sync, &ddsqra,
3313 3322 0, ZFS_SPACE_CHECK_NONE));
3314 3323 }
3315 3324
3316 3325 /*
3317 3326 * Return (in *usedp) the amount of space written in new that is not
3318 3327 * present in oldsnap. New may be a snapshot or the head. Old must be
3319 3328 * a snapshot before new, in new's filesystem (or its origin). If not then
3320 3329 * fail and return EINVAL.
3321 3330 *
3322 3331 * The written space is calculated by considering two components: First, we
3323 3332 * ignore any freed space, and calculate the written as new's used space
3324 3333 * minus old's used space. Next, we add in the amount of space that was freed
3325 3334 * between the two snapshots, thus reducing new's used space relative to old's.
3326 3335 * Specifically, this is the space that was born before old->ds_creation_txg,
3327 3336 * and freed before new (ie. on new's deadlist or a previous deadlist).
3328 3337 *
3329 3338 * space freed [---------------------]
3330 3339 * snapshots ---O-------O--------O-------O------
3331 3340 * oldsnap new
3332 3341 */
3333 3342 int
3334 3343 dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
3335 3344 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
3336 3345 {
3337 3346 int err = 0;
3338 3347 uint64_t snapobj;
3339 3348 dsl_pool_t *dp = new->ds_dir->dd_pool;
3340 3349
3341 3350 ASSERT(dsl_pool_config_held(dp));
3342 3351
3343 3352 *usedp = 0;
3344 3353 *usedp += dsl_dataset_phys(new)->ds_referenced_bytes;
3345 3354 *usedp -= dsl_dataset_phys(oldsnap)->ds_referenced_bytes;
3346 3355
3347 3356 *compp = 0;
3348 3357 *compp += dsl_dataset_phys(new)->ds_compressed_bytes;
3349 3358 *compp -= dsl_dataset_phys(oldsnap)->ds_compressed_bytes;
3350 3359
3351 3360 *uncompp = 0;
3352 3361 *uncompp += dsl_dataset_phys(new)->ds_uncompressed_bytes;
3353 3362 *uncompp -= dsl_dataset_phys(oldsnap)->ds_uncompressed_bytes;
3354 3363
3355 3364 snapobj = new->ds_object;
3356 3365 while (snapobj != oldsnap->ds_object) {
3357 3366 dsl_dataset_t *snap;
3358 3367 uint64_t used, comp, uncomp;
3359 3368
3360 3369 if (snapobj == new->ds_object) {
3361 3370 snap = new;
3362 3371 } else {
3363 3372 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
3364 3373 if (err != 0)
3365 3374 break;
3366 3375 }
3367 3376
3368 3377 if (dsl_dataset_phys(snap)->ds_prev_snap_txg ==
3369 3378 dsl_dataset_phys(oldsnap)->ds_creation_txg) {
3370 3379 /*
3371 3380 * The blocks in the deadlist can not be born after
3372 3381 * ds_prev_snap_txg, so get the whole deadlist space,
3373 3382 * which is more efficient (especially for old-format
3374 3383 * deadlists). Unfortunately the deadlist code
3375 3384 * doesn't have enough information to make this
3376 3385 * optimization itself.
3377 3386 */
3378 3387 dsl_deadlist_space(&snap->ds_deadlist,
3379 3388 &used, &comp, &uncomp);
3380 3389 } else {
3381 3390 dsl_deadlist_space_range(&snap->ds_deadlist,
3382 3391 0, dsl_dataset_phys(oldsnap)->ds_creation_txg,
3383 3392 &used, &comp, &uncomp);
3384 3393 }
3385 3394 *usedp += used;
3386 3395 *compp += comp;
3387 3396 *uncompp += uncomp;
3388 3397
3389 3398 /*
3390 3399 * If we get to the beginning of the chain of snapshots
3391 3400 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
3392 3401 * was not a snapshot of/before new.
3393 3402 */
3394 3403 snapobj = dsl_dataset_phys(snap)->ds_prev_snap_obj;
3395 3404 if (snap != new)
3396 3405 dsl_dataset_rele(snap, FTAG);
3397 3406 if (snapobj == 0) {
3398 3407 err = SET_ERROR(EINVAL);
3399 3408 break;
3400 3409 }
3401 3410
3402 3411 }
3403 3412 return (err);
3404 3413 }
3405 3414
3406 3415 /*
3407 3416 * Return (in *usedp) the amount of space that will be reclaimed if firstsnap,
3408 3417 * lastsnap, and all snapshots in between are deleted.
3409 3418 *
3410 3419 * blocks that would be freed [---------------------------]
3411 3420 * snapshots ---O-------O--------O-------O--------O
3412 3421 * firstsnap lastsnap
3413 3422 *
3414 3423 * This is the set of blocks that were born after the snap before firstsnap,
3415 3424 * (birth > firstsnap->prev_snap_txg) and died before the snap after the
3416 3425 * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).
3417 3426 * We calculate this by iterating over the relevant deadlists (from the snap
3418 3427 * after lastsnap, backward to the snap after firstsnap), summing up the
3419 3428 * space on the deadlist that was born after the snap before firstsnap.
3420 3429 */
3421 3430 int
3422 3431 dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
3423 3432 dsl_dataset_t *lastsnap,
3424 3433 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
3425 3434 {
3426 3435 int err = 0;
3427 3436 uint64_t snapobj;
3428 3437 dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
3429 3438
3430 3439 ASSERT(firstsnap->ds_is_snapshot);
3431 3440 ASSERT(lastsnap->ds_is_snapshot);
3432 3441
3433 3442 /*
3434 3443 * Check that the snapshots are in the same dsl_dir, and firstsnap
3435 3444 * is before lastsnap.
3436 3445 */
3437 3446 if (firstsnap->ds_dir != lastsnap->ds_dir ||
3438 3447 dsl_dataset_phys(firstsnap)->ds_creation_txg >
3439 3448 dsl_dataset_phys(lastsnap)->ds_creation_txg)
3440 3449 return (SET_ERROR(EINVAL));
3441 3450
3442 3451 *usedp = *compp = *uncompp = 0;
3443 3452
3444 3453 snapobj = dsl_dataset_phys(lastsnap)->ds_next_snap_obj;
3445 3454 while (snapobj != firstsnap->ds_object) {
3446 3455 dsl_dataset_t *ds;
3447 3456 uint64_t used, comp, uncomp;
3448 3457
3449 3458 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds);
3450 3459 if (err != 0)
3451 3460 break;
3452 3461
3453 3462 dsl_deadlist_space_range(&ds->ds_deadlist,
3454 3463 dsl_dataset_phys(firstsnap)->ds_prev_snap_txg, UINT64_MAX,
3455 3464 &used, &comp, &uncomp);
3456 3465 *usedp += used;
3457 3466 *compp += comp;
3458 3467 *uncompp += uncomp;
3459 3468
3460 3469 snapobj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
3461 3470 ASSERT3U(snapobj, !=, 0);
3462 3471 dsl_dataset_rele(ds, FTAG);
3463 3472 }
3464 3473 return (err);
3465 3474 }
3466 3475
3467 3476 /*
3468 3477 * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
3469 3478 * For example, they could both be snapshots of the same filesystem, and
3470 3479 * 'earlier' is before 'later'. Or 'earlier' could be the origin of
3471 3480 * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's
3472 3481 * filesystem. Or 'earlier' could be the origin's origin.
3473 3482 *
3474 3483 * If non-zero, earlier_txg is used instead of earlier's ds_creation_txg.
3475 3484 */
3476 3485 boolean_t
3477 3486 dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
3478 3487 uint64_t earlier_txg)
3479 3488 {
3480 3489 dsl_pool_t *dp = later->ds_dir->dd_pool;
3481 3490 int error;
3482 3491 boolean_t ret;
3483 3492
3484 3493 ASSERT(dsl_pool_config_held(dp));
3485 3494 ASSERT(earlier->ds_is_snapshot || earlier_txg != 0);
3486 3495
3487 3496 if (earlier_txg == 0)
3488 3497 earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg;
3489 3498
3490 3499 if (later->ds_is_snapshot &&
3491 3500 earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg)
3492 3501 return (B_FALSE);
3493 3502
3494 3503 if (later->ds_dir == earlier->ds_dir)
3495 3504 return (B_TRUE);
3496 3505 if (!dsl_dir_is_clone(later->ds_dir))
3497 3506 return (B_FALSE);
3498 3507
3499 3508 if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == earlier->ds_object)
3500 3509 return (B_TRUE);
3501 3510 dsl_dataset_t *origin;
3502 3511 error = dsl_dataset_hold_obj(dp,
3503 3512 dsl_dir_phys(later->ds_dir)->dd_origin_obj, FTAG, &origin);
3504 3513 if (error != 0)
3505 3514 return (B_FALSE);
3506 3515 ret = dsl_dataset_is_before(origin, earlier, earlier_txg);
3507 3516 dsl_dataset_rele(origin, FTAG);
3508 3517 return (ret);
3509 3518 }
3510 3519
3511 3520 void
3512 3521 dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx)
3513 3522 {
3514 3523 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
3515 3524 dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx);
3516 3525 }
3517 3526
3518 3527 boolean_t
3519 3528 dsl_dataset_is_zapified(dsl_dataset_t *ds)
3520 3529 {
3521 3530 dmu_object_info_t doi;
3522 3531
3523 3532 dmu_object_info_from_db(ds->ds_dbuf, &doi);
3524 3533 return (doi.doi_type == DMU_OTN_ZAP_METADATA);
3525 3534 }
3526 3535
3527 3536 boolean_t
3528 3537 dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds)
3529 3538 {
3530 3539 return (dsl_dataset_is_zapified(ds) &&
3531 3540 zap_contains(ds->ds_dir->dd_pool->dp_meta_objset,
3532 3541 ds->ds_object, DS_FIELD_RESUME_TOGUID) == 0);
3533 3542 }
↓ open down ↓ |
3008 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX