Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/fssnap.c
+++ new/usr/src/uts/common/io/fssnap.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26
27 27 #include <sys/debug.h>
28 28 #include <sys/types.h>
29 29 #include <sys/file.h>
30 30 #include <sys/errno.h>
31 31 #include <sys/uio.h>
32 32 #include <sys/open.h>
33 33 #include <sys/cred.h>
34 34 #include <sys/kmem.h>
35 35 #include <sys/conf.h>
36 36 #include <sys/cmn_err.h>
37 37 #include <sys/modctl.h>
38 38 #include <sys/disp.h>
39 39 #include <sys/atomic.h>
40 40 #include <sys/filio.h>
41 41 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
42 42 #include <sys/kstat.h>
43 43
44 44 #include <sys/ddi.h>
45 45 #include <sys/devops.h>
46 46 #include <sys/sunddi.h>
47 47 #include <sys/esunddi.h>
48 48 #include <sys/priv_names.h>
49 49
50 50 #include <sys/fssnap.h>
51 51 #include <sys/fssnap_if.h>
52 52
53 53 /*
54 54 * This module implements the file system snapshot code, which provides a
55 55 * point-in-time image of a file system for the purposes of online backup.
56 56 * There are essentially two parts to this project: the driver half and the
57 57 * file system half. The driver half is a pseudo device driver called
58 58 * "fssnap" that represents the snapshot. Each snapshot is assigned a
59 59 * number that corresponds to the minor number of the device, and a control
60 60 * device with a high minor number is used to initiate snapshot creation and
61 61 * deletion. For all practical purposes the driver half acts like a
62 62 * read-only disk device whose contents are exactly the same as the master
63 63 * file system at the time the snapshot was created.
64 64 *
65 65 * The file system half provides interfaces necessary for performing the
66 66 * file system dependent operations required to create and delete snapshots
67 67 * and a special driver strategy routine that must always be used by the file
68 68 * system for snapshots to work correctly.
69 69 *
70 70 * When a snapshot is to be created, the user utility will send an ioctl to
71 71 * the control device of the driver half specifying the file system to be
72 72 * snapshotted, the file descriptor of a backing-store file which is used to
73 73 * hold old data before it is overwritten, and other snapshot parameters.
74 74 * This ioctl is passed on to the file system specified in the original
75 75 * ioctl request. The file system is expected to be able to flush
76 76 * everything out to make the file system consistent and lock it to ensure
77 77 * no changes occur while the snapshot is being created. It then calls
78 78 * fssnap_create() to create state for a new snapshot, from which an opaque
79 79 * handle is returned with the snapshot locked. Next, the file system must
80 80 * populate the "candidate bitmap", which tells the snapshot code which
81 81 * "chunks" should be considered for copy-on-write (a chunk is the unit of
82 82 * granularity used for copy-on-write, which is independent of the device
83 83 * and file system block sizes). This is typically done by scanning the
84 84 * file system allocation bitmaps to determine which chunks contain
85 85 * allocated blocks in the file system at the time the snapshot was created.
86 86 * If a chunk has no allocated blocks, it does not need to be copied before
87 87 * being written to. Once the candidate bitmap is populated with
88 88 * fssnap_set_candidate(), the file system calls fssnap_create_done() to
89 89 * complete the snapshot creation and unlock the snapshot. The file system
90 90 * may now be unlocked and modifications to it resumed.
91 91 *
92 92 * Once a snapshot is created, the file system must perform all writes
93 93 * through a special strategy routine, fssnap_strategy(). This strategy
94 94 * routine determines whether the chunks contained by the write must be
95 95 * copied before being overwritten by consulting the candidate bitmap
96 96 * described above, and the "hastrans bitmap" which tells it whether the chunk
97 97 * has been copied already or not. If the chunk is a candidate but has not
98 98 * been copied, it reads the old data in and adds it to a queue. The
99 99 * old data can then be overwritten with the new data. An asynchronous
100 100 * task queue is dispatched for each old chunk read in which writes the old
101 101 * data to the backing file specified at snapshot creation time. The
102 102 * backing file is a sparse file the same size as the file system that
103 103 * contains the old data at the offset that data originally had in the
104 104 * file system. If the queue containing in-memory chunks gets too large,
105 105 * writes to the file system may be throttled by a semaphore until the
106 106 * task queues have a chance to push some of the chunks to the backing file.
107 107 *
108 108 * With the candidate bitmap, the hastrans bitmap, the data on the master
109 109 * file system, and the old data in memory and in the backing file, the
110 110 * snapshot pseudo-driver can piece together the original file system
111 111 * information to satisfy read requests. If the requested chunk is not a
112 112 * candidate, it returns a zeroed buffer. If the chunk is a candidate but
113 113 * has not been copied it reads it from the master file system. If it is a
114 114 * candidate and has been copied, it either copies the data from the
115 115 * in-memory queue or it reads it in from the backing file. The result is
116 116 * a replication of the original file system that can be backed up, mounted,
117 117 * or manipulated by other file system utilities that work on a read-only
118 118 * device.
119 119 *
120 120 * This module is divided into three roughly logical sections:
121 121 *
122 122 * - The snapshot driver, which is a character/block driver
123 123 * representing the snapshot itself. These routines are
124 124 * prefixed with "snap_".
125 125 *
126 126 * - The library routines that are defined in fssnap_if.h that
127 127 * are used by file systems that use this snapshot implementation.
128 128 * These functions are prefixed with "fssnap_" and are called through
129 129 * a function vector from the file system.
130 130 *
131 131 * - The helper routines used by the snapshot driver and the fssnap
132 132 * library routines for managing the translation table and other
133 133 * useful functions. These routines are all static and are
134 134 * prefixed with either "fssnap_" or "transtbl_" if they
135 135 * are specifically used for translation table activities.
136 136 */
137 137
138 138 static dev_info_t *fssnap_dip = NULL;
139 139 static struct snapshot_id *snapshot = NULL;
140 140 static struct snapshot_id snap_ctl;
141 141 static int num_snapshots = 0;
142 142 static kmutex_t snapshot_mutex;
143 143 static char snapname[] = SNAP_NAME;
144 144
145 145 /* "tunable" parameters */
146 146 static int fssnap_taskq_nthreads = FSSNAP_TASKQ_THREADS;
147 147 static uint_t fssnap_max_mem_chunks = FSSNAP_MAX_MEM_CHUNKS;
148 148 static int fssnap_taskq_maxtasks = FSSNAP_TASKQ_MAXTASKS;
149 149
150 150 /* static function prototypes */
151 151
152 152 /* snapshot driver */
153 153 static int snap_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
154 154 static int snap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
155 155 static int snap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
156 156 static int snap_open(dev_t *devp, int flag, int otyp, cred_t *cred);
157 157 static int snap_close(dev_t dev, int flag, int otyp, cred_t *cred);
158 158 static int snap_strategy(struct buf *bp);
159 159 static int snap_read(dev_t dev, struct uio *uiop, cred_t *credp);
160 160 static int snap_print(dev_t dev, char *str);
161 161 static int snap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
162 162 cred_t *credp, int *rvalp);
163 163 static int snap_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
164 164 int flags, char *name, caddr_t valuep, int *lengthp);
165 165 static int snap_getchunk(struct snapshot_id *sidp, chunknumber_t chunk,
166 166 int offset, int len, char *buffer);
167 167
168 168
169 169 /* fssnap interface implementations (see fssnap_if.h) */
170 170 static void fssnap_strategy_impl(void *, struct buf *);
171 171 static void *fssnap_create_impl(chunknumber_t, uint_t, u_offset_t,
172 172 struct vnode *, int, struct vnode **, char *, u_offset_t);
173 173 static void fssnap_set_candidate_impl(void *, chunknumber_t);
174 174 static int fssnap_is_candidate_impl(void *, u_offset_t);
175 175 static int fssnap_create_done_impl(void *);
176 176 static int fssnap_delete_impl(void *);
177 177
178 178 /* fssnap interface support routines */
179 179 static int fssnap_translate(struct snapshot_id **, struct buf *);
180 180 static void fssnap_write_taskq(void *);
181 181 static void fssnap_create_kstats(snapshot_id_t *, int, const char *,
182 182 const char *);
183 183 static int fssnap_update_kstat_num(kstat_t *, int);
184 184 static void fssnap_delete_kstats(struct cow_info *);
185 185
186 186 /* translation table prototypes */
187 187 static cow_map_node_t *transtbl_add(cow_map_t *, chunknumber_t, caddr_t);
188 188 static cow_map_node_t *transtbl_get(cow_map_t *, chunknumber_t);
189 189 static void transtbl_delete(cow_map_t *, cow_map_node_t *);
190 190 static void transtbl_free(cow_map_t *);
191 191
192 192 static kstat_t *fssnap_highwater_kstat;
193 193
194 194 /* ************************************************************************ */
195 195
196 196 /* Device and Module Structures */
197 197
198 198 static struct cb_ops snap_cb_ops = {
199 199 snap_open,
200 200 snap_close,
201 201 snap_strategy,
202 202 snap_print,
203 203 nodev, /* no snap_dump */
204 204 snap_read,
205 205 nodev, /* no snap_write */
206 206 snap_ioctl,
207 207 nodev, /* no snap_devmap */
208 208 nodev, /* no snap_mmap */
209 209 nodev, /* no snap_segmap */
210 210 nochpoll,
211 211 snap_prop_op,
212 212 NULL, /* streamtab */
213 213 D_64BIT | D_NEW | D_MP, /* driver compatibility */
214 214 CB_REV,
215 215 nodev, /* async I/O read entry point */
216 216 nodev /* async I/O write entry point */
217 217 };
218 218
219 219 static struct dev_ops snap_ops = {
220 220 DEVO_REV,
221 221 0, /* ref count */
222 222 snap_getinfo,
223 223 nulldev, /* snap_identify obsolete */
224 224 nulldev, /* no snap_probe */
225 225 snap_attach,
226 226 snap_detach,
227 227 nodev, /* no snap_reset */
228 228 &snap_cb_ops,
229 229 (struct bus_ops *)NULL,
230 230 nulldev, /* no snap_power() */
231 231 ddi_quiesce_not_needed, /* quiesce */
232 232 };
233 233
234 234 extern struct mod_ops mod_driverops;
235 235
236 236 static struct modldrv md = {
237 237 &mod_driverops, /* Type of module. This is a driver */
238 238 "snapshot driver", /* Name of the module */
239 239 &snap_ops,
240 240 };
241 241
242 242 static struct modlinkage ml = {
243 243 MODREV_1,
244 244 &md,
245 245 NULL
246 246 };
247 247
248 248 static void *statep;
249 249
250 250 int
251 251 _init(void)
252 252 {
253 253 int error;
254 254 kstat_t *ksp;
255 255 kstat_named_t *ksdata;
256 256
257 257 error = ddi_soft_state_init(&statep, sizeof (struct snapshot_id *), 1);
258 258 if (error) {
259 259 cmn_err(CE_WARN, "_init: failed to init ddi_soft_state.");
260 260 return (error);
261 261 }
262 262
263 263 error = mod_install(&ml);
264 264
265 265 if (error) {
266 266 cmn_err(CE_WARN, "_init: failed to mod_install.");
267 267 ddi_soft_state_fini(&statep);
268 268 return (error);
269 269 }
270 270
271 271 /*
272 272 * Fill in the snapshot operations vector for file systems
273 273 * (defined in fssnap_if.c)
274 274 */
275 275
276 276 snapops.fssnap_create = fssnap_create_impl;
277 277 snapops.fssnap_set_candidate = fssnap_set_candidate_impl;
278 278 snapops.fssnap_is_candidate = fssnap_is_candidate_impl;
279 279 snapops.fssnap_create_done = fssnap_create_done_impl;
280 280 snapops.fssnap_delete = fssnap_delete_impl;
281 281 snapops.fssnap_strategy = fssnap_strategy_impl;
282 282
283 283 mutex_init(&snapshot_mutex, NULL, MUTEX_DEFAULT, NULL);
284 284
285 285 /*
286 286 * Initialize the fssnap highwater kstat
287 287 */
288 288 ksp = kstat_create(snapname, 0, FSSNAP_KSTAT_HIGHWATER, "misc",
289 289 KSTAT_TYPE_NAMED, 1, 0);
290 290 if (ksp != NULL) {
291 291 ksdata = (kstat_named_t *)ksp->ks_data;
292 292 kstat_named_init(ksdata, FSSNAP_KSTAT_HIGHWATER,
293 293 KSTAT_DATA_UINT32);
294 294 ksdata->value.ui32 = 0;
295 295 kstat_install(ksp);
296 296 } else {
297 297 cmn_err(CE_WARN, "_init: failed to create highwater kstat.");
298 298 }
299 299 fssnap_highwater_kstat = ksp;
300 300
301 301 return (0);
302 302 }
303 303
304 304 int
305 305 _info(struct modinfo *modinfop)
306 306 {
307 307 return (mod_info(&ml, modinfop));
308 308 }
309 309
310 310 int
311 311 _fini(void)
312 312 {
313 313 int error;
314 314
315 315 error = mod_remove(&ml);
316 316 if (error)
317 317 return (error);
318 318 ddi_soft_state_fini(&statep);
319 319
320 320 /*
321 321 * delete the fssnap highwater kstat
322 322 */
323 323 kstat_delete(fssnap_highwater_kstat);
324 324
325 325 mutex_destroy(&snapshot_mutex);
326 326
327 327 /* Clear out the file system operations vector */
328 328 snapops.fssnap_create = NULL;
329 329 snapops.fssnap_set_candidate = NULL;
330 330 snapops.fssnap_create_done = NULL;
331 331 snapops.fssnap_delete = NULL;
332 332 snapops.fssnap_strategy = NULL;
333 333
334 334 return (0);
335 335 }
336 336
337 337 /* ************************************************************************ */
338 338
339 339 /*
340 340 * Snapshot Driver Routines
341 341 *
342 342 * This section implements the snapshot character and block drivers. The
343 343 * device will appear to be a consistent read-only file system to
344 344 * applications that wish to back it up or mount it. The snapshot driver
345 345 * communicates with the file system through the translation table, which
346 346 * tells the snapshot driver where to find the data necessary to piece
347 347 * together the frozen file system. The data may either be on the master
348 348 * device (no translation exists), in memory (a translation exists but has
349 349 * not been flushed to the backing store), or in the backing store file.
350 350 * The read request may require the snapshot driver to retrieve data from
351 351 * several different places and piece it together to look like a single
352 352 * contiguous read.
353 353 *
354 354 * The device minor number corresponds to the snapshot number in the list of
355 355 * snapshot identifiers. The soft state for each minor number is simply a
356 356 * pointer to the snapshot id, which holds all of the snapshot state. One
357 357 * minor number is designated as the control device. All snapshot create
358 358 * and delete requests go through the control device to ensure this module
359 359 * is properly loaded and attached before the file system starts calling
360 360 * routines defined here.
361 361 */
362 362
363 363
364 364 /*
365 365 * snap_getinfo() - snapshot driver getinfo(9E) routine
366 366 *
367 367 */
368 368 /*ARGSUSED*/
369 369 static int
370 370 snap_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
371 371 {
372 372 switch (infocmd) {
373 373 case DDI_INFO_DEVT2DEVINFO:
374 374 *result = fssnap_dip;
375 375 return (DDI_SUCCESS);
376 376 case DDI_INFO_DEVT2INSTANCE:
377 377 *result = 0; /* we only have one instance */
378 378 return (DDI_SUCCESS);
379 379 }
380 380 return (DDI_FAILURE);
381 381 }
382 382
383 383 /*
384 384 * snap_attach() - snapshot driver attach(9E) routine
385 385 *
386 386 * sets up snapshot control device and control state. The control state
387 387 * is a pointer to an "anonymous" snapshot_id for tracking opens and closes
388 388 */
389 389 static int
390 390 snap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
391 391 {
392 392 int error;
393 393
394 394 switch (cmd) {
395 395 case DDI_ATTACH:
396 396 /* create the control device */
397 397 error = ddi_create_priv_minor_node(dip, SNAP_CTL_NODE, S_IFCHR,
398 398 SNAP_CTL_MINOR, DDI_PSEUDO, PRIVONLY_DEV,
399 399 PRIV_SYS_CONFIG, PRIV_SYS_CONFIG, 0666);
400 400 if (error == DDI_FAILURE) {
401 401 return (DDI_FAILURE);
402 402 }
403 403
404 404 rw_init(&snap_ctl.sid_rwlock, NULL, RW_DEFAULT, NULL);
405 405 rw_enter(&snap_ctl.sid_rwlock, RW_WRITER);
406 406 fssnap_dip = dip;
407 407 snap_ctl.sid_snapnumber = SNAP_CTL_MINOR;
408 408 /* the control sid is not linked into the snapshot list */
409 409 snap_ctl.sid_next = NULL;
410 410 snap_ctl.sid_cowinfo = NULL;
411 411 snap_ctl.sid_flags = 0;
412 412 rw_exit(&snap_ctl.sid_rwlock);
413 413 ddi_report_dev(dip);
414 414
415 415 return (DDI_SUCCESS);
416 416 case DDI_PM_RESUME:
417 417 return (DDI_SUCCESS);
418 418
419 419 case DDI_RESUME:
420 420 return (DDI_SUCCESS);
421 421
422 422 default:
423 423 return (DDI_FAILURE);
424 424 }
425 425 }
426 426
427 427 /*
428 428 * snap_detach() - snapshot driver detach(9E) routine
429 429 *
430 430 * destroys snapshot control device and control state. If any snapshots
431 431 * are active (ie. num_snapshots != 0), the device will refuse to detach.
432 432 */
433 433 static int
434 434 snap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
435 435 {
436 436 struct snapshot_id *sidp, *sidnextp;
437 437
438 438 switch (cmd) {
439 439 case DDI_DETACH:
440 440 /* do not detach if the device is active */
441 441 mutex_enter(&snapshot_mutex);
442 442 if ((num_snapshots != 0) ||
443 443 ((snap_ctl.sid_flags & SID_CHAR_BUSY) != 0)) {
444 444 mutex_exit(&snapshot_mutex);
445 445 return (DDI_FAILURE);
446 446 }
447 447
448 448 /* free up the snapshot list */
449 449 for (sidp = snapshot; sidp != NULL; sidp = sidnextp) {
450 450 ASSERT(SID_AVAILABLE(sidp) &&
451 451 !RW_LOCK_HELD(&sidp->sid_rwlock));
452 452 sidnextp = sidp->sid_next;
453 453 rw_destroy(&sidp->sid_rwlock);
454 454 kmem_free(sidp, sizeof (struct snapshot_id));
455 455 }
456 456 snapshot = NULL;
457 457
458 458 /* delete the control device */
459 459 ddi_remove_minor_node(dip, SNAP_CTL_NODE);
460 460 fssnap_dip = NULL;
461 461
462 462 ASSERT((snap_ctl.sid_flags & SID_CHAR_BUSY) == 0);
463 463 rw_destroy(&snap_ctl.sid_rwlock);
464 464 mutex_exit(&snapshot_mutex);
465 465
466 466 return (DDI_SUCCESS);
467 467
468 468 default:
469 469 return (DDI_FAILURE);
470 470 }
471 471 }
472 472
473 473 /*
474 474 * snap_open() - snapshot driver open(9E) routine
475 475 *
476 476 * marks the snapshot id as busy so it will not be recycled when deleted
477 477 * until the snapshot is closed.
478 478 */
479 479 /* ARGSUSED */
480 480 static int
481 481 snap_open(dev_t *devp, int flag, int otyp, cred_t *cred)
482 482 {
483 483 minor_t minor;
484 484 struct snapshot_id **sidpp, *sidp;
485 485
486 486 /* snapshots are read-only */
487 487 if (flag & FWRITE)
488 488 return (EROFS);
489 489
490 490 minor = getminor(*devp);
491 491
492 492 if (minor == SNAP_CTL_MINOR) {
493 493 /* control device must be opened exclusively */
494 494 if (((flag & FEXCL) != FEXCL) || (otyp != OTYP_CHR))
495 495 return (EINVAL);
496 496
497 497 rw_enter(&snap_ctl.sid_rwlock, RW_WRITER);
498 498 if ((snap_ctl.sid_flags & SID_CHAR_BUSY) != 0) {
499 499 rw_exit(&snap_ctl.sid_rwlock);
500 500 return (EBUSY);
501 501 }
502 502
503 503 snap_ctl.sid_flags |= SID_CHAR_BUSY;
504 504 rw_exit(&snap_ctl.sid_rwlock);
505 505
506 506 return (0);
507 507 }
508 508
509 509 sidpp = ddi_get_soft_state(statep, minor);
510 510 if (sidpp == NULL || *sidpp == NULL)
511 511 return (ENXIO);
512 512 sidp = *sidpp;
513 513 rw_enter(&sidp->sid_rwlock, RW_WRITER);
514 514
515 515 if ((flag & FEXCL) && SID_BUSY(sidp)) {
516 516 rw_exit(&sidp->sid_rwlock);
517 517 return (EAGAIN);
518 518 }
519 519
520 520 ASSERT(sidpp != NULL && sidp != NULL);
521 521 /* check to see if this snapshot has been killed on us */
522 522 if (SID_INACTIVE(sidp)) {
523 523 cmn_err(CE_WARN, "snap_open: snapshot %d does not exist.",
524 524 minor);
525 525 rw_exit(&sidp->sid_rwlock);
526 526 return (ENXIO);
527 527 }
528 528
529 529 switch (otyp) {
530 530 case OTYP_CHR:
531 531 sidp->sid_flags |= SID_CHAR_BUSY;
532 532 break;
533 533 case OTYP_BLK:
534 534 sidp->sid_flags |= SID_BLOCK_BUSY;
535 535 break;
536 536 default:
537 537 rw_exit(&sidp->sid_rwlock);
538 538 return (EINVAL);
539 539 }
540 540
541 541 rw_exit(&sidp->sid_rwlock);
542 542
543 543 /*
544 544 * at this point if a valid snapshot was found then it has
545 545 * been marked busy and we can use it.
546 546 */
547 547 return (0);
548 548 }
549 549
550 550 /*
551 551 * snap_close() - snapshot driver close(9E) routine
552 552 *
553 553 * unsets the busy bits in the snapshot id. If the snapshot has been
554 554 * deleted while the snapshot device was open, the close call will clean
555 555 * up the remaining state information.
556 556 */
557 557 /* ARGSUSED */
558 558 static int
559 559 snap_close(dev_t dev, int flag, int otyp, cred_t *cred)
560 560 {
561 561 struct snapshot_id **sidpp, *sidp;
562 562 minor_t minor;
563 563 char name[20];
564 564
565 565 minor = getminor(dev);
566 566
567 567 /* if this is the control device, close it and return */
568 568 if (minor == SNAP_CTL_MINOR) {
569 569 rw_enter(&snap_ctl.sid_rwlock, RW_WRITER);
570 570 snap_ctl.sid_flags &= ~(SID_CHAR_BUSY);
571 571 rw_exit(&snap_ctl.sid_rwlock);
572 572 return (0);
573 573 }
574 574
575 575 sidpp = ddi_get_soft_state(statep, minor);
576 576 if (sidpp == NULL || *sidpp == NULL) {
577 577 cmn_err(CE_WARN, "snap_close: could not find state for "
578 578 "snapshot %d.", minor);
579 579 return (ENXIO);
580 580 }
581 581 sidp = *sidpp;
582 582 mutex_enter(&snapshot_mutex);
583 583 rw_enter(&sidp->sid_rwlock, RW_WRITER);
584 584
585 585 /* Mark the snapshot as not being busy anymore */
586 586 switch (otyp) {
587 587 case OTYP_CHR:
588 588 sidp->sid_flags &= ~(SID_CHAR_BUSY);
589 589 break;
590 590 case OTYP_BLK:
591 591 sidp->sid_flags &= ~(SID_BLOCK_BUSY);
592 592 break;
593 593 default:
594 594 mutex_exit(&snapshot_mutex);
595 595 rw_exit(&sidp->sid_rwlock);
596 596 return (EINVAL);
597 597 }
598 598
599 599 if (SID_AVAILABLE(sidp)) {
600 600 /*
601 601 * if this is the last close on a snapshot that has been
602 602 * deleted, then free up the soft state. The snapdelete
603 603 * ioctl does not free this when the device is in use so
604 604 * we do it here after the last reference goes away.
605 605 */
606 606
607 607 /* remove the device nodes */
608 608 ASSERT(fssnap_dip != NULL);
609 609 (void) snprintf(name, sizeof (name), "%d",
610 610 sidp->sid_snapnumber);
611 611 ddi_remove_minor_node(fssnap_dip, name);
612 612 (void) snprintf(name, sizeof (name), "%d,raw",
613 613 sidp->sid_snapnumber);
614 614 ddi_remove_minor_node(fssnap_dip, name);
615 615
616 616 /* delete the state structure */
617 617 ddi_soft_state_free(statep, sidp->sid_snapnumber);
618 618 num_snapshots--;
619 619 }
620 620
621 621 mutex_exit(&snapshot_mutex);
622 622 rw_exit(&sidp->sid_rwlock);
623 623
624 624 return (0);
625 625 }
626 626
627 627 /*
628 628 * snap_read() - snapshot driver read(9E) routine
629 629 *
630 630 * reads data from the snapshot by calling snap_strategy() through physio()
631 631 */
632 632 /* ARGSUSED */
633 633 static int
634 634 snap_read(dev_t dev, struct uio *uiop, cred_t *credp)
635 635 {
636 636 minor_t minor;
637 637 struct snapshot_id **sidpp;
638 638
639 639 minor = getminor(dev);
640 640 sidpp = ddi_get_soft_state(statep, minor);
641 641 if (sidpp == NULL || *sidpp == NULL) {
642 642 cmn_err(CE_WARN,
643 643 "snap_read: could not find state for snapshot %d.", minor);
644 644 return (ENXIO);
645 645 }
646 646 return (physio(snap_strategy, NULL, dev, B_READ, minphys, uiop));
647 647 }
648 648
649 649 /*
650 650 * snap_strategy() - snapshot driver strategy(9E) routine
651 651 *
652 652 * cycles through each chunk in the requested buffer and calls
653 653 * snap_getchunk() on each chunk to retrieve it from the appropriate
654 654 * place. Once all of the parts are put together the requested buffer
655 655 * is returned. The snapshot driver is read-only, so a write is invalid.
656 656 */
657 657 static int
658 658 snap_strategy(struct buf *bp)
659 659 {
660 660 struct snapshot_id **sidpp, *sidp;
661 661 minor_t minor;
662 662 chunknumber_t chunk;
663 663 int off, len;
664 664 u_longlong_t reqptr;
665 665 int error = 0;
666 666 size_t chunksz;
667 667 caddr_t buf;
668 668
669 669 /* snapshot device is read-only */
670 670 if (bp->b_flags & B_WRITE) {
671 671 bioerror(bp, EROFS);
672 672 bp->b_resid = bp->b_bcount;
673 673 biodone(bp);
674 674 return (0);
675 675 }
676 676
677 677 minor = getminor(bp->b_edev);
678 678 sidpp = ddi_get_soft_state(statep, minor);
679 679 if (sidpp == NULL || *sidpp == NULL) {
680 680 cmn_err(CE_WARN,
681 681 "snap_strategy: could not find state for snapshot %d.",
682 682 minor);
683 683 bioerror(bp, ENXIO);
684 684 bp->b_resid = bp->b_bcount;
685 685 biodone(bp);
686 686 return (0);
687 687 }
688 688 sidp = *sidpp;
689 689 ASSERT(sidp);
690 690 rw_enter(&sidp->sid_rwlock, RW_READER);
691 691
692 692 if (SID_INACTIVE(sidp)) {
693 693 bioerror(bp, ENXIO);
694 694 bp->b_resid = bp->b_bcount;
695 695 biodone(bp);
696 696 rw_exit(&sidp->sid_rwlock);
697 697 return (0);
698 698 }
699 699
700 700 if (bp->b_flags & (B_PAGEIO|B_PHYS))
701 701 bp_mapin(bp);
702 702
703 703 bp->b_resid = bp->b_bcount;
704 704 ASSERT(bp->b_un.b_addr);
705 705 buf = bp->b_un.b_addr;
706 706
707 707 chunksz = sidp->sid_cowinfo->cow_map.cmap_chunksz;
708 708
709 709 /* reqptr is the current DEV_BSIZE offset into the device */
710 710 /* chunk is the chunk containing reqptr */
711 711 /* len is the length of the request (in the current chunk) in bytes */
712 712 /* off is the byte offset into the current chunk */
713 713 reqptr = bp->b_lblkno;
714 714 while (bp->b_resid > 0) {
715 715 chunk = dbtocowchunk(&sidp->sid_cowinfo->cow_map, reqptr);
716 716 off = (reqptr % (chunksz >> DEV_BSHIFT)) << DEV_BSHIFT;
717 717 len = min(chunksz - off, bp->b_resid);
718 718 ASSERT((off + len) <= chunksz);
719 719
720 720 if ((error = snap_getchunk(sidp, chunk, off, len, buf)) != 0) {
721 721 /*
722 722 * EINVAL means the user tried to go out of range.
723 723 * Anything else means it's likely that we're
724 724 * confused.
725 725 */
726 726 if (error != EINVAL) {
727 727 cmn_err(CE_WARN, "snap_strategy: error "
728 728 "calling snap_getchunk, chunk = %llu, "
729 729 "offset = %d, len = %d, resid = %lu, "
730 730 "error = %d.",
731 731 chunk, off, len, bp->b_resid, error);
732 732 }
733 733 bioerror(bp, error);
734 734 biodone(bp);
735 735 rw_exit(&sidp->sid_rwlock);
736 736 return (0);
737 737 }
738 738 bp->b_resid -= len;
739 739 reqptr += (len >> DEV_BSHIFT);
740 740 buf += len;
741 741 }
742 742
743 743 ASSERT(bp->b_resid == 0);
744 744 biodone(bp);
745 745
746 746 rw_exit(&sidp->sid_rwlock);
747 747 return (0);
748 748 }
749 749
750 750 /*
751 751 * snap_getchunk() - helper function for snap_strategy()
752 752 *
753 753 * gets the requested data from the appropriate place and fills in the
754 754 * buffer. chunk is the chunk number of the request, offset is the
755 755 * offset into that chunk and must be less than the chunk size. len is
756 756 * the length of the request starting at offset, and must not exceed a
757 757 * chunk boundary. buffer is the address to copy the data to. len
758 758 * bytes are copied into the buffer starting at the location specified.
759 759 *
760 760 * A chunk is located according to the following algorithm:
761 761 * - If the chunk does not have a translation or is not a candidate
762 762 * for translation, it is read straight from the master device.
763 763 * - If the chunk does have a translation, then it is either on
764 764 * disk or in memory:
765 765 * o If it is in memory the requested data is simply copied out
766 766 * of the in-memory buffer.
767 767 * o If it is in the backing store, it is read from there.
768 768 *
769 769 * This function does the real work of the snapshot driver.
770 770 */
771 771 static int
772 772 snap_getchunk(struct snapshot_id *sidp, chunknumber_t chunk, int offset,
773 773 int len, char *buffer)
774 774 {
775 775 cow_map_t *cmap = &sidp->sid_cowinfo->cow_map;
776 776 cow_map_node_t *cmn;
777 777 struct buf *snapbuf;
778 778 int error = 0;
779 779 char *newbuffer;
780 780 int newlen = 0;
781 781 int partial = 0;
782 782
783 783 ASSERT(RW_READ_HELD(&sidp->sid_rwlock));
784 784 ASSERT(offset + len <= cmap->cmap_chunksz);
785 785
786 786 /*
787 787 * Check if the chunk number is out of range and if so bail out
788 788 */
789 789 if (chunk >= (cmap->cmap_bmsize * NBBY)) {
790 790 return (EINVAL);
791 791 }
792 792
793 793 /*
794 794 * If the chunk is not a candidate for translation, then the chunk
795 795 * was not allocated when the snapshot was taken. Since it does
796 796 * not contain data associated with this snapshot, just return a
797 797 * zero buffer instead.
798 798 */
799 799 if (isclr(cmap->cmap_candidate, chunk)) {
800 800 bzero(buffer, len);
801 801 return (0);
802 802 }
803 803
804 804 /*
805 805 * if the chunk is a candidate for translation but a
806 806 * translation does not exist, then read through to the
807 807 * original file system. The rwlock is held until the read
808 808 * completes if it hasn't been translated to make sure the
809 809 * file system does not translate the block before we
810 810 * access it. If it has already been translated we don't
811 811 * need the lock, because the translation will never go away.
812 812 */
813 813 rw_enter(&cmap->cmap_rwlock, RW_READER);
814 814 if (isclr(cmap->cmap_hastrans, chunk)) {
815 815 snapbuf = getrbuf(KM_SLEEP);
816 816 /*
817 817 * Reading into the buffer saves having to do a copy,
818 818 * but gets tricky if the request size is not a
819 819 * multiple of DEV_BSIZE. However, we are filling the
820 820 * buffer left to right, so future reads will write
821 821 * over any extra data we might have read.
822 822 */
823 823
824 824 partial = len % DEV_BSIZE;
825 825
826 826 snapbuf->b_bcount = len;
827 827 snapbuf->b_lblkno = lbtodb(chunk * cmap->cmap_chunksz + offset);
828 828 snapbuf->b_un.b_addr = buffer;
829 829
830 830 snapbuf->b_iodone = NULL;
831 831 snapbuf->b_proc = NULL; /* i.e. the kernel */
832 832 snapbuf->b_flags = B_READ | B_BUSY;
833 833 snapbuf->b_edev = sidp->sid_fvp->v_vfsp->vfs_dev;
834 834
835 835 if (partial) {
836 836 /*
837 837 * Partial block read in progress.
838 838 * This is bad as modules further down the line
839 839 * assume buf's are exact multiples of DEV_BSIZE
840 840 * and we end up with fewer, or zero, bytes read.
841 841 * To get round this we need to round up to the
842 842 * nearest full block read and then return only
843 843 * len bytes.
844 844 */
845 845 newlen = (len - partial) + DEV_BSIZE;
846 846 newbuffer = kmem_alloc(newlen, KM_SLEEP);
847 847
848 848 snapbuf->b_bcount = newlen;
849 849 snapbuf->b_un.b_addr = newbuffer;
850 850 }
851 851
852 852 (void) bdev_strategy(snapbuf);
853 853 (void) biowait(snapbuf);
854 854
855 855 error = geterror(snapbuf);
856 856
857 857 if (partial) {
858 858 /*
859 859 * Partial block read. Now we need to bcopy the
860 860 * correct number of bytes back into the
861 861 * supplied buffer, and tidy up our temp
862 862 * buffer.
863 863 */
864 864 bcopy(newbuffer, buffer, len);
865 865 kmem_free(newbuffer, newlen);
866 866 }
867 867
868 868 freerbuf(snapbuf);
869 869 rw_exit(&cmap->cmap_rwlock);
870 870
871 871 return (error);
872 872 }
873 873
874 874 /*
875 875 * finally, if the chunk is a candidate for translation and it
876 876 * has been translated, then we clone the chunk of the buffer
877 877 * that was copied aside by the file system.
878 878 * The cmap_rwlock does not need to be held after we know the
879 879 * data has already been copied. Once a chunk has been copied
880 880 * to the backing file, it is stable read only data.
881 881 */
882 882 cmn = transtbl_get(cmap, chunk);
883 883
884 884 /* check whether the data is in memory or in the backing file */
885 885 if (cmn != NULL) {
886 886 ASSERT(cmn->cmn_buf);
887 887 /* already in memory */
888 888 bcopy(cmn->cmn_buf + offset, buffer, len);
889 889 rw_exit(&cmap->cmap_rwlock);
890 890 } else {
891 891 ssize_t resid = len;
892 892 int bf_index;
893 893 /*
894 894 * can cause deadlock with writer if we don't drop the
895 895 * cmap_rwlock before trying to get the backing store file
896 896 * vnode rwlock.
897 897 */
898 898 rw_exit(&cmap->cmap_rwlock);
899 899
900 900 bf_index = chunk / cmap->cmap_chunksperbf;
901 901
902 902 /* read buffer from backing file */
903 903 error = vn_rdwr(UIO_READ,
904 904 (sidp->sid_cowinfo->cow_backfile_array)[bf_index],
905 905 buffer, len, ((chunk % cmap->cmap_chunksperbf) *
906 906 cmap->cmap_chunksz) + offset, UIO_SYSSPACE, 0,
907 907 RLIM64_INFINITY, kcred, &resid);
908 908 }
909 909
910 910 return (error);
911 911 }
912 912
913 913 /*
914 914 * snap_print() - snapshot driver print(9E) routine
915 915 *
916 916 * prints the device identification string.
917 917 */
918 918 static int
919 919 snap_print(dev_t dev, char *str)
920 920 {
921 921 struct snapshot_id **sidpp;
922 922 minor_t minor;
923 923
924 924 minor = getminor(dev);
925 925 sidpp = ddi_get_soft_state(statep, minor);
926 926 if (sidpp == NULL || *sidpp == NULL) {
927 927 cmn_err(CE_WARN,
928 928 "snap_print: could not find state for snapshot %d.", minor);
929 929 return (ENXIO);
930 930 }
931 931
932 932 cmn_err(CE_NOTE, "snap_print: snapshot %d: %s", minor, str);
933 933
934 934 return (0);
935 935 }
936 936
937 937 /*
938 938 * snap_prop_op() - snapshot driver prop_op(9E) routine
939 939 *
940 940 * get 32-bit and 64-bit values for size (character driver) and nblocks
941 941 * (block driver).
942 942 */
943 943 static int
944 944 snap_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
945 945 int flags, char *name, caddr_t valuep, int *lengthp)
946 946 {
947 947 int minor;
948 948 struct snapshot_id **sidpp;
949 949 dev_t mdev;
950 950 dev_info_t *mdip;
951 951 int error;
952 952
953 953 minor = getminor(dev);
954 954
955 955 /*
956 956 * If this is the control device just check for .conf properties,
957 957 * if the wildcard DDI_DEV_T_ANY was passed in via the dev_t
958 958 * just fall back to the defaults.
959 959 */
960 960 if ((minor == SNAP_CTL_MINOR) || (dev == DDI_DEV_T_ANY))
961 961 return (ddi_prop_op(dev, dip, prop_op, flags, name,
962 962 valuep, lengthp));
963 963
964 964 /* check to see if there is a master device plumbed */
965 965 sidpp = ddi_get_soft_state(statep, minor);
966 966 if (sidpp == NULL || *sidpp == NULL) {
967 967 cmn_err(CE_WARN,
968 968 "snap_prop_op: could not find state for "
969 969 "snapshot %d.", minor);
970 970 return (DDI_PROP_NOT_FOUND);
971 971 }
972 972
973 973 if (((*sidpp)->sid_fvp == NULL) || ((*sidpp)->sid_fvp->v_vfsp == NULL))
974 974 return (ddi_prop_op(dev, dip, prop_op, flags, name,
975 975 valuep, lengthp));
976 976
977 977 /* hold master device and pass operation down */
978 978 mdev = (*sidpp)->sid_fvp->v_vfsp->vfs_dev;
979 979 if (mdip = e_ddi_hold_devi_by_dev(mdev, 0)) {
980 980
981 981 /* get size information from the master device. */
982 982 error = cdev_prop_op(mdev, mdip,
983 983 prop_op, flags, name, valuep, lengthp);
984 984 ddi_release_devi(mdip);
985 985 if (error == DDI_PROP_SUCCESS)
986 986 return (error);
987 987 }
988 988
989 989 /* master device did not service the request, try framework */
990 990 return (ddi_prop_op(dev, dip, prop_op, flags, name, valuep, lengthp));
991 991
992 992 }
993 993
994 994 /*
995 995 * snap_ioctl() - snapshot driver ioctl(9E) routine
996 996 *
997 997 * only applies to the control device. The control device accepts two
998 998 * ioctl requests: create a snapshot or delete a snapshot. In either
999 999 * case, the vnode for the requested file system is extracted, and the
1000 1000 * request is passed on to the file system via the same ioctl. The file
1001 1001 * system is responsible for doing the things necessary for creating or
1002 1002 * destroying a snapshot, including any file system specific operations
1003 1003 * that must be performed as well as setting up and deleting the snapshot
1004 1004 * state through the fssnap interfaces.
1005 1005 */
1006 1006 static int
1007 1007 snap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
1008 1008 int *rvalp)
1009 1009 {
1010 1010 minor_t minor;
1011 1011 int error = 0;
1012 1012
1013 1013 minor = getminor(dev);
1014 1014
1015 1015 if (minor != SNAP_CTL_MINOR) {
1016 1016 return (EINVAL);
1017 1017 }
1018 1018
1019 1019 switch (cmd) {
1020 1020 case _FIOSNAPSHOTCREATE:
1021 1021 {
1022 1022 struct fiosnapcreate fc;
1023 1023 struct file *fp;
1024 1024 struct vnode *vp;
1025 1025
1026 1026 if (ddi_copyin((void *)arg, &fc, sizeof (fc), mode))
1027 1027 return (EFAULT);
1028 1028
1029 1029 /* get vnode for file system mount point */
1030 1030 if ((fp = getf(fc.rootfiledesc)) == NULL)
1031 1031 return (EBADF);
1032 1032
1033 1033 ASSERT(fp->f_vnode);
1034 1034 vp = fp->f_vnode;
1035 1035 VN_HOLD(vp);
1036 1036 releasef(fc.rootfiledesc);
1037 1037
1038 1038 /* pass ioctl request to file system */
1039 1039 error = VOP_IOCTL(vp, cmd, arg, 0, credp, rvalp, NULL);
1040 1040 VN_RELE(vp);
1041 1041 break;
1042 1042 }
1043 1043 case _FIOSNAPSHOTCREATE_MULTI:
1044 1044 {
1045 1045 struct fiosnapcreate_multi fc;
1046 1046 struct file *fp;
1047 1047 struct vnode *vp;
1048 1048
1049 1049 if (ddi_copyin((void *)arg, &fc, sizeof (fc), mode))
1050 1050 return (EFAULT);
1051 1051
1052 1052 /* get vnode for file system mount point */
1053 1053 if ((fp = getf(fc.rootfiledesc)) == NULL)
1054 1054 return (EBADF);
1055 1055
1056 1056 ASSERT(fp->f_vnode);
1057 1057 vp = fp->f_vnode;
1058 1058 VN_HOLD(vp);
1059 1059 releasef(fc.rootfiledesc);
1060 1060
1061 1061 /* pass ioctl request to file system */
1062 1062 error = VOP_IOCTL(vp, cmd, arg, 0, credp, rvalp, NULL);
1063 1063 VN_RELE(vp);
1064 1064 break;
1065 1065 }
1066 1066 case _FIOSNAPSHOTDELETE:
1067 1067 {
1068 1068 major_t major;
1069 1069 struct fiosnapdelete fc;
1070 1070 snapshot_id_t *sidp = NULL;
1071 1071 snapshot_id_t *sidnextp = NULL;
1072 1072 struct file *fp = NULL;
1073 1073 struct vnode *vp = NULL;
1074 1074 struct vfs *vfsp = NULL;
1075 1075 vfsops_t *vfsops = EIO_vfsops;
1076 1076
1077 1077 if (ddi_copyin((void *)arg, &fc, sizeof (fc), mode))
1078 1078 return (EFAULT);
1079 1079
1080 1080 /* get vnode for file system mount point */
1081 1081 if ((fp = getf(fc.rootfiledesc)) == NULL)
1082 1082 return (EBADF);
1083 1083
1084 1084 ASSERT(fp->f_vnode);
1085 1085 vp = fp->f_vnode;
1086 1086 VN_HOLD(vp);
1087 1087 releasef(fc.rootfiledesc);
1088 1088 /*
1089 1089 * Test for two formats of delete and set correct minor/vp:
1090 1090 * pseudo device:
1091 1091 * fssnap -d [/dev/fssnap/x]
1092 1092 * or
1093 1093 * mount point:
1094 1094 * fssnap -d [/mntpt]
1095 1095 * Note that minor is verified to be equal to SNAP_CTL_MINOR
1096 1096 * at this point which is an invalid minor number.
1097 1097 */
1098 1098 ASSERT(fssnap_dip != NULL);
1099 1099 major = ddi_driver_major(fssnap_dip);
1100 1100 mutex_enter(&snapshot_mutex);
1101 1101 for (sidp = snapshot; sidp != NULL; sidp = sidnextp) {
1102 1102 rw_enter(&sidp->sid_rwlock, RW_READER);
1103 1103 sidnextp = sidp->sid_next;
1104 1104 /* pseudo device: */
1105 1105 if (major == getmajor(vp->v_rdev)) {
1106 1106 minor = getminor(vp->v_rdev);
1107 1107 if (sidp->sid_snapnumber == (uint_t)minor &&
1108 1108 sidp->sid_fvp) {
1109 1109 VN_RELE(vp);
1110 1110 vp = sidp->sid_fvp;
1111 1111 VN_HOLD(vp);
1112 1112 rw_exit(&sidp->sid_rwlock);
1113 1113 break;
1114 1114 }
1115 1115 /* Mount point: */
1116 1116 } else {
1117 1117 if (sidp->sid_fvp == vp) {
1118 1118 minor = sidp->sid_snapnumber;
1119 1119 rw_exit(&sidp->sid_rwlock);
1120 1120 break;
1121 1121 }
1122 1122 }
1123 1123 rw_exit(&sidp->sid_rwlock);
1124 1124 }
1125 1125 mutex_exit(&snapshot_mutex);
1126 1126 /* Verify minor got set correctly above */
1127 1127 if (minor == SNAP_CTL_MINOR) {
1128 1128 VN_RELE(vp);
1129 1129 return (EINVAL);
1130 1130 }
1131 1131 dev = makedevice(major, minor);
1132 1132 /*
1133 1133 * Create dummy vfs entry
1134 1134 * to use as a locking semaphore across the IOCTL
1135 1135 * for mount in progress cases...
1136 1136 */
1137 1137 vfsp = vfs_alloc(KM_SLEEP);
1138 1138 VFS_INIT(vfsp, vfsops, NULL);
1139 1139 VFS_HOLD(vfsp);
1140 1140 vfs_addmip(dev, vfsp);
1141 1141 if ((vfs_devmounting(dev, vfsp)) ||
1142 1142 (vfs_devismounted(dev))) {
1143 1143 vfs_delmip(vfsp);
1144 1144 VFS_RELE(vfsp);
1145 1145 VN_RELE(vp);
1146 1146 return (EBUSY);
1147 1147 }
1148 1148 /*
1149 1149 * Nobody mounted but do not release mount in progress lock
1150 1150 * until IOCTL complete to prohibit a mount sneaking
1151 1151 * in
1152 1152 */
1153 1153 error = VOP_IOCTL(vp, cmd, arg, 0, credp, rvalp, NULL);
1154 1154 vfs_delmip(vfsp);
1155 1155 VFS_RELE(vfsp);
1156 1156 VN_RELE(vp);
1157 1157 break;
1158 1158 }
1159 1159 default:
1160 1160 cmn_err(CE_WARN, "snap_ioctl: Invalid ioctl cmd %d, minor %d.",
1161 1161 cmd, minor);
1162 1162 return (EINVAL);
1163 1163 }
1164 1164
1165 1165 return (error);
1166 1166 }
1167 1167
1168 1168
1169 1169 /* ************************************************************************ */
1170 1170
1171 1171 /*
1172 1172 * Translation Table Routines
1173 1173 *
1174 1174 * These support routines implement a simple doubly linked list
1175 1175 * to keep track of chunks that are currently in memory. The maximum
1176 1176 * size of the list is determined by the fssnap_max_mem_chunks variable.
1177 1177 * The cmap_rwlock is used to protect the linkage of the list.
1178 1178 */
1179 1179
1180 1180 /*
1181 1181 * transtbl_add() - add a node to the translation table
1182 1182 *
1183 1183 * allocates a new node and points it at the buffer passed in. The node
1184 1184 * is added to the beginning of the doubly linked list and the head of
1185 1185 * the list is moved. The cmap_rwlock must be held as a writer through
1186 1186 * this operation.
1187 1187 */
1188 1188 static cow_map_node_t *
1189 1189 transtbl_add(cow_map_t *cmap, chunknumber_t chunk, caddr_t buf)
1190 1190 {
1191 1191 cow_map_node_t *cmnode;
1192 1192
1193 1193 ASSERT(RW_WRITE_HELD(&cmap->cmap_rwlock));
1194 1194
1195 1195 cmnode = kmem_alloc(sizeof (cow_map_node_t), KM_SLEEP);
1196 1196
1197 1197 /*
1198 1198 * insert new translations at the beginning so cmn_table is always
1199 1199 * the first node.
1200 1200 */
1201 1201 cmnode->cmn_chunk = chunk;
1202 1202 cmnode->cmn_buf = buf;
1203 1203 cmnode->cmn_prev = NULL;
1204 1204 cmnode->cmn_next = cmap->cmap_table;
1205 1205 if (cmnode->cmn_next)
1206 1206 cmnode->cmn_next->cmn_prev = cmnode;
1207 1207 cmap->cmap_table = cmnode;
1208 1208
1209 1209 return (cmnode);
1210 1210 }
1211 1211
1212 1212 /*
1213 1213 * transtbl_get() - look up a node in the translation table
1214 1214 *
1215 1215 * called by the snapshot driver to find data that has been translated.
1216 1216 * The lookup is done by the chunk number, and the node is returned.
1217 1217 * If the node was not found, NULL is returned.
1218 1218 */
1219 1219 static cow_map_node_t *
1220 1220 transtbl_get(cow_map_t *cmap, chunknumber_t chunk)
1221 1221 {
1222 1222 cow_map_node_t *cmn;
1223 1223
1224 1224 ASSERT(RW_READ_HELD(&cmap->cmap_rwlock));
1225 1225 ASSERT(cmap);
1226 1226
1227 1227 /* search the translation table */
1228 1228 for (cmn = cmap->cmap_table; cmn != NULL; cmn = cmn->cmn_next) {
1229 1229 if (cmn->cmn_chunk == chunk)
1230 1230 return (cmn);
1231 1231 }
1232 1232
1233 1233 /* not found */
1234 1234 return (NULL);
1235 1235 }
1236 1236
1237 1237 /*
1238 1238 * transtbl_delete() - delete a node from the translation table
1239 1239 *
1240 1240 * called when a node's data has been written out to disk. The
1241 1241 * cmap_rwlock must be held as a writer for this operation. If the node
1242 1242 * being deleted is the head of the list, then the head is moved to the
1243 1243 * next node. Both the node's data and the node itself are freed.
1244 1244 */
1245 1245 static void
1246 1246 transtbl_delete(cow_map_t *cmap, cow_map_node_t *cmn)
1247 1247 {
1248 1248 ASSERT(RW_WRITE_HELD(&cmap->cmap_rwlock));
1249 1249 ASSERT(cmn);
1250 1250 ASSERT(cmap->cmap_table);
1251 1251
1252 1252 /* if the head of the list is being deleted, then move the head up */
1253 1253 if (cmap->cmap_table == cmn) {
1254 1254 ASSERT(cmn->cmn_prev == NULL);
1255 1255 cmap->cmap_table = cmn->cmn_next;
1256 1256 }
1257 1257
1258 1258
1259 1259 /* make previous node's next pointer skip over current node */
1260 1260 if (cmn->cmn_prev != NULL) {
1261 1261 ASSERT(cmn->cmn_prev->cmn_next == cmn);
1262 1262 cmn->cmn_prev->cmn_next = cmn->cmn_next;
1263 1263 }
1264 1264
1265 1265 /* make next node's previous pointer skip over current node */
1266 1266 if (cmn->cmn_next != NULL) {
1267 1267 ASSERT(cmn->cmn_next->cmn_prev == cmn);
1268 1268 cmn->cmn_next->cmn_prev = cmn->cmn_prev;
1269 1269 }
1270 1270
1271 1271 /* free the data and the node */
1272 1272 ASSERT(cmn->cmn_buf);
1273 1273 kmem_free(cmn->cmn_buf, cmap->cmap_chunksz);
1274 1274 kmem_free(cmn, sizeof (cow_map_node_t));
1275 1275 }
1276 1276
1277 1277 /*
1278 1278 * transtbl_free() - free the entire translation table
1279 1279 *
1280 1280 * called when the snapshot is deleted. This frees all of the nodes in
1281 1281 * the translation table (but not the bitmaps).
1282 1282 */
1283 1283 static void
1284 1284 transtbl_free(cow_map_t *cmap)
1285 1285 {
1286 1286 cow_map_node_t *curnode;
1287 1287 cow_map_node_t *tempnode;
1288 1288
1289 1289 for (curnode = cmap->cmap_table; curnode != NULL; curnode = tempnode) {
1290 1290 tempnode = curnode->cmn_next;
1291 1291
1292 1292 kmem_free(curnode->cmn_buf, cmap->cmap_chunksz);
1293 1293 kmem_free(curnode, sizeof (cow_map_node_t));
1294 1294 }
1295 1295 }
1296 1296
1297 1297
1298 1298 /* ************************************************************************ */
1299 1299
1300 1300 /*
1301 1301 * Interface Implementation Routines
1302 1302 *
1303 1303 * The following functions implement snapshot interface routines that are
1304 1304 * called by the file system to create, delete, and use a snapshot. The
1305 1305 * interfaces are defined in fssnap_if.c and are filled in by this driver
1306 1306 * when it is loaded. This technique allows the file system to depend on
1307 1307 * the interface module without having to load the full implementation and
1308 1308 * snapshot device drivers.
1309 1309 */
1310 1310
1311 1311 /*
1312 1312 * fssnap_strategy_impl() - strategy routine called by the file system
1313 1313 *
1314 1314 * called by the file system to handle copy-on-write when necessary. All
1315 1315 * reads and writes that the file system performs should go through this
1316 1316 * function. If the file system calls the underlying device's strategy
1317 1317 * routine without going through fssnap_strategy() (eg. by calling
1318 1318 * bdev_strategy()), the snapshot may not be consistent.
1319 1319 *
1320 1320 * This function starts by doing significant sanity checking to insure
1321 1321 * the snapshot was not deleted out from under it or deleted and then
1322 1322 * recreated. To do this, it checks the actual pointer passed into it
1323 1323 * (ie. the handle held by the file system). NOTE that the parameter is
1324 1324 * a POINTER TO A POINTER to the snapshot id. Once the snapshot id is
1325 1325 * locked, it knows things are ok and that this snapshot is really for
1326 1326 * this file system.
1327 1327 *
1328 1328 * If the request is a write, fssnap_translate() is called to determine
1329 1329 * whether a copy-on-write is required. If it is a read, the read is
1330 1330 * simply passed on to the underlying device.
1331 1331 */
1332 1332 static void
1333 1333 fssnap_strategy_impl(void *snapshot_id, buf_t *bp)
1334 1334 {
1335 1335 struct snapshot_id **sidpp;
1336 1336 struct snapshot_id *sidp;
1337 1337 int error;
1338 1338
1339 1339 /* read requests are always passed through */
1340 1340 if (bp->b_flags & B_READ) {
1341 1341 (void) bdev_strategy(bp);
1342 1342 return;
1343 1343 }
1344 1344
1345 1345 /*
1346 1346 * Because we were not able to take the snapshot read lock BEFORE
1347 1347 * checking for a snapshot back in the file system, things may have
1348 1348 * drastically changed out from under us. For instance, the snapshot
1349 1349 * may have been deleted, deleted and recreated, or worse yet, deleted
1350 1350 * for this file system but now the snapshot number is in use by another
1351 1351 * file system.
1352 1352 *
1353 1353 * Having a pointer to the file system's snapshot id pointer allows us
1354 1354 * to sanity check most of this, though it assumes the file system is
1355 1355 * keeping track of a pointer to the snapshot_id somewhere.
1356 1356 */
1357 1357 sidpp = (struct snapshot_id **)snapshot_id;
1358 1358 sidp = *sidpp;
1359 1359
1360 1360 /*
1361 1361 * if this file system's snapshot was disabled, just pass the
1362 1362 * request through.
1363 1363 */
1364 1364 if (sidp == NULL) {
1365 1365 (void) bdev_strategy(bp);
1366 1366 return;
1367 1367 }
1368 1368
1369 1369 /*
1370 1370 * Once we have the reader lock the snapshot will not magically go
1371 1371 * away. But things may have changed on us before this so double check.
1372 1372 */
1373 1373 rw_enter(&sidp->sid_rwlock, RW_READER);
1374 1374
1375 1375 /*
1376 1376 * if an error was founds somewhere the DELETE flag will be
1377 1377 * set to indicate the snapshot should be deleted and no new
1378 1378 * translations should occur.
1379 1379 */
1380 1380 if (sidp->sid_flags & SID_DELETE) {
1381 1381 rw_exit(&sidp->sid_rwlock);
1382 1382 (void) fssnap_delete_impl(sidpp);
1383 1383 (void) bdev_strategy(bp);
1384 1384 return;
1385 1385 }
1386 1386
1387 1387 /*
1388 1388 * If the file system is no longer pointing to the snapshot we were
1389 1389 * called with, then it should not attempt to translate this buffer as
1390 1390 * it may be going to a snapshot for a different file system.
1391 1391 * Even if the file system snapshot pointer is still the same, the
1392 1392 * snapshot may have been disabled before we got the reader lock.
1393 1393 */
1394 1394 if (sidp != *sidpp || SID_INACTIVE(sidp)) {
1395 1395 rw_exit(&sidp->sid_rwlock);
1396 1396 (void) bdev_strategy(bp);
1397 1397 return;
1398 1398 }
1399 1399
1400 1400 /*
1401 1401 * At this point we're sure the snapshot will not go away while the
1402 1402 * reader lock is held, and we are reasonably certain that we are
1403 1403 * writing to the correct snapshot.
1404 1404 */
1405 1405 if ((error = fssnap_translate(sidpp, bp)) != 0) {
1406 1406 /*
1407 1407 * fssnap_translate can release the reader lock if it
1408 1408 * has to wait for a semaphore. In this case it is possible
1409 1409 * for the snapshot to be deleted in this time frame. If this
1410 1410 * happens just sent the buf thru to the filesystems device.
1411 1411 */
1412 1412 if (sidp != *sidpp || SID_INACTIVE(sidp)) {
1413 1413 rw_exit(&sidp->sid_rwlock);
1414 1414 (void) bdev_strategy(bp);
1415 1415 return;
1416 1416 }
1417 1417 bioerror(bp, error);
1418 1418 biodone(bp);
1419 1419 }
1420 1420 rw_exit(&sidp->sid_rwlock);
1421 1421 }
1422 1422
1423 1423 /*
1424 1424 * fssnap_translate() - helper function for fssnap_strategy()
1425 1425 *
1426 1426 * performs the actual copy-on-write for write requests, if required.
1427 1427 * This function does the real work of the file system side of things.
1428 1428 *
1429 1429 * It first checks the candidate bitmap to quickly determine whether any
1430 1430 * action is necessary. If the candidate bitmap indicates the chunk was
1431 1431 * allocated when the snapshot was created, then it checks to see whether
1432 1432 * a translation already exists. If a translation already exists then no
1433 1433 * action is required. If the chunk is a candidate for copy-on-write,
1434 1434 * and a translation does not already exist, then the chunk is read in
1435 1435 * and a node is added to the translation table.
1436 1436 *
1437 1437 * Once all of the chunks in the request range have been copied (if they
1438 1438 * needed to be), then the original request can be satisfied and the old
1439 1439 * data can be overwritten.
1440 1440 */
1441 1441 static int
1442 1442 fssnap_translate(struct snapshot_id **sidpp, struct buf *wbp)
1443 1443 {
1444 1444 snapshot_id_t *sidp = *sidpp;
1445 1445 struct buf *oldbp; /* buffer to store old data in */
1446 1446 struct cow_info *cowp = sidp->sid_cowinfo;
1447 1447 cow_map_t *cmap = &cowp->cow_map;
1448 1448 cow_map_node_t *cmn;
1449 1449 chunknumber_t cowchunk, startchunk, endchunk;
1450 1450 int error;
1451 1451 int throttle_write = 0;
1452 1452
1453 1453 /* make sure the snapshot is active */
1454 1454 ASSERT(RW_READ_HELD(&sidp->sid_rwlock));
1455 1455
1456 1456 startchunk = dbtocowchunk(cmap, wbp->b_lblkno);
1457 1457 endchunk = dbtocowchunk(cmap, wbp->b_lblkno +
1458 1458 ((wbp->b_bcount-1) >> DEV_BSHIFT));
1459 1459
1460 1460 /*
1461 1461 * Do not throttle the writes of the fssnap taskq thread and
1462 1462 * the log roll (trans_roll) thread. Furthermore the writes to
1463 1463 * the on-disk log are also not subject to throttling.
1464 1464 * The fssnap_write_taskq thread's write can block on the throttling
1465 1465 * semaphore which leads to self-deadlock as this same thread
1466 1466 * releases the throttling semaphore after completing the IO.
1467 1467 * If the trans_roll thread's write is throttled then we can deadlock
1468 1468 * because the fssnap_taskq_thread which releases the throttling
1469 1469 * semaphore can block waiting for log space which can only be
1470 1470 * released by the trans_roll thread.
1471 1471 */
1472 1472
1473 1473 throttle_write = !(taskq_member(cowp->cow_taskq, curthread) ||
1474 1474 tsd_get(bypass_snapshot_throttle_key));
1475 1475
1476 1476 /*
1477 1477 * Iterate through all chunks covered by this write and perform the
1478 1478 * copy-aside if necessary. Once all chunks have been safely
1479 1479 * stowed away, the new data may be written in a single sweep.
1480 1480 *
1481 1481 * For each chunk in the range, the following sequence is performed:
1482 1482 * - Is the chunk a candidate for translation?
1483 1483 * o If not, then no translation is necessary, continue
1484 1484 * - If it is a candidate, then does it already have a translation?
1485 1485 * o If so, then no translation is necessary, continue
1486 1486 * - If it is a candidate, but does not yet have a translation,
1487 1487 * then read the old data and schedule an asynchronous taskq
1488 1488 * to write the old data to the backing file.
1489 1489 *
1490 1490 * Once this has been performed over the entire range of chunks, then
1491 1491 * it is safe to overwrite the data that is there.
1492 1492 *
1493 1493 * Note that no lock is required to check the candidate bitmap because
1494 1494 * it never changes once the snapshot is created. The reader lock is
1495 1495 * taken to check the hastrans bitmap since it may change. If it
1496 1496 * turns out a copy is required, then the lock is upgraded to a
1497 1497 * writer, and the bitmap is re-checked as it may have changed while
1498 1498 * the lock was released. Finally, the write lock is held while
1499 1499 * reading the old data to make sure it is not translated out from
1500 1500 * under us.
1501 1501 *
1502 1502 * This locking mechanism should be sufficient to handle multiple
1503 1503 * threads writing to overlapping chunks simultaneously.
1504 1504 */
1505 1505 for (cowchunk = startchunk; cowchunk <= endchunk; cowchunk++) {
1506 1506 /*
1507 1507 * If the cowchunk is outside of the range of our
1508 1508 * candidate maps, then simply break out of the
1509 1509 * loop and pass the I/O through to bdev_strategy.
1510 1510 * This would occur if the file system has grown
1511 1511 * larger since the snapshot was taken.
1512 1512 */
1513 1513 if (cowchunk >= (cmap->cmap_bmsize * NBBY))
1514 1514 break;
1515 1515
1516 1516 /*
1517 1517 * If no disk blocks were allocated in this chunk when the
1518 1518 * snapshot was created then no copy-on-write will be
1519 1519 * required. Since this bitmap is read-only no locks are
1520 1520 * necessary.
1521 1521 */
1522 1522 if (isclr(cmap->cmap_candidate, cowchunk)) {
1523 1523 continue;
1524 1524 }
1525 1525
1526 1526 /*
1527 1527 * If a translation already exists, the data can be written
1528 1528 * through since the old data has already been saved off.
1529 1529 */
1530 1530 if (isset(cmap->cmap_hastrans, cowchunk)) {
1531 1531 continue;
1532 1532 }
1533 1533
1534 1534
1535 1535 /*
1536 1536 * Throttle translations if there are too many outstanding
↓ open down ↓ |
1536 lines elided |
↑ open up ↑ |
1537 1537 * chunks in memory. The semaphore is sema_v'd by the taskq.
1538 1538 *
1539 1539 * You can't keep the sid_rwlock if you would go to sleep.
1540 1540 * This will result in deadlock when someone tries to delete
1541 1541 * the snapshot (wants the sid_rwlock as a writer, but can't
1542 1542 * get it).
1543 1543 */
1544 1544 if (throttle_write) {
1545 1545 if (sema_tryp(&cmap->cmap_throttle_sem) == 0) {
1546 1546 rw_exit(&sidp->sid_rwlock);
1547 - atomic_add_32(&cmap->cmap_waiters, 1);
1547 + atomic_inc_32(&cmap->cmap_waiters);
1548 1548 sema_p(&cmap->cmap_throttle_sem);
1549 - atomic_add_32(&cmap->cmap_waiters, -1);
1549 + atomic_dec_32(&cmap->cmap_waiters);
1550 1550 rw_enter(&sidp->sid_rwlock, RW_READER);
1551 1551
1552 1552 /*
1553 1553 * Now since we released the sid_rwlock the state may
1554 1554 * have transitioned underneath us. so check that again.
1555 1555 */
1556 1556 if (sidp != *sidpp || SID_INACTIVE(sidp)) {
1557 1557 sema_v(&cmap->cmap_throttle_sem);
1558 1558 return (ENXIO);
1559 1559 }
1560 1560 }
1561 1561 }
1562 1562
1563 1563 /*
1564 1564 * Acquire the lock as a writer and check to see if a
1565 1565 * translation has been added in the meantime.
1566 1566 */
1567 1567 rw_enter(&cmap->cmap_rwlock, RW_WRITER);
1568 1568 if (isset(cmap->cmap_hastrans, cowchunk)) {
1569 1569 if (throttle_write)
1570 1570 sema_v(&cmap->cmap_throttle_sem);
1571 1571 rw_exit(&cmap->cmap_rwlock);
1572 1572 continue; /* go to the next chunk */
1573 1573 }
1574 1574
1575 1575 /*
1576 1576 * read a full chunk of data from the requested offset rounded
1577 1577 * down to the nearest chunk size.
1578 1578 */
1579 1579 oldbp = getrbuf(KM_SLEEP);
1580 1580 oldbp->b_lblkno = cowchunktodb(cmap, cowchunk);
1581 1581 oldbp->b_edev = wbp->b_edev;
1582 1582 oldbp->b_bcount = cmap->cmap_chunksz;
1583 1583 oldbp->b_bufsize = cmap->cmap_chunksz;
1584 1584 oldbp->b_iodone = NULL;
1585 1585 oldbp->b_proc = NULL;
1586 1586 oldbp->b_flags = B_READ;
1587 1587 oldbp->b_un.b_addr = kmem_alloc(cmap->cmap_chunksz, KM_SLEEP);
1588 1588
1589 1589 (void) bdev_strategy(oldbp);
1590 1590 (void) biowait(oldbp);
1591 1591
1592 1592 /*
1593 1593 * It's ok to bail in the middle of translating the range
1594 1594 * because the extra copy-asides will not hurt anything
1595 1595 * (except by using extra space in the backing store).
1596 1596 */
1597 1597 if ((error = geterror(oldbp)) != 0) {
1598 1598 cmn_err(CE_WARN, "fssnap_translate: error reading "
1599 1599 "old data for snapshot %d, chunk %llu, disk block "
1600 1600 "%lld, size %lu, error %d.", sidp->sid_snapnumber,
1601 1601 cowchunk, oldbp->b_lblkno, oldbp->b_bcount, error);
1602 1602 kmem_free(oldbp->b_un.b_addr, cmap->cmap_chunksz);
1603 1603 freerbuf(oldbp);
1604 1604 rw_exit(&cmap->cmap_rwlock);
1605 1605 if (throttle_write)
1606 1606 sema_v(&cmap->cmap_throttle_sem);
1607 1607 return (error);
1608 1608 }
1609 1609
1610 1610 /*
1611 1611 * add the node to the translation table and save a reference
1612 1612 * to pass to the taskq for writing out to the backing file
1613 1613 */
1614 1614 cmn = transtbl_add(cmap, cowchunk, oldbp->b_un.b_addr);
1615 1615 freerbuf(oldbp);
1616 1616
1617 1617 /*
1618 1618 * Add a reference to the snapshot id so the lower level
1619 1619 * processing (ie. the taskq) can get back to the state
1620 1620 * information.
1621 1621 */
1622 1622 cmn->cmn_sid = sidp;
1623 1623 cmn->release_sem = throttle_write;
1624 1624 setbit(cmap->cmap_hastrans, cowchunk);
1625 1625
1626 1626 rw_exit(&cmap->cmap_rwlock);
1627 1627
1628 1628 /*
1629 1629 * schedule the asynchronous write to the backing file
1630 1630 */
1631 1631 if (cowp->cow_backfile_array != NULL)
1632 1632 (void) taskq_dispatch(cowp->cow_taskq,
1633 1633 fssnap_write_taskq, cmn, TQ_SLEEP);
1634 1634 }
1635 1635
1636 1636 /*
1637 1637 * Write new data in place of the old data. At this point all of the
1638 1638 * chunks touched by this write have been copied aside and so the new
1639 1639 * data can be written out all at once.
1640 1640 */
1641 1641 (void) bdev_strategy(wbp);
1642 1642
1643 1643 return (0);
1644 1644 }
1645 1645
1646 1646 /*
1647 1647 * fssnap_write_taskq() - write in-memory translations to the backing file
1648 1648 *
1649 1649 * writes in-memory translations to the backing file asynchronously. A
1650 1650 * task is dispatched each time a new translation is created. The task
1651 1651 * writes the data to the backing file and removes it from the memory
1652 1652 * list. The throttling semaphore is released only if the particular
1653 1653 * translation was throttled in fssnap_translate.
1654 1654 */
1655 1655 static void
1656 1656 fssnap_write_taskq(void *arg)
1657 1657 {
1658 1658 cow_map_node_t *cmn = (cow_map_node_t *)arg;
1659 1659 snapshot_id_t *sidp = cmn->cmn_sid;
1660 1660 cow_info_t *cowp = sidp->sid_cowinfo;
1661 1661 cow_map_t *cmap = &cowp->cow_map;
1662 1662 int error;
1663 1663 int bf_index;
1664 1664 int release_sem = cmn->release_sem;
1665 1665
1666 1666 /*
1667 1667 * The sid_rwlock does not need to be held here because the taskqs
1668 1668 * are destroyed explicitly by fssnap_delete (with the sid_rwlock
1669 1669 * held as a writer). taskq_destroy() will flush all of the tasks
1670 1670 * out before fssnap_delete frees up all of the structures.
1671 1671 */
1672 1672
↓ open down ↓ |
113 lines elided |
↑ open up ↑ |
1673 1673 /* if the snapshot was disabled from under us, drop the request. */
1674 1674 rw_enter(&sidp->sid_rwlock, RW_READER);
1675 1675 if (SID_INACTIVE(sidp)) {
1676 1676 rw_exit(&sidp->sid_rwlock);
1677 1677 if (release_sem)
1678 1678 sema_v(&cmap->cmap_throttle_sem);
1679 1679 return;
1680 1680 }
1681 1681 rw_exit(&sidp->sid_rwlock);
1682 1682
1683 - atomic_add_64((uint64_t *)&cmap->cmap_nchunks, 1);
1683 + atomic_inc_64((uint64_t *)&cmap->cmap_nchunks);
1684 1684
1685 1685 if ((cmap->cmap_maxsize != 0) &&
1686 1686 ((cmap->cmap_nchunks * cmap->cmap_chunksz) > cmap->cmap_maxsize)) {
1687 1687 cmn_err(CE_WARN, "fssnap_write_taskq: snapshot %d (%s) has "
1688 1688 "reached the maximum backing file size specified (%llu "
1689 1689 "bytes) and will be deleted.", sidp->sid_snapnumber,
1690 1690 (char *)cowp->cow_kstat_mntpt->ks_data,
1691 1691 cmap->cmap_maxsize);
1692 1692 if (release_sem)
1693 1693 sema_v(&cmap->cmap_throttle_sem);
1694 1694 atomic_or_uint(&sidp->sid_flags, SID_DELETE);
1695 1695 return;
1696 1696 }
1697 1697
1698 1698 /* perform the write */
1699 1699 bf_index = cmn->cmn_chunk / cmap->cmap_chunksperbf;
1700 1700
1701 1701 if (error = vn_rdwr(UIO_WRITE, (cowp->cow_backfile_array)[bf_index],
1702 1702 cmn->cmn_buf, cmap->cmap_chunksz,
1703 1703 (cmn->cmn_chunk % cmap->cmap_chunksperbf) * cmap->cmap_chunksz,
1704 1704 UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, (ssize_t *)NULL)) {
1705 1705 cmn_err(CE_WARN, "fssnap_write_taskq: error writing to "
1706 1706 "backing file. DELETING SNAPSHOT %d, backing file path "
1707 1707 "%s, offset %llu bytes, error %d.", sidp->sid_snapnumber,
1708 1708 (char *)cowp->cow_kstat_bfname->ks_data,
1709 1709 cmn->cmn_chunk * cmap->cmap_chunksz, error);
1710 1710 if (release_sem)
1711 1711 sema_v(&cmap->cmap_throttle_sem);
1712 1712 atomic_or_uint(&sidp->sid_flags, SID_DELETE);
1713 1713 return;
1714 1714 }
1715 1715
1716 1716 /*
1717 1717 * now remove the node and buffer from memory
1718 1718 */
1719 1719 rw_enter(&cmap->cmap_rwlock, RW_WRITER);
1720 1720 transtbl_delete(cmap, cmn);
1721 1721 rw_exit(&cmap->cmap_rwlock);
1722 1722
1723 1723 /* Allow more translations */
1724 1724 if (release_sem)
1725 1725 sema_v(&cmap->cmap_throttle_sem);
1726 1726
1727 1727 }
1728 1728
1729 1729 /*
1730 1730 * fssnap_create_impl() - called from the file system to create a new snapshot
1731 1731 *
1732 1732 * allocates and initializes the structures needed for a new snapshot.
1733 1733 * This is called by the file system when it receives an ioctl request to
1734 1734 * create a new snapshot. An unused snapshot identifier is either found
1735 1735 * or created, and eventually returned as the opaque handle the file
1736 1736 * system will use to identify this snapshot. The snapshot number
1737 1737 * associated with the snapshot identifier is the same as the minor
1738 1738 * number for the snapshot device that is used to access that snapshot.
1739 1739 *
1740 1740 * The snapshot can not be used until the candidate bitmap is populated
1741 1741 * by the file system (see fssnap_set_candidate_impl()), and the file
1742 1742 * system finishes the setup process by calling fssnap_create_done().
1743 1743 * Nearly all of the snapshot locks are held for the duration of the
1744 1744 * create, and are not released until fssnap_create_done is called().
1745 1745 */
1746 1746 static void *
1747 1747 fssnap_create_impl(chunknumber_t nchunks, uint_t chunksz, u_offset_t maxsize,
1748 1748 struct vnode *fsvp, int backfilecount, struct vnode **bfvpp, char *backpath,
1749 1749 u_offset_t max_backfile_size)
1750 1750 {
1751 1751 refstr_t *mountpoint;
1752 1752 char taskqname[50];
1753 1753 struct cow_info *cowp;
1754 1754 struct cow_map *cmap;
1755 1755 struct snapshot_id *sidp;
1756 1756 int lastsnap;
1757 1757
1758 1758 /*
1759 1759 * Sanity check the parameters we care about
1760 1760 * (we don't care about the informational parameters)
1761 1761 */
1762 1762 if ((nchunks == 0) ||
1763 1763 ((chunksz % DEV_BSIZE) != 0) ||
1764 1764 (bfvpp == NULL)) {
1765 1765 return (NULL);
1766 1766 }
1767 1767
1768 1768 /*
1769 1769 * Look for unused snapshot identifiers. Snapshot ids are never
1770 1770 * freed, but deleted snapshot ids will be recycled as needed.
1771 1771 */
1772 1772 mutex_enter(&snapshot_mutex);
1773 1773
1774 1774 findagain:
1775 1775 lastsnap = 0;
1776 1776 for (sidp = snapshot; sidp != NULL; sidp = sidp->sid_next) {
1777 1777 if (sidp->sid_snapnumber > lastsnap)
1778 1778 lastsnap = sidp->sid_snapnumber;
1779 1779
1780 1780 /*
1781 1781 * The sid_rwlock is taken as a reader initially so that
1782 1782 * activity on each snapshot is not stalled while searching
1783 1783 * for a free snapshot id.
1784 1784 */
1785 1785 rw_enter(&sidp->sid_rwlock, RW_READER);
1786 1786
1787 1787 /*
1788 1788 * If the snapshot has been deleted and nobody is using the
1789 1789 * snapshot device than we can reuse this snapshot_id. If
1790 1790 * the snapshot is marked to be deleted (SID_DELETE), then
1791 1791 * it hasn't been deleted yet so don't reuse it.
1792 1792 */
1793 1793 if (SID_AVAILABLE(sidp))
1794 1794 break; /* This spot is unused, so take it */
1795 1795 rw_exit(&sidp->sid_rwlock);
1796 1796 }
1797 1797
1798 1798 /*
1799 1799 * add a new snapshot identifier if there are no deleted
1800 1800 * entries. Since it doesn't matter what order the entries
1801 1801 * are in we can just add it to the beginning of the list.
1802 1802 */
1803 1803 if (sidp) {
1804 1804 if (rw_tryupgrade(&sidp->sid_rwlock) == 0) {
1805 1805 /* someone else grabbed it as a writer, try again */
1806 1806 rw_exit(&sidp->sid_rwlock);
1807 1807 goto findagain;
1808 1808 }
1809 1809 } else {
1810 1810 /* Create a new node if we didn't find an unused one */
1811 1811 sidp = kmem_alloc(sizeof (struct snapshot_id), KM_SLEEP);
1812 1812 rw_init(&sidp->sid_rwlock, NULL, RW_DEFAULT, NULL);
1813 1813 rw_enter(&sidp->sid_rwlock, RW_WRITER);
1814 1814 sidp->sid_snapnumber = (snapshot == NULL) ? 0 : lastsnap + 1;
1815 1815 sidp->sid_cowinfo = NULL;
1816 1816 sidp->sid_flags = 0;
1817 1817 sidp->sid_next = snapshot;
1818 1818 snapshot = sidp;
1819 1819 }
1820 1820
1821 1821 ASSERT(RW_WRITE_HELD(&sidp->sid_rwlock));
1822 1822 ASSERT(sidp->sid_cowinfo == NULL);
1823 1823 ASSERT(sidp->sid_snapnumber <= (lastsnap + 1));
1824 1824
1825 1825 sidp->sid_flags |= SID_CREATING;
1826 1826 /* The root vnode is held until snap_delete_impl() is called */
1827 1827 VN_HOLD(fsvp);
1828 1828 sidp->sid_fvp = fsvp;
1829 1829 num_snapshots++;
1830 1830
1831 1831 /* allocate and initialize structures */
1832 1832
1833 1833 cowp = kmem_zalloc(sizeof (struct cow_info), KM_SLEEP);
1834 1834
1835 1835 cowp->cow_backfile_array = bfvpp;
1836 1836 cowp->cow_backcount = backfilecount;
1837 1837 cowp->cow_backfile_sz = max_backfile_size;
1838 1838
1839 1839 /*
1840 1840 * Initialize task queues for this snapshot. Only a small number
1841 1841 * of threads are required because they will be serialized on the
1842 1842 * backing file's reader/writer lock anyway.
1843 1843 */
1844 1844 (void) snprintf(taskqname, sizeof (taskqname), "%s_taskq_%d", snapname,
1845 1845 sidp->sid_snapnumber);
1846 1846 cowp->cow_taskq = taskq_create(taskqname, fssnap_taskq_nthreads,
1847 1847 minclsyspri, 1, fssnap_taskq_maxtasks, 0);
1848 1848
1849 1849 /* don't allow tasks to start until after everything is ready */
1850 1850 taskq_suspend(cowp->cow_taskq);
1851 1851
1852 1852 /* initialize translation table */
1853 1853 cmap = &cowp->cow_map;
1854 1854 rw_init(&cmap->cmap_rwlock, NULL, RW_DEFAULT, NULL);
1855 1855 rw_enter(&cmap->cmap_rwlock, RW_WRITER);
1856 1856
1857 1857 sema_init(&cmap->cmap_throttle_sem, fssnap_max_mem_chunks, NULL,
1858 1858 SEMA_DEFAULT, NULL);
1859 1859
1860 1860 cmap->cmap_chunksz = chunksz;
1861 1861 cmap->cmap_maxsize = maxsize;
1862 1862 cmap->cmap_chunksperbf = max_backfile_size / chunksz;
1863 1863
1864 1864 /*
1865 1865 * allocate one bit per chunk for the bitmaps, round up
1866 1866 */
1867 1867 cmap->cmap_bmsize = (nchunks + (NBBY - 1)) / NBBY;
1868 1868 cmap->cmap_hastrans = kmem_zalloc(cmap->cmap_bmsize, KM_SLEEP);
1869 1869 cmap->cmap_candidate = kmem_zalloc(cmap->cmap_bmsize, KM_SLEEP);
1870 1870
1871 1871 sidp->sid_cowinfo = cowp;
1872 1872
1873 1873 /* initialize kstats for this snapshot */
1874 1874 mountpoint = vfs_getmntpoint(fsvp->v_vfsp);
1875 1875 fssnap_create_kstats(sidp, sidp->sid_snapnumber,
1876 1876 refstr_value(mountpoint), backpath);
1877 1877 refstr_rele(mountpoint);
1878 1878
1879 1879 mutex_exit(&snapshot_mutex);
1880 1880
1881 1881 /*
1882 1882 * return with snapshot id rwlock held as a writer until
1883 1883 * fssnap_create_done is called
1884 1884 */
1885 1885 return (sidp);
1886 1886 }
1887 1887
1888 1888 /*
1889 1889 * fssnap_set_candidate_impl() - mark a chunk as a candidate for copy-on-write
1890 1890 *
1891 1891 * sets a bit in the candidate bitmap that indicates that a chunk is a
1892 1892 * candidate for copy-on-write. Typically, chunks that are allocated on
1893 1893 * the file system at the time the snapshot is taken are candidates,
1894 1894 * while chunks that have no allocated data do not need to be copied.
1895 1895 * Chunks containing metadata must be marked as candidates as well.
1896 1896 */
1897 1897 static void
1898 1898 fssnap_set_candidate_impl(void *snapshot_id, chunknumber_t chunknumber)
1899 1899 {
1900 1900 struct snapshot_id *sid = snapshot_id;
1901 1901 struct cow_info *cowp = sid->sid_cowinfo;
1902 1902 struct cow_map *cmap = &cowp->cow_map;
1903 1903
1904 1904 /* simple bitmap operation for now */
1905 1905 ASSERT(chunknumber < (cmap->cmap_bmsize * NBBY));
1906 1906 setbit(cmap->cmap_candidate, chunknumber);
1907 1907 }
1908 1908
1909 1909 /*
1910 1910 * fssnap_is_candidate_impl() - check whether a chunk is a candidate
1911 1911 *
1912 1912 * returns 0 if the chunk is not a candidate and 1 if the chunk is a
1913 1913 * candidate. This can be used by the file system to change behavior for
1914 1914 * chunks that might induce a copy-on-write. The offset is specified in
1915 1915 * bytes since the chunk size may not be known by the file system.
1916 1916 */
1917 1917 static int
1918 1918 fssnap_is_candidate_impl(void *snapshot_id, u_offset_t off)
1919 1919 {
1920 1920 struct snapshot_id *sid = snapshot_id;
1921 1921 struct cow_info *cowp = sid->sid_cowinfo;
1922 1922 struct cow_map *cmap = &cowp->cow_map;
1923 1923 ulong_t chunknumber = off / cmap->cmap_chunksz;
1924 1924
1925 1925 /* simple bitmap operation for now */
1926 1926 ASSERT(chunknumber < (cmap->cmap_bmsize * NBBY));
1927 1927 return (isset(cmap->cmap_candidate, chunknumber));
1928 1928 }
1929 1929
1930 1930 /*
1931 1931 * fssnap_create_done_impl() - complete the snapshot setup process
1932 1932 *
1933 1933 * called when the file system is done populating the candidate bitmap
1934 1934 * and it is ready to start using the snapshot. This routine releases
1935 1935 * the snapshot locks, allows taskq tasks to start processing, and
1936 1936 * creates the device minor nodes associated with the snapshot.
1937 1937 */
1938 1938 static int
1939 1939 fssnap_create_done_impl(void *snapshot_id)
1940 1940 {
1941 1941 struct snapshot_id **sidpp, *sidp = snapshot_id;
1942 1942 struct cow_info *cowp;
1943 1943 struct cow_map *cmap;
1944 1944 int snapnumber = -1;
1945 1945 char name[20];
1946 1946
1947 1947 /* sid rwlock and cmap rwlock should be taken from fssnap_create */
1948 1948 ASSERT(sidp);
1949 1949 ASSERT(RW_WRITE_HELD(&sidp->sid_rwlock));
1950 1950 ASSERT(sidp->sid_cowinfo);
1951 1951
1952 1952 cowp = sidp->sid_cowinfo;
1953 1953 cmap = &cowp->cow_map;
1954 1954
1955 1955 ASSERT(RW_WRITE_HELD(&cmap->cmap_rwlock));
1956 1956
1957 1957 sidp->sid_flags &= ~(SID_CREATING | SID_DISABLED);
1958 1958 snapnumber = sidp->sid_snapnumber;
1959 1959
1960 1960 /* allocate state structure and find new snapshot id */
1961 1961 if (ddi_soft_state_zalloc(statep, snapnumber) != DDI_SUCCESS) {
1962 1962 cmn_err(CE_WARN,
1963 1963 "snap_ioctl: create: could not allocate "
1964 1964 "state for snapshot %d.", snapnumber);
1965 1965 snapnumber = -1;
1966 1966 goto out;
1967 1967 }
1968 1968
1969 1969 sidpp = ddi_get_soft_state(statep, snapnumber);
1970 1970 *sidpp = sidp;
1971 1971
1972 1972 /* create minor node based on snapshot number */
1973 1973 ASSERT(fssnap_dip != NULL);
1974 1974 (void) snprintf(name, sizeof (name), "%d", snapnumber);
1975 1975 if (ddi_create_minor_node(fssnap_dip, name, S_IFBLK,
1976 1976 snapnumber, DDI_PSEUDO, 0) != DDI_SUCCESS) {
1977 1977 cmn_err(CE_WARN, "snap_ioctl: could not create "
1978 1978 "block minor node for snapshot %d.", snapnumber);
1979 1979 snapnumber = -1;
1980 1980 goto out;
1981 1981 }
1982 1982
1983 1983 (void) snprintf(name, sizeof (name), "%d,raw", snapnumber);
1984 1984 if (ddi_create_minor_node(fssnap_dip, name, S_IFCHR,
1985 1985 snapnumber, DDI_PSEUDO, 0) != DDI_SUCCESS) {
1986 1986 cmn_err(CE_WARN, "snap_ioctl: could not create "
1987 1987 "character minor node for snapshot %d.", snapnumber);
1988 1988 snapnumber = -1;
1989 1989 }
1990 1990
1991 1991 out:
1992 1992 rw_exit(&sidp->sid_rwlock);
1993 1993 rw_exit(&cmap->cmap_rwlock);
1994 1994
1995 1995 /* let the taskq threads start processing */
1996 1996 taskq_resume(cowp->cow_taskq);
1997 1997
1998 1998 return (snapnumber);
1999 1999 }
2000 2000
2001 2001 /*
2002 2002 * fssnap_delete_impl() - delete a snapshot
2003 2003 *
2004 2004 * used when a snapshot is no longer needed. This is called by the file
2005 2005 * system when it receives an ioctl request to delete a snapshot. It is
2006 2006 * also called internally when error conditions such as disk full, errors
2007 2007 * writing to the backing file, or backing file maxsize exceeded occur.
2008 2008 * If the snapshot device is busy when the delete request is received,
2009 2009 * all state will be deleted except for the soft state and device files
2010 2010 * associated with the snapshot; they will be deleted when the snapshot
2011 2011 * device is closed.
2012 2012 *
2013 2013 * NOTE this function takes a POINTER TO A POINTER to the snapshot id,
2014 2014 * and expects to be able to set the handle held by the file system to
2015 2015 * NULL. This depends on the file system checking that variable for NULL
2016 2016 * before calling fssnap_strategy().
2017 2017 */
2018 2018 static int
2019 2019 fssnap_delete_impl(void *snapshot_id)
2020 2020 {
2021 2021 struct snapshot_id **sidpp = (struct snapshot_id **)snapshot_id;
2022 2022 struct snapshot_id *sidp;
2023 2023 struct snapshot_id **statesidpp;
2024 2024 struct cow_info *cowp;
2025 2025 struct cow_map *cmap;
2026 2026 char name[20];
2027 2027 int snapnumber = -1;
2028 2028 vnode_t **vpp;
2029 2029
2030 2030 /*
2031 2031 * sidp is guaranteed to be valid if sidpp is valid because
2032 2032 * the snapshot list is append-only.
2033 2033 */
2034 2034 if (sidpp == NULL) {
2035 2035 return (-1);
2036 2036 }
2037 2037
2038 2038 sidp = *sidpp;
2039 2039 rw_enter(&sidp->sid_rwlock, RW_WRITER);
2040 2040
2041 2041 ASSERT(RW_WRITE_HELD(&sidp->sid_rwlock));
2042 2042
2043 2043 /*
2044 2044 * double check that the snapshot is still valid for THIS file system
2045 2045 */
2046 2046 if (*sidpp == NULL) {
2047 2047 rw_exit(&sidp->sid_rwlock);
2048 2048 return (-1);
2049 2049 }
2050 2050
2051 2051 /*
2052 2052 * Now we know the snapshot is still valid and will not go away
2053 2053 * because we have the write lock. Once the state is transitioned
2054 2054 * to "disabling", the sid_rwlock can be released. Any pending I/O
2055 2055 * waiting for the lock as a reader will check for this state and
2056 2056 * abort without touching data that may be getting freed.
2057 2057 */
2058 2058 sidp->sid_flags |= SID_DISABLING;
2059 2059 if (sidp->sid_flags & SID_DELETE) {
2060 2060 cmn_err(CE_WARN, "Snapshot %d automatically deleted.",
2061 2061 sidp->sid_snapnumber);
2062 2062 sidp->sid_flags &= ~(SID_DELETE);
2063 2063 }
2064 2064
2065 2065
2066 2066 /*
2067 2067 * This is pointing into file system specific data! The assumption is
2068 2068 * that fssnap_strategy() gets called from the file system based on
2069 2069 * whether this reference to the snapshot_id is NULL or not. So
2070 2070 * setting this to NULL should disable snapshots for the file system.
2071 2071 */
2072 2072 *sidpp = NULL;
2073 2073
2074 2074 /* remove cowinfo */
2075 2075 cowp = sidp->sid_cowinfo;
2076 2076 if (cowp == NULL) {
2077 2077 rw_exit(&sidp->sid_rwlock);
2078 2078 return (-1);
2079 2079 }
2080 2080 rw_exit(&sidp->sid_rwlock);
2081 2081
2082 2082 /* destroy task queues first so they don't reference freed data. */
2083 2083 if (cowp->cow_taskq) {
2084 2084 taskq_destroy(cowp->cow_taskq);
2085 2085 cowp->cow_taskq = NULL;
2086 2086 }
2087 2087
2088 2088 if (cowp->cow_backfile_array != NULL) {
2089 2089 for (vpp = cowp->cow_backfile_array; *vpp; vpp++)
2090 2090 VN_RELE(*vpp);
2091 2091 kmem_free(cowp->cow_backfile_array,
2092 2092 (cowp->cow_backcount + 1) * sizeof (vnode_t *));
2093 2093 cowp->cow_backfile_array = NULL;
2094 2094 }
2095 2095
2096 2096 sidp->sid_cowinfo = NULL;
2097 2097
2098 2098 /* remove cmap */
2099 2099 cmap = &cowp->cow_map;
2100 2100 ASSERT(cmap);
2101 2101
2102 2102 if (cmap->cmap_candidate)
2103 2103 kmem_free(cmap->cmap_candidate, cmap->cmap_bmsize);
2104 2104
2105 2105 if (cmap->cmap_hastrans)
2106 2106 kmem_free(cmap->cmap_hastrans, cmap->cmap_bmsize);
2107 2107
2108 2108 if (cmap->cmap_table)
2109 2109 transtbl_free(&cowp->cow_map);
2110 2110
2111 2111 rw_destroy(&cmap->cmap_rwlock);
2112 2112
2113 2113 while (cmap->cmap_waiters) {
2114 2114 sema_p(&cmap->cmap_throttle_sem);
2115 2115 sema_v(&cmap->cmap_throttle_sem);
2116 2116 }
2117 2117 sema_destroy(&cmap->cmap_throttle_sem);
2118 2118
2119 2119 /* remove kstats */
2120 2120 fssnap_delete_kstats(cowp);
2121 2121
2122 2122 kmem_free(cowp, sizeof (struct cow_info));
2123 2123
2124 2124 statesidpp = ddi_get_soft_state(statep, sidp->sid_snapnumber);
2125 2125 if (statesidpp == NULL || *statesidpp == NULL) {
2126 2126 cmn_err(CE_WARN,
2127 2127 "fssnap_delete_impl: could not find state for snapshot %d.",
2128 2128 sidp->sid_snapnumber);
2129 2129 }
2130 2130 ASSERT(*statesidpp == sidp);
2131 2131
2132 2132 /*
2133 2133 * Leave the node in the list marked DISABLED so it can be reused
2134 2134 * and avoid many race conditions. Return the snapshot number
2135 2135 * that was deleted.
2136 2136 */
2137 2137 mutex_enter(&snapshot_mutex);
2138 2138 rw_enter(&sidp->sid_rwlock, RW_WRITER);
2139 2139 sidp->sid_flags &= ~(SID_DISABLING);
2140 2140 sidp->sid_flags |= SID_DISABLED;
2141 2141 VN_RELE(sidp->sid_fvp);
2142 2142 sidp->sid_fvp = NULL;
2143 2143 snapnumber = sidp->sid_snapnumber;
2144 2144
2145 2145 /*
2146 2146 * If the snapshot is not busy, free the device info now. Otherwise
2147 2147 * the device nodes are freed in snap_close() when the device is
2148 2148 * closed. The sid will not be reused until the device is not busy.
2149 2149 */
2150 2150 if (SID_AVAILABLE(sidp)) {
2151 2151 /* remove the device nodes */
2152 2152 ASSERT(fssnap_dip != NULL);
2153 2153 (void) snprintf(name, sizeof (name), "%d",
2154 2154 sidp->sid_snapnumber);
2155 2155 ddi_remove_minor_node(fssnap_dip, name);
2156 2156 (void) snprintf(name, sizeof (name), "%d,raw",
2157 2157 sidp->sid_snapnumber);
2158 2158 ddi_remove_minor_node(fssnap_dip, name);
2159 2159
2160 2160 /* delete the state structure */
2161 2161 ddi_soft_state_free(statep, sidp->sid_snapnumber);
2162 2162 num_snapshots--;
2163 2163 }
2164 2164
2165 2165 mutex_exit(&snapshot_mutex);
2166 2166 rw_exit(&sidp->sid_rwlock);
2167 2167
2168 2168 return (snapnumber);
2169 2169 }
2170 2170
2171 2171 /*
2172 2172 * fssnap_create_kstats() - allocate and initialize snapshot kstats
2173 2173 *
2174 2174 */
2175 2175 static void
2176 2176 fssnap_create_kstats(snapshot_id_t *sidp, int snapnum,
2177 2177 const char *mountpoint, const char *backfilename)
2178 2178 {
2179 2179 kstat_t *num, *mntpoint, *bfname;
2180 2180 kstat_named_t *hw;
2181 2181 struct cow_info *cowp = sidp->sid_cowinfo;
2182 2182 struct cow_kstat_num *stats;
2183 2183
2184 2184 /* update the high water mark */
2185 2185 if (fssnap_highwater_kstat == NULL) {
2186 2186 cmn_err(CE_WARN, "fssnap_create_kstats: failed to lookup "
2187 2187 "high water mark kstat.");
2188 2188 return;
2189 2189 }
2190 2190
2191 2191 hw = (kstat_named_t *)fssnap_highwater_kstat->ks_data;
2192 2192 if (hw->value.ui32 < snapnum)
2193 2193 hw->value.ui32 = snapnum;
2194 2194
2195 2195 /* initialize the mount point kstat */
2196 2196 kstat_delete_byname(snapname, snapnum, FSSNAP_KSTAT_MNTPT);
2197 2197
2198 2198 if (mountpoint != NULL) {
2199 2199 mntpoint = kstat_create(snapname, snapnum, FSSNAP_KSTAT_MNTPT,
2200 2200 "misc", KSTAT_TYPE_RAW, strlen(mountpoint) + 1, 0);
2201 2201 if (mntpoint == NULL) {
2202 2202 cowp->cow_kstat_mntpt = NULL;
2203 2203 cmn_err(CE_WARN, "fssnap_create_kstats: failed to "
2204 2204 "create mount point kstat");
2205 2205 } else {
2206 2206 (void) strncpy(mntpoint->ks_data, mountpoint,
2207 2207 strlen(mountpoint));
2208 2208 cowp->cow_kstat_mntpt = mntpoint;
2209 2209 kstat_install(mntpoint);
2210 2210 }
2211 2211 } else {
2212 2212 cowp->cow_kstat_mntpt = NULL;
2213 2213 cmn_err(CE_WARN, "fssnap_create_kstats: mount point not "
2214 2214 "specified.");
2215 2215 }
2216 2216
2217 2217 /* initialize the backing file kstat */
2218 2218 kstat_delete_byname(snapname, snapnum, FSSNAP_KSTAT_BFNAME);
2219 2219
2220 2220 if (backfilename == NULL) {
2221 2221 cowp->cow_kstat_bfname = NULL;
2222 2222 } else {
2223 2223 bfname = kstat_create(snapname, snapnum, FSSNAP_KSTAT_BFNAME,
2224 2224 "misc", KSTAT_TYPE_RAW, strlen(backfilename) + 1, 0);
2225 2225 if (bfname != NULL) {
2226 2226 (void) strncpy(bfname->ks_data, backfilename,
2227 2227 strlen(backfilename));
2228 2228 cowp->cow_kstat_bfname = bfname;
2229 2229 kstat_install(bfname);
2230 2230 } else {
2231 2231 cowp->cow_kstat_bfname = NULL;
2232 2232 cmn_err(CE_WARN, "fssnap_create_kstats: failed to "
2233 2233 "create backing file name kstat");
2234 2234 }
2235 2235 }
2236 2236
2237 2237 /* initialize numeric kstats */
2238 2238 kstat_delete_byname(snapname, snapnum, FSSNAP_KSTAT_NUM);
2239 2239
2240 2240 num = kstat_create(snapname, snapnum, FSSNAP_KSTAT_NUM,
2241 2241 "misc", KSTAT_TYPE_NAMED,
2242 2242 sizeof (struct cow_kstat_num) / sizeof (kstat_named_t),
2243 2243 0);
2244 2244 if (num == NULL) {
2245 2245 cmn_err(CE_WARN, "fssnap_create_kstats: failed to create "
2246 2246 "numeric kstats");
2247 2247 cowp->cow_kstat_num = NULL;
2248 2248 return;
2249 2249 }
2250 2250
2251 2251 cowp->cow_kstat_num = num;
2252 2252 stats = num->ks_data;
2253 2253 num->ks_update = fssnap_update_kstat_num;
2254 2254 num->ks_private = sidp;
2255 2255
2256 2256 kstat_named_init(&stats->ckn_state, FSSNAP_KSTAT_NUM_STATE,
2257 2257 KSTAT_DATA_INT32);
2258 2258 kstat_named_init(&stats->ckn_bfsize, FSSNAP_KSTAT_NUM_BFSIZE,
2259 2259 KSTAT_DATA_UINT64);
2260 2260 kstat_named_init(&stats->ckn_maxsize, FSSNAP_KSTAT_NUM_MAXSIZE,
2261 2261 KSTAT_DATA_UINT64);
2262 2262 kstat_named_init(&stats->ckn_createtime, FSSNAP_KSTAT_NUM_CREATETIME,
2263 2263 KSTAT_DATA_LONG);
2264 2264 kstat_named_init(&stats->ckn_chunksize, FSSNAP_KSTAT_NUM_CHUNKSIZE,
2265 2265 KSTAT_DATA_UINT32);
2266 2266
2267 2267 /* initialize the static kstats */
2268 2268 stats->ckn_chunksize.value.ui32 = cowp->cow_map.cmap_chunksz;
2269 2269 stats->ckn_maxsize.value.ui64 = cowp->cow_map.cmap_maxsize;
2270 2270 stats->ckn_createtime.value.l = gethrestime_sec();
2271 2271
2272 2272 kstat_install(num);
2273 2273 }
2274 2274
2275 2275 /*
2276 2276 * fssnap_update_kstat_num() - update a numerical snapshot kstat value
2277 2277 *
2278 2278 */
2279 2279 int
2280 2280 fssnap_update_kstat_num(kstat_t *ksp, int rw)
2281 2281 {
2282 2282 snapshot_id_t *sidp = (snapshot_id_t *)ksp->ks_private;
2283 2283 struct cow_info *cowp = sidp->sid_cowinfo;
2284 2284 struct cow_kstat_num *stats = ksp->ks_data;
2285 2285
2286 2286 if (rw == KSTAT_WRITE)
2287 2287 return (EACCES);
2288 2288
2289 2289 /* state */
2290 2290 if (sidp->sid_flags & SID_CREATING)
2291 2291 stats->ckn_state.value.i32 = COWSTATE_CREATING;
2292 2292 else if (SID_INACTIVE(sidp))
2293 2293 stats->ckn_state.value.i32 = COWSTATE_DISABLED;
2294 2294 else if (SID_BUSY(sidp))
2295 2295 stats->ckn_state.value.i32 = COWSTATE_ACTIVE;
2296 2296 else
2297 2297 stats->ckn_state.value.i32 = COWSTATE_IDLE;
2298 2298
2299 2299 /* bfsize */
2300 2300 stats->ckn_bfsize.value.ui64 = cowp->cow_map.cmap_nchunks *
2301 2301 cowp->cow_map.cmap_chunksz;
2302 2302
2303 2303 return (0);
2304 2304 }
2305 2305
2306 2306 /*
2307 2307 * fssnap_delete_kstats() - deallocate snapshot kstats
2308 2308 *
2309 2309 */
2310 2310 void
2311 2311 fssnap_delete_kstats(struct cow_info *cowp)
2312 2312 {
2313 2313 if (cowp->cow_kstat_num != NULL) {
2314 2314 kstat_delete(cowp->cow_kstat_num);
2315 2315 cowp->cow_kstat_num = NULL;
2316 2316 }
2317 2317 if (cowp->cow_kstat_mntpt != NULL) {
2318 2318 kstat_delete(cowp->cow_kstat_mntpt);
2319 2319 cowp->cow_kstat_mntpt = NULL;
2320 2320 }
2321 2321 if (cowp->cow_kstat_bfname != NULL) {
2322 2322 kstat_delete(cowp->cow_kstat_bfname);
2323 2323 cowp->cow_kstat_bfname = NULL;
2324 2324 }
2325 2325 }
↓ open down ↓ |
632 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX