Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/ufs/lufs.c
+++ new/usr/src/uts/common/fs/ufs/lufs.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 #include <sys/systm.h>
27 27 #include <sys/types.h>
28 28 #include <sys/vnode.h>
29 29 #include <sys/buf.h>
30 30 #include <sys/errno.h>
31 31 #include <sys/fssnap_if.h>
32 32 #include <sys/fs/ufs_inode.h>
33 33 #include <sys/fs/ufs_filio.h>
34 34 #include <sys/sysmacros.h>
35 35 #include <sys/modctl.h>
36 36 #include <sys/fs/ufs_log.h>
37 37 #include <sys/fs/ufs_bio.h>
38 38 #include <sys/fs/ufs_fsdir.h>
39 39 #include <sys/debug.h>
40 40 #include <sys/atomic.h>
41 41 #include <sys/kmem.h>
42 42 #include <sys/inttypes.h>
43 43 #include <sys/vfs.h>
44 44 #include <sys/mntent.h>
45 45 #include <sys/conf.h>
46 46 #include <sys/param.h>
47 47 #include <sys/kstat.h>
48 48 #include <sys/cmn_err.h>
49 49 #include <sys/sdt.h>
50 50
51 51 #define LUFS_GENID_PRIME UINT64_C(4294967291)
52 52 #define LUFS_GENID_BASE UINT64_C(311)
53 53 #define LUFS_NEXT_ID(id) ((uint32_t)(((id) * LUFS_GENID_BASE) % \
54 54 LUFS_GENID_PRIME))
55 55
56 56 extern kmutex_t ufs_scan_lock;
57 57
58 58 static kmutex_t log_mutex; /* general purpose log layer lock */
59 59 kmutex_t ml_scan; /* Scan thread syncronization */
60 60 kcondvar_t ml_scan_cv; /* Scan thread syncronization */
61 61
62 62 struct kmem_cache *lufs_sv;
63 63 struct kmem_cache *lufs_bp;
64 64
65 65 /* Tunables */
66 66 uint_t ldl_maxlogsize = LDL_MAXLOGSIZE;
67 67 uint_t ldl_minlogsize = LDL_MINLOGSIZE;
68 68 uint_t ldl_softlogcap = LDL_SOFTLOGCAP;
69 69 uint32_t ldl_divisor = LDL_DIVISOR;
70 70 uint32_t ldl_mintransfer = LDL_MINTRANSFER;
71 71 uint32_t ldl_maxtransfer = LDL_MAXTRANSFER;
72 72 uint32_t ldl_minbufsize = LDL_MINBUFSIZE;
73 73 uint32_t ldl_cgsizereq = 0;
74 74
75 75 /* Generation of header ids */
76 76 static kmutex_t genid_mutex;
77 77 static uint32_t last_loghead_ident = UINT32_C(0);
78 78
79 79 /*
80 80 * Logging delta and roll statistics
81 81 */
82 82 struct delta_kstats {
83 83 kstat_named_t ds_superblock_deltas;
84 84 kstat_named_t ds_bitmap_deltas;
85 85 kstat_named_t ds_suminfo_deltas;
86 86 kstat_named_t ds_allocblk_deltas;
87 87 kstat_named_t ds_ab0_deltas;
88 88 kstat_named_t ds_dir_deltas;
89 89 kstat_named_t ds_inode_deltas;
90 90 kstat_named_t ds_fbiwrite_deltas;
91 91 kstat_named_t ds_quota_deltas;
92 92 kstat_named_t ds_shadow_deltas;
93 93
94 94 kstat_named_t ds_superblock_rolled;
95 95 kstat_named_t ds_bitmap_rolled;
96 96 kstat_named_t ds_suminfo_rolled;
97 97 kstat_named_t ds_allocblk_rolled;
98 98 kstat_named_t ds_ab0_rolled;
99 99 kstat_named_t ds_dir_rolled;
100 100 kstat_named_t ds_inode_rolled;
101 101 kstat_named_t ds_fbiwrite_rolled;
102 102 kstat_named_t ds_quota_rolled;
103 103 kstat_named_t ds_shadow_rolled;
104 104 } dkstats = {
105 105 { "superblock_deltas", KSTAT_DATA_UINT64 },
106 106 { "bitmap_deltas", KSTAT_DATA_UINT64 },
107 107 { "suminfo_deltas", KSTAT_DATA_UINT64 },
108 108 { "allocblk_deltas", KSTAT_DATA_UINT64 },
109 109 { "ab0_deltas", KSTAT_DATA_UINT64 },
110 110 { "dir_deltas", KSTAT_DATA_UINT64 },
111 111 { "inode_deltas", KSTAT_DATA_UINT64 },
112 112 { "fbiwrite_deltas", KSTAT_DATA_UINT64 },
113 113 { "quota_deltas", KSTAT_DATA_UINT64 },
114 114 { "shadow_deltas", KSTAT_DATA_UINT64 },
115 115
116 116 { "superblock_rolled", KSTAT_DATA_UINT64 },
117 117 { "bitmap_rolled", KSTAT_DATA_UINT64 },
118 118 { "suminfo_rolled", KSTAT_DATA_UINT64 },
119 119 { "allocblk_rolled", KSTAT_DATA_UINT64 },
120 120 { "ab0_rolled", KSTAT_DATA_UINT64 },
121 121 { "dir_rolled", KSTAT_DATA_UINT64 },
122 122 { "inode_rolled", KSTAT_DATA_UINT64 },
123 123 { "fbiwrite_rolled", KSTAT_DATA_UINT64 },
124 124 { "quota_rolled", KSTAT_DATA_UINT64 },
125 125 { "shadow_rolled", KSTAT_DATA_UINT64 }
126 126 };
127 127
128 128 uint64_t delta_stats[DT_MAX];
129 129 uint64_t roll_stats[DT_MAX];
130 130
131 131 /*
132 132 * General logging kstats
133 133 */
134 134 struct logstats logstats = {
135 135 { "master_reads", KSTAT_DATA_UINT64 },
136 136 { "master_writes", KSTAT_DATA_UINT64 },
137 137 { "log_reads_inmem", KSTAT_DATA_UINT64 },
138 138 { "log_reads", KSTAT_DATA_UINT64 },
139 139 { "log_writes", KSTAT_DATA_UINT64 },
140 140 { "log_master_reads", KSTAT_DATA_UINT64 },
141 141 { "log_roll_reads", KSTAT_DATA_UINT64 },
142 142 { "log_roll_writes", KSTAT_DATA_UINT64 }
143 143 };
144 144
145 145 int
146 146 trans_not_done(struct buf *cb)
147 147 {
148 148 sema_v(&cb->b_io);
149 149 return (0);
150 150 }
151 151
152 152 static void
153 153 trans_wait_panic(struct buf *cb)
154 154 {
155 155 while ((cb->b_flags & B_DONE) == 0)
156 156 drv_usecwait(10);
157 157 }
158 158
159 159 int
160 160 trans_not_wait(struct buf *cb)
161 161 {
162 162 /*
163 163 * In case of panic, busy wait for completion
164 164 */
165 165 if (panicstr)
166 166 trans_wait_panic(cb);
167 167 else
168 168 sema_p(&cb->b_io);
169 169
170 170 return (geterror(cb));
171 171 }
172 172
173 173 int
174 174 trans_wait(struct buf *cb)
175 175 {
176 176 /*
177 177 * In case of panic, busy wait for completion and run md daemon queues
178 178 */
179 179 if (panicstr)
180 180 trans_wait_panic(cb);
181 181 return (biowait(cb));
182 182 }
183 183
184 184 static void
185 185 setsum(int32_t *sp, int32_t *lp, int nb)
186 186 {
187 187 int32_t csum = 0;
188 188
189 189 *sp = 0;
190 190 nb /= sizeof (int32_t);
191 191 while (nb--)
192 192 csum += *lp++;
193 193 *sp = csum;
194 194 }
195 195
196 196 static int
197 197 checksum(int32_t *sp, int32_t *lp, int nb)
198 198 {
199 199 int32_t ssum = *sp;
200 200
201 201 setsum(sp, lp, nb);
202 202 if (ssum != *sp) {
203 203 *sp = ssum;
204 204 return (0);
205 205 }
206 206 return (1);
207 207 }
208 208
209 209 void
210 210 lufs_unsnarf(ufsvfs_t *ufsvfsp)
211 211 {
212 212 ml_unit_t *ul;
213 213 mt_map_t *mtm;
214 214
215 215 ul = ufsvfsp->vfs_log;
216 216 if (ul == NULL)
217 217 return;
218 218
219 219 mtm = ul->un_logmap;
220 220
221 221 /*
222 222 * Wait for a pending top_issue_sync which is
223 223 * dispatched (via taskq_dispatch()) but hasnt completed yet.
224 224 */
225 225
226 226 mutex_enter(&mtm->mtm_lock);
227 227
228 228 while (mtm->mtm_taskq_sync_count != 0) {
229 229 cv_wait(&mtm->mtm_cv, &mtm->mtm_lock);
230 230 }
231 231
232 232 mutex_exit(&mtm->mtm_lock);
233 233
234 234 /* Roll committed transactions */
235 235 logmap_roll_dev(ul);
236 236
237 237 /* Kill the roll thread */
238 238 logmap_kill_roll(ul);
239 239
240 240 /* release saved alloction info */
241 241 if (ul->un_ebp)
242 242 kmem_free(ul->un_ebp, ul->un_nbeb);
243 243
244 244 /* release circular bufs */
245 245 free_cirbuf(&ul->un_rdbuf);
246 246 free_cirbuf(&ul->un_wrbuf);
247 247
248 248 /* release maps */
249 249 if (ul->un_logmap)
250 250 ul->un_logmap = map_put(ul->un_logmap);
251 251 if (ul->un_deltamap)
252 252 ul->un_deltamap = map_put(ul->un_deltamap);
253 253 if (ul->un_matamap)
254 254 ul->un_matamap = map_put(ul->un_matamap);
255 255
256 256 mutex_destroy(&ul->un_log_mutex);
257 257 mutex_destroy(&ul->un_state_mutex);
258 258
259 259 /* release state buffer MUST BE LAST!! (contains our ondisk data) */
260 260 if (ul->un_bp)
261 261 brelse(ul->un_bp);
262 262 kmem_free(ul, sizeof (*ul));
263 263
264 264 ufsvfsp->vfs_log = NULL;
265 265 }
266 266
267 267 int
268 268 lufs_snarf(ufsvfs_t *ufsvfsp, struct fs *fs, int ronly)
269 269 {
270 270 buf_t *bp, *tbp;
271 271 ml_unit_t *ul;
272 272 extent_block_t *ebp;
273 273 ic_extent_block_t *nebp;
274 274 size_t nb;
275 275 daddr_t bno; /* in disk blocks */
276 276 int i;
277 277
278 278 /* LINTED: warning: logical expression always true: op "||" */
279 279 ASSERT(sizeof (ml_odunit_t) < DEV_BSIZE);
280 280
281 281 /*
282 282 * Get the allocation table
283 283 * During a remount the superblock pointed to by the ufsvfsp
284 284 * is out of date. Hence the need for the ``new'' superblock
285 285 * pointer, fs, passed in as a parameter.
286 286 */
287 287 bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, logbtodb(fs, fs->fs_logbno),
288 288 fs->fs_bsize);
289 289 if (bp->b_flags & B_ERROR) {
290 290 brelse(bp);
291 291 return (EIO);
292 292 }
293 293 ebp = (void *)bp->b_un.b_addr;
294 294 if (!checksum(&ebp->chksum, (int32_t *)bp->b_un.b_addr,
295 295 fs->fs_bsize)) {
296 296 brelse(bp);
297 297 return (ENODEV);
298 298 }
299 299
300 300 /*
301 301 * It is possible to get log blocks with all zeros.
302 302 * We should also check for nextents to be zero in such case.
303 303 */
304 304 if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0) {
305 305 brelse(bp);
306 306 return (EDOM);
307 307 }
308 308 /*
309 309 * Put allocation into memory. This requires conversion between
310 310 * on the ondisk format of the extent (type extent_t) and the
311 311 * in-core format of the extent (type ic_extent_t). The
312 312 * difference is the in-core form of the extent block stores
313 313 * the physical offset of the extent in disk blocks, which
314 314 * can require more than a 32-bit field.
315 315 */
316 316 nb = (size_t)(sizeof (ic_extent_block_t) +
317 317 ((ebp->nextents - 1) * sizeof (ic_extent_t)));
318 318 nebp = kmem_alloc(nb, KM_SLEEP);
319 319 nebp->ic_nextents = ebp->nextents;
320 320 nebp->ic_nbytes = ebp->nbytes;
321 321 nebp->ic_nextbno = ebp->nextbno;
322 322 for (i = 0; i < ebp->nextents; i++) {
323 323 nebp->ic_extents[i].ic_lbno = ebp->extents[i].lbno;
324 324 nebp->ic_extents[i].ic_nbno = ebp->extents[i].nbno;
325 325 nebp->ic_extents[i].ic_pbno =
326 326 logbtodb(fs, ebp->extents[i].pbno);
327 327 }
328 328 brelse(bp);
329 329
330 330 /*
331 331 * Get the log state
332 332 */
333 333 bno = nebp->ic_extents[0].ic_pbno;
334 334 bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, bno, DEV_BSIZE);
335 335 if (bp->b_flags & B_ERROR) {
336 336 brelse(bp);
337 337 bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, bno + 1, DEV_BSIZE);
338 338 if (bp->b_flags & B_ERROR) {
339 339 brelse(bp);
340 340 kmem_free(nebp, nb);
341 341 return (EIO);
342 342 }
343 343 }
344 344
345 345 /*
346 346 * Put ondisk struct into an anonymous buffer
347 347 * This buffer will contain the memory for the ml_odunit struct
348 348 */
349 349 tbp = ngeteblk(dbtob(LS_SECTORS));
350 350 tbp->b_edev = bp->b_edev;
351 351 tbp->b_dev = bp->b_dev;
352 352 tbp->b_blkno = bno;
353 353 bcopy(bp->b_un.b_addr, tbp->b_un.b_addr, DEV_BSIZE);
354 354 bcopy(bp->b_un.b_addr, tbp->b_un.b_addr + DEV_BSIZE, DEV_BSIZE);
355 355 bp->b_flags |= (B_STALE | B_AGE);
356 356 brelse(bp);
357 357 bp = tbp;
358 358
359 359 /*
360 360 * Verify the log state
361 361 *
362 362 * read/only mounts w/bad logs are allowed. umount will
363 363 * eventually roll the bad log until the first IO error.
364 364 * fsck will then repair the file system.
365 365 *
366 366 * read/write mounts with bad logs are not allowed.
367 367 *
368 368 */
369 369 ul = (ml_unit_t *)kmem_zalloc(sizeof (*ul), KM_SLEEP);
370 370 bcopy(bp->b_un.b_addr, &ul->un_ondisk, sizeof (ml_odunit_t));
371 371 if ((ul->un_chksum != ul->un_head_ident + ul->un_tail_ident) ||
372 372 (ul->un_version != LUFS_VERSION_LATEST) ||
373 373 (!ronly && ul->un_badlog)) {
374 374 kmem_free(ul, sizeof (*ul));
375 375 brelse(bp);
376 376 kmem_free(nebp, nb);
377 377 return (EIO);
378 378 }
379 379 /*
380 380 * Initialize the incore-only fields
381 381 */
382 382 if (ronly)
383 383 ul->un_flags |= LDL_NOROLL;
384 384 ul->un_bp = bp;
385 385 ul->un_ufsvfs = ufsvfsp;
386 386 ul->un_dev = ufsvfsp->vfs_dev;
387 387 ul->un_ebp = nebp;
388 388 ul->un_nbeb = nb;
389 389 ul->un_maxresv = btodb(ul->un_logsize) * LDL_USABLE_BSIZE;
390 390 ul->un_deltamap = map_get(ul, deltamaptype, DELTAMAP_NHASH);
391 391 ul->un_logmap = map_get(ul, logmaptype, LOGMAP_NHASH);
392 392 if (ul->un_debug & MT_MATAMAP)
393 393 ul->un_matamap = map_get(ul, matamaptype, DELTAMAP_NHASH);
394 394 mutex_init(&ul->un_log_mutex, NULL, MUTEX_DEFAULT, NULL);
395 395 mutex_init(&ul->un_state_mutex, NULL, MUTEX_DEFAULT, NULL);
396 396
397 397 /*
398 398 * Aquire the ufs_scan_lock before linking the mtm data
399 399 * structure so that we keep ufs_sync() and ufs_update() away
400 400 * when they execute the ufs_scan_inodes() run while we're in
401 401 * progress of enabling/disabling logging.
402 402 */
403 403 mutex_enter(&ufs_scan_lock);
404 404 ufsvfsp->vfs_log = ul;
405 405
406 406 /* remember the state of the log before the log scan */
407 407 logmap_logscan(ul);
408 408 mutex_exit(&ufs_scan_lock);
409 409
410 410 /*
411 411 * Error during scan
412 412 *
413 413 * If this is a read/only mount; ignore the error.
414 414 * At a later time umount/fsck will repair the fs.
415 415 *
416 416 */
417 417 if (ul->un_flags & LDL_ERROR) {
418 418 if (!ronly) {
419 419 /*
420 420 * Aquire the ufs_scan_lock before de-linking
421 421 * the mtm data structure so that we keep ufs_sync()
422 422 * and ufs_update() away when they execute the
423 423 * ufs_scan_inodes() run while we're in progress of
424 424 * enabling/disabling logging.
425 425 */
426 426 mutex_enter(&ufs_scan_lock);
427 427 lufs_unsnarf(ufsvfsp);
428 428 mutex_exit(&ufs_scan_lock);
429 429 return (EIO);
430 430 }
431 431 ul->un_flags &= ~LDL_ERROR;
432 432 }
433 433 if (!ronly)
434 434 logmap_start_roll(ul);
435 435 return (0);
436 436 }
437 437
438 438 uint32_t
439 439 lufs_hd_genid(const ml_unit_t *up)
440 440 {
441 441 uint32_t id;
442 442
443 443 mutex_enter(&genid_mutex);
444 444
445 445 /*
446 446 * The formula below implements an exponential, modular sequence.
447 447 *
448 448 * ID(N) = (SEED * (BASE^N)) % PRIME
449 449 *
450 450 * The numbers will be pseudo random. They depend on SEED, BASE, PRIME,
451 451 * but will sweep through almost all of the range 1....PRIME-1.
452 452 * Most importantly they will not repeat for PRIME-2 (4294967289)
453 453 * repetitions. If they would repeat that could possibly cause hangs,
454 454 * panics at mount/umount and failed mount operations.
455 455 */
456 456 id = LUFS_NEXT_ID(last_loghead_ident);
457 457
458 458 /* Checking if new identity used already */
459 459 if (up != NULL && up->un_head_ident == id) {
460 460 DTRACE_PROBE1(head_ident_collision, uint32_t, id);
461 461
462 462 /*
463 463 * The following preserves the algorithm for the fix for
464 464 * "panic: free: freeing free frag, dev:0x2000000018, blk:34605,
465 465 * cg:26, ino:148071,".
466 466 * If the header identities un_head_ident are equal to the
467 467 * present element in the sequence, the next element of the
468 468 * sequence is returned instead.
469 469 */
470 470 id = LUFS_NEXT_ID(id);
471 471 }
472 472
473 473 last_loghead_ident = id;
474 474
475 475 mutex_exit(&genid_mutex);
476 476
477 477 return (id);
478 478 }
479 479
480 480 static void
481 481 lufs_genid_init(void)
482 482 {
483 483 uint64_t seed;
484 484
485 485 /* Initialization */
486 486 mutex_init(&genid_mutex, NULL, MUTEX_DEFAULT, NULL);
487 487
488 488 /* Seed the algorithm */
489 489 do {
490 490 timestruc_t tv;
491 491
492 492 gethrestime(&tv);
493 493
494 494 seed = (tv.tv_nsec << 3);
495 495 seed ^= tv.tv_sec;
496 496
497 497 last_loghead_ident = (uint32_t)(seed % LUFS_GENID_PRIME);
498 498 } while (last_loghead_ident == UINT32_C(0));
499 499 }
500 500
501 501 static int
502 502 lufs_initialize(
503 503 ufsvfs_t *ufsvfsp,
504 504 daddr_t bno,
505 505 size_t nb,
506 506 struct fiolog *flp)
507 507 {
508 508 ml_odunit_t *ud, *ud2;
509 509 buf_t *bp;
510 510
511 511 /* LINTED: warning: logical expression always true: op "||" */
512 512 ASSERT(sizeof (ml_odunit_t) < DEV_BSIZE);
513 513 ASSERT(nb >= ldl_minlogsize);
514 514
515 515 bp = UFS_GETBLK(ufsvfsp, ufsvfsp->vfs_dev, bno, dbtob(LS_SECTORS));
516 516 bzero(bp->b_un.b_addr, bp->b_bcount);
517 517
518 518 ud = (void *)bp->b_un.b_addr;
519 519 ud->od_version = LUFS_VERSION_LATEST;
520 520 ud->od_maxtransfer = MIN(ufsvfsp->vfs_iotransz, ldl_maxtransfer);
521 521 if (ud->od_maxtransfer < ldl_mintransfer)
522 522 ud->od_maxtransfer = ldl_mintransfer;
523 523 ud->od_devbsize = DEV_BSIZE;
524 524
525 525 ud->od_requestsize = flp->nbytes_actual;
526 526 ud->od_statesize = dbtob(LS_SECTORS);
527 527 ud->od_logsize = nb - ud->od_statesize;
528 528
529 529 ud->od_statebno = INT32_C(0);
530 530
531 531 ud->od_head_ident = lufs_hd_genid(NULL);
532 532 ud->od_tail_ident = ud->od_head_ident;
533 533 ud->od_chksum = ud->od_head_ident + ud->od_tail_ident;
534 534
535 535 ud->od_bol_lof = dbtob(ud->od_statebno) + ud->od_statesize;
536 536 ud->od_eol_lof = ud->od_bol_lof + ud->od_logsize;
537 537 ud->od_head_lof = ud->od_bol_lof;
538 538 ud->od_tail_lof = ud->od_bol_lof;
539 539
540 540 ASSERT(lufs_initialize_debug(ud));
541 541
542 542 ud2 = (void *)(bp->b_un.b_addr + DEV_BSIZE);
543 543 bcopy(ud, ud2, sizeof (*ud));
544 544
545 545 UFS_BWRITE2(ufsvfsp, bp);
546 546 if (bp->b_flags & B_ERROR) {
547 547 brelse(bp);
548 548 return (EIO);
549 549 }
550 550 brelse(bp);
551 551
552 552 return (0);
553 553 }
554 554
555 555 /*
556 556 * Free log space
557 557 * Assumes the file system is write locked and is not logging
558 558 */
559 559 static int
560 560 lufs_free(struct ufsvfs *ufsvfsp)
561 561 {
562 562 int error = 0, i, j;
563 563 buf_t *bp = NULL;
564 564 extent_t *ep;
565 565 extent_block_t *ebp;
566 566 struct fs *fs = ufsvfsp->vfs_fs;
567 567 daddr_t fno;
568 568 int32_t logbno;
569 569 long nfno;
570 570 inode_t *ip = NULL;
571 571 char clean;
572 572
573 573 /*
574 574 * Nothing to free
575 575 */
576 576 if (fs->fs_logbno == 0)
577 577 return (0);
578 578
579 579 /*
580 580 * Mark the file system as FSACTIVE and no log but honor the
581 581 * current value of fs_reclaim. The reclaim thread could have
582 582 * been active when lufs_disable() was called and if fs_reclaim
583 583 * is reset to zero here it could lead to lost inodes.
584 584 */
585 585 ufsvfsp->vfs_ulockfs.ul_sbowner = curthread;
586 586 mutex_enter(&ufsvfsp->vfs_lock);
587 587 clean = fs->fs_clean;
588 588 logbno = fs->fs_logbno;
589 589 fs->fs_clean = FSACTIVE;
590 590 fs->fs_logbno = INT32_C(0);
591 591 ufs_sbwrite(ufsvfsp);
592 592 mutex_exit(&ufsvfsp->vfs_lock);
593 593 ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1;
594 594 if (ufsvfsp->vfs_bufp->b_flags & B_ERROR) {
595 595 error = EIO;
596 596 fs->fs_clean = clean;
597 597 fs->fs_logbno = logbno;
598 598 goto errout;
599 599 }
600 600
601 601 /*
602 602 * fetch the allocation block
603 603 * superblock -> one block of extents -> log data
604 604 */
605 605 bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, logbtodb(fs, logbno),
606 606 fs->fs_bsize);
607 607 if (bp->b_flags & B_ERROR) {
608 608 error = EIO;
609 609 goto errout;
610 610 }
611 611
612 612 /*
613 613 * Free up the allocated space (dummy inode needed for free())
614 614 */
615 615 ip = ufs_alloc_inode(ufsvfsp, UFSROOTINO);
616 616 ebp = (void *)bp->b_un.b_addr;
617 617 for (i = 0, ep = &ebp->extents[0]; i < ebp->nextents; ++i, ++ep) {
618 618 fno = logbtofrag(fs, ep->pbno);
619 619 nfno = dbtofsb(fs, ep->nbno);
620 620 for (j = 0; j < nfno; j += fs->fs_frag, fno += fs->fs_frag)
621 621 free(ip, fno, fs->fs_bsize, 0);
622 622 }
623 623 free(ip, logbtofrag(fs, logbno), fs->fs_bsize, 0);
624 624 brelse(bp);
625 625 bp = NULL;
626 626
627 627 /*
628 628 * Push the metadata dirtied during the allocations
629 629 */
630 630 ufsvfsp->vfs_ulockfs.ul_sbowner = curthread;
631 631 sbupdate(ufsvfsp->vfs_vfs);
632 632 ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1;
633 633 bflush(ufsvfsp->vfs_dev);
634 634 error = bfinval(ufsvfsp->vfs_dev, 0);
635 635 if (error)
636 636 goto errout;
637 637
638 638 /*
639 639 * Free the dummy inode
640 640 */
641 641 ufs_free_inode(ip);
642 642
643 643 return (0);
644 644
645 645 errout:
646 646 /*
647 647 * Free up all resources
648 648 */
649 649 if (bp)
650 650 brelse(bp);
651 651 if (ip)
652 652 ufs_free_inode(ip);
653 653 return (error);
654 654 }
655 655
656 656 /*
657 657 * Allocate log space
658 658 * Assumes the file system is write locked and is not logging
659 659 */
660 660 static int
661 661 lufs_alloc(struct ufsvfs *ufsvfsp, struct fiolog *flp, size_t minb, cred_t *cr)
662 662 {
663 663 int error = 0;
664 664 buf_t *bp = NULL;
665 665 extent_t *ep, *nep;
666 666 extent_block_t *ebp;
667 667 struct fs *fs = ufsvfsp->vfs_fs;
668 668 daddr_t fno; /* in frags */
669 669 daddr_t bno; /* in disk blocks */
670 670 int32_t logbno = INT32_C(0); /* will be fs_logbno */
671 671 struct inode *ip = NULL;
672 672 size_t nb = flp->nbytes_actual;
673 673 size_t tb = 0;
674 674
675 675 /*
676 676 * Mark the file system as FSACTIVE
677 677 */
678 678 ufsvfsp->vfs_ulockfs.ul_sbowner = curthread;
679 679 mutex_enter(&ufsvfsp->vfs_lock);
680 680 fs->fs_clean = FSACTIVE;
681 681 ufs_sbwrite(ufsvfsp);
682 682 mutex_exit(&ufsvfsp->vfs_lock);
683 683 ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1;
684 684
685 685 /*
686 686 * Allocate the allocation block (need dummy shadow inode;
687 687 * we use a shadow inode so the quota sub-system ignores
688 688 * the block allocations.)
689 689 * superblock -> one block of extents -> log data
690 690 */
691 691 ip = ufs_alloc_inode(ufsvfsp, UFSROOTINO);
692 692 ip->i_mode = IFSHAD; /* make the dummy a shadow inode */
693 693 rw_enter(&ip->i_contents, RW_WRITER);
694 694 fno = contigpref(ufsvfsp, nb + fs->fs_bsize, minb);
695 695 error = alloc(ip, fno, fs->fs_bsize, &fno, cr);
696 696 if (error)
697 697 goto errout;
698 698 bno = fsbtodb(fs, fno);
699 699
700 700 bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, bno, fs->fs_bsize);
701 701 if (bp->b_flags & B_ERROR) {
702 702 error = EIO;
703 703 goto errout;
704 704 }
705 705
706 706 ebp = (void *)bp->b_un.b_addr;
707 707 ebp->type = LUFS_EXTENTS;
708 708 ebp->nextbno = UINT32_C(0);
709 709 ebp->nextents = UINT32_C(0);
710 710 ebp->chksum = INT32_C(0);
711 711 if (fs->fs_magic == FS_MAGIC)
712 712 logbno = bno;
713 713 else
714 714 logbno = dbtofsb(fs, bno);
715 715
716 716 /*
717 717 * Initialize the first extent
718 718 */
719 719 ep = &ebp->extents[0];
720 720 error = alloc(ip, fno + fs->fs_frag, fs->fs_bsize, &fno, cr);
721 721 if (error)
722 722 goto errout;
723 723 bno = fsbtodb(fs, fno);
724 724
725 725 ep->lbno = UINT32_C(0);
726 726 if (fs->fs_magic == FS_MAGIC)
727 727 ep->pbno = (uint32_t)bno;
728 728 else
729 729 ep->pbno = (uint32_t)fno;
730 730 ep->nbno = (uint32_t)fsbtodb(fs, fs->fs_frag);
731 731 ebp->nextents = UINT32_C(1);
732 732 tb = fs->fs_bsize;
733 733 nb -= fs->fs_bsize;
734 734
735 735 while (nb) {
736 736 error = alloc(ip, fno + fs->fs_frag, fs->fs_bsize, &fno, cr);
737 737 if (error) {
738 738 if (tb < minb)
739 739 goto errout;
740 740 error = 0;
741 741 break;
742 742 }
743 743 bno = fsbtodb(fs, fno);
744 744 if ((daddr_t)((logbtodb(fs, ep->pbno) + ep->nbno) == bno))
745 745 ep->nbno += (uint32_t)(fsbtodb(fs, fs->fs_frag));
746 746 else {
747 747 nep = ep + 1;
748 748 if ((caddr_t)(nep + 1) >
749 749 (bp->b_un.b_addr + fs->fs_bsize)) {
750 750 free(ip, fno, fs->fs_bsize, 0);
751 751 break;
752 752 }
753 753 nep->lbno = ep->lbno + ep->nbno;
754 754 if (fs->fs_magic == FS_MAGIC)
755 755 nep->pbno = (uint32_t)bno;
756 756 else
757 757 nep->pbno = (uint32_t)fno;
758 758 nep->nbno = (uint32_t)(fsbtodb(fs, fs->fs_frag));
759 759 ebp->nextents++;
760 760 ep = nep;
761 761 }
762 762 tb += fs->fs_bsize;
763 763 nb -= fs->fs_bsize;
764 764 }
765 765
766 766 if (tb < minb) { /* Failed to reach minimum log size */
767 767 error = ENOSPC;
768 768 goto errout;
769 769 }
770 770
771 771 ebp->nbytes = (uint32_t)tb;
772 772 setsum(&ebp->chksum, (int32_t *)bp->b_un.b_addr, fs->fs_bsize);
773 773 UFS_BWRITE2(ufsvfsp, bp);
774 774 if (bp->b_flags & B_ERROR) {
775 775 error = EIO;
776 776 goto errout;
777 777 }
778 778 /*
779 779 * Initialize the first two sectors of the log
780 780 */
781 781 error = lufs_initialize(ufsvfsp, logbtodb(fs, ebp->extents[0].pbno),
782 782 tb, flp);
783 783 if (error)
784 784 goto errout;
785 785
786 786 /*
787 787 * We are done initializing the allocation block and the log
788 788 */
789 789 brelse(bp);
790 790 bp = NULL;
791 791
792 792 /*
793 793 * Update the superblock and push the dirty metadata
794 794 */
795 795 ufsvfsp->vfs_ulockfs.ul_sbowner = curthread;
796 796 sbupdate(ufsvfsp->vfs_vfs);
797 797 ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1;
798 798 bflush(ufsvfsp->vfs_dev);
799 799 error = bfinval(ufsvfsp->vfs_dev, 1);
800 800 if (error)
801 801 goto errout;
802 802 if (ufsvfsp->vfs_bufp->b_flags & B_ERROR) {
803 803 error = EIO;
804 804 goto errout;
805 805 }
806 806
807 807 /*
808 808 * Everything is safely on disk; update log space pointer in sb
809 809 */
810 810 ufsvfsp->vfs_ulockfs.ul_sbowner = curthread;
811 811 mutex_enter(&ufsvfsp->vfs_lock);
812 812 fs->fs_logbno = (uint32_t)logbno;
813 813 ufs_sbwrite(ufsvfsp);
814 814 mutex_exit(&ufsvfsp->vfs_lock);
815 815 ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1;
816 816
817 817 /*
818 818 * Free the dummy inode
819 819 */
820 820 rw_exit(&ip->i_contents);
821 821 ufs_free_inode(ip);
822 822
823 823 /* inform user of real log size */
824 824 flp->nbytes_actual = tb;
825 825 return (0);
826 826
827 827 errout:
828 828 /*
829 829 * Free all resources
830 830 */
831 831 if (bp)
832 832 brelse(bp);
833 833 if (logbno) {
834 834 fs->fs_logbno = logbno;
835 835 (void) lufs_free(ufsvfsp);
836 836 }
837 837 if (ip) {
838 838 rw_exit(&ip->i_contents);
839 839 ufs_free_inode(ip);
840 840 }
841 841 return (error);
842 842 }
843 843
844 844 /*
845 845 * Disable logging
846 846 */
847 847 int
848 848 lufs_disable(vnode_t *vp, struct fiolog *flp)
849 849 {
850 850 int error = 0;
851 851 inode_t *ip = VTOI(vp);
852 852 ufsvfs_t *ufsvfsp = ip->i_ufsvfs;
853 853 struct fs *fs = ufsvfsp->vfs_fs;
854 854 struct lockfs lf;
855 855 struct ulockfs *ulp;
856 856
857 857 flp->error = FIOLOG_ENONE;
858 858
859 859 /*
860 860 * Logging is already disabled; done
861 861 */
862 862 if (fs->fs_logbno == 0 || ufsvfsp->vfs_log == NULL)
863 863 return (0);
864 864
865 865 /*
866 866 * Readonly file system
867 867 */
868 868 if (fs->fs_ronly) {
869 869 flp->error = FIOLOG_EROFS;
870 870 return (0);
871 871 }
872 872
873 873 /*
874 874 * File system must be write locked to disable logging
875 875 */
876 876 error = ufs_fiolfss(vp, &lf);
877 877 if (error) {
878 878 return (error);
879 879 }
880 880 if (!LOCKFS_IS_ULOCK(&lf)) {
881 881 flp->error = FIOLOG_EULOCK;
882 882 return (0);
883 883 }
884 884 lf.lf_lock = LOCKFS_WLOCK;
885 885 lf.lf_flags = 0;
886 886 lf.lf_comment = NULL;
887 887 error = ufs_fiolfs(vp, &lf, 1);
888 888 if (error) {
889 889 flp->error = FIOLOG_EWLOCK;
890 890 return (0);
891 891 }
892 892
893 893 if (ufsvfsp->vfs_log == NULL || fs->fs_logbno == 0)
894 894 goto errout;
895 895
896 896 /*
897 897 * WE ARE COMMITTED TO DISABLING LOGGING PAST THIS POINT
898 898 */
899 899
900 900 /*
901 901 * Disable logging:
902 902 * Suspend the reclaim thread and force the delete thread to exit.
903 903 * When a nologging mount has completed there may still be
904 904 * work for reclaim to do so just suspend this thread until
905 905 * it's [deadlock-] safe for it to continue. The delete
906 906 * thread won't be needed as ufs_iinactive() calls
907 907 * ufs_delete() when logging is disabled.
908 908 * Freeze and drain reader ops.
909 909 * Commit any outstanding reader transactions (ufs_flush).
910 910 * Set the ``unmounted'' bit in the ufstrans struct.
911 911 * If debug, remove metadata from matamap.
912 912 * Disable matamap processing.
↓ open down ↓ |
912 lines elided |
↑ open up ↑ |
913 913 * NULL the trans ops table.
914 914 * Free all of the incore structs related to logging.
915 915 * Allow reader ops.
916 916 */
917 917 ufs_thread_suspend(&ufsvfsp->vfs_reclaim);
918 918 ufs_thread_exit(&ufsvfsp->vfs_delete);
919 919
920 920 vfs_lock_wait(ufsvfsp->vfs_vfs);
921 921 ulp = &ufsvfsp->vfs_ulockfs;
922 922 mutex_enter(&ulp->ul_lock);
923 - atomic_add_long(&ufs_quiesce_pend, 1);
923 + atomic_inc_ulong(&ufs_quiesce_pend);
924 924 (void) ufs_quiesce(ulp);
925 925
926 926 (void) ufs_flush(ufsvfsp->vfs_vfs);
927 927
928 928 TRANS_MATA_UMOUNT(ufsvfsp);
929 929 ufsvfsp->vfs_domatamap = 0;
930 930
931 931 /*
932 932 * Free all of the incore structs
933 933 * Aquire the ufs_scan_lock before de-linking the mtm data
934 934 * structure so that we keep ufs_sync() and ufs_update() away
935 935 * when they execute the ufs_scan_inodes() run while we're in
936 936 * progress of enabling/disabling logging.
937 937 */
938 938 mutex_enter(&ufs_scan_lock);
939 939 (void) lufs_unsnarf(ufsvfsp);
940 940 mutex_exit(&ufs_scan_lock);
941 941
942 - atomic_add_long(&ufs_quiesce_pend, -1);
942 + atomic_dec_ulong(&ufs_quiesce_pend);
943 943 mutex_exit(&ulp->ul_lock);
944 944 vfs_setmntopt(ufsvfsp->vfs_vfs, MNTOPT_NOLOGGING, NULL, 0);
945 945 vfs_unlock(ufsvfsp->vfs_vfs);
946 946
947 947 fs->fs_rolled = FS_ALL_ROLLED;
948 948 ufsvfsp->vfs_nolog_si = 0;
949 949
950 950 /*
951 951 * Free the log space and mark the superblock as FSACTIVE
952 952 */
953 953 (void) lufs_free(ufsvfsp);
954 954
955 955 /*
956 956 * Allow the reclaim thread to continue.
957 957 */
958 958 ufs_thread_continue(&ufsvfsp->vfs_reclaim);
959 959
960 960 /*
961 961 * Unlock the file system
962 962 */
963 963 lf.lf_lock = LOCKFS_ULOCK;
964 964 lf.lf_flags = 0;
965 965 error = ufs_fiolfs(vp, &lf, 1);
966 966 if (error)
967 967 flp->error = FIOLOG_ENOULOCK;
968 968
969 969 return (0);
970 970
971 971 errout:
972 972 lf.lf_lock = LOCKFS_ULOCK;
973 973 lf.lf_flags = 0;
974 974 (void) ufs_fiolfs(vp, &lf, 1);
975 975 return (error);
976 976 }
977 977
978 978 /*
979 979 * Enable logging
980 980 */
981 981 int
982 982 lufs_enable(struct vnode *vp, struct fiolog *flp, cred_t *cr)
983 983 {
984 984 int error;
985 985 int reclaim;
986 986 inode_t *ip = VTOI(vp);
987 987 ufsvfs_t *ufsvfsp = ip->i_ufsvfs;
988 988 struct fs *fs;
989 989 ml_unit_t *ul;
990 990 struct lockfs lf;
991 991 struct ulockfs *ulp;
992 992 vfs_t *vfsp = ufsvfsp->vfs_vfs;
993 993 uint64_t tmp_nbytes_actual;
994 994 uint64_t cg_minlogsize;
995 995 uint32_t cgsize;
996 996 static int minlogsizewarn = 0;
997 997 static int maxlogsizewarn = 0;
998 998
999 999 /*
1000 1000 * Check if logging is already enabled
1001 1001 */
1002 1002 if (ufsvfsp->vfs_log) {
1003 1003 flp->error = FIOLOG_ETRANS;
1004 1004 /* for root ensure logging option is set */
1005 1005 vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0);
1006 1006 return (0);
1007 1007 }
1008 1008 fs = ufsvfsp->vfs_fs;
1009 1009
1010 1010 /*
1011 1011 * Come back here to recheck if we had to disable the log.
1012 1012 */
1013 1013 recheck:
1014 1014 error = 0;
1015 1015 reclaim = 0;
1016 1016 flp->error = FIOLOG_ENONE;
1017 1017
1018 1018 /*
1019 1019 * The size of the ufs log is determined using the following rules:
1020 1020 *
1021 1021 * 1) If no size is requested the log size is calculated as a
1022 1022 * ratio of the total file system size. By default this is
1023 1023 * 1MB of log per 1GB of file system. This calculation is then
1024 1024 * capped at the log size specified by ldl_softlogcap.
1025 1025 * 2) The log size requested may then be increased based on the
1026 1026 * number of cylinder groups contained in the file system.
1027 1027 * To prevent a hang the log has to be large enough to contain a
1028 1028 * single transaction that alters every cylinder group in the file
1029 1029 * system. This is calculated as cg_minlogsize.
1030 1030 * 3) Finally a check is made that the log size requested is within
1031 1031 * the limits of ldl_minlogsize and ldl_maxlogsize.
1032 1032 */
1033 1033
1034 1034 /*
1035 1035 * Adjust requested log size
1036 1036 */
1037 1037 flp->nbytes_actual = flp->nbytes_requested;
1038 1038 if (flp->nbytes_actual == 0) {
1039 1039 tmp_nbytes_actual =
1040 1040 (((uint64_t)fs->fs_size) / ldl_divisor) << fs->fs_fshift;
1041 1041 flp->nbytes_actual = (uint_t)MIN(tmp_nbytes_actual, INT_MAX);
1042 1042 /*
1043 1043 * The 1MB per 1GB log size allocation only applies up to
1044 1044 * ldl_softlogcap size of log.
1045 1045 */
1046 1046 flp->nbytes_actual = MIN(flp->nbytes_actual, ldl_softlogcap);
1047 1047 }
1048 1048
1049 1049 cgsize = ldl_cgsizereq ? ldl_cgsizereq : LDL_CGSIZEREQ(fs);
1050 1050
1051 1051 /*
1052 1052 * Determine the log size required based on the number of cylinder
1053 1053 * groups in the file system. The log has to be at least this size
1054 1054 * to prevent possible hangs due to log space exhaustion.
1055 1055 */
1056 1056 cg_minlogsize = cgsize * fs->fs_ncg;
1057 1057
1058 1058 /*
1059 1059 * Ensure that the minimum log size isn't so small that it could lead
1060 1060 * to a full log hang.
1061 1061 */
1062 1062 if (ldl_minlogsize < LDL_MINLOGSIZE) {
1063 1063 ldl_minlogsize = LDL_MINLOGSIZE;
1064 1064 if (!minlogsizewarn) {
1065 1065 cmn_err(CE_WARN, "ldl_minlogsize too small, increasing "
1066 1066 "to 0x%x", LDL_MINLOGSIZE);
1067 1067 minlogsizewarn = 1;
1068 1068 }
1069 1069 }
1070 1070
1071 1071 /*
1072 1072 * Ensure that the maximum log size isn't greater than INT_MAX as the
1073 1073 * logical log offset fields would overflow.
1074 1074 */
1075 1075 if (ldl_maxlogsize > INT_MAX) {
1076 1076 ldl_maxlogsize = INT_MAX;
1077 1077 if (!maxlogsizewarn) {
1078 1078 cmn_err(CE_WARN, "ldl_maxlogsize too large, reducing "
1079 1079 "to 0x%x", INT_MAX);
1080 1080 maxlogsizewarn = 1;
1081 1081 }
1082 1082 }
1083 1083
1084 1084 if (cg_minlogsize > ldl_maxlogsize) {
1085 1085 cmn_err(CE_WARN,
1086 1086 "%s: reducing calculated log size from 0x%x to "
1087 1087 "ldl_maxlogsize (0x%x).", fs->fs_fsmnt, (int)cg_minlogsize,
1088 1088 ldl_maxlogsize);
1089 1089 }
1090 1090
1091 1091 cg_minlogsize = MAX(cg_minlogsize, ldl_minlogsize);
1092 1092 cg_minlogsize = MIN(cg_minlogsize, ldl_maxlogsize);
1093 1093
1094 1094 flp->nbytes_actual = MAX(flp->nbytes_actual, cg_minlogsize);
1095 1095 flp->nbytes_actual = MAX(flp->nbytes_actual, ldl_minlogsize);
1096 1096 flp->nbytes_actual = MIN(flp->nbytes_actual, ldl_maxlogsize);
1097 1097 flp->nbytes_actual = blkroundup(fs, flp->nbytes_actual);
1098 1098
1099 1099 /*
1100 1100 * logging is enabled and the log is the right size; done
1101 1101 */
1102 1102 ul = ufsvfsp->vfs_log;
1103 1103 if (ul && fs->fs_logbno && (flp->nbytes_actual == ul->un_requestsize))
1104 1104 return (0);
1105 1105
1106 1106 /*
1107 1107 * Readonly file system
1108 1108 */
1109 1109 if (fs->fs_ronly) {
1110 1110 flp->error = FIOLOG_EROFS;
1111 1111 return (0);
1112 1112 }
1113 1113
1114 1114 /*
1115 1115 * File system must be write locked to enable logging
1116 1116 */
1117 1117 error = ufs_fiolfss(vp, &lf);
1118 1118 if (error) {
1119 1119 return (error);
1120 1120 }
1121 1121 if (!LOCKFS_IS_ULOCK(&lf)) {
1122 1122 flp->error = FIOLOG_EULOCK;
1123 1123 return (0);
1124 1124 }
1125 1125 lf.lf_lock = LOCKFS_WLOCK;
1126 1126 lf.lf_flags = 0;
1127 1127 lf.lf_comment = NULL;
1128 1128 error = ufs_fiolfs(vp, &lf, 1);
1129 1129 if (error) {
1130 1130 flp->error = FIOLOG_EWLOCK;
1131 1131 return (0);
1132 1132 }
1133 1133
1134 1134 /*
1135 1135 * Grab appropriate locks to synchronize with the rest
1136 1136 * of the system
1137 1137 */
1138 1138 vfs_lock_wait(vfsp);
1139 1139 ulp = &ufsvfsp->vfs_ulockfs;
1140 1140 mutex_enter(&ulp->ul_lock);
1141 1141
1142 1142 /*
1143 1143 * File system must be fairly consistent to enable logging
1144 1144 */
1145 1145 if (fs->fs_clean != FSLOG &&
1146 1146 fs->fs_clean != FSACTIVE &&
1147 1147 fs->fs_clean != FSSTABLE &&
1148 1148 fs->fs_clean != FSCLEAN) {
1149 1149 flp->error = FIOLOG_ECLEAN;
1150 1150 goto unlockout;
1151 1151 }
1152 1152
1153 1153 /*
1154 1154 * A write-locked file system is only active if there are
1155 1155 * open deleted files; so remember to set FS_RECLAIM later.
1156 1156 */
1157 1157 if (fs->fs_clean == FSACTIVE)
1158 1158 reclaim = FS_RECLAIM;
1159 1159
1160 1160 /*
1161 1161 * Logging is already enabled; must be changing the log's size
1162 1162 */
1163 1163 if (fs->fs_logbno && ufsvfsp->vfs_log) {
1164 1164 /*
1165 1165 * Before we can disable logging, we must give up our
1166 1166 * lock. As a consequence of unlocking and disabling the
1167 1167 * log, the fs structure may change. Because of this, when
1168 1168 * disabling is complete, we will go back to recheck to
1169 1169 * repeat all of the checks that we performed to get to
1170 1170 * this point. Disabling sets fs->fs_logbno to 0, so this
1171 1171 * will not put us into an infinite loop.
1172 1172 */
1173 1173 mutex_exit(&ulp->ul_lock);
1174 1174 vfs_unlock(vfsp);
1175 1175
1176 1176 lf.lf_lock = LOCKFS_ULOCK;
1177 1177 lf.lf_flags = 0;
1178 1178 error = ufs_fiolfs(vp, &lf, 1);
1179 1179 if (error) {
1180 1180 flp->error = FIOLOG_ENOULOCK;
1181 1181 return (0);
1182 1182 }
1183 1183 error = lufs_disable(vp, flp);
1184 1184 if (error || (flp->error != FIOLOG_ENONE))
1185 1185 return (0);
1186 1186 goto recheck;
1187 1187 }
1188 1188
1189 1189 error = lufs_alloc(ufsvfsp, flp, cg_minlogsize, cr);
1190 1190 if (error)
1191 1191 goto errout;
1192 1192
1193 1193 /*
1194 1194 * Create all of the incore structs
1195 1195 */
1196 1196 error = lufs_snarf(ufsvfsp, fs, 0);
1197 1197 if (error)
1198 1198 goto errout;
1199 1199
1200 1200 /*
1201 1201 * DON'T ``GOTO ERROUT'' PAST THIS POINT
1202 1202 */
1203 1203
1204 1204 /*
1205 1205 * Pretend we were just mounted with logging enabled
1206 1206 * Get the ops vector
1207 1207 * If debug, record metadata locations with log subsystem
1208 1208 * Start the delete thread
1209 1209 * Start the reclaim thread, if necessary
1210 1210 */
1211 1211 vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0);
1212 1212
1213 1213 TRANS_DOMATAMAP(ufsvfsp);
1214 1214 TRANS_MATA_MOUNT(ufsvfsp);
1215 1215 TRANS_MATA_SI(ufsvfsp, fs);
1216 1216 ufs_thread_start(&ufsvfsp->vfs_delete, ufs_thread_delete, vfsp);
1217 1217 if (fs->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) {
1218 1218 fs->fs_reclaim &= ~FS_RECLAIM;
1219 1219 fs->fs_reclaim |= FS_RECLAIMING;
1220 1220 ufs_thread_start(&ufsvfsp->vfs_reclaim,
1221 1221 ufs_thread_reclaim, vfsp);
1222 1222 } else
1223 1223 fs->fs_reclaim |= reclaim;
1224 1224
1225 1225 mutex_exit(&ulp->ul_lock);
1226 1226 vfs_unlock(vfsp);
1227 1227
1228 1228 /*
1229 1229 * Unlock the file system
1230 1230 */
1231 1231 lf.lf_lock = LOCKFS_ULOCK;
1232 1232 lf.lf_flags = 0;
1233 1233 error = ufs_fiolfs(vp, &lf, 1);
1234 1234 if (error) {
1235 1235 flp->error = FIOLOG_ENOULOCK;
1236 1236 return (0);
1237 1237 }
1238 1238
1239 1239 /*
1240 1240 * There's nothing in the log yet (we've just allocated it)
1241 1241 * so directly write out the super block.
1242 1242 * Note, we have to force this sb out to disk
1243 1243 * (not just to the log) so that if we crash we know we are logging
1244 1244 */
1245 1245 mutex_enter(&ufsvfsp->vfs_lock);
1246 1246 fs->fs_clean = FSLOG;
1247 1247 fs->fs_rolled = FS_NEED_ROLL; /* Mark the fs as unrolled */
1248 1248 UFS_BWRITE2(NULL, ufsvfsp->vfs_bufp);
1249 1249 mutex_exit(&ufsvfsp->vfs_lock);
1250 1250
1251 1251 return (0);
1252 1252
1253 1253 errout:
1254 1254 /*
1255 1255 * Aquire the ufs_scan_lock before de-linking the mtm data
1256 1256 * structure so that we keep ufs_sync() and ufs_update() away
1257 1257 * when they execute the ufs_scan_inodes() run while we're in
1258 1258 * progress of enabling/disabling logging.
1259 1259 */
1260 1260 mutex_enter(&ufs_scan_lock);
1261 1261 (void) lufs_unsnarf(ufsvfsp);
1262 1262 mutex_exit(&ufs_scan_lock);
1263 1263
1264 1264 (void) lufs_free(ufsvfsp);
1265 1265 unlockout:
1266 1266 mutex_exit(&ulp->ul_lock);
1267 1267 vfs_unlock(vfsp);
1268 1268
1269 1269 lf.lf_lock = LOCKFS_ULOCK;
1270 1270 lf.lf_flags = 0;
1271 1271 (void) ufs_fiolfs(vp, &lf, 1);
1272 1272 return (error);
1273 1273 }
1274 1274
1275 1275 void
1276 1276 lufs_read_strategy(ml_unit_t *ul, buf_t *bp)
1277 1277 {
1278 1278 mt_map_t *logmap = ul->un_logmap;
1279 1279 offset_t mof = ldbtob(bp->b_blkno);
1280 1280 off_t nb = bp->b_bcount;
1281 1281 mapentry_t *age;
1282 1282 char *va;
1283 1283 int (*saviodone)();
1284 1284 int entire_range;
1285 1285
1286 1286 /*
1287 1287 * get a linked list of overlapping deltas
1288 1288 * returns with &mtm->mtm_rwlock held
1289 1289 */
1290 1290 entire_range = logmap_list_get(logmap, mof, nb, &age);
1291 1291
1292 1292 /*
1293 1293 * no overlapping deltas were found; read master
1294 1294 */
1295 1295 if (age == NULL) {
1296 1296 rw_exit(&logmap->mtm_rwlock);
1297 1297 if (ul->un_flags & LDL_ERROR) {
1298 1298 bp->b_flags |= B_ERROR;
1299 1299 bp->b_error = EIO;
1300 1300 biodone(bp);
1301 1301 } else {
1302 1302 ul->un_ufsvfs->vfs_iotstamp = ddi_get_lbolt();
1303 1303 logstats.ls_lreads.value.ui64++;
1304 1304 (void) bdev_strategy(bp);
1305 1305 lwp_stat_update(LWP_STAT_INBLK, 1);
1306 1306 }
1307 1307 return;
1308 1308 }
1309 1309
1310 1310 va = bp_mapin_common(bp, VM_SLEEP);
1311 1311 /*
1312 1312 * if necessary, sync read the data from master
1313 1313 * errors are returned in bp
1314 1314 */
1315 1315 if (!entire_range) {
1316 1316 saviodone = bp->b_iodone;
1317 1317 bp->b_iodone = trans_not_done;
1318 1318 logstats.ls_mreads.value.ui64++;
1319 1319 (void) bdev_strategy(bp);
1320 1320 lwp_stat_update(LWP_STAT_INBLK, 1);
1321 1321 if (trans_not_wait(bp))
1322 1322 ldl_seterror(ul, "Error reading master");
1323 1323 bp->b_iodone = saviodone;
1324 1324 }
1325 1325
1326 1326 /*
1327 1327 * sync read the data from the log
1328 1328 * errors are returned inline
1329 1329 */
1330 1330 if (ldl_read(ul, va, mof, nb, age)) {
1331 1331 bp->b_flags |= B_ERROR;
1332 1332 bp->b_error = EIO;
1333 1333 }
1334 1334
1335 1335 /*
1336 1336 * unlist the deltas
1337 1337 */
1338 1338 logmap_list_put(logmap, age);
1339 1339
1340 1340 /*
1341 1341 * all done
1342 1342 */
1343 1343 if (ul->un_flags & LDL_ERROR) {
1344 1344 bp->b_flags |= B_ERROR;
1345 1345 bp->b_error = EIO;
1346 1346 }
1347 1347 biodone(bp);
1348 1348 }
1349 1349
1350 1350 void
1351 1351 lufs_write_strategy(ml_unit_t *ul, buf_t *bp)
1352 1352 {
1353 1353 offset_t mof = ldbtob(bp->b_blkno);
1354 1354 off_t nb = bp->b_bcount;
1355 1355 char *va;
1356 1356 mapentry_t *me;
1357 1357
1358 1358 ASSERT((nb & DEV_BMASK) == 0);
1359 1359 ul->un_logmap->mtm_ref = 1;
1360 1360
1361 1361 /*
1362 1362 * if there are deltas, move into log
1363 1363 */
1364 1364 me = deltamap_remove(ul->un_deltamap, mof, nb);
1365 1365 if (me) {
1366 1366
1367 1367 va = bp_mapin_common(bp, VM_SLEEP);
1368 1368
1369 1369 ASSERT(((ul->un_debug & MT_WRITE_CHECK) == 0) ||
1370 1370 (ul->un_matamap == NULL)||
1371 1371 matamap_within(ul->un_matamap, mof, nb));
1372 1372
1373 1373 /*
1374 1374 * move to logmap
1375 1375 */
1376 1376 if (ufs_crb_enable) {
1377 1377 logmap_add_buf(ul, va, mof, me,
1378 1378 bp->b_un.b_addr, nb);
1379 1379 } else {
1380 1380 logmap_add(ul, va, mof, me);
1381 1381 }
1382 1382
1383 1383 if (ul->un_flags & LDL_ERROR) {
1384 1384 bp->b_flags |= B_ERROR;
1385 1385 bp->b_error = EIO;
1386 1386 }
1387 1387 biodone(bp);
1388 1388 return;
1389 1389 }
1390 1390 if (ul->un_flags & LDL_ERROR) {
1391 1391 bp->b_flags |= B_ERROR;
1392 1392 bp->b_error = EIO;
1393 1393 biodone(bp);
1394 1394 return;
1395 1395 }
1396 1396
1397 1397 /*
1398 1398 * Check that we are not updating metadata, or if so then via B_PHYS.
1399 1399 */
1400 1400 ASSERT((ul->un_matamap == NULL) ||
1401 1401 !(matamap_overlap(ul->un_matamap, mof, nb) &&
1402 1402 ((bp->b_flags & B_PHYS) == 0)));
1403 1403
1404 1404 ul->un_ufsvfs->vfs_iotstamp = ddi_get_lbolt();
1405 1405 logstats.ls_lwrites.value.ui64++;
1406 1406
1407 1407 /* If snapshots are enabled, write through the snapshot driver */
1408 1408 if (ul->un_ufsvfs->vfs_snapshot)
1409 1409 fssnap_strategy(&ul->un_ufsvfs->vfs_snapshot, bp);
1410 1410 else
1411 1411 (void) bdev_strategy(bp);
1412 1412
1413 1413 lwp_stat_update(LWP_STAT_OUBLK, 1);
1414 1414 }
1415 1415
1416 1416 void
1417 1417 lufs_strategy(ml_unit_t *ul, buf_t *bp)
1418 1418 {
1419 1419 if (bp->b_flags & B_READ)
1420 1420 lufs_read_strategy(ul, bp);
1421 1421 else
1422 1422 lufs_write_strategy(ul, bp);
1423 1423 }
1424 1424
1425 1425 /* ARGSUSED */
1426 1426 static int
1427 1427 delta_stats_update(kstat_t *ksp, int rw)
1428 1428 {
1429 1429 if (rw == KSTAT_WRITE) {
1430 1430 delta_stats[DT_SB] = dkstats.ds_superblock_deltas.value.ui64;
1431 1431 delta_stats[DT_CG] = dkstats.ds_bitmap_deltas.value.ui64;
1432 1432 delta_stats[DT_SI] = dkstats.ds_suminfo_deltas.value.ui64;
1433 1433 delta_stats[DT_AB] = dkstats.ds_allocblk_deltas.value.ui64;
1434 1434 delta_stats[DT_ABZERO] = dkstats.ds_ab0_deltas.value.ui64;
1435 1435 delta_stats[DT_DIR] = dkstats.ds_dir_deltas.value.ui64;
1436 1436 delta_stats[DT_INODE] = dkstats.ds_inode_deltas.value.ui64;
1437 1437 delta_stats[DT_FBI] = dkstats.ds_fbiwrite_deltas.value.ui64;
1438 1438 delta_stats[DT_QR] = dkstats.ds_quota_deltas.value.ui64;
1439 1439 delta_stats[DT_SHAD] = dkstats.ds_shadow_deltas.value.ui64;
1440 1440
1441 1441 roll_stats[DT_SB] = dkstats.ds_superblock_rolled.value.ui64;
1442 1442 roll_stats[DT_CG] = dkstats.ds_bitmap_rolled.value.ui64;
1443 1443 roll_stats[DT_SI] = dkstats.ds_suminfo_rolled.value.ui64;
1444 1444 roll_stats[DT_AB] = dkstats.ds_allocblk_rolled.value.ui64;
1445 1445 roll_stats[DT_ABZERO] = dkstats.ds_ab0_rolled.value.ui64;
1446 1446 roll_stats[DT_DIR] = dkstats.ds_dir_rolled.value.ui64;
1447 1447 roll_stats[DT_INODE] = dkstats.ds_inode_rolled.value.ui64;
1448 1448 roll_stats[DT_FBI] = dkstats.ds_fbiwrite_rolled.value.ui64;
1449 1449 roll_stats[DT_QR] = dkstats.ds_quota_rolled.value.ui64;
1450 1450 roll_stats[DT_SHAD] = dkstats.ds_shadow_rolled.value.ui64;
1451 1451 } else {
1452 1452 dkstats.ds_superblock_deltas.value.ui64 = delta_stats[DT_SB];
1453 1453 dkstats.ds_bitmap_deltas.value.ui64 = delta_stats[DT_CG];
1454 1454 dkstats.ds_suminfo_deltas.value.ui64 = delta_stats[DT_SI];
1455 1455 dkstats.ds_allocblk_deltas.value.ui64 = delta_stats[DT_AB];
1456 1456 dkstats.ds_ab0_deltas.value.ui64 = delta_stats[DT_ABZERO];
1457 1457 dkstats.ds_dir_deltas.value.ui64 = delta_stats[DT_DIR];
1458 1458 dkstats.ds_inode_deltas.value.ui64 = delta_stats[DT_INODE];
1459 1459 dkstats.ds_fbiwrite_deltas.value.ui64 = delta_stats[DT_FBI];
1460 1460 dkstats.ds_quota_deltas.value.ui64 = delta_stats[DT_QR];
1461 1461 dkstats.ds_shadow_deltas.value.ui64 = delta_stats[DT_SHAD];
1462 1462
1463 1463 dkstats.ds_superblock_rolled.value.ui64 = roll_stats[DT_SB];
1464 1464 dkstats.ds_bitmap_rolled.value.ui64 = roll_stats[DT_CG];
1465 1465 dkstats.ds_suminfo_rolled.value.ui64 = roll_stats[DT_SI];
1466 1466 dkstats.ds_allocblk_rolled.value.ui64 = roll_stats[DT_AB];
1467 1467 dkstats.ds_ab0_rolled.value.ui64 = roll_stats[DT_ABZERO];
1468 1468 dkstats.ds_dir_rolled.value.ui64 = roll_stats[DT_DIR];
1469 1469 dkstats.ds_inode_rolled.value.ui64 = roll_stats[DT_INODE];
1470 1470 dkstats.ds_fbiwrite_rolled.value.ui64 = roll_stats[DT_FBI];
1471 1471 dkstats.ds_quota_rolled.value.ui64 = roll_stats[DT_QR];
1472 1472 dkstats.ds_shadow_rolled.value.ui64 = roll_stats[DT_SHAD];
1473 1473 }
1474 1474 return (0);
1475 1475 }
1476 1476
1477 1477 extern size_t ufs_crb_limit;
1478 1478 extern int ufs_max_crb_divisor;
1479 1479
1480 1480 void
1481 1481 lufs_init(void)
1482 1482 {
1483 1483 kstat_t *ksp;
1484 1484
1485 1485 /* Create kmem caches */
1486 1486 lufs_sv = kmem_cache_create("lufs_save", sizeof (lufs_save_t), 0,
1487 1487 NULL, NULL, NULL, NULL, NULL, 0);
1488 1488 lufs_bp = kmem_cache_create("lufs_bufs", sizeof (lufs_buf_t), 0,
1489 1489 NULL, NULL, NULL, NULL, NULL, 0);
1490 1490
1491 1491 mutex_init(&log_mutex, NULL, MUTEX_DEFAULT, NULL);
1492 1492
1493 1493 _init_top();
1494 1494
1495 1495 if (bio_lufs_strategy == NULL)
1496 1496 bio_lufs_strategy = (void (*) (void *, buf_t *)) lufs_strategy;
1497 1497
1498 1498 /*
1499 1499 * Initialise general logging and delta kstats
1500 1500 */
1501 1501 ksp = kstat_create("ufs_log", 0, "logstats", "ufs", KSTAT_TYPE_NAMED,
1502 1502 sizeof (logstats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
1503 1503 if (ksp) {
1504 1504 ksp->ks_data = (void *) &logstats;
1505 1505 kstat_install(ksp);
1506 1506 }
1507 1507
1508 1508 ksp = kstat_create("ufs_log", 0, "deltastats", "ufs", KSTAT_TYPE_NAMED,
1509 1509 sizeof (dkstats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
1510 1510 if (ksp) {
1511 1511 ksp->ks_data = (void *) &dkstats;
1512 1512 ksp->ks_update = delta_stats_update;
1513 1513 kstat_install(ksp);
1514 1514 }
1515 1515
1516 1516 /* Initialize generation of logging ids */
1517 1517 lufs_genid_init();
1518 1518
1519 1519 /*
1520 1520 * Set up the maximum amount of kmem that the crbs (system wide)
1521 1521 * can use.
1522 1522 */
1523 1523 ufs_crb_limit = kmem_maxavail() / ufs_max_crb_divisor;
1524 1524 }
↓ open down ↓ |
572 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX