5045-use-atomic_inc_*-atomic_dec_*-instead-of-atomic_add_* Wdiff usr/src/uts/common/fs/ufs/lufs.c

Print this page

5045 use atomic_{inc,dec}_* instead of atomic_add_*

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/ufs/lufs.c
          +++ new/usr/src/uts/common/fs/ufs/lufs.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  #include <sys/systm.h>
  27   27  #include <sys/types.h>
  28   28  #include <sys/vnode.h>
  29   29  #include <sys/buf.h>
  30   30  #include <sys/errno.h>
  31   31  #include <sys/fssnap_if.h>
  32   32  #include <sys/fs/ufs_inode.h>
  33   33  #include <sys/fs/ufs_filio.h>
  34   34  #include <sys/sysmacros.h>
  35   35  #include <sys/modctl.h>
  36   36  #include <sys/fs/ufs_log.h>
  37   37  #include <sys/fs/ufs_bio.h>
  38   38  #include <sys/fs/ufs_fsdir.h>
  39   39  #include <sys/debug.h>
  40   40  #include <sys/atomic.h>
  41   41  #include <sys/kmem.h>
  42   42  #include <sys/inttypes.h>
  43   43  #include <sys/vfs.h>
  44   44  #include <sys/mntent.h>
  45   45  #include <sys/conf.h>
  46   46  #include <sys/param.h>
  47   47  #include <sys/kstat.h>
  48   48  #include <sys/cmn_err.h>
  49   49  #include <sys/sdt.h>
  50   50  
  51   51  #define LUFS_GENID_PRIME        UINT64_C(4294967291)
  52   52  #define LUFS_GENID_BASE         UINT64_C(311)
  53   53  #define LUFS_NEXT_ID(id)        ((uint32_t)(((id) * LUFS_GENID_BASE) % \
  54   54                                      LUFS_GENID_PRIME))
  55   55  
  56   56  extern  kmutex_t        ufs_scan_lock;
  57   57  
  58   58  static kmutex_t log_mutex;      /* general purpose log layer lock */
  59   59  kmutex_t        ml_scan;        /* Scan thread syncronization */
  60   60  kcondvar_t      ml_scan_cv;     /* Scan thread syncronization */
  61   61  
  62   62  struct kmem_cache       *lufs_sv;
  63   63  struct kmem_cache       *lufs_bp;
  64   64  
  65   65  /* Tunables */
  66   66  uint_t          ldl_maxlogsize  = LDL_MAXLOGSIZE;
  67   67  uint_t          ldl_minlogsize  = LDL_MINLOGSIZE;
  68   68  uint_t          ldl_softlogcap  = LDL_SOFTLOGCAP;
  69   69  uint32_t        ldl_divisor     = LDL_DIVISOR;
  70   70  uint32_t        ldl_mintransfer = LDL_MINTRANSFER;
  71   71  uint32_t        ldl_maxtransfer = LDL_MAXTRANSFER;
  72   72  uint32_t        ldl_minbufsize  = LDL_MINBUFSIZE;
  73   73  uint32_t        ldl_cgsizereq   = 0;
  74   74  
  75   75  /* Generation of header ids */
  76   76  static kmutex_t genid_mutex;
  77   77  static uint32_t last_loghead_ident = UINT32_C(0);
  78   78  
  79   79  /*
  80   80   * Logging delta and roll statistics
  81   81   */
  82   82  struct delta_kstats {
  83   83          kstat_named_t ds_superblock_deltas;
  84   84          kstat_named_t ds_bitmap_deltas;
  85   85          kstat_named_t ds_suminfo_deltas;
  86   86          kstat_named_t ds_allocblk_deltas;
  87   87          kstat_named_t ds_ab0_deltas;
  88   88          kstat_named_t ds_dir_deltas;
  89   89          kstat_named_t ds_inode_deltas;
  90   90          kstat_named_t ds_fbiwrite_deltas;
  91   91          kstat_named_t ds_quota_deltas;
  92   92          kstat_named_t ds_shadow_deltas;
  93   93  
  94   94          kstat_named_t ds_superblock_rolled;
  95   95          kstat_named_t ds_bitmap_rolled;
  96   96          kstat_named_t ds_suminfo_rolled;
  97   97          kstat_named_t ds_allocblk_rolled;
  98   98          kstat_named_t ds_ab0_rolled;
  99   99          kstat_named_t ds_dir_rolled;
 100  100          kstat_named_t ds_inode_rolled;
 101  101          kstat_named_t ds_fbiwrite_rolled;
 102  102          kstat_named_t ds_quota_rolled;
 103  103          kstat_named_t ds_shadow_rolled;
 104  104  } dkstats = {
 105  105          { "superblock_deltas",  KSTAT_DATA_UINT64 },
 106  106          { "bitmap_deltas",      KSTAT_DATA_UINT64 },
 107  107          { "suminfo_deltas",     KSTAT_DATA_UINT64 },
 108  108          { "allocblk_deltas",    KSTAT_DATA_UINT64 },
 109  109          { "ab0_deltas",         KSTAT_DATA_UINT64 },
 110  110          { "dir_deltas",         KSTAT_DATA_UINT64 },
 111  111          { "inode_deltas",       KSTAT_DATA_UINT64 },
 112  112          { "fbiwrite_deltas",    KSTAT_DATA_UINT64 },
 113  113          { "quota_deltas",       KSTAT_DATA_UINT64 },
 114  114          { "shadow_deltas",      KSTAT_DATA_UINT64 },
 115  115  
 116  116          { "superblock_rolled",  KSTAT_DATA_UINT64 },
 117  117          { "bitmap_rolled",      KSTAT_DATA_UINT64 },
 118  118          { "suminfo_rolled",     KSTAT_DATA_UINT64 },
 119  119          { "allocblk_rolled",    KSTAT_DATA_UINT64 },
 120  120          { "ab0_rolled",         KSTAT_DATA_UINT64 },
 121  121          { "dir_rolled",         KSTAT_DATA_UINT64 },
 122  122          { "inode_rolled",       KSTAT_DATA_UINT64 },
 123  123          { "fbiwrite_rolled",    KSTAT_DATA_UINT64 },
 124  124          { "quota_rolled",       KSTAT_DATA_UINT64 },
 125  125          { "shadow_rolled",      KSTAT_DATA_UINT64 }
 126  126  };
 127  127  
 128  128  uint64_t delta_stats[DT_MAX];
 129  129  uint64_t roll_stats[DT_MAX];
 130  130  
 131  131  /*
 132  132   * General logging kstats
 133  133   */
 134  134  struct logstats logstats = {
 135  135          { "master_reads",               KSTAT_DATA_UINT64 },
 136  136          { "master_writes",              KSTAT_DATA_UINT64 },
 137  137          { "log_reads_inmem",            KSTAT_DATA_UINT64 },
 138  138          { "log_reads",                  KSTAT_DATA_UINT64 },
 139  139          { "log_writes",                 KSTAT_DATA_UINT64 },
 140  140          { "log_master_reads",           KSTAT_DATA_UINT64 },
 141  141          { "log_roll_reads",             KSTAT_DATA_UINT64 },
 142  142          { "log_roll_writes",            KSTAT_DATA_UINT64 }
 143  143  };
 144  144  
 145  145  int
 146  146  trans_not_done(struct buf *cb)
 147  147  {
 148  148          sema_v(&cb->b_io);
 149  149          return (0);
 150  150  }
 151  151  
 152  152  static void
 153  153  trans_wait_panic(struct buf *cb)
 154  154  {
 155  155          while ((cb->b_flags & B_DONE) == 0)
 156  156                  drv_usecwait(10);
 157  157  }
 158  158  
 159  159  int
 160  160  trans_not_wait(struct buf *cb)
 161  161  {
 162  162          /*
 163  163           * In case of panic, busy wait for completion
 164  164           */
 165  165          if (panicstr)
 166  166                  trans_wait_panic(cb);
 167  167          else
 168  168                  sema_p(&cb->b_io);
 169  169  
 170  170          return (geterror(cb));
 171  171  }
 172  172  
 173  173  int
 174  174  trans_wait(struct buf *cb)
 175  175  {
 176  176          /*
 177  177           * In case of panic, busy wait for completion and run md daemon queues
 178  178           */
 179  179          if (panicstr)
 180  180                  trans_wait_panic(cb);
 181  181          return (biowait(cb));
 182  182  }
 183  183  
 184  184  static void
 185  185  setsum(int32_t *sp, int32_t *lp, int nb)
 186  186  {
 187  187          int32_t csum = 0;
 188  188  
 189  189          *sp = 0;
 190  190          nb /= sizeof (int32_t);
 191  191          while (nb--)
 192  192                  csum += *lp++;
 193  193          *sp = csum;
 194  194  }
 195  195  
 196  196  static int
 197  197  checksum(int32_t *sp, int32_t *lp, int nb)
 198  198  {
 199  199          int32_t ssum = *sp;
 200  200  
 201  201          setsum(sp, lp, nb);
 202  202          if (ssum != *sp) {
 203  203                  *sp = ssum;
 204  204                  return (0);
 205  205          }
 206  206          return (1);
 207  207  }
 208  208  
 209  209  void
 210  210  lufs_unsnarf(ufsvfs_t *ufsvfsp)
 211  211  {
 212  212          ml_unit_t *ul;
 213  213          mt_map_t *mtm;
 214  214  
 215  215          ul = ufsvfsp->vfs_log;
 216  216          if (ul == NULL)
 217  217                  return;
 218  218  
 219  219          mtm = ul->un_logmap;
 220  220  
 221  221          /*
 222  222           * Wait for a pending top_issue_sync which is
 223  223           * dispatched (via taskq_dispatch()) but hasnt completed yet.
 224  224           */
 225  225  
 226  226          mutex_enter(&mtm->mtm_lock);
 227  227  
 228  228          while (mtm->mtm_taskq_sync_count != 0) {
 229  229                  cv_wait(&mtm->mtm_cv, &mtm->mtm_lock);
 230  230          }
 231  231  
 232  232          mutex_exit(&mtm->mtm_lock);
 233  233  
 234  234          /* Roll committed transactions */
 235  235          logmap_roll_dev(ul);
 236  236  
 237  237          /* Kill the roll thread */
 238  238          logmap_kill_roll(ul);
 239  239  
 240  240          /* release saved alloction info */
 241  241          if (ul->un_ebp)
 242  242                  kmem_free(ul->un_ebp, ul->un_nbeb);
 243  243  
 244  244          /* release circular bufs */
 245  245          free_cirbuf(&ul->un_rdbuf);
 246  246          free_cirbuf(&ul->un_wrbuf);
 247  247  
 248  248          /* release maps */
 249  249          if (ul->un_logmap)
 250  250                  ul->un_logmap = map_put(ul->un_logmap);
 251  251          if (ul->un_deltamap)
 252  252                  ul->un_deltamap = map_put(ul->un_deltamap);
 253  253          if (ul->un_matamap)
 254  254                  ul->un_matamap = map_put(ul->un_matamap);
 255  255  
 256  256          mutex_destroy(&ul->un_log_mutex);
 257  257          mutex_destroy(&ul->un_state_mutex);
 258  258  
 259  259          /* release state buffer MUST BE LAST!! (contains our ondisk data) */
 260  260          if (ul->un_bp)
 261  261                  brelse(ul->un_bp);
 262  262          kmem_free(ul, sizeof (*ul));
 263  263  
 264  264          ufsvfsp->vfs_log = NULL;
 265  265  }
 266  266  
 267  267  int
 268  268  lufs_snarf(ufsvfs_t *ufsvfsp, struct fs *fs, int ronly)
 269  269  {
 270  270          buf_t           *bp, *tbp;
 271  271          ml_unit_t       *ul;
 272  272          extent_block_t  *ebp;
 273  273          ic_extent_block_t  *nebp;
 274  274          size_t          nb;
 275  275          daddr_t         bno;    /* in disk blocks */
 276  276          int             i;
 277  277  
 278  278          /* LINTED: warning: logical expression always true: op "||" */
 279  279          ASSERT(sizeof (ml_odunit_t) < DEV_BSIZE);
 280  280  
 281  281          /*
 282  282           * Get the allocation table
 283  283           *      During a remount the superblock pointed to by the ufsvfsp
 284  284           *      is out of date.  Hence the need for the ``new'' superblock
 285  285           *      pointer, fs, passed in as a parameter.
 286  286           */
 287  287          bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, logbtodb(fs, fs->fs_logbno),
 288  288              fs->fs_bsize);
 289  289          if (bp->b_flags & B_ERROR) {
 290  290                  brelse(bp);
 291  291                  return (EIO);
 292  292          }
 293  293          ebp = (void *)bp->b_un.b_addr;
 294  294          if (!checksum(&ebp->chksum, (int32_t *)bp->b_un.b_addr,
 295  295              fs->fs_bsize)) {
 296  296                  brelse(bp);
 297  297                  return (ENODEV);
 298  298          }
 299  299  
 300  300          /*
 301  301           * It is possible to get log blocks with all zeros.
 302  302           * We should also check for nextents to be zero in such case.
 303  303           */
 304  304          if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0) {
 305  305                  brelse(bp);
 306  306                  return (EDOM);
 307  307          }
 308  308          /*
 309  309           * Put allocation into memory.  This requires conversion between
 310  310           * on the ondisk format of the extent (type extent_t) and the
 311  311           * in-core format of the extent (type ic_extent_t).  The
 312  312           * difference is the in-core form of the extent block stores
 313  313           * the physical offset of the extent in disk blocks, which
 314  314           * can require more than a 32-bit field.
 315  315           */
 316  316          nb = (size_t)(sizeof (ic_extent_block_t) +
 317  317              ((ebp->nextents - 1) * sizeof (ic_extent_t)));
 318  318          nebp = kmem_alloc(nb, KM_SLEEP);
 319  319          nebp->ic_nextents = ebp->nextents;
 320  320          nebp->ic_nbytes = ebp->nbytes;
 321  321          nebp->ic_nextbno = ebp->nextbno;
 322  322          for (i = 0; i < ebp->nextents; i++) {
 323  323                  nebp->ic_extents[i].ic_lbno = ebp->extents[i].lbno;
 324  324                  nebp->ic_extents[i].ic_nbno = ebp->extents[i].nbno;
 325  325                  nebp->ic_extents[i].ic_pbno =
 326  326                      logbtodb(fs, ebp->extents[i].pbno);
 327  327          }
 328  328          brelse(bp);
 329  329  
 330  330          /*
 331  331           * Get the log state
 332  332           */
 333  333          bno = nebp->ic_extents[0].ic_pbno;
 334  334          bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, bno, DEV_BSIZE);
 335  335          if (bp->b_flags & B_ERROR) {
 336  336                  brelse(bp);
 337  337                  bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, bno + 1, DEV_BSIZE);
 338  338                  if (bp->b_flags & B_ERROR) {
 339  339                          brelse(bp);
 340  340                          kmem_free(nebp, nb);
 341  341                          return (EIO);
 342  342                  }
 343  343          }
 344  344  
 345  345          /*
 346  346           * Put ondisk struct into an anonymous buffer
 347  347           *      This buffer will contain the memory for the ml_odunit struct
 348  348           */
 349  349          tbp = ngeteblk(dbtob(LS_SECTORS));
 350  350          tbp->b_edev = bp->b_edev;
 351  351          tbp->b_dev = bp->b_dev;
 352  352          tbp->b_blkno = bno;
 353  353          bcopy(bp->b_un.b_addr, tbp->b_un.b_addr, DEV_BSIZE);
 354  354          bcopy(bp->b_un.b_addr, tbp->b_un.b_addr + DEV_BSIZE, DEV_BSIZE);
 355  355          bp->b_flags |= (B_STALE | B_AGE);
 356  356          brelse(bp);
 357  357          bp = tbp;
 358  358  
 359  359          /*
 360  360           * Verify the log state
 361  361           *
 362  362           * read/only mounts w/bad logs are allowed.  umount will
 363  363           * eventually roll the bad log until the first IO error.
 364  364           * fsck will then repair the file system.
 365  365           *
 366  366           * read/write mounts with bad logs are not allowed.
 367  367           *
 368  368           */
 369  369          ul = (ml_unit_t *)kmem_zalloc(sizeof (*ul), KM_SLEEP);
 370  370          bcopy(bp->b_un.b_addr, &ul->un_ondisk, sizeof (ml_odunit_t));
 371  371          if ((ul->un_chksum != ul->un_head_ident + ul->un_tail_ident) ||
 372  372              (ul->un_version != LUFS_VERSION_LATEST) ||
 373  373              (!ronly && ul->un_badlog)) {
 374  374                  kmem_free(ul, sizeof (*ul));
 375  375                  brelse(bp);
 376  376                  kmem_free(nebp, nb);
 377  377                  return (EIO);
 378  378          }
 379  379          /*
 380  380           * Initialize the incore-only fields
 381  381           */
 382  382          if (ronly)
 383  383                  ul->un_flags |= LDL_NOROLL;
 384  384          ul->un_bp = bp;
 385  385          ul->un_ufsvfs = ufsvfsp;
 386  386          ul->un_dev = ufsvfsp->vfs_dev;
 387  387          ul->un_ebp = nebp;
 388  388          ul->un_nbeb = nb;
 389  389          ul->un_maxresv = btodb(ul->un_logsize) * LDL_USABLE_BSIZE;
 390  390          ul->un_deltamap = map_get(ul, deltamaptype, DELTAMAP_NHASH);
 391  391          ul->un_logmap = map_get(ul, logmaptype, LOGMAP_NHASH);
 392  392          if (ul->un_debug & MT_MATAMAP)
 393  393                  ul->un_matamap = map_get(ul, matamaptype, DELTAMAP_NHASH);
 394  394          mutex_init(&ul->un_log_mutex, NULL, MUTEX_DEFAULT, NULL);
 395  395          mutex_init(&ul->un_state_mutex, NULL, MUTEX_DEFAULT, NULL);
 396  396  
 397  397          /*
 398  398           * Aquire the ufs_scan_lock before linking the mtm data
 399  399           * structure so that we keep ufs_sync() and ufs_update() away
 400  400           * when they execute the ufs_scan_inodes() run while we're in
 401  401           * progress of enabling/disabling logging.
 402  402           */
 403  403          mutex_enter(&ufs_scan_lock);
 404  404          ufsvfsp->vfs_log = ul;
 405  405  
 406  406          /* remember the state of the log before the log scan */
 407  407          logmap_logscan(ul);
 408  408          mutex_exit(&ufs_scan_lock);
 409  409  
 410  410          /*
 411  411           * Error during scan
 412  412           *
 413  413           * If this is a read/only mount; ignore the error.
 414  414           * At a later time umount/fsck will repair the fs.
 415  415           *
 416  416           */
 417  417          if (ul->un_flags & LDL_ERROR) {
 418  418                  if (!ronly) {
 419  419                          /*
 420  420                           * Aquire the ufs_scan_lock before de-linking
 421  421                           * the mtm data structure so that we keep ufs_sync()
 422  422                           * and ufs_update() away when they execute the
 423  423                           * ufs_scan_inodes() run while we're in progress of
 424  424                           * enabling/disabling logging.
 425  425                           */
 426  426                          mutex_enter(&ufs_scan_lock);
 427  427                          lufs_unsnarf(ufsvfsp);
 428  428                          mutex_exit(&ufs_scan_lock);
 429  429                          return (EIO);
 430  430                  }
 431  431                  ul->un_flags &= ~LDL_ERROR;
 432  432          }
 433  433          if (!ronly)
 434  434                  logmap_start_roll(ul);
 435  435          return (0);
 436  436  }
 437  437  
 438  438  uint32_t
 439  439  lufs_hd_genid(const ml_unit_t *up)
 440  440  {
 441  441          uint32_t id;
 442  442  
 443  443          mutex_enter(&genid_mutex);
 444  444  
 445  445          /*
 446  446           * The formula below implements an exponential, modular sequence.
 447  447           *
 448  448           * ID(N) = (SEED * (BASE^N)) % PRIME
 449  449           *
 450  450           * The numbers will be pseudo random.  They depend on SEED, BASE, PRIME,
 451  451           * but will sweep through almost all of the range 1....PRIME-1.
 452  452           * Most  importantly  they  will  not  repeat  for PRIME-2 (4294967289)
 453  453           * repetitions.  If they would repeat that  could possibly cause  hangs,
 454  454           * panics at mount/umount and failed mount operations.
 455  455           */
 456  456          id = LUFS_NEXT_ID(last_loghead_ident);
 457  457  
 458  458          /* Checking if new identity used already */
 459  459          if (up != NULL && up->un_head_ident == id) {
 460  460                  DTRACE_PROBE1(head_ident_collision, uint32_t, id);
 461  461  
 462  462                  /*
 463  463                   * The  following  preserves  the  algorithm  for  the fix  for
 464  464                   * "panic: free: freeing free frag, dev:0x2000000018, blk:34605,
 465  465                   * cg:26, ino:148071,".
 466  466                   * If  the header identities  un_head_ident  are  equal  to the
 467  467                   * present element  in the sequence,  the next element  of  the
 468  468                   * sequence is returned instead.
 469  469                   */
 470  470                  id = LUFS_NEXT_ID(id);
 471  471          }
 472  472  
 473  473          last_loghead_ident = id;
 474  474  
 475  475          mutex_exit(&genid_mutex);
 476  476  
 477  477          return (id);
 478  478  }
 479  479  
 480  480  static void
 481  481  lufs_genid_init(void)
 482  482  {
 483  483          uint64_t seed;
 484  484  
 485  485          /* Initialization */
 486  486          mutex_init(&genid_mutex, NULL, MUTEX_DEFAULT, NULL);
 487  487  
 488  488          /* Seed the algorithm */
 489  489          do {
 490  490                  timestruc_t tv;
 491  491  
 492  492                  gethrestime(&tv);
 493  493  
 494  494                  seed = (tv.tv_nsec << 3);
 495  495                  seed ^= tv.tv_sec;
 496  496  
 497  497                  last_loghead_ident = (uint32_t)(seed % LUFS_GENID_PRIME);
 498  498          } while (last_loghead_ident == UINT32_C(0));
 499  499  }
 500  500  
 501  501  static int
 502  502  lufs_initialize(
 503  503          ufsvfs_t *ufsvfsp,
 504  504          daddr_t bno,
 505  505          size_t nb,
 506  506          struct fiolog *flp)
 507  507  {
 508  508          ml_odunit_t     *ud, *ud2;
 509  509          buf_t           *bp;
 510  510  
 511  511          /* LINTED: warning: logical expression always true: op "||" */
 512  512          ASSERT(sizeof (ml_odunit_t) < DEV_BSIZE);
 513  513          ASSERT(nb >= ldl_minlogsize);
 514  514  
 515  515          bp = UFS_GETBLK(ufsvfsp, ufsvfsp->vfs_dev, bno, dbtob(LS_SECTORS));
 516  516          bzero(bp->b_un.b_addr, bp->b_bcount);
 517  517  
 518  518          ud = (void *)bp->b_un.b_addr;
 519  519          ud->od_version = LUFS_VERSION_LATEST;
 520  520          ud->od_maxtransfer = MIN(ufsvfsp->vfs_iotransz, ldl_maxtransfer);
 521  521          if (ud->od_maxtransfer < ldl_mintransfer)
 522  522                  ud->od_maxtransfer = ldl_mintransfer;
 523  523          ud->od_devbsize = DEV_BSIZE;
 524  524  
 525  525          ud->od_requestsize = flp->nbytes_actual;
 526  526          ud->od_statesize = dbtob(LS_SECTORS);
 527  527          ud->od_logsize = nb - ud->od_statesize;
 528  528  
 529  529          ud->od_statebno = INT32_C(0);
 530  530  
 531  531          ud->od_head_ident = lufs_hd_genid(NULL);
 532  532          ud->od_tail_ident = ud->od_head_ident;
 533  533          ud->od_chksum = ud->od_head_ident + ud->od_tail_ident;
 534  534  
 535  535          ud->od_bol_lof = dbtob(ud->od_statebno) + ud->od_statesize;
 536  536          ud->od_eol_lof = ud->od_bol_lof + ud->od_logsize;
 537  537          ud->od_head_lof = ud->od_bol_lof;
 538  538          ud->od_tail_lof = ud->od_bol_lof;
 539  539  
 540  540          ASSERT(lufs_initialize_debug(ud));
 541  541  
 542  542          ud2 = (void *)(bp->b_un.b_addr + DEV_BSIZE);
 543  543          bcopy(ud, ud2, sizeof (*ud));
 544  544  
 545  545          UFS_BWRITE2(ufsvfsp, bp);
 546  546          if (bp->b_flags & B_ERROR) {
 547  547                  brelse(bp);
 548  548                  return (EIO);
 549  549          }
 550  550          brelse(bp);
 551  551  
 552  552          return (0);
 553  553  }
 554  554  
 555  555  /*
 556  556   * Free log space
 557  557   *      Assumes the file system is write locked and is not logging
 558  558   */
 559  559  static int
 560  560  lufs_free(struct ufsvfs *ufsvfsp)
 561  561  {
 562  562          int             error = 0, i, j;
 563  563          buf_t           *bp = NULL;
 564  564          extent_t        *ep;
 565  565          extent_block_t  *ebp;
 566  566          struct fs       *fs = ufsvfsp->vfs_fs;
 567  567          daddr_t         fno;
 568  568          int32_t         logbno;
 569  569          long            nfno;
 570  570          inode_t         *ip = NULL;
 571  571          char            clean;
 572  572  
 573  573          /*
 574  574           * Nothing to free
 575  575           */
 576  576          if (fs->fs_logbno == 0)
 577  577                  return (0);
 578  578  
 579  579          /*
 580  580           * Mark the file system as FSACTIVE and no log but honor the
 581  581           * current value of fs_reclaim.  The reclaim thread could have
 582  582           * been active when lufs_disable() was called and if fs_reclaim
 583  583           * is reset to zero here it could lead to lost inodes.
 584  584           */
 585  585          ufsvfsp->vfs_ulockfs.ul_sbowner = curthread;
 586  586          mutex_enter(&ufsvfsp->vfs_lock);
 587  587          clean = fs->fs_clean;
 588  588          logbno = fs->fs_logbno;
 589  589          fs->fs_clean = FSACTIVE;
 590  590          fs->fs_logbno = INT32_C(0);
 591  591          ufs_sbwrite(ufsvfsp);
 592  592          mutex_exit(&ufsvfsp->vfs_lock);
 593  593          ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1;
 594  594          if (ufsvfsp->vfs_bufp->b_flags & B_ERROR) {
 595  595                  error = EIO;
 596  596                  fs->fs_clean = clean;
 597  597                  fs->fs_logbno = logbno;
 598  598                  goto errout;
 599  599          }
 600  600  
 601  601          /*
 602  602           * fetch the allocation block
 603  603           *      superblock -> one block of extents -> log data
 604  604           */
 605  605          bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, logbtodb(fs, logbno),
 606  606              fs->fs_bsize);
 607  607          if (bp->b_flags & B_ERROR) {
 608  608                  error = EIO;
 609  609                  goto errout;
 610  610          }
 611  611  
 612  612          /*
 613  613           * Free up the allocated space (dummy inode needed for free())
 614  614           */
 615  615          ip = ufs_alloc_inode(ufsvfsp, UFSROOTINO);
 616  616          ebp = (void *)bp->b_un.b_addr;
 617  617          for (i = 0, ep = &ebp->extents[0]; i < ebp->nextents; ++i, ++ep) {
 618  618                  fno = logbtofrag(fs, ep->pbno);
 619  619                  nfno = dbtofsb(fs, ep->nbno);
 620  620                  for (j = 0; j < nfno; j += fs->fs_frag, fno += fs->fs_frag)
 621  621                          free(ip, fno, fs->fs_bsize, 0);
 622  622          }
 623  623          free(ip, logbtofrag(fs, logbno), fs->fs_bsize, 0);
 624  624          brelse(bp);
 625  625          bp = NULL;
 626  626  
 627  627          /*
 628  628           * Push the metadata dirtied during the allocations
 629  629           */
 630  630          ufsvfsp->vfs_ulockfs.ul_sbowner = curthread;
 631  631          sbupdate(ufsvfsp->vfs_vfs);
 632  632          ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1;
 633  633          bflush(ufsvfsp->vfs_dev);
 634  634          error = bfinval(ufsvfsp->vfs_dev, 0);
 635  635          if (error)
 636  636                  goto errout;
 637  637  
 638  638          /*
 639  639           * Free the dummy inode
 640  640           */
 641  641          ufs_free_inode(ip);
 642  642  
 643  643          return (0);
 644  644  
 645  645  errout:
 646  646          /*
 647  647           * Free up all resources
 648  648           */
 649  649          if (bp)
 650  650                  brelse(bp);
 651  651          if (ip)
 652  652                  ufs_free_inode(ip);
 653  653          return (error);
 654  654  }
 655  655  
 656  656  /*
 657  657   * Allocate log space
 658  658   *      Assumes the file system is write locked and is not logging
 659  659   */
 660  660  static int
 661  661  lufs_alloc(struct ufsvfs *ufsvfsp, struct fiolog *flp, size_t minb, cred_t *cr)
 662  662  {
 663  663          int             error = 0;
 664  664          buf_t           *bp = NULL;
 665  665          extent_t        *ep, *nep;
 666  666          extent_block_t  *ebp;
 667  667          struct fs       *fs = ufsvfsp->vfs_fs;
 668  668          daddr_t         fno;    /* in frags */
 669  669          daddr_t         bno;    /* in disk blocks */
 670  670          int32_t         logbno = INT32_C(0);    /* will be fs_logbno */
 671  671          struct inode    *ip = NULL;
 672  672          size_t          nb = flp->nbytes_actual;
 673  673          size_t          tb = 0;
 674  674  
 675  675          /*
 676  676           * Mark the file system as FSACTIVE
 677  677           */
 678  678          ufsvfsp->vfs_ulockfs.ul_sbowner = curthread;
 679  679          mutex_enter(&ufsvfsp->vfs_lock);
 680  680          fs->fs_clean = FSACTIVE;
 681  681          ufs_sbwrite(ufsvfsp);
 682  682          mutex_exit(&ufsvfsp->vfs_lock);
 683  683          ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1;
 684  684  
 685  685          /*
 686  686           * Allocate the allocation block (need dummy shadow inode;
 687  687           * we use a shadow inode so the quota sub-system ignores
 688  688           * the block allocations.)
 689  689           *      superblock -> one block of extents -> log data
 690  690           */
 691  691          ip = ufs_alloc_inode(ufsvfsp, UFSROOTINO);
 692  692          ip->i_mode = IFSHAD;            /* make the dummy a shadow inode */
 693  693          rw_enter(&ip->i_contents, RW_WRITER);
 694  694          fno = contigpref(ufsvfsp, nb + fs->fs_bsize, minb);
 695  695          error = alloc(ip, fno, fs->fs_bsize, &fno, cr);
 696  696          if (error)
 697  697                  goto errout;
 698  698          bno = fsbtodb(fs, fno);
 699  699  
 700  700          bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, bno, fs->fs_bsize);
 701  701          if (bp->b_flags & B_ERROR) {
 702  702                  error = EIO;
 703  703                  goto errout;
 704  704          }
 705  705  
 706  706          ebp = (void *)bp->b_un.b_addr;
 707  707          ebp->type = LUFS_EXTENTS;
 708  708          ebp->nextbno = UINT32_C(0);
 709  709          ebp->nextents = UINT32_C(0);
 710  710          ebp->chksum = INT32_C(0);
 711  711          if (fs->fs_magic == FS_MAGIC)
 712  712                  logbno = bno;
 713  713          else
 714  714                  logbno = dbtofsb(fs, bno);
 715  715  
 716  716          /*
 717  717           * Initialize the first extent
 718  718           */
 719  719          ep = &ebp->extents[0];
 720  720          error = alloc(ip, fno + fs->fs_frag, fs->fs_bsize, &fno, cr);
 721  721          if (error)
 722  722                  goto errout;
 723  723          bno = fsbtodb(fs, fno);
 724  724  
 725  725          ep->lbno = UINT32_C(0);
 726  726          if (fs->fs_magic == FS_MAGIC)
 727  727                  ep->pbno = (uint32_t)bno;
 728  728          else
 729  729                  ep->pbno = (uint32_t)fno;
 730  730          ep->nbno = (uint32_t)fsbtodb(fs, fs->fs_frag);
 731  731          ebp->nextents = UINT32_C(1);
 732  732          tb = fs->fs_bsize;
 733  733          nb -= fs->fs_bsize;
 734  734  
 735  735          while (nb) {
 736  736                  error = alloc(ip, fno + fs->fs_frag, fs->fs_bsize, &fno, cr);
 737  737                  if (error) {
 738  738                          if (tb < minb)
 739  739                                  goto errout;
 740  740                          error = 0;
 741  741                          break;
 742  742                  }
 743  743                  bno = fsbtodb(fs, fno);
 744  744                  if ((daddr_t)((logbtodb(fs, ep->pbno) + ep->nbno) == bno))
 745  745                          ep->nbno += (uint32_t)(fsbtodb(fs, fs->fs_frag));
 746  746                  else {
 747  747                          nep = ep + 1;
 748  748                          if ((caddr_t)(nep + 1) >
 749  749                              (bp->b_un.b_addr + fs->fs_bsize)) {
 750  750                                  free(ip, fno, fs->fs_bsize, 0);
 751  751                                  break;
 752  752                          }
 753  753                          nep->lbno = ep->lbno + ep->nbno;
 754  754                          if (fs->fs_magic == FS_MAGIC)
 755  755                                  nep->pbno = (uint32_t)bno;
 756  756                          else
 757  757                                  nep->pbno = (uint32_t)fno;
 758  758                          nep->nbno = (uint32_t)(fsbtodb(fs, fs->fs_frag));
 759  759                          ebp->nextents++;
 760  760                          ep = nep;
 761  761                  }
 762  762                  tb += fs->fs_bsize;
 763  763                  nb -= fs->fs_bsize;
 764  764          }
 765  765  
 766  766          if (tb < minb) {        /* Failed to reach minimum log size */
 767  767                  error = ENOSPC;
 768  768                  goto errout;
 769  769          }
 770  770  
 771  771          ebp->nbytes = (uint32_t)tb;
 772  772          setsum(&ebp->chksum, (int32_t *)bp->b_un.b_addr, fs->fs_bsize);
 773  773          UFS_BWRITE2(ufsvfsp, bp);
 774  774          if (bp->b_flags & B_ERROR) {
 775  775                  error = EIO;
 776  776                  goto errout;
 777  777          }
 778  778          /*
 779  779           * Initialize the first two sectors of the log
 780  780           */
 781  781          error = lufs_initialize(ufsvfsp, logbtodb(fs, ebp->extents[0].pbno),
 782  782              tb, flp);
 783  783          if (error)
 784  784                  goto errout;
 785  785  
 786  786          /*
 787  787           * We are done initializing the allocation block and the log
 788  788           */
 789  789          brelse(bp);
 790  790          bp = NULL;
 791  791  
 792  792          /*
 793  793           * Update the superblock and push the dirty metadata
 794  794           */
 795  795          ufsvfsp->vfs_ulockfs.ul_sbowner = curthread;
 796  796          sbupdate(ufsvfsp->vfs_vfs);
 797  797          ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1;
 798  798          bflush(ufsvfsp->vfs_dev);
 799  799          error = bfinval(ufsvfsp->vfs_dev, 1);
 800  800          if (error)
 801  801                  goto errout;
 802  802          if (ufsvfsp->vfs_bufp->b_flags & B_ERROR) {
 803  803                  error = EIO;
 804  804                  goto errout;
 805  805          }
 806  806  
 807  807          /*
 808  808           * Everything is safely on disk; update log space pointer in sb
 809  809           */
 810  810          ufsvfsp->vfs_ulockfs.ul_sbowner = curthread;
 811  811          mutex_enter(&ufsvfsp->vfs_lock);
 812  812          fs->fs_logbno = (uint32_t)logbno;
 813  813          ufs_sbwrite(ufsvfsp);
 814  814          mutex_exit(&ufsvfsp->vfs_lock);
 815  815          ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1;
 816  816  
 817  817          /*
 818  818           * Free the dummy inode
 819  819           */
 820  820          rw_exit(&ip->i_contents);
 821  821          ufs_free_inode(ip);
 822  822  
 823  823          /* inform user of real log size */
 824  824          flp->nbytes_actual = tb;
 825  825          return (0);
 826  826  
 827  827  errout:
 828  828          /*
 829  829           * Free all resources
 830  830           */
 831  831          if (bp)
 832  832                  brelse(bp);
 833  833          if (logbno) {
 834  834                  fs->fs_logbno = logbno;
 835  835                  (void) lufs_free(ufsvfsp);
 836  836          }
 837  837          if (ip) {
 838  838                  rw_exit(&ip->i_contents);
 839  839                  ufs_free_inode(ip);
 840  840          }
 841  841          return (error);
 842  842  }
 843  843  
 844  844  /*
 845  845   * Disable logging
 846  846   */
 847  847  int
 848  848  lufs_disable(vnode_t *vp, struct fiolog *flp)
 849  849  {
 850  850          int             error = 0;
 851  851          inode_t         *ip = VTOI(vp);
 852  852          ufsvfs_t        *ufsvfsp = ip->i_ufsvfs;
 853  853          struct fs       *fs = ufsvfsp->vfs_fs;
 854  854          struct lockfs   lf;
 855  855          struct ulockfs  *ulp;
 856  856  
 857  857          flp->error = FIOLOG_ENONE;
 858  858  
 859  859          /*
 860  860           * Logging is already disabled; done
 861  861           */
 862  862          if (fs->fs_logbno == 0 || ufsvfsp->vfs_log == NULL)
 863  863                  return (0);
 864  864  
 865  865          /*
 866  866           * Readonly file system
 867  867           */
 868  868          if (fs->fs_ronly) {
 869  869                  flp->error = FIOLOG_EROFS;
 870  870                  return (0);
 871  871          }
 872  872  
 873  873          /*
 874  874           * File system must be write locked to disable logging
 875  875           */
 876  876          error = ufs_fiolfss(vp, &lf);
 877  877          if (error) {
 878  878                  return (error);
 879  879          }
 880  880          if (!LOCKFS_IS_ULOCK(&lf)) {
 881  881                  flp->error = FIOLOG_EULOCK;
 882  882                  return (0);
 883  883          }
 884  884          lf.lf_lock = LOCKFS_WLOCK;
 885  885          lf.lf_flags = 0;
 886  886          lf.lf_comment = NULL;
 887  887          error = ufs_fiolfs(vp, &lf, 1);
 888  888          if (error) {
 889  889                  flp->error = FIOLOG_EWLOCK;
 890  890                  return (0);
 891  891          }
 892  892  
 893  893          if (ufsvfsp->vfs_log == NULL || fs->fs_logbno == 0)
 894  894                  goto errout;
 895  895  
 896  896          /*
 897  897           * WE ARE COMMITTED TO DISABLING LOGGING PAST THIS POINT
 898  898           */
 899  899  
 900  900          /*
 901  901           * Disable logging:
 902  902           * Suspend the reclaim thread and force the delete thread to exit.
 903  903           *      When a nologging mount has completed there may still be
 904  904           *      work for reclaim to do so just suspend this thread until
 905  905           *      it's [deadlock-] safe for it to continue.  The delete
 906  906           *      thread won't be needed as ufs_iinactive() calls
 907  907           *      ufs_delete() when logging is disabled.
 908  908           * Freeze and drain reader ops.
 909  909           *      Commit any outstanding reader transactions (ufs_flush).
 910  910           *      Set the ``unmounted'' bit in the ufstrans struct.
 911  911           *      If debug, remove metadata from matamap.
 912  912           *      Disable matamap processing.

↓ open down ↓

912 lines elided

↑ open up ↑

 913  913           *      NULL the trans ops table.
 914  914           *      Free all of the incore structs related to logging.
 915  915           * Allow reader ops.
 916  916           */
 917  917          ufs_thread_suspend(&ufsvfsp->vfs_reclaim);
 918  918          ufs_thread_exit(&ufsvfsp->vfs_delete);
 919  919  
 920  920          vfs_lock_wait(ufsvfsp->vfs_vfs);
 921  921          ulp = &ufsvfsp->vfs_ulockfs;
 922  922          mutex_enter(&ulp->ul_lock);
 923      -        atomic_add_long(&ufs_quiesce_pend, 1);
      923 +        atomic_inc_ulong(&ufs_quiesce_pend);
 924  924          (void) ufs_quiesce(ulp);
 925  925  
 926  926          (void) ufs_flush(ufsvfsp->vfs_vfs);
 927  927  
 928  928          TRANS_MATA_UMOUNT(ufsvfsp);
 929  929          ufsvfsp->vfs_domatamap = 0;
 930  930  
 931  931          /*
 932  932           * Free all of the incore structs
 933  933           * Aquire the ufs_scan_lock before de-linking the mtm data
 934  934           * structure so that we keep ufs_sync() and ufs_update() away
 935  935           * when they execute the ufs_scan_inodes() run while we're in
 936  936           * progress of enabling/disabling logging.
 937  937           */
 938  938          mutex_enter(&ufs_scan_lock);
 939  939          (void) lufs_unsnarf(ufsvfsp);
 940  940          mutex_exit(&ufs_scan_lock);
 941  941  
 942      -        atomic_add_long(&ufs_quiesce_pend, -1);
      942 +        atomic_dec_ulong(&ufs_quiesce_pend);
 943  943          mutex_exit(&ulp->ul_lock);
 944  944          vfs_setmntopt(ufsvfsp->vfs_vfs, MNTOPT_NOLOGGING, NULL, 0);
 945  945          vfs_unlock(ufsvfsp->vfs_vfs);
 946  946  
 947  947          fs->fs_rolled = FS_ALL_ROLLED;
 948  948          ufsvfsp->vfs_nolog_si = 0;
 949  949  
 950  950          /*
 951  951           * Free the log space and mark the superblock as FSACTIVE
 952  952           */

 953  953          (void) lufs_free(ufsvfsp);
 954  954  
 955  955          /*
 956  956           * Allow the reclaim thread to continue.
 957  957           */
 958  958          ufs_thread_continue(&ufsvfsp->vfs_reclaim);
 959  959  
 960  960          /*
 961  961           * Unlock the file system
 962  962           */
 963  963          lf.lf_lock = LOCKFS_ULOCK;
 964  964          lf.lf_flags = 0;
 965  965          error = ufs_fiolfs(vp, &lf, 1);
 966  966          if (error)
 967  967                  flp->error = FIOLOG_ENOULOCK;
 968  968  
 969  969          return (0);
 970  970  
 971  971  errout:
 972  972          lf.lf_lock = LOCKFS_ULOCK;
 973  973          lf.lf_flags = 0;
 974  974          (void) ufs_fiolfs(vp, &lf, 1);
 975  975          return (error);
 976  976  }
 977  977  
 978  978  /*
 979  979   * Enable logging
 980  980   */
 981  981  int
 982  982  lufs_enable(struct vnode *vp, struct fiolog *flp, cred_t *cr)
 983  983  {
 984  984          int             error;
 985  985          int             reclaim;
 986  986          inode_t         *ip = VTOI(vp);
 987  987          ufsvfs_t        *ufsvfsp = ip->i_ufsvfs;
 988  988          struct fs       *fs;
 989  989          ml_unit_t       *ul;
 990  990          struct lockfs   lf;
 991  991          struct ulockfs  *ulp;
 992  992          vfs_t           *vfsp = ufsvfsp->vfs_vfs;
 993  993          uint64_t        tmp_nbytes_actual;
 994  994          uint64_t        cg_minlogsize;
 995  995          uint32_t        cgsize;
 996  996          static int      minlogsizewarn = 0;
 997  997          static int      maxlogsizewarn = 0;
 998  998  
 999  999          /*
1000 1000           * Check if logging is already enabled
1001 1001           */
1002 1002          if (ufsvfsp->vfs_log) {
1003 1003                  flp->error = FIOLOG_ETRANS;
1004 1004                  /* for root ensure logging option is set */
1005 1005                  vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0);
1006 1006                  return (0);
1007 1007          }
1008 1008          fs = ufsvfsp->vfs_fs;
1009 1009  
1010 1010          /*
1011 1011           * Come back here to recheck if we had to disable the log.
1012 1012           */
1013 1013  recheck:
1014 1014          error = 0;
1015 1015          reclaim = 0;
1016 1016          flp->error = FIOLOG_ENONE;
1017 1017  
1018 1018          /*
1019 1019           * The size of the ufs log is determined using the following rules:
1020 1020           *
1021 1021           * 1) If no size is requested the log size is calculated as a
1022 1022           *    ratio of the total file system size. By default this is
1023 1023           *    1MB of log per 1GB of file system. This calculation is then
1024 1024           *    capped at the log size specified by ldl_softlogcap.
1025 1025           * 2) The log size requested may then be increased based on the
1026 1026           *    number of cylinder groups contained in the file system.
1027 1027           *    To prevent a hang the log has to be large enough to contain a
1028 1028           *    single transaction that alters every cylinder group in the file
1029 1029           *    system. This is calculated as cg_minlogsize.
1030 1030           * 3) Finally a check is made that the log size requested is within
1031 1031           *    the limits of ldl_minlogsize and ldl_maxlogsize.
1032 1032           */
1033 1033  
1034 1034          /*
1035 1035           * Adjust requested log size
1036 1036           */
1037 1037          flp->nbytes_actual = flp->nbytes_requested;
1038 1038          if (flp->nbytes_actual == 0) {
1039 1039                  tmp_nbytes_actual =
1040 1040                      (((uint64_t)fs->fs_size) / ldl_divisor) << fs->fs_fshift;
1041 1041                  flp->nbytes_actual = (uint_t)MIN(tmp_nbytes_actual, INT_MAX);
1042 1042                  /*
1043 1043                   * The 1MB per 1GB log size allocation only applies up to
1044 1044                   * ldl_softlogcap size of log.
1045 1045                   */
1046 1046                  flp->nbytes_actual = MIN(flp->nbytes_actual, ldl_softlogcap);
1047 1047          }
1048 1048  
1049 1049          cgsize = ldl_cgsizereq ? ldl_cgsizereq : LDL_CGSIZEREQ(fs);
1050 1050  
1051 1051          /*
1052 1052           * Determine the log size required based on the number of cylinder
1053 1053           * groups in the file system. The log has to be at least this size
1054 1054           * to prevent possible hangs due to log space exhaustion.
1055 1055           */
1056 1056          cg_minlogsize = cgsize * fs->fs_ncg;
1057 1057  
1058 1058          /*
1059 1059           * Ensure that the minimum log size isn't so small that it could lead
1060 1060           * to a full log hang.
1061 1061           */
1062 1062          if (ldl_minlogsize < LDL_MINLOGSIZE) {
1063 1063                  ldl_minlogsize = LDL_MINLOGSIZE;
1064 1064                  if (!minlogsizewarn) {
1065 1065                          cmn_err(CE_WARN, "ldl_minlogsize too small, increasing "
1066 1066                              "to 0x%x", LDL_MINLOGSIZE);
1067 1067                          minlogsizewarn = 1;
1068 1068                  }
1069 1069          }
1070 1070  
1071 1071          /*
1072 1072           * Ensure that the maximum log size isn't greater than INT_MAX as the
1073 1073           * logical log offset fields would overflow.
1074 1074           */
1075 1075          if (ldl_maxlogsize > INT_MAX) {
1076 1076                  ldl_maxlogsize = INT_MAX;
1077 1077                  if (!maxlogsizewarn) {
1078 1078                          cmn_err(CE_WARN, "ldl_maxlogsize too large, reducing "
1079 1079                              "to 0x%x", INT_MAX);
1080 1080                          maxlogsizewarn = 1;
1081 1081                  }
1082 1082          }
1083 1083  
1084 1084          if (cg_minlogsize > ldl_maxlogsize) {
1085 1085                  cmn_err(CE_WARN,
1086 1086                      "%s: reducing calculated log size from 0x%x to "
1087 1087                      "ldl_maxlogsize (0x%x).", fs->fs_fsmnt, (int)cg_minlogsize,
1088 1088                      ldl_maxlogsize);
1089 1089          }
1090 1090  
1091 1091          cg_minlogsize = MAX(cg_minlogsize, ldl_minlogsize);
1092 1092          cg_minlogsize = MIN(cg_minlogsize, ldl_maxlogsize);
1093 1093  
1094 1094          flp->nbytes_actual = MAX(flp->nbytes_actual, cg_minlogsize);
1095 1095          flp->nbytes_actual = MAX(flp->nbytes_actual, ldl_minlogsize);
1096 1096          flp->nbytes_actual = MIN(flp->nbytes_actual, ldl_maxlogsize);
1097 1097          flp->nbytes_actual = blkroundup(fs, flp->nbytes_actual);
1098 1098  
1099 1099          /*
1100 1100           * logging is enabled and the log is the right size; done
1101 1101           */
1102 1102          ul = ufsvfsp->vfs_log;
1103 1103          if (ul && fs->fs_logbno && (flp->nbytes_actual == ul->un_requestsize))
1104 1104                          return (0);
1105 1105  
1106 1106          /*
1107 1107           * Readonly file system
1108 1108           */
1109 1109          if (fs->fs_ronly) {
1110 1110                  flp->error = FIOLOG_EROFS;
1111 1111                  return (0);
1112 1112          }
1113 1113  
1114 1114          /*
1115 1115           * File system must be write locked to enable logging
1116 1116           */
1117 1117          error = ufs_fiolfss(vp, &lf);
1118 1118          if (error) {
1119 1119                  return (error);
1120 1120          }
1121 1121          if (!LOCKFS_IS_ULOCK(&lf)) {
1122 1122                  flp->error = FIOLOG_EULOCK;
1123 1123                  return (0);
1124 1124          }
1125 1125          lf.lf_lock = LOCKFS_WLOCK;
1126 1126          lf.lf_flags = 0;
1127 1127          lf.lf_comment = NULL;
1128 1128          error = ufs_fiolfs(vp, &lf, 1);
1129 1129          if (error) {
1130 1130                  flp->error = FIOLOG_EWLOCK;
1131 1131                  return (0);
1132 1132          }
1133 1133  
1134 1134          /*
1135 1135           * Grab appropriate locks to synchronize with the rest
1136 1136           * of the system
1137 1137           */
1138 1138          vfs_lock_wait(vfsp);
1139 1139          ulp = &ufsvfsp->vfs_ulockfs;
1140 1140          mutex_enter(&ulp->ul_lock);
1141 1141  
1142 1142          /*
1143 1143           * File system must be fairly consistent to enable logging
1144 1144           */
1145 1145          if (fs->fs_clean != FSLOG &&
1146 1146              fs->fs_clean != FSACTIVE &&
1147 1147              fs->fs_clean != FSSTABLE &&
1148 1148              fs->fs_clean != FSCLEAN) {
1149 1149                  flp->error = FIOLOG_ECLEAN;
1150 1150                  goto unlockout;
1151 1151          }
1152 1152  
1153 1153          /*
1154 1154           * A write-locked file system is only active if there are
1155 1155           * open deleted files; so remember to set FS_RECLAIM later.
1156 1156           */
1157 1157          if (fs->fs_clean == FSACTIVE)
1158 1158                  reclaim = FS_RECLAIM;
1159 1159  
1160 1160          /*
1161 1161           * Logging is already enabled; must be changing the log's size
1162 1162           */
1163 1163          if (fs->fs_logbno && ufsvfsp->vfs_log) {
1164 1164                  /*
1165 1165                   * Before we can disable logging, we must give up our
1166 1166                   * lock.  As a consequence of unlocking and disabling the
1167 1167                   * log, the fs structure may change.  Because of this, when
1168 1168                   * disabling is complete, we will go back to recheck to
1169 1169                   * repeat all of the checks that we performed to get to
1170 1170                   * this point.  Disabling sets fs->fs_logbno to 0, so this
1171 1171                   * will not put us into an infinite loop.
1172 1172                   */
1173 1173                  mutex_exit(&ulp->ul_lock);
1174 1174                  vfs_unlock(vfsp);
1175 1175  
1176 1176                  lf.lf_lock = LOCKFS_ULOCK;
1177 1177                  lf.lf_flags = 0;
1178 1178                  error = ufs_fiolfs(vp, &lf, 1);
1179 1179                  if (error) {
1180 1180                          flp->error = FIOLOG_ENOULOCK;
1181 1181                          return (0);
1182 1182                  }
1183 1183                  error = lufs_disable(vp, flp);
1184 1184                  if (error || (flp->error != FIOLOG_ENONE))
1185 1185                          return (0);
1186 1186                  goto recheck;
1187 1187          }
1188 1188  
1189 1189          error = lufs_alloc(ufsvfsp, flp, cg_minlogsize, cr);
1190 1190          if (error)
1191 1191                  goto errout;
1192 1192  
1193 1193          /*
1194 1194           * Create all of the incore structs
1195 1195           */
1196 1196          error = lufs_snarf(ufsvfsp, fs, 0);
1197 1197          if (error)
1198 1198                  goto errout;
1199 1199  
1200 1200          /*
1201 1201           * DON'T ``GOTO ERROUT'' PAST THIS POINT
1202 1202           */
1203 1203  
1204 1204          /*
1205 1205           * Pretend we were just mounted with logging enabled
1206 1206           *              Get the ops vector
1207 1207           *              If debug, record metadata locations with log subsystem
1208 1208           *              Start the delete thread
1209 1209           *              Start the reclaim thread, if necessary
1210 1210           */
1211 1211          vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0);
1212 1212  
1213 1213          TRANS_DOMATAMAP(ufsvfsp);
1214 1214          TRANS_MATA_MOUNT(ufsvfsp);
1215 1215          TRANS_MATA_SI(ufsvfsp, fs);
1216 1216          ufs_thread_start(&ufsvfsp->vfs_delete, ufs_thread_delete, vfsp);
1217 1217          if (fs->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) {
1218 1218                  fs->fs_reclaim &= ~FS_RECLAIM;
1219 1219                  fs->fs_reclaim |=  FS_RECLAIMING;
1220 1220                  ufs_thread_start(&ufsvfsp->vfs_reclaim,
1221 1221                      ufs_thread_reclaim, vfsp);
1222 1222          } else
1223 1223                  fs->fs_reclaim |= reclaim;
1224 1224  
1225 1225          mutex_exit(&ulp->ul_lock);
1226 1226          vfs_unlock(vfsp);
1227 1227  
1228 1228          /*
1229 1229           * Unlock the file system
1230 1230           */
1231 1231          lf.lf_lock = LOCKFS_ULOCK;
1232 1232          lf.lf_flags = 0;
1233 1233          error = ufs_fiolfs(vp, &lf, 1);
1234 1234          if (error) {
1235 1235                  flp->error = FIOLOG_ENOULOCK;
1236 1236                  return (0);
1237 1237          }
1238 1238  
1239 1239          /*
1240 1240           * There's nothing in the log yet (we've just allocated it)
1241 1241           * so directly write out the super block.
1242 1242           * Note, we have to force this sb out to disk
1243 1243           * (not just to the log) so that if we crash we know we are logging
1244 1244           */
1245 1245          mutex_enter(&ufsvfsp->vfs_lock);
1246 1246          fs->fs_clean = FSLOG;
1247 1247          fs->fs_rolled = FS_NEED_ROLL; /* Mark the fs as unrolled */
1248 1248          UFS_BWRITE2(NULL, ufsvfsp->vfs_bufp);
1249 1249          mutex_exit(&ufsvfsp->vfs_lock);
1250 1250  
1251 1251          return (0);
1252 1252  
1253 1253  errout:
1254 1254          /*
1255 1255           * Aquire the ufs_scan_lock before de-linking the mtm data
1256 1256           * structure so that we keep ufs_sync() and ufs_update() away
1257 1257           * when they execute the ufs_scan_inodes() run while we're in
1258 1258           * progress of enabling/disabling logging.
1259 1259           */
1260 1260          mutex_enter(&ufs_scan_lock);
1261 1261          (void) lufs_unsnarf(ufsvfsp);
1262 1262          mutex_exit(&ufs_scan_lock);
1263 1263  
1264 1264          (void) lufs_free(ufsvfsp);
1265 1265  unlockout:
1266 1266          mutex_exit(&ulp->ul_lock);
1267 1267          vfs_unlock(vfsp);
1268 1268  
1269 1269          lf.lf_lock = LOCKFS_ULOCK;
1270 1270          lf.lf_flags = 0;
1271 1271          (void) ufs_fiolfs(vp, &lf, 1);
1272 1272          return (error);
1273 1273  }
1274 1274  
1275 1275  void
1276 1276  lufs_read_strategy(ml_unit_t *ul, buf_t *bp)
1277 1277  {
1278 1278          mt_map_t        *logmap = ul->un_logmap;
1279 1279          offset_t        mof     = ldbtob(bp->b_blkno);
1280 1280          off_t           nb      = bp->b_bcount;
1281 1281          mapentry_t      *age;
1282 1282          char            *va;
1283 1283          int             (*saviodone)();
1284 1284          int             entire_range;
1285 1285  
1286 1286          /*
1287 1287           * get a linked list of overlapping deltas
1288 1288           * returns with &mtm->mtm_rwlock held
1289 1289           */
1290 1290          entire_range = logmap_list_get(logmap, mof, nb, &age);
1291 1291  
1292 1292          /*
1293 1293           * no overlapping deltas were found; read master
1294 1294           */
1295 1295          if (age == NULL) {
1296 1296                  rw_exit(&logmap->mtm_rwlock);
1297 1297                  if (ul->un_flags & LDL_ERROR) {
1298 1298                          bp->b_flags |= B_ERROR;
1299 1299                          bp->b_error = EIO;
1300 1300                          biodone(bp);
1301 1301                  } else {
1302 1302                          ul->un_ufsvfs->vfs_iotstamp = ddi_get_lbolt();
1303 1303                          logstats.ls_lreads.value.ui64++;
1304 1304                          (void) bdev_strategy(bp);
1305 1305                          lwp_stat_update(LWP_STAT_INBLK, 1);
1306 1306                  }
1307 1307                  return;
1308 1308          }
1309 1309  
1310 1310          va = bp_mapin_common(bp, VM_SLEEP);
1311 1311          /*
1312 1312           * if necessary, sync read the data from master
1313 1313           *      errors are returned in bp
1314 1314           */
1315 1315          if (!entire_range) {
1316 1316                  saviodone = bp->b_iodone;
1317 1317                  bp->b_iodone = trans_not_done;
1318 1318                  logstats.ls_mreads.value.ui64++;
1319 1319                  (void) bdev_strategy(bp);
1320 1320                  lwp_stat_update(LWP_STAT_INBLK, 1);
1321 1321                  if (trans_not_wait(bp))
1322 1322                          ldl_seterror(ul, "Error reading master");
1323 1323                  bp->b_iodone = saviodone;
1324 1324          }
1325 1325  
1326 1326          /*
1327 1327           * sync read the data from the log
1328 1328           *      errors are returned inline
1329 1329           */
1330 1330          if (ldl_read(ul, va, mof, nb, age)) {
1331 1331                  bp->b_flags |= B_ERROR;
1332 1332                  bp->b_error = EIO;
1333 1333          }
1334 1334  
1335 1335          /*
1336 1336           * unlist the deltas
1337 1337           */
1338 1338          logmap_list_put(logmap, age);
1339 1339  
1340 1340          /*
1341 1341           * all done
1342 1342           */
1343 1343          if (ul->un_flags & LDL_ERROR) {
1344 1344                  bp->b_flags |= B_ERROR;
1345 1345                  bp->b_error = EIO;
1346 1346          }
1347 1347          biodone(bp);
1348 1348  }
1349 1349  
1350 1350  void
1351 1351  lufs_write_strategy(ml_unit_t *ul, buf_t *bp)
1352 1352  {
1353 1353          offset_t        mof     = ldbtob(bp->b_blkno);
1354 1354          off_t           nb      = bp->b_bcount;
1355 1355          char            *va;
1356 1356          mapentry_t      *me;
1357 1357  
1358 1358          ASSERT((nb & DEV_BMASK) == 0);
1359 1359          ul->un_logmap->mtm_ref = 1;
1360 1360  
1361 1361          /*
1362 1362           * if there are deltas, move into log
1363 1363           */
1364 1364          me = deltamap_remove(ul->un_deltamap, mof, nb);
1365 1365          if (me) {
1366 1366  
1367 1367                  va = bp_mapin_common(bp, VM_SLEEP);
1368 1368  
1369 1369                  ASSERT(((ul->un_debug & MT_WRITE_CHECK) == 0) ||
1370 1370                      (ul->un_matamap == NULL)||
1371 1371                      matamap_within(ul->un_matamap, mof, nb));
1372 1372  
1373 1373                  /*
1374 1374                   * move to logmap
1375 1375                   */
1376 1376                  if (ufs_crb_enable) {
1377 1377                          logmap_add_buf(ul, va, mof, me,
1378 1378                              bp->b_un.b_addr, nb);
1379 1379                  } else {
1380 1380                          logmap_add(ul, va, mof, me);
1381 1381                  }
1382 1382  
1383 1383                  if (ul->un_flags & LDL_ERROR) {
1384 1384                          bp->b_flags |= B_ERROR;
1385 1385                          bp->b_error = EIO;
1386 1386                  }
1387 1387                  biodone(bp);
1388 1388                  return;
1389 1389          }
1390 1390          if (ul->un_flags & LDL_ERROR) {
1391 1391                  bp->b_flags |= B_ERROR;
1392 1392                  bp->b_error = EIO;
1393 1393                  biodone(bp);
1394 1394                  return;
1395 1395          }
1396 1396  
1397 1397          /*
1398 1398           * Check that we are not updating metadata, or if so then via B_PHYS.
1399 1399           */
1400 1400          ASSERT((ul->un_matamap == NULL) ||
1401 1401              !(matamap_overlap(ul->un_matamap, mof, nb) &&
1402 1402              ((bp->b_flags & B_PHYS) == 0)));
1403 1403  
1404 1404          ul->un_ufsvfs->vfs_iotstamp = ddi_get_lbolt();
1405 1405          logstats.ls_lwrites.value.ui64++;
1406 1406  
1407 1407          /* If snapshots are enabled, write through the snapshot driver */
1408 1408          if (ul->un_ufsvfs->vfs_snapshot)
1409 1409                  fssnap_strategy(&ul->un_ufsvfs->vfs_snapshot, bp);
1410 1410          else
1411 1411                  (void) bdev_strategy(bp);
1412 1412  
1413 1413          lwp_stat_update(LWP_STAT_OUBLK, 1);
1414 1414  }
1415 1415  
1416 1416  void
1417 1417  lufs_strategy(ml_unit_t *ul, buf_t *bp)
1418 1418  {
1419 1419          if (bp->b_flags & B_READ)
1420 1420                  lufs_read_strategy(ul, bp);
1421 1421          else
1422 1422                  lufs_write_strategy(ul, bp);
1423 1423  }
1424 1424  
1425 1425  /* ARGSUSED */
1426 1426  static int
1427 1427  delta_stats_update(kstat_t *ksp, int rw)
1428 1428  {
1429 1429          if (rw == KSTAT_WRITE) {
1430 1430                  delta_stats[DT_SB] = dkstats.ds_superblock_deltas.value.ui64;
1431 1431                  delta_stats[DT_CG] = dkstats.ds_bitmap_deltas.value.ui64;
1432 1432                  delta_stats[DT_SI] = dkstats.ds_suminfo_deltas.value.ui64;
1433 1433                  delta_stats[DT_AB] = dkstats.ds_allocblk_deltas.value.ui64;
1434 1434                  delta_stats[DT_ABZERO] = dkstats.ds_ab0_deltas.value.ui64;
1435 1435                  delta_stats[DT_DIR] = dkstats.ds_dir_deltas.value.ui64;
1436 1436                  delta_stats[DT_INODE] = dkstats.ds_inode_deltas.value.ui64;
1437 1437                  delta_stats[DT_FBI] = dkstats.ds_fbiwrite_deltas.value.ui64;
1438 1438                  delta_stats[DT_QR] = dkstats.ds_quota_deltas.value.ui64;
1439 1439                  delta_stats[DT_SHAD] = dkstats.ds_shadow_deltas.value.ui64;
1440 1440  
1441 1441                  roll_stats[DT_SB] = dkstats.ds_superblock_rolled.value.ui64;
1442 1442                  roll_stats[DT_CG] = dkstats.ds_bitmap_rolled.value.ui64;
1443 1443                  roll_stats[DT_SI] = dkstats.ds_suminfo_rolled.value.ui64;
1444 1444                  roll_stats[DT_AB] = dkstats.ds_allocblk_rolled.value.ui64;
1445 1445                  roll_stats[DT_ABZERO] = dkstats.ds_ab0_rolled.value.ui64;
1446 1446                  roll_stats[DT_DIR] = dkstats.ds_dir_rolled.value.ui64;
1447 1447                  roll_stats[DT_INODE] = dkstats.ds_inode_rolled.value.ui64;
1448 1448                  roll_stats[DT_FBI] = dkstats.ds_fbiwrite_rolled.value.ui64;
1449 1449                  roll_stats[DT_QR] = dkstats.ds_quota_rolled.value.ui64;
1450 1450                  roll_stats[DT_SHAD] = dkstats.ds_shadow_rolled.value.ui64;
1451 1451          } else {
1452 1452                  dkstats.ds_superblock_deltas.value.ui64 = delta_stats[DT_SB];
1453 1453                  dkstats.ds_bitmap_deltas.value.ui64 = delta_stats[DT_CG];
1454 1454                  dkstats.ds_suminfo_deltas.value.ui64 = delta_stats[DT_SI];
1455 1455                  dkstats.ds_allocblk_deltas.value.ui64 = delta_stats[DT_AB];
1456 1456                  dkstats.ds_ab0_deltas.value.ui64 = delta_stats[DT_ABZERO];
1457 1457                  dkstats.ds_dir_deltas.value.ui64 = delta_stats[DT_DIR];
1458 1458                  dkstats.ds_inode_deltas.value.ui64 = delta_stats[DT_INODE];
1459 1459                  dkstats.ds_fbiwrite_deltas.value.ui64 = delta_stats[DT_FBI];
1460 1460                  dkstats.ds_quota_deltas.value.ui64 = delta_stats[DT_QR];
1461 1461                  dkstats.ds_shadow_deltas.value.ui64 = delta_stats[DT_SHAD];
1462 1462  
1463 1463                  dkstats.ds_superblock_rolled.value.ui64 = roll_stats[DT_SB];
1464 1464                  dkstats.ds_bitmap_rolled.value.ui64 = roll_stats[DT_CG];
1465 1465                  dkstats.ds_suminfo_rolled.value.ui64 = roll_stats[DT_SI];
1466 1466                  dkstats.ds_allocblk_rolled.value.ui64 = roll_stats[DT_AB];
1467 1467                  dkstats.ds_ab0_rolled.value.ui64 = roll_stats[DT_ABZERO];
1468 1468                  dkstats.ds_dir_rolled.value.ui64 = roll_stats[DT_DIR];
1469 1469                  dkstats.ds_inode_rolled.value.ui64 = roll_stats[DT_INODE];
1470 1470                  dkstats.ds_fbiwrite_rolled.value.ui64 = roll_stats[DT_FBI];
1471 1471                  dkstats.ds_quota_rolled.value.ui64 = roll_stats[DT_QR];
1472 1472                  dkstats.ds_shadow_rolled.value.ui64 = roll_stats[DT_SHAD];
1473 1473          }
1474 1474          return (0);
1475 1475  }
1476 1476  
1477 1477  extern size_t ufs_crb_limit;
1478 1478  extern int ufs_max_crb_divisor;
1479 1479  
1480 1480  void
1481 1481  lufs_init(void)
1482 1482  {
1483 1483          kstat_t *ksp;
1484 1484  
1485 1485          /* Create kmem caches */
1486 1486          lufs_sv = kmem_cache_create("lufs_save", sizeof (lufs_save_t), 0,
1487 1487              NULL, NULL, NULL, NULL, NULL, 0);
1488 1488          lufs_bp = kmem_cache_create("lufs_bufs", sizeof (lufs_buf_t), 0,
1489 1489              NULL, NULL, NULL, NULL, NULL, 0);
1490 1490  
1491 1491          mutex_init(&log_mutex, NULL, MUTEX_DEFAULT, NULL);
1492 1492  
1493 1493          _init_top();
1494 1494  
1495 1495          if (bio_lufs_strategy == NULL)
1496 1496                  bio_lufs_strategy = (void (*) (void *, buf_t *)) lufs_strategy;
1497 1497  
1498 1498          /*
1499 1499           * Initialise general logging and delta kstats
1500 1500           */
1501 1501          ksp = kstat_create("ufs_log", 0, "logstats", "ufs", KSTAT_TYPE_NAMED,
1502 1502              sizeof (logstats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
1503 1503          if (ksp) {
1504 1504                  ksp->ks_data = (void *) &logstats;
1505 1505                  kstat_install(ksp);
1506 1506          }
1507 1507  
1508 1508          ksp = kstat_create("ufs_log", 0, "deltastats", "ufs", KSTAT_TYPE_NAMED,
1509 1509              sizeof (dkstats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
1510 1510          if (ksp) {
1511 1511                  ksp->ks_data = (void *) &dkstats;
1512 1512                  ksp->ks_update = delta_stats_update;
1513 1513                  kstat_install(ksp);
1514 1514          }
1515 1515  
1516 1516          /* Initialize  generation of logging ids */
1517 1517          lufs_genid_init();
1518 1518  
1519 1519          /*
1520 1520           * Set up the maximum amount of kmem that the crbs (system wide)
1521 1521           * can use.
1522 1522           */
1523 1523          ufs_crb_limit = kmem_maxavail() / ufs_max_crb_divisor;
1524 1524  }

↓ open down ↓

572 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX