6583-remove-whole-process-swapping Wdiff usr/src/uts/common/fs/nfs/nfs_srv.c

Print this page

6583 remove whole-process swapping

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/nfs/nfs_srv.c
          +++ new/usr/src/uts/common/fs/nfs/nfs_srv.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27   27   *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  28   28   *      All rights reserved.
  29   29   */
  30   30  
  31   31  #include <sys/param.h>
  32   32  #include <sys/types.h>
  33   33  #include <sys/systm.h>
  34   34  #include <sys/cred.h>
  35   35  #include <sys/buf.h>
  36   36  #include <sys/vfs.h>
  37   37  #include <sys/vnode.h>
  38   38  #include <sys/uio.h>
  39   39  #include <sys/stat.h>
  40   40  #include <sys/errno.h>
  41   41  #include <sys/sysmacros.h>
  42   42  #include <sys/statvfs.h>
  43   43  #include <sys/kmem.h>
  44   44  #include <sys/kstat.h>
  45   45  #include <sys/dirent.h>
  46   46  #include <sys/cmn_err.h>
  47   47  #include <sys/debug.h>
  48   48  #include <sys/vtrace.h>
  49   49  #include <sys/mode.h>
  50   50  #include <sys/acl.h>
  51   51  #include <sys/nbmlock.h>
  52   52  #include <sys/policy.h>
  53   53  #include <sys/sdt.h>
  54   54  
  55   55  #include <rpc/types.h>
  56   56  #include <rpc/auth.h>
  57   57  #include <rpc/svc.h>
  58   58  
  59   59  #include <nfs/nfs.h>
  60   60  #include <nfs/export.h>
  61   61  #include <nfs/nfs_cmd.h>
  62   62  
  63   63  #include <vm/hat.h>
  64   64  #include <vm/as.h>
  65   65  #include <vm/seg.h>
  66   66  #include <vm/seg_map.h>
  67   67  #include <vm/seg_kmem.h>
  68   68  
  69   69  #include <sys/strsubr.h>
  70   70  
  71   71  /*
  72   72   * These are the interface routines for the server side of the
  73   73   * Network File System.  See the NFS version 2 protocol specification
  74   74   * for a description of this interface.
  75   75   */
  76   76  
  77   77  static int      sattr_to_vattr(struct nfssattr *, struct vattr *);
  78   78  static void     acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
  79   79                          cred_t *);
  80   80  
  81   81  /*
  82   82   * Some "over the wire" UNIX file types.  These are encoded
  83   83   * into the mode.  This needs to be fixed in the next rev.
  84   84   */
  85   85  #define IFMT            0170000         /* type of file */
  86   86  #define IFCHR           0020000         /* character special */
  87   87  #define IFBLK           0060000         /* block special */
  88   88  #define IFSOCK          0140000         /* socket */
  89   89  
  90   90  u_longlong_t nfs2_srv_caller_id;
  91   91  
  92   92  /*
  93   93   * Get file attributes.
  94   94   * Returns the current attributes of the file with the given fhandle.
  95   95   */
  96   96  /* ARGSUSED */
  97   97  void
  98   98  rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
  99   99      struct svc_req *req, cred_t *cr, bool_t ro)
 100  100  {
 101  101          int error;
 102  102          vnode_t *vp;
 103  103          struct vattr va;
 104  104  
 105  105          vp = nfs_fhtovp(fhp, exi);
 106  106          if (vp == NULL) {
 107  107                  ns->ns_status = NFSERR_STALE;
 108  108                  return;
 109  109          }
 110  110  
 111  111          /*
 112  112           * Do the getattr.
 113  113           */
 114  114          va.va_mask = AT_ALL;    /* we want all the attributes */
 115  115  
 116  116          error = rfs4_delegated_getattr(vp, &va, 0, cr);
 117  117  
 118  118          /* check for overflows */
 119  119          if (!error) {
 120  120                  /* Lie about the object type for a referral */
 121  121                  if (vn_is_nfs_reparse(vp, cr))
 122  122                          va.va_type = VLNK;
 123  123  
 124  124                  acl_perm(vp, exi, &va, cr);
 125  125                  error = vattr_to_nattr(&va, &ns->ns_attr);
 126  126          }
 127  127  
 128  128          VN_RELE(vp);
 129  129  
 130  130          ns->ns_status = puterrno(error);
 131  131  }
 132  132  void *
 133  133  rfs_getattr_getfh(fhandle_t *fhp)
 134  134  {
 135  135          return (fhp);
 136  136  }
 137  137  
 138  138  /*
 139  139   * Set file attributes.
 140  140   * Sets the attributes of the file with the given fhandle.  Returns
 141  141   * the new attributes.
 142  142   */
 143  143  /* ARGSUSED */
 144  144  void
 145  145  rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
 146  146      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
 147  147  {
 148  148          int error;
 149  149          int flag;
 150  150          int in_crit = 0;
 151  151          vnode_t *vp;
 152  152          struct vattr va;
 153  153          struct vattr bva;
 154  154          struct flock64 bf;
 155  155          caller_context_t ct;
 156  156  
 157  157  
 158  158          vp = nfs_fhtovp(&args->saa_fh, exi);
 159  159          if (vp == NULL) {
 160  160                  ns->ns_status = NFSERR_STALE;
 161  161                  return;
 162  162          }
 163  163  
 164  164          if (rdonly(ro, vp)) {
 165  165                  VN_RELE(vp);
 166  166                  ns->ns_status = NFSERR_ROFS;
 167  167                  return;
 168  168          }
 169  169  
 170  170          error = sattr_to_vattr(&args->saa_sa, &va);
 171  171          if (error) {
 172  172                  VN_RELE(vp);
 173  173                  ns->ns_status = puterrno(error);
 174  174                  return;
 175  175          }
 176  176  
 177  177          /*
 178  178           * If the client is requesting a change to the mtime,
 179  179           * but the nanosecond field is set to 1 billion, then
 180  180           * this is a flag to the server that it should set the
 181  181           * atime and mtime fields to the server's current time.
 182  182           * The 1 billion number actually came from the client
 183  183           * as 1 million, but the units in the over the wire
 184  184           * request are microseconds instead of nanoseconds.
 185  185           *
 186  186           * This is an overload of the protocol and should be
 187  187           * documented in the NFS Version 2 protocol specification.
 188  188           */
 189  189          if (va.va_mask & AT_MTIME) {
 190  190                  if (va.va_mtime.tv_nsec == 1000000000) {
 191  191                          gethrestime(&va.va_mtime);
 192  192                          va.va_atime = va.va_mtime;
 193  193                          va.va_mask |= AT_ATIME;
 194  194                          flag = 0;
 195  195                  } else
 196  196                          flag = ATTR_UTIME;
 197  197          } else
 198  198                  flag = 0;
 199  199  
 200  200          /*
 201  201           * If the filesystem is exported with nosuid, then mask off
 202  202           * the setuid and setgid bits.
 203  203           */
 204  204          if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
 205  205              (exi->exi_export.ex_flags & EX_NOSUID))
 206  206                  va.va_mode &= ~(VSUID | VSGID);
 207  207  
 208  208          ct.cc_sysid = 0;
 209  209          ct.cc_pid = 0;
 210  210          ct.cc_caller_id = nfs2_srv_caller_id;
 211  211          ct.cc_flags = CC_DONTBLOCK;
 212  212  
 213  213          /*
 214  214           * We need to specially handle size changes because it is
 215  215           * possible for the client to create a file with modes
 216  216           * which indicate read-only, but with the file opened for
 217  217           * writing.  If the client then tries to set the size of
 218  218           * the file, then the normal access checking done in
 219  219           * VOP_SETATTR would prevent the client from doing so,
 220  220           * although it should be legal for it to do so.  To get
 221  221           * around this, we do the access checking for ourselves
 222  222           * and then use VOP_SPACE which doesn't do the access
 223  223           * checking which VOP_SETATTR does. VOP_SPACE can only
 224  224           * operate on VREG files, let VOP_SETATTR handle the other
 225  225           * extremely rare cases.
 226  226           * Also the client should not be allowed to change the
 227  227           * size of the file if there is a conflicting non-blocking
 228  228           * mandatory lock in the region of change.
 229  229           */
 230  230          if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
 231  231                  if (nbl_need_check(vp)) {
 232  232                          nbl_start_crit(vp, RW_READER);
 233  233                          in_crit = 1;
 234  234                  }
 235  235  
 236  236                  bva.va_mask = AT_UID | AT_SIZE;
 237  237  
 238  238                  error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
 239  239  
 240  240                  if (error) {
 241  241                          if (in_crit)
 242  242                                  nbl_end_crit(vp);
 243  243                          VN_RELE(vp);
 244  244                          ns->ns_status = puterrno(error);
 245  245                          return;
 246  246                  }
 247  247  
 248  248                  if (in_crit) {
 249  249                          u_offset_t offset;
 250  250                          ssize_t length;
 251  251  
 252  252                          if (va.va_size < bva.va_size) {
 253  253                                  offset = va.va_size;
 254  254                                  length = bva.va_size - va.va_size;
 255  255                          } else {
 256  256                                  offset = bva.va_size;
 257  257                                  length = va.va_size - bva.va_size;
 258  258                          }
 259  259                          if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
 260  260                              NULL)) {
 261  261                                  error = EACCES;
 262  262                          }
 263  263                  }
 264  264  
 265  265                  if (crgetuid(cr) == bva.va_uid && !error &&
 266  266                      va.va_size != bva.va_size) {
 267  267                          va.va_mask &= ~AT_SIZE;
 268  268                          bf.l_type = F_WRLCK;
 269  269                          bf.l_whence = 0;
 270  270                          bf.l_start = (off64_t)va.va_size;
 271  271                          bf.l_len = 0;
 272  272                          bf.l_sysid = 0;
 273  273                          bf.l_pid = 0;
 274  274  
 275  275                          error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
 276  276                              (offset_t)va.va_size, cr, &ct);
 277  277                  }
 278  278                  if (in_crit)
 279  279                          nbl_end_crit(vp);
 280  280          } else
 281  281                  error = 0;
 282  282  
 283  283          /*
 284  284           * Do the setattr.
 285  285           */
 286  286          if (!error && va.va_mask) {
 287  287                  error = VOP_SETATTR(vp, &va, flag, cr, &ct);
 288  288          }
 289  289  
 290  290          /*
 291  291           * check if the monitor on either vop_space or vop_setattr detected
 292  292           * a delegation conflict and if so, mark the thread flag as
 293  293           * wouldblock so that the response is dropped and the client will
 294  294           * try again.
 295  295           */
 296  296          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 297  297                  VN_RELE(vp);
 298  298                  curthread->t_flag |= T_WOULDBLOCK;
 299  299                  return;
 300  300          }
 301  301  
 302  302          if (!error) {
 303  303                  va.va_mask = AT_ALL;    /* get everything */
 304  304  
 305  305                  error = rfs4_delegated_getattr(vp, &va, 0, cr);
 306  306  
 307  307                  /* check for overflows */
 308  308                  if (!error) {
 309  309                          acl_perm(vp, exi, &va, cr);
 310  310                          error = vattr_to_nattr(&va, &ns->ns_attr);
 311  311                  }
 312  312          }
 313  313  
 314  314          ct.cc_flags = 0;
 315  315  
 316  316          /*
 317  317           * Force modified metadata out to stable storage.
 318  318           */
 319  319          (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
 320  320  
 321  321          VN_RELE(vp);
 322  322  
 323  323          ns->ns_status = puterrno(error);
 324  324  }
 325  325  void *
 326  326  rfs_setattr_getfh(struct nfssaargs *args)
 327  327  {
 328  328          return (&args->saa_fh);
 329  329  }
 330  330  
 331  331  /*
 332  332   * Directory lookup.
 333  333   * Returns an fhandle and file attributes for file name in a directory.
 334  334   */
 335  335  /* ARGSUSED */
 336  336  void
 337  337  rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
 338  338      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
 339  339  {
 340  340          int error;
 341  341          vnode_t *dvp;
 342  342          vnode_t *vp;
 343  343          struct vattr va;
 344  344          fhandle_t *fhp = da->da_fhandle;
 345  345          struct sec_ol sec = {0, 0};
 346  346          bool_t publicfh_flag = FALSE, auth_weak = FALSE;
 347  347          char *name;
 348  348          struct sockaddr *ca;
 349  349  
 350  350          /*
 351  351           * Trusted Extension doesn't support NFSv2. MOUNT
 352  352           * will reject v2 clients. Need to prevent v2 client
 353  353           * access via WebNFS here.
 354  354           */
 355  355          if (is_system_labeled() && req->rq_vers == 2) {
 356  356                  dr->dr_status = NFSERR_ACCES;
 357  357                  return;
 358  358          }
 359  359  
 360  360          /*
 361  361           * Disallow NULL paths
 362  362           */
 363  363          if (da->da_name == NULL || *da->da_name == '\0') {
 364  364                  dr->dr_status = NFSERR_ACCES;
 365  365                  return;
 366  366          }
 367  367  
 368  368          /*
 369  369           * Allow lookups from the root - the default
 370  370           * location of the public filehandle.
 371  371           */
 372  372          if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
 373  373                  dvp = rootdir;
 374  374                  VN_HOLD(dvp);
 375  375          } else {
 376  376                  dvp = nfs_fhtovp(fhp, exi);
 377  377                  if (dvp == NULL) {
 378  378                          dr->dr_status = NFSERR_STALE;
 379  379                          return;
 380  380                  }
 381  381          }
 382  382  
 383  383          /*
 384  384           * Not allow lookup beyond root.
 385  385           * If the filehandle matches a filehandle of the exi,
 386  386           * then the ".." refers beyond the root of an exported filesystem.
 387  387           */
 388  388          if (strcmp(da->da_name, "..") == 0 &&
 389  389              EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
 390  390                  VN_RELE(dvp);
 391  391                  dr->dr_status = NFSERR_NOENT;
 392  392                  return;
 393  393          }
 394  394  
 395  395          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 396  396          name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
 397  397              MAXPATHLEN);
 398  398  
 399  399          if (name == NULL) {
 400  400                  dr->dr_status = NFSERR_ACCES;
 401  401                  return;
 402  402          }
 403  403  
 404  404          /*
 405  405           * If the public filehandle is used then allow
 406  406           * a multi-component lookup, i.e. evaluate
 407  407           * a pathname and follow symbolic links if
 408  408           * necessary.
 409  409           *
 410  410           * This may result in a vnode in another filesystem
 411  411           * which is OK as long as the filesystem is exported.
 412  412           */
 413  413          if (PUBLIC_FH2(fhp)) {
 414  414                  publicfh_flag = TRUE;
 415  415                  error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
 416  416                      &sec);
 417  417          } else {
 418  418                  /*
 419  419                   * Do a normal single component lookup.
 420  420                   */
 421  421                  error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
 422  422                      NULL, NULL, NULL);
 423  423          }
 424  424  
 425  425          if (name != da->da_name)
 426  426                  kmem_free(name, MAXPATHLEN);
 427  427  
 428  428  
 429  429          if (!error) {
 430  430                  va.va_mask = AT_ALL;    /* we want everything */
 431  431  
 432  432                  error = rfs4_delegated_getattr(vp, &va, 0, cr);
 433  433  
 434  434                  /* check for overflows */
 435  435                  if (!error) {
 436  436                          acl_perm(vp, exi, &va, cr);
 437  437                          error = vattr_to_nattr(&va, &dr->dr_attr);
 438  438                          if (!error) {
 439  439                                  if (sec.sec_flags & SEC_QUERY)
 440  440                                          error = makefh_ol(&dr->dr_fhandle, exi,
 441  441                                              sec.sec_index);
 442  442                                  else {
 443  443                                          error = makefh(&dr->dr_fhandle, vp,
 444  444                                              exi);
 445  445                                          if (!error && publicfh_flag &&
 446  446                                              !chk_clnt_sec(exi, req))
 447  447                                                  auth_weak = TRUE;
 448  448                                  }
 449  449                          }
 450  450                  }
 451  451                  VN_RELE(vp);
 452  452          }
 453  453  
 454  454          VN_RELE(dvp);
 455  455  
 456  456          /*
 457  457           * If publicfh_flag is true then we have called rfs_publicfh_mclookup
 458  458           * and have obtained a new exportinfo in exi which needs to be
 459  459           * released. Note the the original exportinfo pointed to by exi
 460  460           * will be released by the caller, comon_dispatch.
 461  461           */
 462  462          if (publicfh_flag && exi != NULL)
 463  463                  exi_rele(exi);
 464  464  
 465  465          /*
 466  466           * If it's public fh, no 0x81, and client's flavor is
 467  467           * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
 468  468           * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
 469  469           */
 470  470          if (auth_weak)
 471  471                  dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
 472  472          else
 473  473                  dr->dr_status = puterrno(error);
 474  474  }
 475  475  void *
 476  476  rfs_lookup_getfh(struct nfsdiropargs *da)
 477  477  {
 478  478          return (da->da_fhandle);
 479  479  }
 480  480  
 481  481  /*
 482  482   * Read symbolic link.
 483  483   * Returns the string in the symbolic link at the given fhandle.
 484  484   */
 485  485  /* ARGSUSED */
 486  486  void
 487  487  rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
 488  488      struct svc_req *req, cred_t *cr, bool_t ro)
 489  489  {
 490  490          int error;
 491  491          struct iovec iov;
 492  492          struct uio uio;
 493  493          vnode_t *vp;
 494  494          struct vattr va;
 495  495          struct sockaddr *ca;
 496  496          char *name = NULL;
 497  497          int is_referral = 0;
 498  498  
 499  499          vp = nfs_fhtovp(fhp, exi);
 500  500          if (vp == NULL) {
 501  501                  rl->rl_data = NULL;
 502  502                  rl->rl_status = NFSERR_STALE;
 503  503                  return;
 504  504          }
 505  505  
 506  506          va.va_mask = AT_MODE;
 507  507  
 508  508          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
 509  509  
 510  510          if (error) {
 511  511                  VN_RELE(vp);
 512  512                  rl->rl_data = NULL;
 513  513                  rl->rl_status = puterrno(error);
 514  514                  return;
 515  515          }
 516  516  
 517  517          if (MANDLOCK(vp, va.va_mode)) {
 518  518                  VN_RELE(vp);
 519  519                  rl->rl_data = NULL;
 520  520                  rl->rl_status = NFSERR_ACCES;
 521  521                  return;
 522  522          }
 523  523  
 524  524          /* We lied about the object type for a referral */
 525  525          if (vn_is_nfs_reparse(vp, cr))
 526  526                  is_referral = 1;
 527  527  
 528  528          /*
 529  529           * XNFS and RFC1094 require us to return ENXIO if argument
 530  530           * is not a link. BUGID 1138002.
 531  531           */
 532  532          if (vp->v_type != VLNK && !is_referral) {
 533  533                  VN_RELE(vp);
 534  534                  rl->rl_data = NULL;
 535  535                  rl->rl_status = NFSERR_NXIO;
 536  536                  return;
 537  537          }
 538  538  
 539  539          /*
 540  540           * Allocate data for pathname.  This will be freed by rfs_rlfree.
 541  541           */
 542  542          rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
 543  543  
 544  544          if (is_referral) {
 545  545                  char *s;
 546  546                  size_t strsz;
 547  547  
 548  548                  /* Get an artificial symlink based on a referral */
 549  549                  s = build_symlink(vp, cr, &strsz);
 550  550                  global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
 551  551                  DTRACE_PROBE2(nfs2serv__func__referral__reflink,
 552  552                      vnode_t *, vp, char *, s);
 553  553                  if (s == NULL)
 554  554                          error = EINVAL;
 555  555                  else {
 556  556                          error = 0;
 557  557                          (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
 558  558                          rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
 559  559                          kmem_free(s, strsz);
 560  560                  }
 561  561  
 562  562          } else {
 563  563  
 564  564                  /*
 565  565                   * Set up io vector to read sym link data
 566  566                   */
 567  567                  iov.iov_base = rl->rl_data;
 568  568                  iov.iov_len = NFS_MAXPATHLEN;
 569  569                  uio.uio_iov = &iov;
 570  570                  uio.uio_iovcnt = 1;
 571  571                  uio.uio_segflg = UIO_SYSSPACE;
 572  572                  uio.uio_extflg = UIO_COPY_CACHED;
 573  573                  uio.uio_loffset = (offset_t)0;
 574  574                  uio.uio_resid = NFS_MAXPATHLEN;
 575  575  
 576  576                  /*
 577  577                   * Do the readlink.
 578  578                   */
 579  579                  error = VOP_READLINK(vp, &uio, cr, NULL);
 580  580  
 581  581                  rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
 582  582  
 583  583                  if (!error)
 584  584                          rl->rl_data[rl->rl_count] = '\0';
 585  585  
 586  586          }
 587  587  
 588  588  
 589  589          VN_RELE(vp);
 590  590  
 591  591          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
 592  592          name = nfscmd_convname(ca, exi, rl->rl_data,
 593  593              NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
 594  594  
 595  595          if (name != NULL && name != rl->rl_data) {
 596  596                  kmem_free(rl->rl_data, NFS_MAXPATHLEN);
 597  597                  rl->rl_data = name;
 598  598          }
 599  599  
 600  600          /*
 601  601           * XNFS and RFC1094 require us to return ENXIO if argument
 602  602           * is not a link. UFS returns EINVAL if this is the case,
 603  603           * so we do the mapping here. BUGID 1138002.
 604  604           */
 605  605          if (error == EINVAL)
 606  606                  rl->rl_status = NFSERR_NXIO;
 607  607          else
 608  608                  rl->rl_status = puterrno(error);
 609  609  
 610  610  }
 611  611  void *
 612  612  rfs_readlink_getfh(fhandle_t *fhp)
 613  613  {
 614  614          return (fhp);
 615  615  }
 616  616  /*
 617  617   * Free data allocated by rfs_readlink
 618  618   */
 619  619  void
 620  620  rfs_rlfree(struct nfsrdlnres *rl)
 621  621  {
 622  622          if (rl->rl_data != NULL)
 623  623                  kmem_free(rl->rl_data, NFS_MAXPATHLEN);
 624  624  }
 625  625  
 626  626  static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
 627  627  
 628  628  /*
 629  629   * Read data.
 630  630   * Returns some data read from the file at the given fhandle.
 631  631   */
 632  632  /* ARGSUSED */
 633  633  void
 634  634  rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
 635  635      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
 636  636  {
 637  637          vnode_t *vp;
 638  638          int error;
 639  639          struct vattr va;
 640  640          struct iovec iov;
 641  641          struct uio uio;
 642  642          mblk_t *mp;
 643  643          int alloc_err = 0;
 644  644          int in_crit = 0;
 645  645          caller_context_t ct;
 646  646  
 647  647          vp = nfs_fhtovp(&ra->ra_fhandle, exi);
 648  648          if (vp == NULL) {
 649  649                  rr->rr_data = NULL;
 650  650                  rr->rr_status = NFSERR_STALE;
 651  651                  return;
 652  652          }
 653  653  
 654  654          if (vp->v_type != VREG) {
 655  655                  VN_RELE(vp);
 656  656                  rr->rr_data = NULL;
 657  657                  rr->rr_status = NFSERR_ISDIR;
 658  658                  return;
 659  659          }
 660  660  
 661  661          ct.cc_sysid = 0;
 662  662          ct.cc_pid = 0;
 663  663          ct.cc_caller_id = nfs2_srv_caller_id;
 664  664          ct.cc_flags = CC_DONTBLOCK;
 665  665  
 666  666          /*
 667  667           * Enter the critical region before calling VOP_RWLOCK
 668  668           * to avoid a deadlock with write requests.
 669  669           */
 670  670          if (nbl_need_check(vp)) {
 671  671                  nbl_start_crit(vp, RW_READER);
 672  672                  if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
 673  673                      0, NULL)) {
 674  674                          nbl_end_crit(vp);
 675  675                          VN_RELE(vp);
 676  676                          rr->rr_data = NULL;
 677  677                          rr->rr_status = NFSERR_ACCES;
 678  678                          return;
 679  679                  }
 680  680                  in_crit = 1;
 681  681          }
 682  682  
 683  683          error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
 684  684  
 685  685          /* check if a monitor detected a delegation conflict */
 686  686          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
 687  687                  VN_RELE(vp);
 688  688                  /* mark as wouldblock so response is dropped */
 689  689                  curthread->t_flag |= T_WOULDBLOCK;
 690  690  
 691  691                  rr->rr_data = NULL;
 692  692                  return;
 693  693          }
 694  694  
 695  695          va.va_mask = AT_ALL;
 696  696  
 697  697          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
 698  698  
 699  699          if (error) {
 700  700                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 701  701                  if (in_crit)
 702  702                          nbl_end_crit(vp);
 703  703  
 704  704                  VN_RELE(vp);
 705  705                  rr->rr_data = NULL;
 706  706                  rr->rr_status = puterrno(error);
 707  707  
 708  708                  return;
 709  709          }
 710  710  
 711  711          /*
 712  712           * This is a kludge to allow reading of files created
 713  713           * with no read permission.  The owner of the file
 714  714           * is always allowed to read it.
 715  715           */
 716  716          if (crgetuid(cr) != va.va_uid) {
 717  717                  error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
 718  718  
 719  719                  if (error) {
 720  720                          /*
 721  721                           * Exec is the same as read over the net because
 722  722                           * of demand loading.
 723  723                           */
 724  724                          error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
 725  725                  }
 726  726                  if (error) {
 727  727                          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 728  728                          if (in_crit)
 729  729                                  nbl_end_crit(vp);
 730  730                          VN_RELE(vp);
 731  731                          rr->rr_data = NULL;
 732  732                          rr->rr_status = puterrno(error);
 733  733  
 734  734                          return;
 735  735                  }
 736  736          }
 737  737  
 738  738          if (MANDLOCK(vp, va.va_mode)) {
 739  739                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 740  740                  if (in_crit)
 741  741                          nbl_end_crit(vp);
 742  742  
 743  743                  VN_RELE(vp);
 744  744                  rr->rr_data = NULL;
 745  745                  rr->rr_status = NFSERR_ACCES;
 746  746  
 747  747                  return;
 748  748          }
 749  749  
 750  750          rr->rr_ok.rrok_wlist_len = 0;
 751  751          rr->rr_ok.rrok_wlist = NULL;
 752  752  
 753  753          if ((u_offset_t)ra->ra_offset >= va.va_size) {
 754  754                  rr->rr_count = 0;
 755  755                  rr->rr_data = NULL;
 756  756                  /*
 757  757                   * In this case, status is NFS_OK, but there is no data
 758  758                   * to encode. So set rr_mp to NULL.
 759  759                   */
 760  760                  rr->rr_mp = NULL;
 761  761                  rr->rr_ok.rrok_wlist = ra->ra_wlist;
 762  762                  if (rr->rr_ok.rrok_wlist)
 763  763                          clist_zero_len(rr->rr_ok.rrok_wlist);
 764  764                  goto done;
 765  765          }
 766  766  
 767  767          if (ra->ra_wlist) {
 768  768                  mp = NULL;
 769  769                  rr->rr_mp = NULL;
 770  770                  (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
 771  771                  if (ra->ra_count > iov.iov_len) {
 772  772                          rr->rr_data = NULL;
 773  773                          rr->rr_status = NFSERR_INVAL;
 774  774                          goto done;
 775  775                  }
 776  776          } else {
 777  777                  /*
 778  778                   * mp will contain the data to be sent out in the read reply.
 779  779                   * This will be freed after the reply has been sent out (by the
 780  780                   * driver).
 781  781                   * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
 782  782                   * that the call to xdrmblk_putmblk() never fails.
 783  783                   */
 784  784                  mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
 785  785                      &alloc_err);
 786  786                  ASSERT(mp != NULL);
 787  787                  ASSERT(alloc_err == 0);
 788  788  
 789  789                  rr->rr_mp = mp;
 790  790  
 791  791                  /*
 792  792                   * Set up io vector
 793  793                   */
 794  794                  iov.iov_base = (caddr_t)mp->b_datap->db_base;
 795  795                  iov.iov_len = ra->ra_count;
 796  796          }
 797  797  
 798  798          uio.uio_iov = &iov;
 799  799          uio.uio_iovcnt = 1;
 800  800          uio.uio_segflg = UIO_SYSSPACE;
 801  801          uio.uio_extflg = UIO_COPY_CACHED;
 802  802          uio.uio_loffset = (offset_t)ra->ra_offset;
 803  803          uio.uio_resid = ra->ra_count;
 804  804  
 805  805          error = VOP_READ(vp, &uio, 0, cr, &ct);
 806  806  
 807  807          if (error) {
 808  808                  if (mp)
 809  809                          freeb(mp);
 810  810  
 811  811                  /*
 812  812                   * check if a monitor detected a delegation conflict and
 813  813                   * mark as wouldblock so response is dropped
 814  814                   */
 815  815                  if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
 816  816                          curthread->t_flag |= T_WOULDBLOCK;
 817  817                  else
 818  818                          rr->rr_status = puterrno(error);
 819  819  
 820  820                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 821  821                  if (in_crit)
 822  822                          nbl_end_crit(vp);
 823  823  
 824  824                  VN_RELE(vp);
 825  825                  rr->rr_data = NULL;
 826  826  
 827  827                  return;
 828  828          }
 829  829  
 830  830          /*
 831  831           * Get attributes again so we can send the latest access
 832  832           * time to the client side for his cache.
 833  833           */
 834  834          va.va_mask = AT_ALL;
 835  835  
 836  836          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
 837  837  
 838  838          if (error) {
 839  839                  if (mp)
 840  840                          freeb(mp);
 841  841  
 842  842                  VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 843  843                  if (in_crit)
 844  844                          nbl_end_crit(vp);
 845  845  
 846  846                  VN_RELE(vp);
 847  847                  rr->rr_data = NULL;
 848  848                  rr->rr_status = puterrno(error);
 849  849  
 850  850                  return;
 851  851          }
 852  852  
 853  853          rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
 854  854  
 855  855          if (mp) {
 856  856                  rr->rr_data = (char *)mp->b_datap->db_base;
 857  857          } else {
 858  858                  if (ra->ra_wlist) {
 859  859                          rr->rr_data = (caddr_t)iov.iov_base;
 860  860                          if (!rdma_setup_read_data2(ra, rr)) {
 861  861                                  rr->rr_data = NULL;
 862  862                                  rr->rr_status = puterrno(NFSERR_INVAL);
 863  863                          }
 864  864                  }
 865  865          }
 866  866  done:
 867  867          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
 868  868          if (in_crit)
 869  869                  nbl_end_crit(vp);
 870  870  
 871  871          acl_perm(vp, exi, &va, cr);
 872  872  
 873  873          /* check for overflows */
 874  874          error = vattr_to_nattr(&va, &rr->rr_attr);
 875  875  
 876  876          VN_RELE(vp);
 877  877  
 878  878          rr->rr_status = puterrno(error);
 879  879  }
 880  880  
 881  881  /*
 882  882   * Free data allocated by rfs_read
 883  883   */
 884  884  void
 885  885  rfs_rdfree(struct nfsrdresult *rr)
 886  886  {
 887  887          mblk_t *mp;
 888  888  
 889  889          if (rr->rr_status == NFS_OK) {
 890  890                  mp = rr->rr_mp;
 891  891                  if (mp != NULL)
 892  892                          freeb(mp);
 893  893          }
 894  894  }
 895  895  
 896  896  void *
 897  897  rfs_read_getfh(struct nfsreadargs *ra)
 898  898  {
 899  899          return (&ra->ra_fhandle);
 900  900  }
 901  901  
 902  902  #define MAX_IOVECS      12
 903  903  
 904  904  #ifdef DEBUG
 905  905  static int rfs_write_sync_hits = 0;
 906  906  static int rfs_write_sync_misses = 0;
 907  907  #endif
 908  908  
 909  909  /*
 910  910   * Write data to file.
 911  911   * Returns attributes of a file after writing some data to it.
 912  912   *
 913  913   * Any changes made here, especially in error handling might have
 914  914   * to also be done in rfs_write (which clusters write requests).
 915  915   */
 916  916  /* ARGSUSED */
 917  917  void
 918  918  rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
 919  919      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
 920  920  {
 921  921          int error;
 922  922          vnode_t *vp;
 923  923          rlim64_t rlimit;
 924  924          struct vattr va;
 925  925          struct uio uio;
 926  926          struct iovec iov[MAX_IOVECS];
 927  927          mblk_t *m;
 928  928          struct iovec *iovp;
 929  929          int iovcnt;
 930  930          cred_t *savecred;
 931  931          int in_crit = 0;
 932  932          caller_context_t ct;
 933  933  
 934  934          vp = nfs_fhtovp(&wa->wa_fhandle, exi);
 935  935          if (vp == NULL) {
 936  936                  ns->ns_status = NFSERR_STALE;
 937  937                  return;
 938  938          }
 939  939  
 940  940          if (rdonly(ro, vp)) {
 941  941                  VN_RELE(vp);
 942  942                  ns->ns_status = NFSERR_ROFS;
 943  943                  return;
 944  944          }
 945  945  
 946  946          if (vp->v_type != VREG) {
 947  947                  VN_RELE(vp);
 948  948                  ns->ns_status = NFSERR_ISDIR;
 949  949                  return;
 950  950          }
 951  951  
 952  952          ct.cc_sysid = 0;
 953  953          ct.cc_pid = 0;
 954  954          ct.cc_caller_id = nfs2_srv_caller_id;
 955  955          ct.cc_flags = CC_DONTBLOCK;
 956  956  
 957  957          va.va_mask = AT_UID|AT_MODE;
 958  958  
 959  959          error = VOP_GETATTR(vp, &va, 0, cr, &ct);
 960  960  
 961  961          if (error) {
 962  962                  VN_RELE(vp);
 963  963                  ns->ns_status = puterrno(error);
 964  964  
 965  965                  return;
 966  966          }
 967  967  
 968  968          if (crgetuid(cr) != va.va_uid) {
 969  969                  /*
 970  970                   * This is a kludge to allow writes of files created
 971  971                   * with read only permission.  The owner of the file
 972  972                   * is always allowed to write it.
 973  973                   */
 974  974                  error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
 975  975  
 976  976                  if (error) {
 977  977                          VN_RELE(vp);
 978  978                          ns->ns_status = puterrno(error);
 979  979                          return;
 980  980                  }
 981  981          }
 982  982  
 983  983          /*
 984  984           * Can't access a mandatory lock file.  This might cause
 985  985           * the NFS service thread to block forever waiting for a
 986  986           * lock to be released that will never be released.
 987  987           */
 988  988          if (MANDLOCK(vp, va.va_mode)) {
 989  989                  VN_RELE(vp);
 990  990                  ns->ns_status = NFSERR_ACCES;
 991  991                  return;
 992  992          }
 993  993  
 994  994          /*
 995  995           * We have to enter the critical region before calling VOP_RWLOCK
 996  996           * to avoid a deadlock with ufs.
 997  997           */
 998  998          if (nbl_need_check(vp)) {
 999  999                  nbl_start_crit(vp, RW_READER);
1000 1000                  in_crit = 1;
1001 1001                  if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1002 1002                      wa->wa_count, 0, NULL)) {
1003 1003                          error = EACCES;
1004 1004                          goto out;
1005 1005                  }
1006 1006          }
1007 1007  
1008 1008          error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1009 1009  
1010 1010          /* check if a monitor detected a delegation conflict */
1011 1011          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1012 1012                  VN_RELE(vp);
1013 1013                  /* mark as wouldblock so response is dropped */
1014 1014                  curthread->t_flag |= T_WOULDBLOCK;
1015 1015                  return;
1016 1016          }
1017 1017  
1018 1018          if (wa->wa_data || wa->wa_rlist) {
1019 1019                  /* Do the RDMA thing if necessary */
1020 1020                  if (wa->wa_rlist) {
1021 1021                          iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1022 1022                          iov[0].iov_len = wa->wa_count;
1023 1023                  } else  {
1024 1024                          iov[0].iov_base = wa->wa_data;
1025 1025                          iov[0].iov_len = wa->wa_count;
1026 1026                  }
1027 1027                  uio.uio_iov = iov;
1028 1028                  uio.uio_iovcnt = 1;
1029 1029                  uio.uio_segflg = UIO_SYSSPACE;
1030 1030                  uio.uio_extflg = UIO_COPY_DEFAULT;
1031 1031                  uio.uio_loffset = (offset_t)wa->wa_offset;
1032 1032                  uio.uio_resid = wa->wa_count;
1033 1033                  /*
1034 1034                   * The limit is checked on the client. We
1035 1035                   * should allow any size writes here.
1036 1036                   */
1037 1037                  uio.uio_llimit = curproc->p_fsz_ctl;
1038 1038                  rlimit = uio.uio_llimit - wa->wa_offset;
1039 1039                  if (rlimit < (rlim64_t)uio.uio_resid)
1040 1040                          uio.uio_resid = (uint_t)rlimit;
1041 1041  
1042 1042                  /*
1043 1043                   * for now we assume no append mode
1044 1044                   */
1045 1045                  /*
1046 1046                   * We're changing creds because VM may fault and we need
1047 1047                   * the cred of the current thread to be used if quota
1048 1048                   * checking is enabled.
1049 1049                   */
1050 1050                  savecred = curthread->t_cred;
1051 1051                  curthread->t_cred = cr;
1052 1052                  error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1053 1053                  curthread->t_cred = savecred;
1054 1054          } else {
1055 1055                  iovcnt = 0;
1056 1056                  for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1057 1057                          iovcnt++;
1058 1058                  if (iovcnt <= MAX_IOVECS) {
1059 1059  #ifdef DEBUG
1060 1060                          rfs_write_sync_hits++;
1061 1061  #endif
1062 1062                          iovp = iov;
1063 1063                  } else {
1064 1064  #ifdef DEBUG
1065 1065                          rfs_write_sync_misses++;
1066 1066  #endif
1067 1067                          iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1068 1068                  }
1069 1069                  mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1070 1070                  uio.uio_iov = iovp;
1071 1071                  uio.uio_iovcnt = iovcnt;
1072 1072                  uio.uio_segflg = UIO_SYSSPACE;
1073 1073                  uio.uio_extflg = UIO_COPY_DEFAULT;
1074 1074                  uio.uio_loffset = (offset_t)wa->wa_offset;
1075 1075                  uio.uio_resid = wa->wa_count;
1076 1076                  /*
1077 1077                   * The limit is checked on the client. We
1078 1078                   * should allow any size writes here.
1079 1079                   */
1080 1080                  uio.uio_llimit = curproc->p_fsz_ctl;
1081 1081                  rlimit = uio.uio_llimit - wa->wa_offset;
1082 1082                  if (rlimit < (rlim64_t)uio.uio_resid)
1083 1083                          uio.uio_resid = (uint_t)rlimit;
1084 1084  
1085 1085                  /*
1086 1086                   * For now we assume no append mode.
1087 1087                   */
1088 1088                  /*
1089 1089                   * We're changing creds because VM may fault and we need
1090 1090                   * the cred of the current thread to be used if quota
1091 1091                   * checking is enabled.
1092 1092                   */
1093 1093                  savecred = curthread->t_cred;
1094 1094                  curthread->t_cred = cr;
1095 1095                  error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1096 1096                  curthread->t_cred = savecred;
1097 1097  
1098 1098                  if (iovp != iov)
1099 1099                          kmem_free(iovp, sizeof (*iovp) * iovcnt);
1100 1100          }
1101 1101  
1102 1102          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1103 1103  
1104 1104          if (!error) {
1105 1105                  /*
1106 1106                   * Get attributes again so we send the latest mod
1107 1107                   * time to the client side for his cache.
1108 1108                   */
1109 1109                  va.va_mask = AT_ALL;    /* now we want everything */
1110 1110  
1111 1111                  error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1112 1112  
1113 1113                  /* check for overflows */
1114 1114                  if (!error) {
1115 1115                          acl_perm(vp, exi, &va, cr);
1116 1116                          error = vattr_to_nattr(&va, &ns->ns_attr);
1117 1117                  }
1118 1118          }
1119 1119  
1120 1120  out:
1121 1121          if (in_crit)
1122 1122                  nbl_end_crit(vp);
1123 1123          VN_RELE(vp);
1124 1124  
1125 1125          /* check if a monitor detected a delegation conflict */
1126 1126          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1127 1127                  /* mark as wouldblock so response is dropped */
1128 1128                  curthread->t_flag |= T_WOULDBLOCK;
1129 1129          else
1130 1130                  ns->ns_status = puterrno(error);
1131 1131  
1132 1132  }
1133 1133  
1134 1134  struct rfs_async_write {
1135 1135          struct nfswriteargs *wa;
1136 1136          struct nfsattrstat *ns;
1137 1137          struct svc_req *req;
1138 1138          cred_t *cr;
1139 1139          bool_t ro;
1140 1140          kthread_t *thread;
1141 1141          struct rfs_async_write *list;
1142 1142  };
1143 1143  
1144 1144  struct rfs_async_write_list {
1145 1145          fhandle_t *fhp;
1146 1146          kcondvar_t cv;
1147 1147          struct rfs_async_write *list;
1148 1148          struct rfs_async_write_list *next;
1149 1149  };
1150 1150  
1151 1151  static struct rfs_async_write_list *rfs_async_write_head = NULL;
1152 1152  static kmutex_t rfs_async_write_lock;
1153 1153  static int rfs_write_async = 1; /* enables write clustering if == 1 */
1154 1154  
1155 1155  #define MAXCLIOVECS     42
1156 1156  #define RFSWRITE_INITVAL (enum nfsstat) -1
1157 1157  
1158 1158  #ifdef DEBUG
1159 1159  static int rfs_write_hits = 0;
1160 1160  static int rfs_write_misses = 0;
1161 1161  #endif
1162 1162  
1163 1163  /*
1164 1164   * Write data to file.
1165 1165   * Returns attributes of a file after writing some data to it.
1166 1166   */
1167 1167  void
1168 1168  rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1169 1169      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1170 1170  {
1171 1171          int error;
1172 1172          vnode_t *vp;
1173 1173          rlim64_t rlimit;
1174 1174          struct vattr va;
1175 1175          struct uio uio;
1176 1176          struct rfs_async_write_list *lp;
1177 1177          struct rfs_async_write_list *nlp;
1178 1178          struct rfs_async_write *rp;
1179 1179          struct rfs_async_write *nrp;
1180 1180          struct rfs_async_write *trp;
1181 1181          struct rfs_async_write *lrp;
1182 1182          int data_written;
1183 1183          int iovcnt;
1184 1184          mblk_t *m;
1185 1185          struct iovec *iovp;
1186 1186          struct iovec *niovp;
1187 1187          struct iovec iov[MAXCLIOVECS];
1188 1188          int count;
1189 1189          int rcount;
1190 1190          uint_t off;
1191 1191          uint_t len;
1192 1192          struct rfs_async_write nrpsp;
1193 1193          struct rfs_async_write_list nlpsp;
1194 1194          ushort_t t_flag;
1195 1195          cred_t *savecred;
1196 1196          int in_crit = 0;
1197 1197          caller_context_t ct;
1198 1198  
1199 1199          if (!rfs_write_async) {
1200 1200                  rfs_write_sync(wa, ns, exi, req, cr, ro);
1201 1201                  return;
1202 1202          }
1203 1203  
1204 1204          /*
1205 1205           * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1206 1206           * is considered an OK.
1207 1207           */

↓ open down ↓

1207 lines elided

↑ open up ↑

1208 1208          ns->ns_status = RFSWRITE_INITVAL;
1209 1209  
1210 1210          nrp = &nrpsp;
1211 1211          nrp->wa = wa;
1212 1212          nrp->ns = ns;
1213 1213          nrp->req = req;
1214 1214          nrp->cr = cr;
1215 1215          nrp->ro = ro;
1216 1216          nrp->thread = curthread;
1217 1217  
1218      -        ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1219      -
1220 1218          /*
1221 1219           * Look to see if there is already a cluster started
1222 1220           * for this file.
1223 1221           */
1224 1222          mutex_enter(&rfs_async_write_lock);
1225 1223          for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
1226 1224                  if (bcmp(&wa->wa_fhandle, lp->fhp,
1227 1225                      sizeof (fhandle_t)) == 0)
1228 1226                          break;
1229 1227          }

1230 1228  
1231 1229          /*
1232 1230           * If lp is non-NULL, then there is already a cluster
1233 1231           * started.  We need to place ourselves in the cluster
1234 1232           * list in the right place as determined by starting
1235 1233           * offset.  Conflicts with non-blocking mandatory locked
1236 1234           * regions will be checked when the cluster is processed.
1237 1235           */
1238 1236          if (lp != NULL) {
1239 1237                  rp = lp->list;
1240 1238                  trp = NULL;
1241 1239                  while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1242 1240                          trp = rp;
1243 1241                          rp = rp->list;
1244 1242                  }
1245 1243                  nrp->list = rp;
1246 1244                  if (trp == NULL)
1247 1245                          lp->list = nrp;
1248 1246                  else
1249 1247                          trp->list = nrp;
1250 1248                  while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1251 1249                          cv_wait(&lp->cv, &rfs_async_write_lock);
1252 1250                  mutex_exit(&rfs_async_write_lock);
1253 1251  
1254 1252                  return;
1255 1253          }
1256 1254  
1257 1255          /*
1258 1256           * No cluster started yet, start one and add ourselves
1259 1257           * to the list of clusters.
1260 1258           */
1261 1259          nrp->list = NULL;
1262 1260  
1263 1261          nlp = &nlpsp;
1264 1262          nlp->fhp = &wa->wa_fhandle;
1265 1263          cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1266 1264          nlp->list = nrp;
1267 1265          nlp->next = NULL;
1268 1266  
1269 1267          if (rfs_async_write_head == NULL) {
1270 1268                  rfs_async_write_head = nlp;
1271 1269          } else {
1272 1270                  lp = rfs_async_write_head;
1273 1271                  while (lp->next != NULL)
1274 1272                          lp = lp->next;
1275 1273                  lp->next = nlp;
1276 1274          }
1277 1275          mutex_exit(&rfs_async_write_lock);
1278 1276  
1279 1277          /*
1280 1278           * Convert the file handle common to all of the requests
1281 1279           * in this cluster to a vnode.
1282 1280           */
1283 1281          vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1284 1282          if (vp == NULL) {
1285 1283                  mutex_enter(&rfs_async_write_lock);
1286 1284                  if (rfs_async_write_head == nlp)
1287 1285                          rfs_async_write_head = nlp->next;
1288 1286                  else {
1289 1287                          lp = rfs_async_write_head;
1290 1288                          while (lp->next != nlp)
1291 1289                                  lp = lp->next;
1292 1290                          lp->next = nlp->next;
1293 1291                  }
1294 1292                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1295 1293                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1296 1294                          rp->ns->ns_status = NFSERR_STALE;
1297 1295                          rp->thread->t_flag |= t_flag;
1298 1296                  }
1299 1297                  cv_broadcast(&nlp->cv);
1300 1298                  mutex_exit(&rfs_async_write_lock);
1301 1299  
1302 1300                  return;
1303 1301          }
1304 1302  
1305 1303          /*
1306 1304           * Can only write regular files.  Attempts to write any
1307 1305           * other file types fail with EISDIR.
1308 1306           */
1309 1307          if (vp->v_type != VREG) {
1310 1308                  VN_RELE(vp);
1311 1309                  mutex_enter(&rfs_async_write_lock);
1312 1310                  if (rfs_async_write_head == nlp)
1313 1311                          rfs_async_write_head = nlp->next;
1314 1312                  else {
1315 1313                          lp = rfs_async_write_head;
1316 1314                          while (lp->next != nlp)
1317 1315                                  lp = lp->next;
1318 1316                          lp->next = nlp->next;
1319 1317                  }
1320 1318                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1321 1319                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1322 1320                          rp->ns->ns_status = NFSERR_ISDIR;
1323 1321                          rp->thread->t_flag |= t_flag;
1324 1322                  }
1325 1323                  cv_broadcast(&nlp->cv);
1326 1324                  mutex_exit(&rfs_async_write_lock);
1327 1325  
1328 1326                  return;
1329 1327          }
1330 1328  
1331 1329          /*
1332 1330           * Enter the critical region before calling VOP_RWLOCK, to avoid a
1333 1331           * deadlock with ufs.
1334 1332           */
1335 1333          if (nbl_need_check(vp)) {
1336 1334                  nbl_start_crit(vp, RW_READER);
1337 1335                  in_crit = 1;
1338 1336          }
1339 1337  
1340 1338          ct.cc_sysid = 0;
1341 1339          ct.cc_pid = 0;
1342 1340          ct.cc_caller_id = nfs2_srv_caller_id;
1343 1341          ct.cc_flags = CC_DONTBLOCK;
1344 1342  
1345 1343          /*
1346 1344           * Lock the file for writing.  This operation provides
1347 1345           * the delay which allows clusters to grow.
1348 1346           */
1349 1347          error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1350 1348  
1351 1349          /* check if a monitor detected a delegation conflict */
1352 1350          if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1353 1351                  if (in_crit)
1354 1352                          nbl_end_crit(vp);
1355 1353                  VN_RELE(vp);
1356 1354                  /* mark as wouldblock so response is dropped */
1357 1355                  curthread->t_flag |= T_WOULDBLOCK;
1358 1356                  mutex_enter(&rfs_async_write_lock);
1359 1357                  if (rfs_async_write_head == nlp)
1360 1358                          rfs_async_write_head = nlp->next;
1361 1359                  else {
1362 1360                          lp = rfs_async_write_head;
1363 1361                          while (lp->next != nlp)
1364 1362                                  lp = lp->next;
1365 1363                          lp->next = nlp->next;
1366 1364                  }
1367 1365                  for (rp = nlp->list; rp != NULL; rp = rp->list) {
1368 1366                          if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1369 1367                                  rp->ns->ns_status = puterrno(error);
1370 1368                                  rp->thread->t_flag |= T_WOULDBLOCK;
1371 1369                          }
1372 1370                  }
1373 1371                  cv_broadcast(&nlp->cv);
1374 1372                  mutex_exit(&rfs_async_write_lock);
1375 1373  
1376 1374                  return;
1377 1375          }
1378 1376  
1379 1377          /*
1380 1378           * Disconnect this cluster from the list of clusters.
1381 1379           * The cluster that is being dealt with must be fixed
1382 1380           * in size after this point, so there is no reason
1383 1381           * to leave it on the list so that new requests can
1384 1382           * find it.
1385 1383           *
1386 1384           * The algorithm is that the first write request will
1387 1385           * create a cluster, convert the file handle to a
1388 1386           * vnode pointer, and then lock the file for writing.
1389 1387           * This request is not likely to be clustered with
1390 1388           * any others.  However, the next request will create
1391 1389           * a new cluster and be blocked in VOP_RWLOCK while
1392 1390           * the first request is being processed.  This delay
1393 1391           * will allow more requests to be clustered in this
1394 1392           * second cluster.
1395 1393           */
1396 1394          mutex_enter(&rfs_async_write_lock);
1397 1395          if (rfs_async_write_head == nlp)
1398 1396                  rfs_async_write_head = nlp->next;
1399 1397          else {
1400 1398                  lp = rfs_async_write_head;
1401 1399                  while (lp->next != nlp)
1402 1400                          lp = lp->next;
1403 1401                  lp->next = nlp->next;
1404 1402          }
1405 1403          mutex_exit(&rfs_async_write_lock);
1406 1404  
1407 1405          /*
1408 1406           * Step through the list of requests in this cluster.
1409 1407           * We need to check permissions to make sure that all
1410 1408           * of the requests have sufficient permission to write
1411 1409           * the file.  A cluster can be composed of requests
1412 1410           * from different clients and different users on each
1413 1411           * client.
1414 1412           *
1415 1413           * As a side effect, we also calculate the size of the
1416 1414           * byte range that this cluster encompasses.
1417 1415           */
1418 1416          rp = nlp->list;
1419 1417          off = rp->wa->wa_offset;
1420 1418          len = (uint_t)0;
1421 1419          do {
1422 1420                  if (rdonly(rp->ro, vp)) {
1423 1421                          rp->ns->ns_status = NFSERR_ROFS;
1424 1422                          t_flag = curthread->t_flag & T_WOULDBLOCK;
1425 1423                          rp->thread->t_flag |= t_flag;
1426 1424                          continue;
1427 1425                  }
1428 1426  
1429 1427                  va.va_mask = AT_UID|AT_MODE;
1430 1428  
1431 1429                  error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1432 1430  
1433 1431                  if (!error) {
1434 1432                          if (crgetuid(rp->cr) != va.va_uid) {
1435 1433                                  /*
1436 1434                                   * This is a kludge to allow writes of files
1437 1435                                   * created with read only permission.  The
1438 1436                                   * owner of the file is always allowed to
1439 1437                                   * write it.
1440 1438                                   */
1441 1439                                  error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1442 1440                          }
1443 1441                          if (!error && MANDLOCK(vp, va.va_mode))
1444 1442                                  error = EACCES;
1445 1443                  }
1446 1444  
1447 1445                  /*
1448 1446                   * Check for a conflict with a nbmand-locked region.
1449 1447                   */
1450 1448                  if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1451 1449                      rp->wa->wa_count, 0, NULL)) {
1452 1450                          error = EACCES;
1453 1451                  }
1454 1452  
1455 1453                  if (error) {
1456 1454                          rp->ns->ns_status = puterrno(error);
1457 1455                          t_flag = curthread->t_flag & T_WOULDBLOCK;
1458 1456                          rp->thread->t_flag |= t_flag;
1459 1457                          continue;
1460 1458                  }
1461 1459                  if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1462 1460                          len = rp->wa->wa_offset + rp->wa->wa_count - off;
1463 1461          } while ((rp = rp->list) != NULL);
1464 1462  
1465 1463          /*
1466 1464           * Step through the cluster attempting to gather as many
1467 1465           * requests which are contiguous as possible.  These
1468 1466           * contiguous requests are handled via one call to VOP_WRITE
1469 1467           * instead of different calls to VOP_WRITE.  We also keep
1470 1468           * track of the fact that any data was written.
1471 1469           */
1472 1470          rp = nlp->list;
1473 1471          data_written = 0;
1474 1472          do {
1475 1473                  /*
1476 1474                   * Skip any requests which are already marked as having an
1477 1475                   * error.
1478 1476                   */
1479 1477                  if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1480 1478                          rp = rp->list;
1481 1479                          continue;
1482 1480                  }
1483 1481  
1484 1482                  /*
1485 1483                   * Count the number of iovec's which are required
1486 1484                   * to handle this set of requests.  One iovec is
1487 1485                   * needed for each data buffer, whether addressed
1488 1486                   * by wa_data or by the b_rptr pointers in the
1489 1487                   * mblk chains.
1490 1488                   */
1491 1489                  iovcnt = 0;
1492 1490                  lrp = rp;
1493 1491                  for (;;) {
1494 1492                          if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1495 1493                                  iovcnt++;
1496 1494                          else {
1497 1495                                  m = lrp->wa->wa_mblk;
1498 1496                                  while (m != NULL) {
1499 1497                                          iovcnt++;
1500 1498                                          m = m->b_cont;
1501 1499                                  }
1502 1500                          }
1503 1501                          if (lrp->list == NULL ||
1504 1502                              lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1505 1503                              lrp->wa->wa_offset + lrp->wa->wa_count !=
1506 1504                              lrp->list->wa->wa_offset) {
1507 1505                                  lrp = lrp->list;
1508 1506                                  break;
1509 1507                          }
1510 1508                          lrp = lrp->list;
1511 1509                  }
1512 1510  
1513 1511                  if (iovcnt <= MAXCLIOVECS) {
1514 1512  #ifdef DEBUG
1515 1513                          rfs_write_hits++;
1516 1514  #endif
1517 1515                          niovp = iov;
1518 1516                  } else {
1519 1517  #ifdef DEBUG
1520 1518                          rfs_write_misses++;
1521 1519  #endif
1522 1520                          niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1523 1521                  }
1524 1522                  /*
1525 1523                   * Put together the scatter/gather iovecs.
1526 1524                   */
1527 1525                  iovp = niovp;
1528 1526                  trp = rp;
1529 1527                  count = 0;
1530 1528                  do {
1531 1529                          if (trp->wa->wa_data || trp->wa->wa_rlist) {
1532 1530                                  if (trp->wa->wa_rlist) {
1533 1531                                          iovp->iov_base =
1534 1532                                              (char *)((trp->wa->wa_rlist)->
1535 1533                                              u.c_daddr3);
1536 1534                                          iovp->iov_len = trp->wa->wa_count;
1537 1535                                  } else  {
1538 1536                                          iovp->iov_base = trp->wa->wa_data;
1539 1537                                          iovp->iov_len = trp->wa->wa_count;
1540 1538                                  }
1541 1539                                  iovp++;
1542 1540                          } else {
1543 1541                                  m = trp->wa->wa_mblk;
1544 1542                                  rcount = trp->wa->wa_count;
1545 1543                                  while (m != NULL) {
1546 1544                                          iovp->iov_base = (caddr_t)m->b_rptr;
1547 1545                                          iovp->iov_len = (m->b_wptr - m->b_rptr);
1548 1546                                          rcount -= iovp->iov_len;
1549 1547                                          if (rcount < 0)
1550 1548                                                  iovp->iov_len += rcount;
1551 1549                                          iovp++;
1552 1550                                          if (rcount <= 0)
1553 1551                                                  break;
1554 1552                                          m = m->b_cont;
1555 1553                                  }
1556 1554                          }
1557 1555                          count += trp->wa->wa_count;
1558 1556                          trp = trp->list;
1559 1557                  } while (trp != lrp);
1560 1558  
1561 1559                  uio.uio_iov = niovp;
1562 1560                  uio.uio_iovcnt = iovcnt;
1563 1561                  uio.uio_segflg = UIO_SYSSPACE;
1564 1562                  uio.uio_extflg = UIO_COPY_DEFAULT;
1565 1563                  uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1566 1564                  uio.uio_resid = count;
1567 1565                  /*
1568 1566                   * The limit is checked on the client. We
1569 1567                   * should allow any size writes here.
1570 1568                   */
1571 1569                  uio.uio_llimit = curproc->p_fsz_ctl;
1572 1570                  rlimit = uio.uio_llimit - rp->wa->wa_offset;
1573 1571                  if (rlimit < (rlim64_t)uio.uio_resid)
1574 1572                          uio.uio_resid = (uint_t)rlimit;
1575 1573  
1576 1574                  /*
1577 1575                   * For now we assume no append mode.
1578 1576                   */
1579 1577  
1580 1578                  /*
1581 1579                   * We're changing creds because VM may fault
1582 1580                   * and we need the cred of the current
1583 1581                   * thread to be used if quota * checking is
1584 1582                   * enabled.
1585 1583                   */
1586 1584                  savecred = curthread->t_cred;
1587 1585                  curthread->t_cred = cr;
1588 1586                  error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1589 1587                  curthread->t_cred = savecred;
1590 1588  
1591 1589                  /* check if a monitor detected a delegation conflict */
1592 1590                  if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1593 1591                          /* mark as wouldblock so response is dropped */
1594 1592                          curthread->t_flag |= T_WOULDBLOCK;
1595 1593  
1596 1594                  if (niovp != iov)
1597 1595                          kmem_free(niovp, sizeof (*niovp) * iovcnt);
1598 1596  
1599 1597                  if (!error) {
1600 1598                          data_written = 1;
1601 1599                          /*
1602 1600                           * Get attributes again so we send the latest mod
1603 1601                           * time to the client side for his cache.
1604 1602                           */
1605 1603                          va.va_mask = AT_ALL;    /* now we want everything */
1606 1604  
1607 1605                          error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1608 1606  
1609 1607                          if (!error)
1610 1608                                  acl_perm(vp, exi, &va, rp->cr);
1611 1609                  }
1612 1610  
1613 1611                  /*
1614 1612                   * Fill in the status responses for each request
1615 1613                   * which was just handled.  Also, copy the latest
1616 1614                   * attributes in to the attribute responses if
1617 1615                   * appropriate.
1618 1616                   */
1619 1617                  t_flag = curthread->t_flag & T_WOULDBLOCK;
1620 1618                  do {
1621 1619                          rp->thread->t_flag |= t_flag;
1622 1620                          /* check for overflows */
1623 1621                          if (!error) {
1624 1622                                  error  = vattr_to_nattr(&va, &rp->ns->ns_attr);
1625 1623                          }
1626 1624                          rp->ns->ns_status = puterrno(error);
1627 1625                          rp = rp->list;
1628 1626                  } while (rp != lrp);
1629 1627          } while (rp != NULL);
1630 1628  
1631 1629          /*
1632 1630           * If any data was written at all, then we need to flush
1633 1631           * the data and metadata to stable storage.
1634 1632           */
1635 1633          if (data_written) {
1636 1634                  error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1637 1635  
1638 1636                  if (!error) {
1639 1637                          error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1640 1638                  }
1641 1639          }
1642 1640  
1643 1641          VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1644 1642  
1645 1643          if (in_crit)
1646 1644                  nbl_end_crit(vp);
1647 1645          VN_RELE(vp);
1648 1646  
1649 1647          t_flag = curthread->t_flag & T_WOULDBLOCK;
1650 1648          mutex_enter(&rfs_async_write_lock);
1651 1649          for (rp = nlp->list; rp != NULL; rp = rp->list) {
1652 1650                  if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1653 1651                          rp->ns->ns_status = puterrno(error);
1654 1652                          rp->thread->t_flag |= t_flag;
1655 1653                  }
1656 1654          }
1657 1655          cv_broadcast(&nlp->cv);
1658 1656          mutex_exit(&rfs_async_write_lock);
1659 1657  
1660 1658  }
1661 1659  
1662 1660  void *
1663 1661  rfs_write_getfh(struct nfswriteargs *wa)
1664 1662  {
1665 1663          return (&wa->wa_fhandle);
1666 1664  }
1667 1665  
1668 1666  /*
1669 1667   * Create a file.
1670 1668   * Creates a file with given attributes and returns those attributes
1671 1669   * and an fhandle for the new file.
1672 1670   */
1673 1671  void
1674 1672  rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1675 1673      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1676 1674  {
1677 1675          int error;
1678 1676          int lookuperr;
1679 1677          int in_crit = 0;
1680 1678          struct vattr va;
1681 1679          vnode_t *vp;
1682 1680          vnode_t *realvp;
1683 1681          vnode_t *dvp;
1684 1682          char *name = args->ca_da.da_name;
1685 1683          vnode_t *tvp = NULL;
1686 1684          int mode;
1687 1685          int lookup_ok;
1688 1686          bool_t trunc;
1689 1687          struct sockaddr *ca;
1690 1688  
1691 1689          /*
1692 1690           * Disallow NULL paths
1693 1691           */
1694 1692          if (name == NULL || *name == '\0') {
1695 1693                  dr->dr_status = NFSERR_ACCES;
1696 1694                  return;
1697 1695          }
1698 1696  
1699 1697          dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1700 1698          if (dvp == NULL) {
1701 1699                  dr->dr_status = NFSERR_STALE;
1702 1700                  return;
1703 1701          }
1704 1702  
1705 1703          error = sattr_to_vattr(args->ca_sa, &va);
1706 1704          if (error) {
1707 1705                  dr->dr_status = puterrno(error);
1708 1706                  return;
1709 1707          }
1710 1708  
1711 1709          /*
1712 1710           * Must specify the mode.
1713 1711           */
1714 1712          if (!(va.va_mask & AT_MODE)) {
1715 1713                  VN_RELE(dvp);
1716 1714                  dr->dr_status = NFSERR_INVAL;
1717 1715                  return;
1718 1716          }
1719 1717  
1720 1718          /*
1721 1719           * This is a completely gross hack to make mknod
1722 1720           * work over the wire until we can wack the protocol
1723 1721           */
1724 1722          if ((va.va_mode & IFMT) == IFCHR) {
1725 1723                  if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1726 1724                          va.va_type = VFIFO;     /* xtra kludge for named pipe */
1727 1725                  else {
1728 1726                          va.va_type = VCHR;
1729 1727                          /*
1730 1728                           * uncompress the received dev_t
1731 1729                           * if the top half is zero indicating a request
1732 1730                           * from an `older style' OS.
1733 1731                           */
1734 1732                          if ((va.va_size & 0xffff0000) == 0)
1735 1733                                  va.va_rdev = nfsv2_expdev(va.va_size);
1736 1734                          else
1737 1735                                  va.va_rdev = (dev_t)va.va_size;
1738 1736                  }
1739 1737                  va.va_mask &= ~AT_SIZE;
1740 1738          } else if ((va.va_mode & IFMT) == IFBLK) {
1741 1739                  va.va_type = VBLK;
1742 1740                  /*
1743 1741                   * uncompress the received dev_t
1744 1742                   * if the top half is zero indicating a request
1745 1743                   * from an `older style' OS.
1746 1744                   */
1747 1745                  if ((va.va_size & 0xffff0000) == 0)
1748 1746                          va.va_rdev = nfsv2_expdev(va.va_size);
1749 1747                  else
1750 1748                          va.va_rdev = (dev_t)va.va_size;
1751 1749                  va.va_mask &= ~AT_SIZE;
1752 1750          } else if ((va.va_mode & IFMT) == IFSOCK) {
1753 1751                  va.va_type = VSOCK;
1754 1752          } else {
1755 1753                  va.va_type = VREG;
1756 1754          }
1757 1755          va.va_mode &= ~IFMT;
1758 1756          va.va_mask |= AT_TYPE;
1759 1757  
1760 1758          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1761 1759          name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1762 1760              MAXPATHLEN);
1763 1761          if (name == NULL) {
1764 1762                  dr->dr_status = puterrno(EINVAL);
1765 1763                  return;
1766 1764          }
1767 1765  
1768 1766          /*
1769 1767           * Why was the choice made to use VWRITE as the mode to the
1770 1768           * call to VOP_CREATE ? This results in a bug.  When a client
1771 1769           * opens a file that already exists and is RDONLY, the second
1772 1770           * open fails with an EACESS because of the mode.
1773 1771           * bug ID 1054648.
1774 1772           */
1775 1773          lookup_ok = 0;
1776 1774          mode = VWRITE;
1777 1775          if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1778 1776                  error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1779 1777                      NULL, NULL, NULL);
1780 1778                  if (!error) {
1781 1779                          struct vattr at;
1782 1780  
1783 1781                          lookup_ok = 1;
1784 1782                          at.va_mask = AT_MODE;
1785 1783                          error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1786 1784                          if (!error)
1787 1785                                  mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1788 1786                          VN_RELE(tvp);
1789 1787                          tvp = NULL;
1790 1788                  }
1791 1789          }
1792 1790  
1793 1791          if (!lookup_ok) {
1794 1792                  if (rdonly(ro, dvp)) {
1795 1793                          error = EROFS;
1796 1794                  } else if (va.va_type != VREG && va.va_type != VFIFO &&
1797 1795                      va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1798 1796                          error = EPERM;
1799 1797                  } else {
1800 1798                          error = 0;
1801 1799                  }
1802 1800          }
1803 1801  
1804 1802          /*
1805 1803           * If file size is being modified on an already existing file
1806 1804           * make sure that there are no conflicting non-blocking mandatory
1807 1805           * locks in the region being manipulated. Return EACCES if there
1808 1806           * are conflicting locks.
1809 1807           */
1810 1808          if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1811 1809                  lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1812 1810                      NULL, NULL, NULL);
1813 1811  
1814 1812                  if (!lookuperr &&
1815 1813                      rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1816 1814                          VN_RELE(tvp);
1817 1815                          curthread->t_flag |= T_WOULDBLOCK;
1818 1816                          goto out;
1819 1817                  }
1820 1818  
1821 1819                  if (!lookuperr && nbl_need_check(tvp)) {
1822 1820                          /*
1823 1821                           * The file exists. Now check if it has any
1824 1822                           * conflicting non-blocking mandatory locks
1825 1823                           * in the region being changed.
1826 1824                           */
1827 1825                          struct vattr bva;
1828 1826                          u_offset_t offset;
1829 1827                          ssize_t length;
1830 1828  
1831 1829                          nbl_start_crit(tvp, RW_READER);
1832 1830                          in_crit = 1;
1833 1831  
1834 1832                          bva.va_mask = AT_SIZE;
1835 1833                          error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1836 1834                          if (!error) {
1837 1835                                  if (va.va_size < bva.va_size) {
1838 1836                                          offset = va.va_size;
1839 1837                                          length = bva.va_size - va.va_size;
1840 1838                                  } else {
1841 1839                                          offset = bva.va_size;
1842 1840                                          length = va.va_size - bva.va_size;
1843 1841                                  }
1844 1842                                  if (length) {
1845 1843                                          if (nbl_conflict(tvp, NBL_WRITE,
1846 1844                                              offset, length, 0, NULL)) {
1847 1845                                                  error = EACCES;
1848 1846                                          }
1849 1847                                  }
1850 1848                          }
1851 1849                          if (error) {
1852 1850                                  nbl_end_crit(tvp);
1853 1851                                  VN_RELE(tvp);
1854 1852                                  in_crit = 0;
1855 1853                          }
1856 1854                  } else if (tvp != NULL) {
1857 1855                          VN_RELE(tvp);
1858 1856                  }
1859 1857          }
1860 1858  
1861 1859          if (!error) {
1862 1860                  /*
1863 1861                   * If filesystem is shared with nosuid the remove any
1864 1862                   * setuid/setgid bits on create.
1865 1863                   */
1866 1864                  if (va.va_type == VREG &&
1867 1865                      exi->exi_export.ex_flags & EX_NOSUID)
1868 1866                          va.va_mode &= ~(VSUID | VSGID);
1869 1867  
1870 1868                  error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1871 1869                      NULL, NULL);
1872 1870  
1873 1871                  if (!error) {
1874 1872  
1875 1873                          if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1876 1874                                  trunc = TRUE;
1877 1875                          else
1878 1876                                  trunc = FALSE;
1879 1877  
1880 1878                          if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1881 1879                                  VN_RELE(vp);
1882 1880                                  curthread->t_flag |= T_WOULDBLOCK;
1883 1881                                  goto out;
1884 1882                          }
1885 1883                          va.va_mask = AT_ALL;
1886 1884  
1887 1885                          error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1888 1886  
1889 1887                          /* check for overflows */
1890 1888                          if (!error) {
1891 1889                                  acl_perm(vp, exi, &va, cr);
1892 1890                                  error = vattr_to_nattr(&va, &dr->dr_attr);
1893 1891                                  if (!error) {
1894 1892                                          error = makefh(&dr->dr_fhandle, vp,
1895 1893                                              exi);
1896 1894                                  }
1897 1895                          }
1898 1896                          /*
1899 1897                           * Force modified metadata out to stable storage.
1900 1898                           *
1901 1899                           * if a underlying vp exists, pass it to VOP_FSYNC
1902 1900                           */
1903 1901                          if (VOP_REALVP(vp, &realvp, NULL) == 0)
1904 1902                                  (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
1905 1903                          else
1906 1904                                  (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1907 1905                          VN_RELE(vp);
1908 1906                  }
1909 1907  
1910 1908                  if (in_crit) {
1911 1909                          nbl_end_crit(tvp);
1912 1910                          VN_RELE(tvp);
1913 1911                  }
1914 1912          }
1915 1913  
1916 1914          /*
1917 1915           * Force modified data and metadata out to stable storage.
1918 1916           */
1919 1917          (void) VOP_FSYNC(dvp, 0, cr, NULL);
1920 1918  
1921 1919  out:
1922 1920  
1923 1921          VN_RELE(dvp);
1924 1922  
1925 1923          dr->dr_status = puterrno(error);
1926 1924  
1927 1925          if (name != args->ca_da.da_name)
1928 1926                  kmem_free(name, MAXPATHLEN);
1929 1927  }
1930 1928  void *
1931 1929  rfs_create_getfh(struct nfscreatargs *args)
1932 1930  {
1933 1931          return (args->ca_da.da_fhandle);
1934 1932  }
1935 1933  
1936 1934  /*
1937 1935   * Remove a file.
1938 1936   * Remove named file from parent directory.
1939 1937   */
1940 1938  /* ARGSUSED */
1941 1939  void
1942 1940  rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
1943 1941      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1944 1942  {
1945 1943          int error = 0;
1946 1944          vnode_t *vp;
1947 1945          vnode_t *targvp;
1948 1946          int in_crit = 0;
1949 1947  
1950 1948          /*
1951 1949           * Disallow NULL paths
1952 1950           */
1953 1951          if (da->da_name == NULL || *da->da_name == '\0') {
1954 1952                  *status = NFSERR_ACCES;
1955 1953                  return;
1956 1954          }
1957 1955  
1958 1956          vp = nfs_fhtovp(da->da_fhandle, exi);
1959 1957          if (vp == NULL) {
1960 1958                  *status = NFSERR_STALE;
1961 1959                  return;
1962 1960          }
1963 1961  
1964 1962          if (rdonly(ro, vp)) {
1965 1963                  VN_RELE(vp);
1966 1964                  *status = NFSERR_ROFS;
1967 1965                  return;
1968 1966          }
1969 1967  
1970 1968          /*
1971 1969           * Check for a conflict with a non-blocking mandatory share reservation.
1972 1970           */
1973 1971          error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
1974 1972              NULL, cr, NULL, NULL, NULL);
1975 1973          if (error != 0) {
1976 1974                  VN_RELE(vp);
1977 1975                  *status = puterrno(error);
1978 1976                  return;
1979 1977          }
1980 1978  
1981 1979          /*
1982 1980           * If the file is delegated to an v4 client, then initiate
1983 1981           * recall and drop this request (by setting T_WOULDBLOCK).
1984 1982           * The client will eventually re-transmit the request and
1985 1983           * (hopefully), by then, the v4 client will have returned
1986 1984           * the delegation.
1987 1985           */
1988 1986  
1989 1987          if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
1990 1988                  VN_RELE(vp);
1991 1989                  VN_RELE(targvp);
1992 1990                  curthread->t_flag |= T_WOULDBLOCK;
1993 1991                  return;
1994 1992          }
1995 1993  
1996 1994          if (nbl_need_check(targvp)) {
1997 1995                  nbl_start_crit(targvp, RW_READER);
1998 1996                  in_crit = 1;
1999 1997                  if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2000 1998                          error = EACCES;
2001 1999                          goto out;
2002 2000                  }
2003 2001          }
2004 2002  
2005 2003          error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2006 2004  
2007 2005          /*
2008 2006           * Force modified data and metadata out to stable storage.
2009 2007           */
2010 2008          (void) VOP_FSYNC(vp, 0, cr, NULL);
2011 2009  
2012 2010  out:
2013 2011          if (in_crit)
2014 2012                  nbl_end_crit(targvp);
2015 2013          VN_RELE(targvp);
2016 2014          VN_RELE(vp);
2017 2015  
2018 2016          *status = puterrno(error);
2019 2017  
2020 2018  }
2021 2019  
2022 2020  void *
2023 2021  rfs_remove_getfh(struct nfsdiropargs *da)
2024 2022  {
2025 2023          return (da->da_fhandle);
2026 2024  }
2027 2025  
2028 2026  /*
2029 2027   * rename a file
2030 2028   * Give a file (from) a new name (to).
2031 2029   */
2032 2030  /* ARGSUSED */
2033 2031  void
2034 2032  rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2035 2033      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2036 2034  {
2037 2035          int error = 0;
2038 2036          vnode_t *fromvp;
2039 2037          vnode_t *tovp;
2040 2038          struct exportinfo *to_exi;
2041 2039          fhandle_t *fh;
2042 2040          vnode_t *srcvp;
2043 2041          vnode_t *targvp;
2044 2042          int in_crit = 0;
2045 2043  
2046 2044          fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2047 2045          if (fromvp == NULL) {
2048 2046                  *status = NFSERR_STALE;
2049 2047                  return;
2050 2048          }
2051 2049  
2052 2050          fh = args->rna_to.da_fhandle;
2053 2051          to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2054 2052          if (to_exi == NULL) {
2055 2053                  VN_RELE(fromvp);
2056 2054                  *status = NFSERR_ACCES;
2057 2055                  return;
2058 2056          }
2059 2057          exi_rele(to_exi);
2060 2058  
2061 2059          if (to_exi != exi) {
2062 2060                  VN_RELE(fromvp);
2063 2061                  *status = NFSERR_XDEV;
2064 2062                  return;
2065 2063          }
2066 2064  
2067 2065          tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2068 2066          if (tovp == NULL) {
2069 2067                  VN_RELE(fromvp);
2070 2068                  *status = NFSERR_STALE;
2071 2069                  return;
2072 2070          }
2073 2071  
2074 2072          if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2075 2073                  VN_RELE(tovp);
2076 2074                  VN_RELE(fromvp);
2077 2075                  *status = NFSERR_NOTDIR;
2078 2076                  return;
2079 2077          }
2080 2078  
2081 2079          /*
2082 2080           * Disallow NULL paths
2083 2081           */
2084 2082          if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2085 2083              args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2086 2084                  VN_RELE(tovp);
2087 2085                  VN_RELE(fromvp);
2088 2086                  *status = NFSERR_ACCES;
2089 2087                  return;
2090 2088          }
2091 2089  
2092 2090          if (rdonly(ro, tovp)) {
2093 2091                  VN_RELE(tovp);
2094 2092                  VN_RELE(fromvp);
2095 2093                  *status = NFSERR_ROFS;
2096 2094                  return;
2097 2095          }
2098 2096  
2099 2097          /*
2100 2098           * Check for a conflict with a non-blocking mandatory share reservation.
2101 2099           */
2102 2100          error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2103 2101              NULL, cr, NULL, NULL, NULL);
2104 2102          if (error != 0) {
2105 2103                  VN_RELE(tovp);
2106 2104                  VN_RELE(fromvp);
2107 2105                  *status = puterrno(error);
2108 2106                  return;
2109 2107          }
2110 2108  
2111 2109          /* Check for delegations on the source file */
2112 2110  
2113 2111          if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2114 2112                  VN_RELE(tovp);
2115 2113                  VN_RELE(fromvp);
2116 2114                  VN_RELE(srcvp);
2117 2115                  curthread->t_flag |= T_WOULDBLOCK;
2118 2116                  return;
2119 2117          }
2120 2118  
2121 2119          /* Check for delegation on the file being renamed over, if it exists */
2122 2120  
2123 2121          if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2124 2122              VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2125 2123              NULL, NULL, NULL) == 0) {
2126 2124  
2127 2125                  if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2128 2126                          VN_RELE(tovp);
2129 2127                          VN_RELE(fromvp);
2130 2128                          VN_RELE(srcvp);
2131 2129                          VN_RELE(targvp);
2132 2130                          curthread->t_flag |= T_WOULDBLOCK;
2133 2131                          return;
2134 2132                  }
2135 2133                  VN_RELE(targvp);
2136 2134          }
2137 2135  
2138 2136  
2139 2137          if (nbl_need_check(srcvp)) {
2140 2138                  nbl_start_crit(srcvp, RW_READER);
2141 2139                  in_crit = 1;
2142 2140                  if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2143 2141                          error = EACCES;
2144 2142                          goto out;
2145 2143                  }
2146 2144          }
2147 2145  
2148 2146          error = VOP_RENAME(fromvp, args->rna_from.da_name,
2149 2147              tovp, args->rna_to.da_name, cr, NULL, 0);
2150 2148  
2151 2149          if (error == 0)
2152 2150                  vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2153 2151                      strlen(args->rna_to.da_name));
2154 2152  
2155 2153          /*
2156 2154           * Force modified data and metadata out to stable storage.
2157 2155           */
2158 2156          (void) VOP_FSYNC(tovp, 0, cr, NULL);
2159 2157          (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2160 2158  
2161 2159  out:
2162 2160          if (in_crit)
2163 2161                  nbl_end_crit(srcvp);
2164 2162          VN_RELE(srcvp);
2165 2163          VN_RELE(tovp);
2166 2164          VN_RELE(fromvp);
2167 2165  
2168 2166          *status = puterrno(error);
2169 2167  
2170 2168  }
2171 2169  void *
2172 2170  rfs_rename_getfh(struct nfsrnmargs *args)
2173 2171  {
2174 2172          return (args->rna_from.da_fhandle);
2175 2173  }
2176 2174  
2177 2175  /*
2178 2176   * Link to a file.
2179 2177   * Create a file (to) which is a hard link to the given file (from).
2180 2178   */
2181 2179  /* ARGSUSED */
2182 2180  void
2183 2181  rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2184 2182      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2185 2183  {
2186 2184          int error;
2187 2185          vnode_t *fromvp;
2188 2186          vnode_t *tovp;
2189 2187          struct exportinfo *to_exi;
2190 2188          fhandle_t *fh;
2191 2189  
2192 2190          fromvp = nfs_fhtovp(args->la_from, exi);
2193 2191          if (fromvp == NULL) {
2194 2192                  *status = NFSERR_STALE;
2195 2193                  return;
2196 2194          }
2197 2195  
2198 2196          fh = args->la_to.da_fhandle;
2199 2197          to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2200 2198          if (to_exi == NULL) {
2201 2199                  VN_RELE(fromvp);
2202 2200                  *status = NFSERR_ACCES;
2203 2201                  return;
2204 2202          }
2205 2203          exi_rele(to_exi);
2206 2204  
2207 2205          if (to_exi != exi) {
2208 2206                  VN_RELE(fromvp);
2209 2207                  *status = NFSERR_XDEV;
2210 2208                  return;
2211 2209          }
2212 2210  
2213 2211          tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2214 2212          if (tovp == NULL) {
2215 2213                  VN_RELE(fromvp);
2216 2214                  *status = NFSERR_STALE;
2217 2215                  return;
2218 2216          }
2219 2217  
2220 2218          if (tovp->v_type != VDIR) {
2221 2219                  VN_RELE(tovp);
2222 2220                  VN_RELE(fromvp);
2223 2221                  *status = NFSERR_NOTDIR;
2224 2222                  return;
2225 2223          }
2226 2224          /*
2227 2225           * Disallow NULL paths
2228 2226           */
2229 2227          if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2230 2228                  VN_RELE(tovp);
2231 2229                  VN_RELE(fromvp);
2232 2230                  *status = NFSERR_ACCES;
2233 2231                  return;
2234 2232          }
2235 2233  
2236 2234          if (rdonly(ro, tovp)) {
2237 2235                  VN_RELE(tovp);
2238 2236                  VN_RELE(fromvp);
2239 2237                  *status = NFSERR_ROFS;
2240 2238                  return;
2241 2239          }
2242 2240  
2243 2241          error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2244 2242  
2245 2243          /*
2246 2244           * Force modified data and metadata out to stable storage.
2247 2245           */
2248 2246          (void) VOP_FSYNC(tovp, 0, cr, NULL);
2249 2247          (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2250 2248  
2251 2249          VN_RELE(tovp);
2252 2250          VN_RELE(fromvp);
2253 2251  
2254 2252          *status = puterrno(error);
2255 2253  
2256 2254  }
2257 2255  void *
2258 2256  rfs_link_getfh(struct nfslinkargs *args)
2259 2257  {
2260 2258          return (args->la_from);
2261 2259  }
2262 2260  
2263 2261  /*
2264 2262   * Symbolicly link to a file.
2265 2263   * Create a file (to) with the given attributes which is a symbolic link
2266 2264   * to the given path name (to).
2267 2265   */
2268 2266  void
2269 2267  rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2270 2268      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2271 2269  {
2272 2270          int error;
2273 2271          struct vattr va;
2274 2272          vnode_t *vp;
2275 2273          vnode_t *svp;
2276 2274          int lerror;
2277 2275          struct sockaddr *ca;
2278 2276          char *name = NULL;
2279 2277  
2280 2278          /*
2281 2279           * Disallow NULL paths
2282 2280           */
2283 2281          if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2284 2282                  *status = NFSERR_ACCES;
2285 2283                  return;
2286 2284          }
2287 2285  
2288 2286          vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2289 2287          if (vp == NULL) {
2290 2288                  *status = NFSERR_STALE;
2291 2289                  return;
2292 2290          }
2293 2291  
2294 2292          if (rdonly(ro, vp)) {
2295 2293                  VN_RELE(vp);
2296 2294                  *status = NFSERR_ROFS;
2297 2295                  return;
2298 2296          }
2299 2297  
2300 2298          error = sattr_to_vattr(args->sla_sa, &va);
2301 2299          if (error) {
2302 2300                  VN_RELE(vp);
2303 2301                  *status = puterrno(error);
2304 2302                  return;
2305 2303          }
2306 2304  
2307 2305          if (!(va.va_mask & AT_MODE)) {
2308 2306                  VN_RELE(vp);
2309 2307                  *status = NFSERR_INVAL;
2310 2308                  return;
2311 2309          }
2312 2310  
2313 2311          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2314 2312          name = nfscmd_convname(ca, exi, args->sla_tnm,
2315 2313              NFSCMD_CONV_INBOUND, MAXPATHLEN);
2316 2314  
2317 2315          if (name == NULL) {
2318 2316                  *status = NFSERR_ACCES;
2319 2317                  return;
2320 2318          }
2321 2319  
2322 2320          va.va_type = VLNK;
2323 2321          va.va_mask |= AT_TYPE;
2324 2322  
2325 2323          error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2326 2324  
2327 2325          /*
2328 2326           * Force new data and metadata out to stable storage.
2329 2327           */
2330 2328          lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2331 2329              NULL, cr, NULL, NULL, NULL);
2332 2330  
2333 2331          if (!lerror) {
2334 2332                  (void) VOP_FSYNC(svp, 0, cr, NULL);
2335 2333                  VN_RELE(svp);
2336 2334          }
2337 2335  
2338 2336          /*
2339 2337           * Force modified data and metadata out to stable storage.
2340 2338           */
2341 2339          (void) VOP_FSYNC(vp, 0, cr, NULL);
2342 2340  
2343 2341          VN_RELE(vp);
2344 2342  
2345 2343          *status = puterrno(error);
2346 2344          if (name != args->sla_tnm)
2347 2345                  kmem_free(name, MAXPATHLEN);
2348 2346  
2349 2347  }
2350 2348  void *
2351 2349  rfs_symlink_getfh(struct nfsslargs *args)
2352 2350  {
2353 2351          return (args->sla_from.da_fhandle);
2354 2352  }
2355 2353  
2356 2354  /*
2357 2355   * Make a directory.
2358 2356   * Create a directory with the given name, parent directory, and attributes.
2359 2357   * Returns a file handle and attributes for the new directory.
2360 2358   */
2361 2359  /* ARGSUSED */
2362 2360  void
2363 2361  rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2364 2362      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2365 2363  {
2366 2364          int error;
2367 2365          struct vattr va;
2368 2366          vnode_t *dvp = NULL;
2369 2367          vnode_t *vp;
2370 2368          char *name = args->ca_da.da_name;
2371 2369  
2372 2370          /*
2373 2371           * Disallow NULL paths
2374 2372           */
2375 2373          if (name == NULL || *name == '\0') {
2376 2374                  dr->dr_status = NFSERR_ACCES;
2377 2375                  return;
2378 2376          }
2379 2377  
2380 2378          vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2381 2379          if (vp == NULL) {
2382 2380                  dr->dr_status = NFSERR_STALE;
2383 2381                  return;
2384 2382          }
2385 2383  
2386 2384          if (rdonly(ro, vp)) {
2387 2385                  VN_RELE(vp);
2388 2386                  dr->dr_status = NFSERR_ROFS;
2389 2387                  return;
2390 2388          }
2391 2389  
2392 2390          error = sattr_to_vattr(args->ca_sa, &va);
2393 2391          if (error) {
2394 2392                  VN_RELE(vp);
2395 2393                  dr->dr_status = puterrno(error);
2396 2394                  return;
2397 2395          }
2398 2396  
2399 2397          if (!(va.va_mask & AT_MODE)) {
2400 2398                  VN_RELE(vp);
2401 2399                  dr->dr_status = NFSERR_INVAL;
2402 2400                  return;
2403 2401          }
2404 2402  
2405 2403          va.va_type = VDIR;
2406 2404          va.va_mask |= AT_TYPE;
2407 2405  
2408 2406          error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2409 2407  
2410 2408          if (!error) {
2411 2409                  /*
2412 2410                   * Attribtutes of the newly created directory should
2413 2411                   * be returned to the client.
2414 2412                   */
2415 2413                  va.va_mask = AT_ALL; /* We want everything */
2416 2414                  error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2417 2415  
2418 2416                  /* check for overflows */
2419 2417                  if (!error) {
2420 2418                          acl_perm(vp, exi, &va, cr);
2421 2419                          error = vattr_to_nattr(&va, &dr->dr_attr);
2422 2420                          if (!error) {
2423 2421                                  error = makefh(&dr->dr_fhandle, dvp, exi);
2424 2422                          }
2425 2423                  }
2426 2424                  /*
2427 2425                   * Force new data and metadata out to stable storage.
2428 2426                   */
2429 2427                  (void) VOP_FSYNC(dvp, 0, cr, NULL);
2430 2428                  VN_RELE(dvp);
2431 2429          }
2432 2430  
2433 2431          /*
2434 2432           * Force modified data and metadata out to stable storage.
2435 2433           */
2436 2434          (void) VOP_FSYNC(vp, 0, cr, NULL);
2437 2435  
2438 2436          VN_RELE(vp);
2439 2437  
2440 2438          dr->dr_status = puterrno(error);
2441 2439  
2442 2440  }
2443 2441  void *
2444 2442  rfs_mkdir_getfh(struct nfscreatargs *args)
2445 2443  {
2446 2444          return (args->ca_da.da_fhandle);
2447 2445  }
2448 2446  
2449 2447  /*
2450 2448   * Remove a directory.
2451 2449   * Remove the given directory name from the given parent directory.
2452 2450   */
2453 2451  /* ARGSUSED */
2454 2452  void
2455 2453  rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2456 2454      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2457 2455  {
2458 2456          int error;
2459 2457          vnode_t *vp;
2460 2458  
2461 2459          /*
2462 2460           * Disallow NULL paths
2463 2461           */
2464 2462          if (da->da_name == NULL || *da->da_name == '\0') {
2465 2463                  *status = NFSERR_ACCES;
2466 2464                  return;
2467 2465          }
2468 2466  
2469 2467          vp = nfs_fhtovp(da->da_fhandle, exi);
2470 2468          if (vp == NULL) {
2471 2469                  *status = NFSERR_STALE;
2472 2470                  return;
2473 2471          }
2474 2472  
2475 2473          if (rdonly(ro, vp)) {
2476 2474                  VN_RELE(vp);
2477 2475                  *status = NFSERR_ROFS;
2478 2476                  return;
2479 2477          }
2480 2478  
2481 2479          /*
2482 2480           * VOP_RMDIR takes a third argument (the current
2483 2481           * directory of the process).  That's because someone
2484 2482           * wants to return EINVAL if one tries to remove ".".
2485 2483           * Of course, NFS servers have no idea what their
2486 2484           * clients' current directories are.  We fake it by
2487 2485           * supplying a vnode known to exist and illegal to
2488 2486           * remove.
2489 2487           */
2490 2488          error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
2491 2489  
2492 2490          /*
2493 2491           * Force modified data and metadata out to stable storage.
2494 2492           */
2495 2493          (void) VOP_FSYNC(vp, 0, cr, NULL);
2496 2494  
2497 2495          VN_RELE(vp);
2498 2496  
2499 2497          /*
2500 2498           * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2501 2499           * if the directory is not empty.  A System V NFS server
2502 2500           * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2503 2501           * over the wire.
2504 2502           */
2505 2503          if (error == EEXIST)
2506 2504                  *status = NFSERR_NOTEMPTY;
2507 2505          else
2508 2506                  *status = puterrno(error);
2509 2507  
2510 2508  }
2511 2509  void *
2512 2510  rfs_rmdir_getfh(struct nfsdiropargs *da)
2513 2511  {
2514 2512          return (da->da_fhandle);
2515 2513  }
2516 2514  
2517 2515  /* ARGSUSED */
2518 2516  void
2519 2517  rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2520 2518      struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2521 2519  {
2522 2520          int error;
2523 2521          int iseof;
2524 2522          struct iovec iov;
2525 2523          struct uio uio;
2526 2524          vnode_t *vp;
2527 2525          char *ndata = NULL;
2528 2526          struct sockaddr *ca;
2529 2527          size_t nents;
2530 2528          int ret;
2531 2529  
2532 2530          vp = nfs_fhtovp(&rda->rda_fh, exi);
2533 2531          if (vp == NULL) {
2534 2532                  rd->rd_entries = NULL;
2535 2533                  rd->rd_status = NFSERR_STALE;
2536 2534                  return;
2537 2535          }
2538 2536  
2539 2537          if (vp->v_type != VDIR) {
2540 2538                  VN_RELE(vp);
2541 2539                  rd->rd_entries = NULL;
2542 2540                  rd->rd_status = NFSERR_NOTDIR;
2543 2541                  return;
2544 2542          }
2545 2543  
2546 2544          (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2547 2545  
2548 2546          error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2549 2547  
2550 2548          if (error) {
2551 2549                  rd->rd_entries = NULL;
2552 2550                  goto bad;
2553 2551          }
2554 2552  
2555 2553          if (rda->rda_count == 0) {
2556 2554                  rd->rd_entries = NULL;
2557 2555                  rd->rd_size = 0;
2558 2556                  rd->rd_eof = FALSE;
2559 2557                  goto bad;
2560 2558          }
2561 2559  
2562 2560          rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2563 2561  
2564 2562          /*
2565 2563           * Allocate data for entries.  This will be freed by rfs_rddirfree.
2566 2564           */
2567 2565          rd->rd_bufsize = (uint_t)rda->rda_count;
2568 2566          rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2569 2567  
2570 2568          /*
2571 2569           * Set up io vector to read directory data
2572 2570           */
2573 2571          iov.iov_base = (caddr_t)rd->rd_entries;
2574 2572          iov.iov_len = rda->rda_count;
2575 2573          uio.uio_iov = &iov;
2576 2574          uio.uio_iovcnt = 1;
2577 2575          uio.uio_segflg = UIO_SYSSPACE;
2578 2576          uio.uio_extflg = UIO_COPY_CACHED;
2579 2577          uio.uio_loffset = (offset_t)rda->rda_offset;
2580 2578          uio.uio_resid = rda->rda_count;
2581 2579  
2582 2580          /*
2583 2581           * read directory
2584 2582           */
2585 2583          error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2586 2584  
2587 2585          /*
2588 2586           * Clean up
2589 2587           */
2590 2588          if (!error) {
2591 2589                  /*
2592 2590                   * set size and eof
2593 2591                   */
2594 2592                  if (uio.uio_resid == rda->rda_count) {
2595 2593                          rd->rd_size = 0;
2596 2594                          rd->rd_eof = TRUE;
2597 2595                  } else {
2598 2596                          rd->rd_size = (uint32_t)(rda->rda_count -
2599 2597                              uio.uio_resid);
2600 2598                          rd->rd_eof = iseof ? TRUE : FALSE;
2601 2599                  }
2602 2600          }
2603 2601  
2604 2602          ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2605 2603          nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2606 2604          ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2607 2605              rda->rda_count, &ndata);
2608 2606  
2609 2607          if (ret != 0) {
2610 2608                  size_t dropbytes;
2611 2609                  /*
2612 2610                   * We had to drop one or more entries in order to fit
2613 2611                   * during the character conversion.  We need to patch
2614 2612                   * up the size and eof info.
2615 2613                   */
2616 2614                  if (rd->rd_eof)
2617 2615                          rd->rd_eof = FALSE;
2618 2616                  dropbytes = nfscmd_dropped_entrysize(
2619 2617                      (struct dirent64 *)rd->rd_entries, nents, ret);
2620 2618                  rd->rd_size -= dropbytes;
2621 2619          }
2622 2620          if (ndata == NULL) {
2623 2621                  ndata = (char *)rd->rd_entries;
2624 2622          } else if (ndata != (char *)rd->rd_entries) {
2625 2623                  kmem_free(rd->rd_entries, rd->rd_bufsize);
2626 2624                  rd->rd_entries = (void *)ndata;
2627 2625                  rd->rd_bufsize = rda->rda_count;
2628 2626          }
2629 2627  
2630 2628  bad:
2631 2629          VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2632 2630  
2633 2631  #if 0 /* notyet */
2634 2632          /*
2635 2633           * Don't do this.  It causes local disk writes when just
2636 2634           * reading the file and the overhead is deemed larger
2637 2635           * than the benefit.
2638 2636           */
2639 2637          /*
2640 2638           * Force modified metadata out to stable storage.
2641 2639           */
2642 2640          (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2643 2641  #endif
2644 2642  
2645 2643          VN_RELE(vp);
2646 2644  
2647 2645          rd->rd_status = puterrno(error);
2648 2646  
2649 2647  }
2650 2648  void *
2651 2649  rfs_readdir_getfh(struct nfsrddirargs *rda)
2652 2650  {
2653 2651          return (&rda->rda_fh);
2654 2652  }
2655 2653  void
2656 2654  rfs_rddirfree(struct nfsrddirres *rd)
2657 2655  {
2658 2656          if (rd->rd_entries != NULL)
2659 2657                  kmem_free(rd->rd_entries, rd->rd_bufsize);
2660 2658  }
2661 2659  
2662 2660  /* ARGSUSED */
2663 2661  void
2664 2662  rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2665 2663      struct svc_req *req, cred_t *cr, bool_t ro)
2666 2664  {
2667 2665          int error;
2668 2666          struct statvfs64 sb;
2669 2667          vnode_t *vp;
2670 2668  
2671 2669          vp = nfs_fhtovp(fh, exi);
2672 2670          if (vp == NULL) {
2673 2671                  fs->fs_status = NFSERR_STALE;
2674 2672                  return;
2675 2673          }
2676 2674  
2677 2675          error = VFS_STATVFS(vp->v_vfsp, &sb);
2678 2676  
2679 2677          if (!error) {
2680 2678                  fs->fs_tsize = nfstsize();
2681 2679                  fs->fs_bsize = sb.f_frsize;
2682 2680                  fs->fs_blocks = sb.f_blocks;
2683 2681                  fs->fs_bfree = sb.f_bfree;
2684 2682                  fs->fs_bavail = sb.f_bavail;
2685 2683          }
2686 2684  
2687 2685          VN_RELE(vp);
2688 2686  
2689 2687          fs->fs_status = puterrno(error);
2690 2688  
2691 2689  }
2692 2690  void *
2693 2691  rfs_statfs_getfh(fhandle_t *fh)
2694 2692  {
2695 2693          return (fh);
2696 2694  }
2697 2695  
2698 2696  static int
2699 2697  sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2700 2698  {
2701 2699          vap->va_mask = 0;
2702 2700  
2703 2701          /*
2704 2702           * There was a sign extension bug in some VFS based systems
2705 2703           * which stored the mode as a short.  When it would get
2706 2704           * assigned to a u_long, no sign extension would occur.
2707 2705           * It needed to, but this wasn't noticed because sa_mode
2708 2706           * would then get assigned back to the short, thus ignoring
2709 2707           * the upper 16 bits of sa_mode.
2710 2708           *
2711 2709           * To make this implementation work for both broken
2712 2710           * clients and good clients, we check for both versions
2713 2711           * of the mode.
2714 2712           */
2715 2713          if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2716 2714              sa->sa_mode != (uint32_t)-1) {
2717 2715                  vap->va_mask |= AT_MODE;
2718 2716                  vap->va_mode = sa->sa_mode;
2719 2717          }
2720 2718          if (sa->sa_uid != (uint32_t)-1) {
2721 2719                  vap->va_mask |= AT_UID;
2722 2720                  vap->va_uid = sa->sa_uid;
2723 2721          }
2724 2722          if (sa->sa_gid != (uint32_t)-1) {
2725 2723                  vap->va_mask |= AT_GID;
2726 2724                  vap->va_gid = sa->sa_gid;
2727 2725          }
2728 2726          if (sa->sa_size != (uint32_t)-1) {
2729 2727                  vap->va_mask |= AT_SIZE;
2730 2728                  vap->va_size = sa->sa_size;
2731 2729          }
2732 2730          if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2733 2731              sa->sa_atime.tv_usec != (int32_t)-1) {
2734 2732  #ifndef _LP64
2735 2733                  /* return error if time overflow */
2736 2734                  if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2737 2735                          return (EOVERFLOW);
2738 2736  #endif
2739 2737                  vap->va_mask |= AT_ATIME;
2740 2738                  /*
2741 2739                   * nfs protocol defines times as unsigned so don't extend sign,
2742 2740                   * unless sysadmin set nfs_allow_preepoch_time.
2743 2741                   */
2744 2742                  NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2745 2743                  vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2746 2744          }
2747 2745          if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2748 2746              sa->sa_mtime.tv_usec != (int32_t)-1) {
2749 2747  #ifndef _LP64
2750 2748                  /* return error if time overflow */
2751 2749                  if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2752 2750                          return (EOVERFLOW);
2753 2751  #endif
2754 2752                  vap->va_mask |= AT_MTIME;
2755 2753                  /*
2756 2754                   * nfs protocol defines times as unsigned so don't extend sign,
2757 2755                   * unless sysadmin set nfs_allow_preepoch_time.
2758 2756                   */
2759 2757                  NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2760 2758                  vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2761 2759          }
2762 2760          return (0);
2763 2761  }
2764 2762  
2765 2763  static enum nfsftype vt_to_nf[] = {
2766 2764          0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2767 2765  };
2768 2766  
2769 2767  /*
2770 2768   * check the following fields for overflow: nodeid, size, and time.
2771 2769   * There could be a problem when converting 64-bit LP64 fields
2772 2770   * into 32-bit ones.  Return an error if there is an overflow.
2773 2771   */
2774 2772  int
2775 2773  vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2776 2774  {
2777 2775          ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2778 2776          na->na_type = vt_to_nf[vap->va_type];
2779 2777  
2780 2778          if (vap->va_mode == (unsigned short) -1)
2781 2779                  na->na_mode = (uint32_t)-1;
2782 2780          else
2783 2781                  na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2784 2782  
2785 2783          if (vap->va_uid == (unsigned short)(-1))
2786 2784                  na->na_uid = (uint32_t)(-1);
2787 2785          else if (vap->va_uid == UID_NOBODY)
2788 2786                  na->na_uid = (uint32_t)NFS_UID_NOBODY;
2789 2787          else
2790 2788                  na->na_uid = vap->va_uid;
2791 2789  
2792 2790          if (vap->va_gid == (unsigned short)(-1))
2793 2791                  na->na_gid = (uint32_t)-1;
2794 2792          else if (vap->va_gid == GID_NOBODY)
2795 2793                  na->na_gid = (uint32_t)NFS_GID_NOBODY;
2796 2794          else
2797 2795                  na->na_gid = vap->va_gid;
2798 2796  
2799 2797          /*
2800 2798           * Do we need to check fsid for overflow?  It is 64-bit in the
2801 2799           * vattr, but are bigger than 32 bit values supported?
2802 2800           */
2803 2801          na->na_fsid = vap->va_fsid;
2804 2802  
2805 2803          na->na_nodeid = vap->va_nodeid;
2806 2804  
2807 2805          /*
2808 2806           * Check to make sure that the nodeid is representable over the
2809 2807           * wire without losing bits.
2810 2808           */
2811 2809          if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2812 2810                  return (EFBIG);
2813 2811          na->na_nlink = vap->va_nlink;
2814 2812  
2815 2813          /*
2816 2814           * Check for big files here, instead of at the caller.  See
2817 2815           * comments in cstat for large special file explanation.
2818 2816           */
2819 2817          if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2820 2818                  if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2821 2819                          return (EFBIG);
2822 2820                  if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2823 2821                          /* UNKNOWN_SIZE | OVERFLOW */
2824 2822                          na->na_size = MAXOFF32_T;
2825 2823                  } else
2826 2824                          na->na_size = vap->va_size;
2827 2825          } else
2828 2826                  na->na_size = vap->va_size;
2829 2827  
2830 2828          /*
2831 2829           * If the vnode times overflow the 32-bit times that NFS2
2832 2830           * uses on the wire then return an error.
2833 2831           */
2834 2832          if (!NFS_VAP_TIME_OK(vap)) {
2835 2833                  return (EOVERFLOW);
2836 2834          }
2837 2835          na->na_atime.tv_sec = vap->va_atime.tv_sec;
2838 2836          na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2839 2837  
2840 2838          na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2841 2839          na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2842 2840  
2843 2841          na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2844 2842          na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2845 2843  
2846 2844          /*
2847 2845           * If the dev_t will fit into 16 bits then compress
2848 2846           * it, otherwise leave it alone. See comments in
2849 2847           * nfs_client.c.
2850 2848           */
2851 2849          if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2852 2850              getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2853 2851                  na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2854 2852          else
2855 2853                  (void) cmpldev(&na->na_rdev, vap->va_rdev);
2856 2854  
2857 2855          na->na_blocks = vap->va_nblocks;
2858 2856          na->na_blocksize = vap->va_blksize;
2859 2857  
2860 2858          /*
2861 2859           * This bit of ugliness is a *TEMPORARY* hack to preserve the
2862 2860           * over-the-wire protocols for named-pipe vnodes.  It remaps the
2863 2861           * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2864 2862           *
2865 2863           * BUYER BEWARE:
2866 2864           *  If you are porting the NFS to a non-Sun server, you probably
2867 2865           *  don't want to include the following block of code.  The
2868 2866           *  over-the-wire special file types will be changing with the
2869 2867           *  NFS Protocol Revision.
2870 2868           */
2871 2869          if (vap->va_type == VFIFO)
2872 2870                  NA_SETFIFO(na);
2873 2871          return (0);
2874 2872  }
2875 2873  
2876 2874  /*
2877 2875   * acl v2 support: returns approximate permission.
2878 2876   *      default: returns minimal permission (more restrictive)
2879 2877   *      aclok: returns maximal permission (less restrictive)
2880 2878   *      This routine changes the permissions that are alaredy in *va.
2881 2879   *      If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
2882 2880   *      CLASS_OBJ is always the same as GROUP_OBJ entry.
2883 2881   */
2884 2882  static void
2885 2883  acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
2886 2884  {
2887 2885          vsecattr_t      vsa;
2888 2886          int             aclcnt;
2889 2887          aclent_t        *aclentp;
2890 2888          mode_t          mask_perm;
2891 2889          mode_t          grp_perm;
2892 2890          mode_t          other_perm;
2893 2891          mode_t          other_orig;
2894 2892          int             error;
2895 2893  
2896 2894          /* dont care default acl */
2897 2895          vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
2898 2896          error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
2899 2897  
2900 2898          if (!error) {
2901 2899                  aclcnt = vsa.vsa_aclcnt;
2902 2900                  if (aclcnt > MIN_ACL_ENTRIES) {
2903 2901                          /* non-trivial ACL */
2904 2902                          aclentp = vsa.vsa_aclentp;
2905 2903                          if (exi->exi_export.ex_flags & EX_ACLOK) {
2906 2904                                  /* maximal permissions */
2907 2905                                  grp_perm = 0;
2908 2906                                  other_perm = 0;
2909 2907                                  for (; aclcnt > 0; aclcnt--, aclentp++) {
2910 2908                                          switch (aclentp->a_type) {
2911 2909                                          case USER_OBJ:
2912 2910                                                  break;
2913 2911                                          case USER:
2914 2912                                                  grp_perm |=
2915 2913                                                      aclentp->a_perm << 3;
2916 2914                                                  other_perm |= aclentp->a_perm;
2917 2915                                                  break;
2918 2916                                          case GROUP_OBJ:
2919 2917                                                  grp_perm |=
2920 2918                                                      aclentp->a_perm << 3;
2921 2919                                                  break;
2922 2920                                          case GROUP:
2923 2921                                                  other_perm |= aclentp->a_perm;
2924 2922                                                  break;
2925 2923                                          case OTHER_OBJ:
2926 2924                                                  other_orig = aclentp->a_perm;
2927 2925                                                  break;
2928 2926                                          case CLASS_OBJ:
2929 2927                                                  mask_perm = aclentp->a_perm;
2930 2928                                                  break;
2931 2929                                          default:
2932 2930                                                  break;
2933 2931                                          }
2934 2932                                  }
2935 2933                                  grp_perm &= mask_perm << 3;
2936 2934                                  other_perm &= mask_perm;
2937 2935                                  other_perm |= other_orig;
2938 2936  
2939 2937                          } else {
2940 2938                                  /* minimal permissions */
2941 2939                                  grp_perm = 070;
2942 2940                                  other_perm = 07;
2943 2941                                  for (; aclcnt > 0; aclcnt--, aclentp++) {
2944 2942                                          switch (aclentp->a_type) {
2945 2943                                          case USER_OBJ:
2946 2944                                                  break;
2947 2945                                          case USER:
2948 2946                                          case CLASS_OBJ:
2949 2947                                                  grp_perm &=
2950 2948                                                      aclentp->a_perm << 3;
2951 2949                                                  other_perm &=
2952 2950                                                      aclentp->a_perm;
2953 2951                                                  break;
2954 2952                                          case GROUP_OBJ:
2955 2953                                                  grp_perm &=
2956 2954                                                      aclentp->a_perm << 3;
2957 2955                                                  break;
2958 2956                                          case GROUP:
2959 2957                                                  other_perm &=
2960 2958                                                      aclentp->a_perm;
2961 2959                                                  break;
2962 2960                                          case OTHER_OBJ:
2963 2961                                                  other_perm &=
2964 2962                                                      aclentp->a_perm;
2965 2963                                                  break;
2966 2964                                          default:
2967 2965                                                  break;
2968 2966                                          }
2969 2967                                  }
2970 2968                          }
2971 2969                          /* copy to va */
2972 2970                          va->va_mode &= ~077;
2973 2971                          va->va_mode |= grp_perm | other_perm;
2974 2972                  }
2975 2973                  if (vsa.vsa_aclcnt)
2976 2974                          kmem_free(vsa.vsa_aclentp,
2977 2975                              vsa.vsa_aclcnt * sizeof (aclent_t));
2978 2976          }
2979 2977  }
2980 2978  
2981 2979  void
2982 2980  rfs_srvrinit(void)
2983 2981  {
2984 2982          mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
2985 2983          nfs2_srv_caller_id = fs_new_caller_id();
2986 2984  }
2987 2985  
2988 2986  void
2989 2987  rfs_srvrfini(void)
2990 2988  {
2991 2989          mutex_destroy(&rfs_async_write_lock);
2992 2990  }
2993 2991  
2994 2992  static int
2995 2993  rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
2996 2994  {
2997 2995          struct clist    *wcl;
2998 2996          int             wlist_len;
2999 2997          uint32_t        count = rr->rr_count;
3000 2998  
3001 2999          wcl = ra->ra_wlist;
3002 3000  
3003 3001          if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3004 3002                  return (FALSE);
3005 3003          }
3006 3004  
3007 3005          wcl = ra->ra_wlist;
3008 3006          rr->rr_ok.rrok_wlist_len = wlist_len;
3009 3007          rr->rr_ok.rrok_wlist = wcl;
3010 3008  
3011 3009          return (TRUE);
3012 3010  }

↓ open down ↓

1783 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX