1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All rights reserved. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/buf.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/uio.h> 39 #include <sys/stat.h> 40 #include <sys/errno.h> 41 #include <sys/sysmacros.h> 42 #include <sys/statvfs.h> 43 #include <sys/kmem.h> 44 #include <sys/kstat.h> 45 #include <sys/dirent.h> 46 #include <sys/cmn_err.h> 47 #include <sys/debug.h> 48 #include <sys/vtrace.h> 49 #include <sys/mode.h> 50 #include <sys/acl.h> 51 #include <sys/nbmlock.h> 52 #include <sys/policy.h> 53 #include <sys/sdt.h> 54 55 #include <rpc/types.h> 56 #include <rpc/auth.h> 57 #include <rpc/svc.h> 58 59 #include <nfs/nfs.h> 60 #include <nfs/export.h> 61 #include <nfs/nfs_cmd.h> 62 63 #include <vm/hat.h> 64 #include <vm/as.h> 65 #include <vm/seg.h> 66 #include <vm/seg_map.h> 67 #include <vm/seg_kmem.h> 68 69 #include <sys/strsubr.h> 70 71 /* 72 * These are the interface routines for the server side of the 73 * Network File System. See the NFS version 2 protocol specification 74 * for a description of this interface. 75 */ 76 77 static int sattr_to_vattr(struct nfssattr *, struct vattr *); 78 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, 79 cred_t *); 80 81 /* 82 * Some "over the wire" UNIX file types. These are encoded 83 * into the mode. This needs to be fixed in the next rev. 84 */ 85 #define IFMT 0170000 /* type of file */ 86 #define IFCHR 0020000 /* character special */ 87 #define IFBLK 0060000 /* block special */ 88 #define IFSOCK 0140000 /* socket */ 89 90 u_longlong_t nfs2_srv_caller_id; 91 92 /* 93 * Get file attributes. 94 * Returns the current attributes of the file with the given fhandle. 95 */ 96 /* ARGSUSED */ 97 void 98 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi, 99 struct svc_req *req, cred_t *cr) 100 { 101 int error; 102 vnode_t *vp; 103 struct vattr va; 104 105 vp = nfs_fhtovp(fhp, exi); 106 if (vp == NULL) { 107 ns->ns_status = NFSERR_STALE; 108 return; 109 } 110 111 /* 112 * Do the getattr. 113 */ 114 va.va_mask = AT_ALL; /* we want all the attributes */ 115 116 error = rfs4_delegated_getattr(vp, &va, 0, cr); 117 118 /* check for overflows */ 119 if (!error) { 120 /* Lie about the object type for a referral */ 121 if (vn_is_nfs_reparse(vp, cr)) 122 va.va_type = VLNK; 123 124 acl_perm(vp, exi, &va, cr); 125 error = vattr_to_nattr(&va, &ns->ns_attr); 126 } 127 128 VN_RELE(vp); 129 130 ns->ns_status = puterrno(error); 131 } 132 void * 133 rfs_getattr_getfh(fhandle_t *fhp) 134 { 135 return (fhp); 136 } 137 138 /* 139 * Set file attributes. 140 * Sets the attributes of the file with the given fhandle. Returns 141 * the new attributes. 142 */ 143 void 144 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, 145 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 146 { 147 int error; 148 int flag; 149 int in_crit = 0; 150 vnode_t *vp; 151 struct vattr va; 152 struct vattr bva; 153 struct flock64 bf; 154 caller_context_t ct; 155 156 157 vp = nfs_fhtovp(&args->saa_fh, exi); 158 if (vp == NULL) { 159 ns->ns_status = NFSERR_STALE; 160 return; 161 } 162 163 if (rdonly(exi, vp, req)) { 164 VN_RELE(vp); 165 ns->ns_status = NFSERR_ROFS; 166 return; 167 } 168 169 error = sattr_to_vattr(&args->saa_sa, &va); 170 if (error) { 171 VN_RELE(vp); 172 ns->ns_status = puterrno(error); 173 return; 174 } 175 176 /* 177 * If the client is requesting a change to the mtime, 178 * but the nanosecond field is set to 1 billion, then 179 * this is a flag to the server that it should set the 180 * atime and mtime fields to the server's current time. 181 * The 1 billion number actually came from the client 182 * as 1 million, but the units in the over the wire 183 * request are microseconds instead of nanoseconds. 184 * 185 * This is an overload of the protocol and should be 186 * documented in the NFS Version 2 protocol specification. 187 */ 188 if (va.va_mask & AT_MTIME) { 189 if (va.va_mtime.tv_nsec == 1000000000) { 190 gethrestime(&va.va_mtime); 191 va.va_atime = va.va_mtime; 192 va.va_mask |= AT_ATIME; 193 flag = 0; 194 } else 195 flag = ATTR_UTIME; 196 } else 197 flag = 0; 198 199 /* 200 * If the filesystem is exported with nosuid, then mask off 201 * the setuid and setgid bits. 202 */ 203 if ((va.va_mask & AT_MODE) && vp->v_type == VREG && 204 (exi->exi_export.ex_flags & EX_NOSUID)) 205 va.va_mode &= ~(VSUID | VSGID); 206 207 ct.cc_sysid = 0; 208 ct.cc_pid = 0; 209 ct.cc_caller_id = nfs2_srv_caller_id; 210 ct.cc_flags = CC_DONTBLOCK; 211 212 /* 213 * We need to specially handle size changes because it is 214 * possible for the client to create a file with modes 215 * which indicate read-only, but with the file opened for 216 * writing. If the client then tries to set the size of 217 * the file, then the normal access checking done in 218 * VOP_SETATTR would prevent the client from doing so, 219 * although it should be legal for it to do so. To get 220 * around this, we do the access checking for ourselves 221 * and then use VOP_SPACE which doesn't do the access 222 * checking which VOP_SETATTR does. VOP_SPACE can only 223 * operate on VREG files, let VOP_SETATTR handle the other 224 * extremely rare cases. 225 * Also the client should not be allowed to change the 226 * size of the file if there is a conflicting non-blocking 227 * mandatory lock in the region of change. 228 */ 229 if (vp->v_type == VREG && va.va_mask & AT_SIZE) { 230 if (nbl_need_check(vp)) { 231 nbl_start_crit(vp, RW_READER); 232 in_crit = 1; 233 } 234 235 bva.va_mask = AT_UID | AT_SIZE; 236 237 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 238 239 if (error) { 240 if (in_crit) 241 nbl_end_crit(vp); 242 VN_RELE(vp); 243 ns->ns_status = puterrno(error); 244 return; 245 } 246 247 if (in_crit) { 248 u_offset_t offset; 249 ssize_t length; 250 251 if (va.va_size < bva.va_size) { 252 offset = va.va_size; 253 length = bva.va_size - va.va_size; 254 } else { 255 offset = bva.va_size; 256 length = va.va_size - bva.va_size; 257 } 258 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 259 NULL)) { 260 error = EACCES; 261 } 262 } 263 264 if (crgetuid(cr) == bva.va_uid && !error && 265 va.va_size != bva.va_size) { 266 va.va_mask &= ~AT_SIZE; 267 bf.l_type = F_WRLCK; 268 bf.l_whence = 0; 269 bf.l_start = (off64_t)va.va_size; 270 bf.l_len = 0; 271 bf.l_sysid = 0; 272 bf.l_pid = 0; 273 274 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 275 (offset_t)va.va_size, cr, &ct); 276 } 277 if (in_crit) 278 nbl_end_crit(vp); 279 } else 280 error = 0; 281 282 /* 283 * Do the setattr. 284 */ 285 if (!error && va.va_mask) { 286 error = VOP_SETATTR(vp, &va, flag, cr, &ct); 287 } 288 289 /* 290 * check if the monitor on either vop_space or vop_setattr detected 291 * a delegation conflict and if so, mark the thread flag as 292 * wouldblock so that the response is dropped and the client will 293 * try again. 294 */ 295 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 296 VN_RELE(vp); 297 curthread->t_flag |= T_WOULDBLOCK; 298 return; 299 } 300 301 if (!error) { 302 va.va_mask = AT_ALL; /* get everything */ 303 304 error = rfs4_delegated_getattr(vp, &va, 0, cr); 305 306 /* check for overflows */ 307 if (!error) { 308 acl_perm(vp, exi, &va, cr); 309 error = vattr_to_nattr(&va, &ns->ns_attr); 310 } 311 } 312 313 ct.cc_flags = 0; 314 315 /* 316 * Force modified metadata out to stable storage. 317 */ 318 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 319 320 VN_RELE(vp); 321 322 ns->ns_status = puterrno(error); 323 } 324 void * 325 rfs_setattr_getfh(struct nfssaargs *args) 326 { 327 return (&args->saa_fh); 328 } 329 330 /* 331 * Directory lookup. 332 * Returns an fhandle and file attributes for file name in a directory. 333 */ 334 /* ARGSUSED */ 335 void 336 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, 337 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 338 { 339 int error; 340 vnode_t *dvp; 341 vnode_t *vp; 342 struct vattr va; 343 fhandle_t *fhp = da->da_fhandle; 344 struct sec_ol sec = {0, 0}; 345 bool_t publicfh_flag = FALSE, auth_weak = FALSE; 346 char *name; 347 struct sockaddr *ca; 348 349 /* 350 * Trusted Extension doesn't support NFSv2. MOUNT 351 * will reject v2 clients. Need to prevent v2 client 352 * access via WebNFS here. 353 */ 354 if (is_system_labeled() && req->rq_vers == 2) { 355 dr->dr_status = NFSERR_ACCES; 356 return; 357 } 358 359 /* 360 * Disallow NULL paths 361 */ 362 if (da->da_name == NULL || *da->da_name == '\0') { 363 dr->dr_status = NFSERR_ACCES; 364 return; 365 } 366 367 /* 368 * Allow lookups from the root - the default 369 * location of the public filehandle. 370 */ 371 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { 372 dvp = rootdir; 373 VN_HOLD(dvp); 374 } else { 375 dvp = nfs_fhtovp(fhp, exi); 376 if (dvp == NULL) { 377 dr->dr_status = NFSERR_STALE; 378 return; 379 } 380 } 381 382 /* 383 * Not allow lookup beyond root. 384 * If the filehandle matches a filehandle of the exi, 385 * then the ".." refers beyond the root of an exported filesystem. 386 */ 387 if (strcmp(da->da_name, "..") == 0 && 388 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) { 389 VN_RELE(dvp); 390 dr->dr_status = NFSERR_NOENT; 391 return; 392 } 393 394 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 395 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND, 396 MAXPATHLEN); 397 398 if (name == NULL) { 399 dr->dr_status = NFSERR_ACCES; 400 return; 401 } 402 403 /* 404 * If the public filehandle is used then allow 405 * a multi-component lookup, i.e. evaluate 406 * a pathname and follow symbolic links if 407 * necessary. 408 * 409 * This may result in a vnode in another filesystem 410 * which is OK as long as the filesystem is exported. 411 */ 412 if (PUBLIC_FH2(fhp)) { 413 publicfh_flag = TRUE; 414 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi, 415 &sec); 416 } else { 417 /* 418 * Do a normal single component lookup. 419 */ 420 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 421 NULL, NULL, NULL); 422 } 423 424 if (name != da->da_name) 425 kmem_free(name, MAXPATHLEN); 426 427 428 if (!error) { 429 va.va_mask = AT_ALL; /* we want everything */ 430 431 error = rfs4_delegated_getattr(vp, &va, 0, cr); 432 433 /* check for overflows */ 434 if (!error) { 435 acl_perm(vp, exi, &va, cr); 436 error = vattr_to_nattr(&va, &dr->dr_attr); 437 if (!error) { 438 if (sec.sec_flags & SEC_QUERY) 439 error = makefh_ol(&dr->dr_fhandle, exi, 440 sec.sec_index); 441 else { 442 error = makefh(&dr->dr_fhandle, vp, 443 exi); 444 if (!error && publicfh_flag && 445 !chk_clnt_sec(exi, req)) 446 auth_weak = TRUE; 447 } 448 } 449 } 450 VN_RELE(vp); 451 } 452 453 VN_RELE(dvp); 454 455 /* 456 * If publicfh_flag is true then we have called rfs_publicfh_mclookup 457 * and have obtained a new exportinfo in exi which needs to be 458 * released. Note the the original exportinfo pointed to by exi 459 * will be released by the caller, comon_dispatch. 460 */ 461 if (publicfh_flag && exi != NULL) 462 exi_rele(exi); 463 464 /* 465 * If it's public fh, no 0x81, and client's flavor is 466 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now. 467 * Then set RPC status to AUTH_TOOWEAK in common_dispatch. 468 */ 469 if (auth_weak) 470 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR; 471 else 472 dr->dr_status = puterrno(error); 473 } 474 void * 475 rfs_lookup_getfh(struct nfsdiropargs *da) 476 { 477 return (da->da_fhandle); 478 } 479 480 /* 481 * Read symbolic link. 482 * Returns the string in the symbolic link at the given fhandle. 483 */ 484 /* ARGSUSED */ 485 void 486 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, 487 struct svc_req *req, cred_t *cr) 488 { 489 int error; 490 struct iovec iov; 491 struct uio uio; 492 vnode_t *vp; 493 struct vattr va; 494 struct sockaddr *ca; 495 char *name = NULL; 496 int is_referral = 0; 497 498 vp = nfs_fhtovp(fhp, exi); 499 if (vp == NULL) { 500 rl->rl_data = NULL; 501 rl->rl_status = NFSERR_STALE; 502 return; 503 } 504 505 va.va_mask = AT_MODE; 506 507 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 508 509 if (error) { 510 VN_RELE(vp); 511 rl->rl_data = NULL; 512 rl->rl_status = puterrno(error); 513 return; 514 } 515 516 if (MANDLOCK(vp, va.va_mode)) { 517 VN_RELE(vp); 518 rl->rl_data = NULL; 519 rl->rl_status = NFSERR_ACCES; 520 return; 521 } 522 523 /* We lied about the object type for a referral */ 524 if (vn_is_nfs_reparse(vp, cr)) 525 is_referral = 1; 526 527 /* 528 * XNFS and RFC1094 require us to return ENXIO if argument 529 * is not a link. BUGID 1138002. 530 */ 531 if (vp->v_type != VLNK && !is_referral) { 532 VN_RELE(vp); 533 rl->rl_data = NULL; 534 rl->rl_status = NFSERR_NXIO; 535 return; 536 } 537 538 /* 539 * Allocate data for pathname. This will be freed by rfs_rlfree. 540 */ 541 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP); 542 543 if (is_referral) { 544 char *s; 545 size_t strsz; 546 547 /* Get an artificial symlink based on a referral */ 548 s = build_symlink(vp, cr, &strsz); 549 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++; 550 DTRACE_PROBE2(nfs2serv__func__referral__reflink, 551 vnode_t *, vp, char *, s); 552 if (s == NULL) 553 error = EINVAL; 554 else { 555 error = 0; 556 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN); 557 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN); 558 kmem_free(s, strsz); 559 } 560 561 } else { 562 563 /* 564 * Set up io vector to read sym link data 565 */ 566 iov.iov_base = rl->rl_data; 567 iov.iov_len = NFS_MAXPATHLEN; 568 uio.uio_iov = &iov; 569 uio.uio_iovcnt = 1; 570 uio.uio_segflg = UIO_SYSSPACE; 571 uio.uio_extflg = UIO_COPY_CACHED; 572 uio.uio_loffset = (offset_t)0; 573 uio.uio_resid = NFS_MAXPATHLEN; 574 575 /* 576 * Do the readlink. 577 */ 578 error = VOP_READLINK(vp, &uio, cr, NULL); 579 580 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid); 581 582 if (!error) 583 rl->rl_data[rl->rl_count] = '\0'; 584 585 } 586 587 588 VN_RELE(vp); 589 590 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 591 name = nfscmd_convname(ca, exi, rl->rl_data, 592 NFSCMD_CONV_OUTBOUND, MAXPATHLEN); 593 594 if (name != NULL && name != rl->rl_data) { 595 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 596 rl->rl_data = name; 597 } 598 599 /* 600 * XNFS and RFC1094 require us to return ENXIO if argument 601 * is not a link. UFS returns EINVAL if this is the case, 602 * so we do the mapping here. BUGID 1138002. 603 */ 604 if (error == EINVAL) 605 rl->rl_status = NFSERR_NXIO; 606 else 607 rl->rl_status = puterrno(error); 608 609 } 610 void * 611 rfs_readlink_getfh(fhandle_t *fhp) 612 { 613 return (fhp); 614 } 615 /* 616 * Free data allocated by rfs_readlink 617 */ 618 void 619 rfs_rlfree(struct nfsrdlnres *rl) 620 { 621 if (rl->rl_data != NULL) 622 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 623 } 624 625 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *); 626 627 /* 628 * Read data. 629 * Returns some data read from the file at the given fhandle. 630 */ 631 /* ARGSUSED */ 632 void 633 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, 634 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 635 { 636 vnode_t *vp; 637 int error; 638 struct vattr va; 639 struct iovec iov; 640 struct uio uio; 641 mblk_t *mp; 642 int alloc_err = 0; 643 int in_crit = 0; 644 caller_context_t ct; 645 646 vp = nfs_fhtovp(&ra->ra_fhandle, exi); 647 if (vp == NULL) { 648 rr->rr_data = NULL; 649 rr->rr_status = NFSERR_STALE; 650 return; 651 } 652 653 if (vp->v_type != VREG) { 654 VN_RELE(vp); 655 rr->rr_data = NULL; 656 rr->rr_status = NFSERR_ISDIR; 657 return; 658 } 659 660 ct.cc_sysid = 0; 661 ct.cc_pid = 0; 662 ct.cc_caller_id = nfs2_srv_caller_id; 663 ct.cc_flags = CC_DONTBLOCK; 664 665 /* 666 * Enter the critical region before calling VOP_RWLOCK 667 * to avoid a deadlock with write requests. 668 */ 669 if (nbl_need_check(vp)) { 670 nbl_start_crit(vp, RW_READER); 671 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count, 672 0, NULL)) { 673 nbl_end_crit(vp); 674 VN_RELE(vp); 675 rr->rr_data = NULL; 676 rr->rr_status = NFSERR_ACCES; 677 return; 678 } 679 in_crit = 1; 680 } 681 682 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); 683 684 /* check if a monitor detected a delegation conflict */ 685 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 686 VN_RELE(vp); 687 /* mark as wouldblock so response is dropped */ 688 curthread->t_flag |= T_WOULDBLOCK; 689 690 rr->rr_data = NULL; 691 return; 692 } 693 694 va.va_mask = AT_ALL; 695 696 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 697 698 if (error) { 699 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 700 if (in_crit) 701 nbl_end_crit(vp); 702 703 VN_RELE(vp); 704 rr->rr_data = NULL; 705 rr->rr_status = puterrno(error); 706 707 return; 708 } 709 710 /* 711 * This is a kludge to allow reading of files created 712 * with no read permission. The owner of the file 713 * is always allowed to read it. 714 */ 715 if (crgetuid(cr) != va.va_uid) { 716 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct); 717 718 if (error) { 719 /* 720 * Exec is the same as read over the net because 721 * of demand loading. 722 */ 723 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct); 724 } 725 if (error) { 726 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 727 if (in_crit) 728 nbl_end_crit(vp); 729 VN_RELE(vp); 730 rr->rr_data = NULL; 731 rr->rr_status = puterrno(error); 732 733 return; 734 } 735 } 736 737 if (MANDLOCK(vp, va.va_mode)) { 738 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 739 if (in_crit) 740 nbl_end_crit(vp); 741 742 VN_RELE(vp); 743 rr->rr_data = NULL; 744 rr->rr_status = NFSERR_ACCES; 745 746 return; 747 } 748 749 rr->rr_ok.rrok_wlist_len = 0; 750 rr->rr_ok.rrok_wlist = NULL; 751 752 if ((u_offset_t)ra->ra_offset >= va.va_size) { 753 rr->rr_count = 0; 754 rr->rr_data = NULL; 755 /* 756 * In this case, status is NFS_OK, but there is no data 757 * to encode. So set rr_mp to NULL. 758 */ 759 rr->rr_mp = NULL; 760 rr->rr_ok.rrok_wlist = ra->ra_wlist; 761 if (rr->rr_ok.rrok_wlist) 762 clist_zero_len(rr->rr_ok.rrok_wlist); 763 goto done; 764 } 765 766 if (ra->ra_wlist) { 767 mp = NULL; 768 rr->rr_mp = NULL; 769 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist); 770 if (ra->ra_count > iov.iov_len) { 771 rr->rr_data = NULL; 772 rr->rr_status = NFSERR_INVAL; 773 goto done; 774 } 775 } else { 776 /* 777 * mp will contain the data to be sent out in the read reply. 778 * This will be freed after the reply has been sent out (by the 779 * driver). 780 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so 781 * that the call to xdrmblk_putmblk() never fails. 782 */ 783 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG, 784 &alloc_err); 785 ASSERT(mp != NULL); 786 ASSERT(alloc_err == 0); 787 788 rr->rr_mp = mp; 789 790 /* 791 * Set up io vector 792 */ 793 iov.iov_base = (caddr_t)mp->b_datap->db_base; 794 iov.iov_len = ra->ra_count; 795 } 796 797 uio.uio_iov = &iov; 798 uio.uio_iovcnt = 1; 799 uio.uio_segflg = UIO_SYSSPACE; 800 uio.uio_extflg = UIO_COPY_CACHED; 801 uio.uio_loffset = (offset_t)ra->ra_offset; 802 uio.uio_resid = ra->ra_count; 803 804 error = VOP_READ(vp, &uio, 0, cr, &ct); 805 806 if (error) { 807 if (mp) 808 freeb(mp); 809 810 /* 811 * check if a monitor detected a delegation conflict and 812 * mark as wouldblock so response is dropped 813 */ 814 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 815 curthread->t_flag |= T_WOULDBLOCK; 816 else 817 rr->rr_status = puterrno(error); 818 819 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 820 if (in_crit) 821 nbl_end_crit(vp); 822 823 VN_RELE(vp); 824 rr->rr_data = NULL; 825 826 return; 827 } 828 829 /* 830 * Get attributes again so we can send the latest access 831 * time to the client side for his cache. 832 */ 833 va.va_mask = AT_ALL; 834 835 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 836 837 if (error) { 838 if (mp) 839 freeb(mp); 840 841 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 842 if (in_crit) 843 nbl_end_crit(vp); 844 845 VN_RELE(vp); 846 rr->rr_data = NULL; 847 rr->rr_status = puterrno(error); 848 849 return; 850 } 851 852 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid); 853 854 if (mp) { 855 rr->rr_data = (char *)mp->b_datap->db_base; 856 } else { 857 if (ra->ra_wlist) { 858 rr->rr_data = (caddr_t)iov.iov_base; 859 if (!rdma_setup_read_data2(ra, rr)) { 860 rr->rr_data = NULL; 861 rr->rr_status = puterrno(NFSERR_INVAL); 862 } 863 } 864 } 865 done: 866 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 867 if (in_crit) 868 nbl_end_crit(vp); 869 870 acl_perm(vp, exi, &va, cr); 871 872 /* check for overflows */ 873 error = vattr_to_nattr(&va, &rr->rr_attr); 874 875 VN_RELE(vp); 876 877 rr->rr_status = puterrno(error); 878 } 879 880 /* 881 * Free data allocated by rfs_read 882 */ 883 void 884 rfs_rdfree(struct nfsrdresult *rr) 885 { 886 mblk_t *mp; 887 888 if (rr->rr_status == NFS_OK) { 889 mp = rr->rr_mp; 890 if (mp != NULL) 891 freeb(mp); 892 } 893 } 894 895 void * 896 rfs_read_getfh(struct nfsreadargs *ra) 897 { 898 return (&ra->ra_fhandle); 899 } 900 901 #define MAX_IOVECS 12 902 903 #ifdef DEBUG 904 static int rfs_write_sync_hits = 0; 905 static int rfs_write_sync_misses = 0; 906 #endif 907 908 /* 909 * Write data to file. 910 * Returns attributes of a file after writing some data to it. 911 * 912 * Any changes made here, especially in error handling might have 913 * to also be done in rfs_write (which clusters write requests). 914 */ 915 void 916 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, 917 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 918 { 919 int error; 920 vnode_t *vp; 921 rlim64_t rlimit; 922 struct vattr va; 923 struct uio uio; 924 struct iovec iov[MAX_IOVECS]; 925 mblk_t *m; 926 struct iovec *iovp; 927 int iovcnt; 928 cred_t *savecred; 929 int in_crit = 0; 930 caller_context_t ct; 931 932 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 933 if (vp == NULL) { 934 ns->ns_status = NFSERR_STALE; 935 return; 936 } 937 938 if (rdonly(exi, vp, req)) { 939 VN_RELE(vp); 940 ns->ns_status = NFSERR_ROFS; 941 return; 942 } 943 944 if (vp->v_type != VREG) { 945 VN_RELE(vp); 946 ns->ns_status = NFSERR_ISDIR; 947 return; 948 } 949 950 ct.cc_sysid = 0; 951 ct.cc_pid = 0; 952 ct.cc_caller_id = nfs2_srv_caller_id; 953 ct.cc_flags = CC_DONTBLOCK; 954 955 va.va_mask = AT_UID|AT_MODE; 956 957 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 958 959 if (error) { 960 VN_RELE(vp); 961 ns->ns_status = puterrno(error); 962 963 return; 964 } 965 966 if (crgetuid(cr) != va.va_uid) { 967 /* 968 * This is a kludge to allow writes of files created 969 * with read only permission. The owner of the file 970 * is always allowed to write it. 971 */ 972 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct); 973 974 if (error) { 975 VN_RELE(vp); 976 ns->ns_status = puterrno(error); 977 return; 978 } 979 } 980 981 /* 982 * Can't access a mandatory lock file. This might cause 983 * the NFS service thread to block forever waiting for a 984 * lock to be released that will never be released. 985 */ 986 if (MANDLOCK(vp, va.va_mode)) { 987 VN_RELE(vp); 988 ns->ns_status = NFSERR_ACCES; 989 return; 990 } 991 992 /* 993 * We have to enter the critical region before calling VOP_RWLOCK 994 * to avoid a deadlock with ufs. 995 */ 996 if (nbl_need_check(vp)) { 997 nbl_start_crit(vp, RW_READER); 998 in_crit = 1; 999 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset, 1000 wa->wa_count, 0, NULL)) { 1001 error = EACCES; 1002 goto out; 1003 } 1004 } 1005 1006 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1007 1008 /* check if a monitor detected a delegation conflict */ 1009 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1010 VN_RELE(vp); 1011 /* mark as wouldblock so response is dropped */ 1012 curthread->t_flag |= T_WOULDBLOCK; 1013 return; 1014 } 1015 1016 if (wa->wa_data || wa->wa_rlist) { 1017 /* Do the RDMA thing if necessary */ 1018 if (wa->wa_rlist) { 1019 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3); 1020 iov[0].iov_len = wa->wa_count; 1021 } else { 1022 iov[0].iov_base = wa->wa_data; 1023 iov[0].iov_len = wa->wa_count; 1024 } 1025 uio.uio_iov = iov; 1026 uio.uio_iovcnt = 1; 1027 uio.uio_segflg = UIO_SYSSPACE; 1028 uio.uio_extflg = UIO_COPY_DEFAULT; 1029 uio.uio_loffset = (offset_t)wa->wa_offset; 1030 uio.uio_resid = wa->wa_count; 1031 /* 1032 * The limit is checked on the client. We 1033 * should allow any size writes here. 1034 */ 1035 uio.uio_llimit = curproc->p_fsz_ctl; 1036 rlimit = uio.uio_llimit - wa->wa_offset; 1037 if (rlimit < (rlim64_t)uio.uio_resid) 1038 uio.uio_resid = (uint_t)rlimit; 1039 1040 /* 1041 * for now we assume no append mode 1042 */ 1043 /* 1044 * We're changing creds because VM may fault and we need 1045 * the cred of the current thread to be used if quota 1046 * checking is enabled. 1047 */ 1048 savecred = curthread->t_cred; 1049 curthread->t_cred = cr; 1050 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1051 curthread->t_cred = savecred; 1052 } else { 1053 iovcnt = 0; 1054 for (m = wa->wa_mblk; m != NULL; m = m->b_cont) 1055 iovcnt++; 1056 if (iovcnt <= MAX_IOVECS) { 1057 #ifdef DEBUG 1058 rfs_write_sync_hits++; 1059 #endif 1060 iovp = iov; 1061 } else { 1062 #ifdef DEBUG 1063 rfs_write_sync_misses++; 1064 #endif 1065 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 1066 } 1067 mblk_to_iov(wa->wa_mblk, iovcnt, iovp); 1068 uio.uio_iov = iovp; 1069 uio.uio_iovcnt = iovcnt; 1070 uio.uio_segflg = UIO_SYSSPACE; 1071 uio.uio_extflg = UIO_COPY_DEFAULT; 1072 uio.uio_loffset = (offset_t)wa->wa_offset; 1073 uio.uio_resid = wa->wa_count; 1074 /* 1075 * The limit is checked on the client. We 1076 * should allow any size writes here. 1077 */ 1078 uio.uio_llimit = curproc->p_fsz_ctl; 1079 rlimit = uio.uio_llimit - wa->wa_offset; 1080 if (rlimit < (rlim64_t)uio.uio_resid) 1081 uio.uio_resid = (uint_t)rlimit; 1082 1083 /* 1084 * For now we assume no append mode. 1085 */ 1086 /* 1087 * We're changing creds because VM may fault and we need 1088 * the cred of the current thread to be used if quota 1089 * checking is enabled. 1090 */ 1091 savecred = curthread->t_cred; 1092 curthread->t_cred = cr; 1093 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1094 curthread->t_cred = savecred; 1095 1096 if (iovp != iov) 1097 kmem_free(iovp, sizeof (*iovp) * iovcnt); 1098 } 1099 1100 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1101 1102 if (!error) { 1103 /* 1104 * Get attributes again so we send the latest mod 1105 * time to the client side for his cache. 1106 */ 1107 va.va_mask = AT_ALL; /* now we want everything */ 1108 1109 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1110 1111 /* check for overflows */ 1112 if (!error) { 1113 acl_perm(vp, exi, &va, cr); 1114 error = vattr_to_nattr(&va, &ns->ns_attr); 1115 } 1116 } 1117 1118 out: 1119 if (in_crit) 1120 nbl_end_crit(vp); 1121 VN_RELE(vp); 1122 1123 /* check if a monitor detected a delegation conflict */ 1124 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1125 /* mark as wouldblock so response is dropped */ 1126 curthread->t_flag |= T_WOULDBLOCK; 1127 else 1128 ns->ns_status = puterrno(error); 1129 1130 } 1131 1132 struct rfs_async_write { 1133 struct nfswriteargs *wa; 1134 struct nfsattrstat *ns; 1135 struct svc_req *req; 1136 cred_t *cr; 1137 kthread_t *thread; 1138 struct rfs_async_write *list; 1139 }; 1140 1141 struct rfs_async_write_list { 1142 fhandle_t *fhp; 1143 kcondvar_t cv; 1144 struct rfs_async_write *list; 1145 struct rfs_async_write_list *next; 1146 }; 1147 1148 static struct rfs_async_write_list *rfs_async_write_head = NULL; 1149 static kmutex_t rfs_async_write_lock; 1150 static int rfs_write_async = 1; /* enables write clustering if == 1 */ 1151 1152 #define MAXCLIOVECS 42 1153 #define RFSWRITE_INITVAL (enum nfsstat) -1 1154 1155 #ifdef DEBUG 1156 static int rfs_write_hits = 0; 1157 static int rfs_write_misses = 0; 1158 #endif 1159 1160 /* 1161 * Write data to file. 1162 * Returns attributes of a file after writing some data to it. 1163 */ 1164 void 1165 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, 1166 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1167 { 1168 int error; 1169 vnode_t *vp; 1170 rlim64_t rlimit; 1171 struct vattr va; 1172 struct uio uio; 1173 struct rfs_async_write_list *lp; 1174 struct rfs_async_write_list *nlp; 1175 struct rfs_async_write *rp; 1176 struct rfs_async_write *nrp; 1177 struct rfs_async_write *trp; 1178 struct rfs_async_write *lrp; 1179 int data_written; 1180 int iovcnt; 1181 mblk_t *m; 1182 struct iovec *iovp; 1183 struct iovec *niovp; 1184 struct iovec iov[MAXCLIOVECS]; 1185 int count; 1186 int rcount; 1187 uint_t off; 1188 uint_t len; 1189 struct rfs_async_write nrpsp; 1190 struct rfs_async_write_list nlpsp; 1191 ushort_t t_flag; 1192 cred_t *savecred; 1193 int in_crit = 0; 1194 caller_context_t ct; 1195 1196 if (!rfs_write_async) { 1197 rfs_write_sync(wa, ns, exi, req, cr); 1198 return; 1199 } 1200 1201 /* 1202 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0 1203 * is considered an OK. 1204 */ 1205 ns->ns_status = RFSWRITE_INITVAL; 1206 1207 nrp = &nrpsp; 1208 nrp->wa = wa; 1209 nrp->ns = ns; 1210 nrp->req = req; 1211 nrp->cr = cr; 1212 nrp->thread = curthread; 1213 1214 /* 1215 * Look to see if there is already a cluster started 1216 * for this file. 1217 */ 1218 mutex_enter(&rfs_async_write_lock); 1219 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) { 1220 if (bcmp(&wa->wa_fhandle, lp->fhp, 1221 sizeof (fhandle_t)) == 0) 1222 break; 1223 } 1224 1225 /* 1226 * If lp is non-NULL, then there is already a cluster 1227 * started. We need to place ourselves in the cluster 1228 * list in the right place as determined by starting 1229 * offset. Conflicts with non-blocking mandatory locked 1230 * regions will be checked when the cluster is processed. 1231 */ 1232 if (lp != NULL) { 1233 rp = lp->list; 1234 trp = NULL; 1235 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) { 1236 trp = rp; 1237 rp = rp->list; 1238 } 1239 nrp->list = rp; 1240 if (trp == NULL) 1241 lp->list = nrp; 1242 else 1243 trp->list = nrp; 1244 while (nrp->ns->ns_status == RFSWRITE_INITVAL) 1245 cv_wait(&lp->cv, &rfs_async_write_lock); 1246 mutex_exit(&rfs_async_write_lock); 1247 1248 return; 1249 } 1250 1251 /* 1252 * No cluster started yet, start one and add ourselves 1253 * to the list of clusters. 1254 */ 1255 nrp->list = NULL; 1256 1257 nlp = &nlpsp; 1258 nlp->fhp = &wa->wa_fhandle; 1259 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL); 1260 nlp->list = nrp; 1261 nlp->next = NULL; 1262 1263 if (rfs_async_write_head == NULL) { 1264 rfs_async_write_head = nlp; 1265 } else { 1266 lp = rfs_async_write_head; 1267 while (lp->next != NULL) 1268 lp = lp->next; 1269 lp->next = nlp; 1270 } 1271 mutex_exit(&rfs_async_write_lock); 1272 1273 /* 1274 * Convert the file handle common to all of the requests 1275 * in this cluster to a vnode. 1276 */ 1277 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 1278 if (vp == NULL) { 1279 mutex_enter(&rfs_async_write_lock); 1280 if (rfs_async_write_head == nlp) 1281 rfs_async_write_head = nlp->next; 1282 else { 1283 lp = rfs_async_write_head; 1284 while (lp->next != nlp) 1285 lp = lp->next; 1286 lp->next = nlp->next; 1287 } 1288 t_flag = curthread->t_flag & T_WOULDBLOCK; 1289 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1290 rp->ns->ns_status = NFSERR_STALE; 1291 rp->thread->t_flag |= t_flag; 1292 } 1293 cv_broadcast(&nlp->cv); 1294 mutex_exit(&rfs_async_write_lock); 1295 1296 return; 1297 } 1298 1299 /* 1300 * Can only write regular files. Attempts to write any 1301 * other file types fail with EISDIR. 1302 */ 1303 if (vp->v_type != VREG) { 1304 VN_RELE(vp); 1305 mutex_enter(&rfs_async_write_lock); 1306 if (rfs_async_write_head == nlp) 1307 rfs_async_write_head = nlp->next; 1308 else { 1309 lp = rfs_async_write_head; 1310 while (lp->next != nlp) 1311 lp = lp->next; 1312 lp->next = nlp->next; 1313 } 1314 t_flag = curthread->t_flag & T_WOULDBLOCK; 1315 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1316 rp->ns->ns_status = NFSERR_ISDIR; 1317 rp->thread->t_flag |= t_flag; 1318 } 1319 cv_broadcast(&nlp->cv); 1320 mutex_exit(&rfs_async_write_lock); 1321 1322 return; 1323 } 1324 1325 /* 1326 * Enter the critical region before calling VOP_RWLOCK, to avoid a 1327 * deadlock with ufs. 1328 */ 1329 if (nbl_need_check(vp)) { 1330 nbl_start_crit(vp, RW_READER); 1331 in_crit = 1; 1332 } 1333 1334 ct.cc_sysid = 0; 1335 ct.cc_pid = 0; 1336 ct.cc_caller_id = nfs2_srv_caller_id; 1337 ct.cc_flags = CC_DONTBLOCK; 1338 1339 /* 1340 * Lock the file for writing. This operation provides 1341 * the delay which allows clusters to grow. 1342 */ 1343 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1344 1345 /* check if a monitor detected a delegation conflict */ 1346 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1347 if (in_crit) 1348 nbl_end_crit(vp); 1349 VN_RELE(vp); 1350 /* mark as wouldblock so response is dropped */ 1351 curthread->t_flag |= T_WOULDBLOCK; 1352 mutex_enter(&rfs_async_write_lock); 1353 if (rfs_async_write_head == nlp) 1354 rfs_async_write_head = nlp->next; 1355 else { 1356 lp = rfs_async_write_head; 1357 while (lp->next != nlp) 1358 lp = lp->next; 1359 lp->next = nlp->next; 1360 } 1361 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1362 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1363 rp->ns->ns_status = puterrno(error); 1364 rp->thread->t_flag |= T_WOULDBLOCK; 1365 } 1366 } 1367 cv_broadcast(&nlp->cv); 1368 mutex_exit(&rfs_async_write_lock); 1369 1370 return; 1371 } 1372 1373 /* 1374 * Disconnect this cluster from the list of clusters. 1375 * The cluster that is being dealt with must be fixed 1376 * in size after this point, so there is no reason 1377 * to leave it on the list so that new requests can 1378 * find it. 1379 * 1380 * The algorithm is that the first write request will 1381 * create a cluster, convert the file handle to a 1382 * vnode pointer, and then lock the file for writing. 1383 * This request is not likely to be clustered with 1384 * any others. However, the next request will create 1385 * a new cluster and be blocked in VOP_RWLOCK while 1386 * the first request is being processed. This delay 1387 * will allow more requests to be clustered in this 1388 * second cluster. 1389 */ 1390 mutex_enter(&rfs_async_write_lock); 1391 if (rfs_async_write_head == nlp) 1392 rfs_async_write_head = nlp->next; 1393 else { 1394 lp = rfs_async_write_head; 1395 while (lp->next != nlp) 1396 lp = lp->next; 1397 lp->next = nlp->next; 1398 } 1399 mutex_exit(&rfs_async_write_lock); 1400 1401 /* 1402 * Step through the list of requests in this cluster. 1403 * We need to check permissions to make sure that all 1404 * of the requests have sufficient permission to write 1405 * the file. A cluster can be composed of requests 1406 * from different clients and different users on each 1407 * client. 1408 * 1409 * As a side effect, we also calculate the size of the 1410 * byte range that this cluster encompasses. 1411 */ 1412 rp = nlp->list; 1413 off = rp->wa->wa_offset; 1414 len = (uint_t)0; 1415 do { 1416 if (rdonly(exi, vp, rp->req)) { 1417 rp->ns->ns_status = NFSERR_ROFS; 1418 t_flag = curthread->t_flag & T_WOULDBLOCK; 1419 rp->thread->t_flag |= t_flag; 1420 continue; 1421 } 1422 1423 va.va_mask = AT_UID|AT_MODE; 1424 1425 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1426 1427 if (!error) { 1428 if (crgetuid(rp->cr) != va.va_uid) { 1429 /* 1430 * This is a kludge to allow writes of files 1431 * created with read only permission. The 1432 * owner of the file is always allowed to 1433 * write it. 1434 */ 1435 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct); 1436 } 1437 if (!error && MANDLOCK(vp, va.va_mode)) 1438 error = EACCES; 1439 } 1440 1441 /* 1442 * Check for a conflict with a nbmand-locked region. 1443 */ 1444 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset, 1445 rp->wa->wa_count, 0, NULL)) { 1446 error = EACCES; 1447 } 1448 1449 if (error) { 1450 rp->ns->ns_status = puterrno(error); 1451 t_flag = curthread->t_flag & T_WOULDBLOCK; 1452 rp->thread->t_flag |= t_flag; 1453 continue; 1454 } 1455 if (len < rp->wa->wa_offset + rp->wa->wa_count - off) 1456 len = rp->wa->wa_offset + rp->wa->wa_count - off; 1457 } while ((rp = rp->list) != NULL); 1458 1459 /* 1460 * Step through the cluster attempting to gather as many 1461 * requests which are contiguous as possible. These 1462 * contiguous requests are handled via one call to VOP_WRITE 1463 * instead of different calls to VOP_WRITE. We also keep 1464 * track of the fact that any data was written. 1465 */ 1466 rp = nlp->list; 1467 data_written = 0; 1468 do { 1469 /* 1470 * Skip any requests which are already marked as having an 1471 * error. 1472 */ 1473 if (rp->ns->ns_status != RFSWRITE_INITVAL) { 1474 rp = rp->list; 1475 continue; 1476 } 1477 1478 /* 1479 * Count the number of iovec's which are required 1480 * to handle this set of requests. One iovec is 1481 * needed for each data buffer, whether addressed 1482 * by wa_data or by the b_rptr pointers in the 1483 * mblk chains. 1484 */ 1485 iovcnt = 0; 1486 lrp = rp; 1487 for (;;) { 1488 if (lrp->wa->wa_data || lrp->wa->wa_rlist) 1489 iovcnt++; 1490 else { 1491 m = lrp->wa->wa_mblk; 1492 while (m != NULL) { 1493 iovcnt++; 1494 m = m->b_cont; 1495 } 1496 } 1497 if (lrp->list == NULL || 1498 lrp->list->ns->ns_status != RFSWRITE_INITVAL || 1499 lrp->wa->wa_offset + lrp->wa->wa_count != 1500 lrp->list->wa->wa_offset) { 1501 lrp = lrp->list; 1502 break; 1503 } 1504 lrp = lrp->list; 1505 } 1506 1507 if (iovcnt <= MAXCLIOVECS) { 1508 #ifdef DEBUG 1509 rfs_write_hits++; 1510 #endif 1511 niovp = iov; 1512 } else { 1513 #ifdef DEBUG 1514 rfs_write_misses++; 1515 #endif 1516 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP); 1517 } 1518 /* 1519 * Put together the scatter/gather iovecs. 1520 */ 1521 iovp = niovp; 1522 trp = rp; 1523 count = 0; 1524 do { 1525 if (trp->wa->wa_data || trp->wa->wa_rlist) { 1526 if (trp->wa->wa_rlist) { 1527 iovp->iov_base = 1528 (char *)((trp->wa->wa_rlist)-> 1529 u.c_daddr3); 1530 iovp->iov_len = trp->wa->wa_count; 1531 } else { 1532 iovp->iov_base = trp->wa->wa_data; 1533 iovp->iov_len = trp->wa->wa_count; 1534 } 1535 iovp++; 1536 } else { 1537 m = trp->wa->wa_mblk; 1538 rcount = trp->wa->wa_count; 1539 while (m != NULL) { 1540 iovp->iov_base = (caddr_t)m->b_rptr; 1541 iovp->iov_len = (m->b_wptr - m->b_rptr); 1542 rcount -= iovp->iov_len; 1543 if (rcount < 0) 1544 iovp->iov_len += rcount; 1545 iovp++; 1546 if (rcount <= 0) 1547 break; 1548 m = m->b_cont; 1549 } 1550 } 1551 count += trp->wa->wa_count; 1552 trp = trp->list; 1553 } while (trp != lrp); 1554 1555 uio.uio_iov = niovp; 1556 uio.uio_iovcnt = iovcnt; 1557 uio.uio_segflg = UIO_SYSSPACE; 1558 uio.uio_extflg = UIO_COPY_DEFAULT; 1559 uio.uio_loffset = (offset_t)rp->wa->wa_offset; 1560 uio.uio_resid = count; 1561 /* 1562 * The limit is checked on the client. We 1563 * should allow any size writes here. 1564 */ 1565 uio.uio_llimit = curproc->p_fsz_ctl; 1566 rlimit = uio.uio_llimit - rp->wa->wa_offset; 1567 if (rlimit < (rlim64_t)uio.uio_resid) 1568 uio.uio_resid = (uint_t)rlimit; 1569 1570 /* 1571 * For now we assume no append mode. 1572 */ 1573 1574 /* 1575 * We're changing creds because VM may fault 1576 * and we need the cred of the current 1577 * thread to be used if quota * checking is 1578 * enabled. 1579 */ 1580 savecred = curthread->t_cred; 1581 curthread->t_cred = cr; 1582 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct); 1583 curthread->t_cred = savecred; 1584 1585 /* check if a monitor detected a delegation conflict */ 1586 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1587 /* mark as wouldblock so response is dropped */ 1588 curthread->t_flag |= T_WOULDBLOCK; 1589 1590 if (niovp != iov) 1591 kmem_free(niovp, sizeof (*niovp) * iovcnt); 1592 1593 if (!error) { 1594 data_written = 1; 1595 /* 1596 * Get attributes again so we send the latest mod 1597 * time to the client side for his cache. 1598 */ 1599 va.va_mask = AT_ALL; /* now we want everything */ 1600 1601 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1602 1603 if (!error) 1604 acl_perm(vp, exi, &va, rp->cr); 1605 } 1606 1607 /* 1608 * Fill in the status responses for each request 1609 * which was just handled. Also, copy the latest 1610 * attributes in to the attribute responses if 1611 * appropriate. 1612 */ 1613 t_flag = curthread->t_flag & T_WOULDBLOCK; 1614 do { 1615 rp->thread->t_flag |= t_flag; 1616 /* check for overflows */ 1617 if (!error) { 1618 error = vattr_to_nattr(&va, &rp->ns->ns_attr); 1619 } 1620 rp->ns->ns_status = puterrno(error); 1621 rp = rp->list; 1622 } while (rp != lrp); 1623 } while (rp != NULL); 1624 1625 /* 1626 * If any data was written at all, then we need to flush 1627 * the data and metadata to stable storage. 1628 */ 1629 if (data_written) { 1630 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct); 1631 1632 if (!error) { 1633 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct); 1634 } 1635 } 1636 1637 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1638 1639 if (in_crit) 1640 nbl_end_crit(vp); 1641 VN_RELE(vp); 1642 1643 t_flag = curthread->t_flag & T_WOULDBLOCK; 1644 mutex_enter(&rfs_async_write_lock); 1645 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1646 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1647 rp->ns->ns_status = puterrno(error); 1648 rp->thread->t_flag |= t_flag; 1649 } 1650 } 1651 cv_broadcast(&nlp->cv); 1652 mutex_exit(&rfs_async_write_lock); 1653 1654 } 1655 1656 void * 1657 rfs_write_getfh(struct nfswriteargs *wa) 1658 { 1659 return (&wa->wa_fhandle); 1660 } 1661 1662 /* 1663 * Create a file. 1664 * Creates a file with given attributes and returns those attributes 1665 * and an fhandle for the new file. 1666 */ 1667 void 1668 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr, 1669 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1670 { 1671 int error; 1672 int lookuperr; 1673 int in_crit = 0; 1674 struct vattr va; 1675 vnode_t *vp; 1676 vnode_t *realvp; 1677 vnode_t *dvp; 1678 char *name = args->ca_da.da_name; 1679 vnode_t *tvp = NULL; 1680 int mode; 1681 int lookup_ok; 1682 bool_t trunc; 1683 struct sockaddr *ca; 1684 1685 /* 1686 * Disallow NULL paths 1687 */ 1688 if (name == NULL || *name == '\0') { 1689 dr->dr_status = NFSERR_ACCES; 1690 return; 1691 } 1692 1693 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 1694 if (dvp == NULL) { 1695 dr->dr_status = NFSERR_STALE; 1696 return; 1697 } 1698 1699 error = sattr_to_vattr(args->ca_sa, &va); 1700 if (error) { 1701 dr->dr_status = puterrno(error); 1702 return; 1703 } 1704 1705 /* 1706 * Must specify the mode. 1707 */ 1708 if (!(va.va_mask & AT_MODE)) { 1709 VN_RELE(dvp); 1710 dr->dr_status = NFSERR_INVAL; 1711 return; 1712 } 1713 1714 /* 1715 * This is a completely gross hack to make mknod 1716 * work over the wire until we can wack the protocol 1717 */ 1718 if ((va.va_mode & IFMT) == IFCHR) { 1719 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV) 1720 va.va_type = VFIFO; /* xtra kludge for named pipe */ 1721 else { 1722 va.va_type = VCHR; 1723 /* 1724 * uncompress the received dev_t 1725 * if the top half is zero indicating a request 1726 * from an `older style' OS. 1727 */ 1728 if ((va.va_size & 0xffff0000) == 0) 1729 va.va_rdev = nfsv2_expdev(va.va_size); 1730 else 1731 va.va_rdev = (dev_t)va.va_size; 1732 } 1733 va.va_mask &= ~AT_SIZE; 1734 } else if ((va.va_mode & IFMT) == IFBLK) { 1735 va.va_type = VBLK; 1736 /* 1737 * uncompress the received dev_t 1738 * if the top half is zero indicating a request 1739 * from an `older style' OS. 1740 */ 1741 if ((va.va_size & 0xffff0000) == 0) 1742 va.va_rdev = nfsv2_expdev(va.va_size); 1743 else 1744 va.va_rdev = (dev_t)va.va_size; 1745 va.va_mask &= ~AT_SIZE; 1746 } else if ((va.va_mode & IFMT) == IFSOCK) { 1747 va.va_type = VSOCK; 1748 } else { 1749 va.va_type = VREG; 1750 } 1751 va.va_mode &= ~IFMT; 1752 va.va_mask |= AT_TYPE; 1753 1754 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1755 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND, 1756 MAXPATHLEN); 1757 if (name == NULL) { 1758 dr->dr_status = puterrno(EINVAL); 1759 return; 1760 } 1761 1762 /* 1763 * Why was the choice made to use VWRITE as the mode to the 1764 * call to VOP_CREATE ? This results in a bug. When a client 1765 * opens a file that already exists and is RDONLY, the second 1766 * open fails with an EACESS because of the mode. 1767 * bug ID 1054648. 1768 */ 1769 lookup_ok = 0; 1770 mode = VWRITE; 1771 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) { 1772 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1773 NULL, NULL, NULL); 1774 if (!error) { 1775 struct vattr at; 1776 1777 lookup_ok = 1; 1778 at.va_mask = AT_MODE; 1779 error = VOP_GETATTR(tvp, &at, 0, cr, NULL); 1780 if (!error) 1781 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD; 1782 VN_RELE(tvp); 1783 tvp = NULL; 1784 } 1785 } 1786 1787 if (!lookup_ok) { 1788 if (rdonly(exi, dvp, req)) { 1789 error = EROFS; 1790 } else if (va.va_type != VREG && va.va_type != VFIFO && 1791 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) { 1792 error = EPERM; 1793 } else { 1794 error = 0; 1795 } 1796 } 1797 1798 /* 1799 * If file size is being modified on an already existing file 1800 * make sure that there are no conflicting non-blocking mandatory 1801 * locks in the region being manipulated. Return EACCES if there 1802 * are conflicting locks. 1803 */ 1804 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) { 1805 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1806 NULL, NULL, NULL); 1807 1808 if (!lookuperr && 1809 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) { 1810 VN_RELE(tvp); 1811 curthread->t_flag |= T_WOULDBLOCK; 1812 goto out; 1813 } 1814 1815 if (!lookuperr && nbl_need_check(tvp)) { 1816 /* 1817 * The file exists. Now check if it has any 1818 * conflicting non-blocking mandatory locks 1819 * in the region being changed. 1820 */ 1821 struct vattr bva; 1822 u_offset_t offset; 1823 ssize_t length; 1824 1825 nbl_start_crit(tvp, RW_READER); 1826 in_crit = 1; 1827 1828 bva.va_mask = AT_SIZE; 1829 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL); 1830 if (!error) { 1831 if (va.va_size < bva.va_size) { 1832 offset = va.va_size; 1833 length = bva.va_size - va.va_size; 1834 } else { 1835 offset = bva.va_size; 1836 length = va.va_size - bva.va_size; 1837 } 1838 if (length) { 1839 if (nbl_conflict(tvp, NBL_WRITE, 1840 offset, length, 0, NULL)) { 1841 error = EACCES; 1842 } 1843 } 1844 } 1845 if (error) { 1846 nbl_end_crit(tvp); 1847 VN_RELE(tvp); 1848 in_crit = 0; 1849 } 1850 } else if (tvp != NULL) { 1851 VN_RELE(tvp); 1852 } 1853 } 1854 1855 if (!error) { 1856 /* 1857 * If filesystem is shared with nosuid the remove any 1858 * setuid/setgid bits on create. 1859 */ 1860 if (va.va_type == VREG && 1861 exi->exi_export.ex_flags & EX_NOSUID) 1862 va.va_mode &= ~(VSUID | VSGID); 1863 1864 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0, 1865 NULL, NULL); 1866 1867 if (!error) { 1868 1869 if ((va.va_mask & AT_SIZE) && (va.va_size == 0)) 1870 trunc = TRUE; 1871 else 1872 trunc = FALSE; 1873 1874 if (rfs4_check_delegated(FWRITE, vp, trunc)) { 1875 VN_RELE(vp); 1876 curthread->t_flag |= T_WOULDBLOCK; 1877 goto out; 1878 } 1879 va.va_mask = AT_ALL; 1880 1881 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1882 1883 /* check for overflows */ 1884 if (!error) { 1885 acl_perm(vp, exi, &va, cr); 1886 error = vattr_to_nattr(&va, &dr->dr_attr); 1887 if (!error) { 1888 error = makefh(&dr->dr_fhandle, vp, 1889 exi); 1890 } 1891 } 1892 /* 1893 * Force modified metadata out to stable storage. 1894 * 1895 * if a underlying vp exists, pass it to VOP_FSYNC 1896 */ 1897 if (VOP_REALVP(vp, &realvp, NULL) == 0) 1898 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL); 1899 else 1900 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 1901 VN_RELE(vp); 1902 } 1903 1904 if (in_crit) { 1905 nbl_end_crit(tvp); 1906 VN_RELE(tvp); 1907 } 1908 } 1909 1910 /* 1911 * Force modified data and metadata out to stable storage. 1912 */ 1913 (void) VOP_FSYNC(dvp, 0, cr, NULL); 1914 1915 out: 1916 1917 VN_RELE(dvp); 1918 1919 dr->dr_status = puterrno(error); 1920 1921 if (name != args->ca_da.da_name) 1922 kmem_free(name, MAXPATHLEN); 1923 } 1924 void * 1925 rfs_create_getfh(struct nfscreatargs *args) 1926 { 1927 return (args->ca_da.da_fhandle); 1928 } 1929 1930 /* 1931 * Remove a file. 1932 * Remove named file from parent directory. 1933 */ 1934 void 1935 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status, 1936 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 1937 { 1938 int error = 0; 1939 vnode_t *vp; 1940 vnode_t *targvp; 1941 int in_crit = 0; 1942 1943 /* 1944 * Disallow NULL paths 1945 */ 1946 if (da->da_name == NULL || *da->da_name == '\0') { 1947 *status = NFSERR_ACCES; 1948 return; 1949 } 1950 1951 vp = nfs_fhtovp(da->da_fhandle, exi); 1952 if (vp == NULL) { 1953 *status = NFSERR_STALE; 1954 return; 1955 } 1956 1957 if (rdonly(exi, vp, req)) { 1958 VN_RELE(vp); 1959 *status = NFSERR_ROFS; 1960 return; 1961 } 1962 1963 /* 1964 * Check for a conflict with a non-blocking mandatory share reservation. 1965 */ 1966 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0, 1967 NULL, cr, NULL, NULL, NULL); 1968 if (error != 0) { 1969 VN_RELE(vp); 1970 *status = puterrno(error); 1971 return; 1972 } 1973 1974 /* 1975 * If the file is delegated to an v4 client, then initiate 1976 * recall and drop this request (by setting T_WOULDBLOCK). 1977 * The client will eventually re-transmit the request and 1978 * (hopefully), by then, the v4 client will have returned 1979 * the delegation. 1980 */ 1981 1982 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 1983 VN_RELE(vp); 1984 VN_RELE(targvp); 1985 curthread->t_flag |= T_WOULDBLOCK; 1986 return; 1987 } 1988 1989 if (nbl_need_check(targvp)) { 1990 nbl_start_crit(targvp, RW_READER); 1991 in_crit = 1; 1992 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 1993 error = EACCES; 1994 goto out; 1995 } 1996 } 1997 1998 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0); 1999 2000 /* 2001 * Force modified data and metadata out to stable storage. 2002 */ 2003 (void) VOP_FSYNC(vp, 0, cr, NULL); 2004 2005 out: 2006 if (in_crit) 2007 nbl_end_crit(targvp); 2008 VN_RELE(targvp); 2009 VN_RELE(vp); 2010 2011 *status = puterrno(error); 2012 2013 } 2014 2015 void * 2016 rfs_remove_getfh(struct nfsdiropargs *da) 2017 { 2018 return (da->da_fhandle); 2019 } 2020 2021 /* 2022 * rename a file 2023 * Give a file (from) a new name (to). 2024 */ 2025 void 2026 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status, 2027 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2028 { 2029 int error = 0; 2030 vnode_t *fromvp; 2031 vnode_t *tovp; 2032 struct exportinfo *to_exi; 2033 fhandle_t *fh; 2034 vnode_t *srcvp; 2035 vnode_t *targvp; 2036 int in_crit = 0; 2037 2038 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi); 2039 if (fromvp == NULL) { 2040 *status = NFSERR_STALE; 2041 return; 2042 } 2043 2044 fh = args->rna_to.da_fhandle; 2045 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2046 if (to_exi == NULL) { 2047 VN_RELE(fromvp); 2048 *status = NFSERR_ACCES; 2049 return; 2050 } 2051 exi_rele(to_exi); 2052 2053 if (to_exi != exi) { 2054 VN_RELE(fromvp); 2055 *status = NFSERR_XDEV; 2056 return; 2057 } 2058 2059 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi); 2060 if (tovp == NULL) { 2061 VN_RELE(fromvp); 2062 *status = NFSERR_STALE; 2063 return; 2064 } 2065 2066 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) { 2067 VN_RELE(tovp); 2068 VN_RELE(fromvp); 2069 *status = NFSERR_NOTDIR; 2070 return; 2071 } 2072 2073 /* 2074 * Disallow NULL paths 2075 */ 2076 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' || 2077 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') { 2078 VN_RELE(tovp); 2079 VN_RELE(fromvp); 2080 *status = NFSERR_ACCES; 2081 return; 2082 } 2083 2084 if (rdonly(exi, tovp, req)) { 2085 VN_RELE(tovp); 2086 VN_RELE(fromvp); 2087 *status = NFSERR_ROFS; 2088 return; 2089 } 2090 2091 /* 2092 * Check for a conflict with a non-blocking mandatory share reservation. 2093 */ 2094 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0, 2095 NULL, cr, NULL, NULL, NULL); 2096 if (error != 0) { 2097 VN_RELE(tovp); 2098 VN_RELE(fromvp); 2099 *status = puterrno(error); 2100 return; 2101 } 2102 2103 /* Check for delegations on the source file */ 2104 2105 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) { 2106 VN_RELE(tovp); 2107 VN_RELE(fromvp); 2108 VN_RELE(srcvp); 2109 curthread->t_flag |= T_WOULDBLOCK; 2110 return; 2111 } 2112 2113 /* Check for delegation on the file being renamed over, if it exists */ 2114 2115 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE && 2116 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, 2117 NULL, NULL, NULL) == 0) { 2118 2119 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2120 VN_RELE(tovp); 2121 VN_RELE(fromvp); 2122 VN_RELE(srcvp); 2123 VN_RELE(targvp); 2124 curthread->t_flag |= T_WOULDBLOCK; 2125 return; 2126 } 2127 VN_RELE(targvp); 2128 } 2129 2130 2131 if (nbl_need_check(srcvp)) { 2132 nbl_start_crit(srcvp, RW_READER); 2133 in_crit = 1; 2134 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 2135 error = EACCES; 2136 goto out; 2137 } 2138 } 2139 2140 error = VOP_RENAME(fromvp, args->rna_from.da_name, 2141 tovp, args->rna_to.da_name, cr, NULL, 0); 2142 2143 if (error == 0) 2144 vn_renamepath(tovp, srcvp, args->rna_to.da_name, 2145 strlen(args->rna_to.da_name)); 2146 2147 /* 2148 * Force modified data and metadata out to stable storage. 2149 */ 2150 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2151 (void) VOP_FSYNC(fromvp, 0, cr, NULL); 2152 2153 out: 2154 if (in_crit) 2155 nbl_end_crit(srcvp); 2156 VN_RELE(srcvp); 2157 VN_RELE(tovp); 2158 VN_RELE(fromvp); 2159 2160 *status = puterrno(error); 2161 2162 } 2163 void * 2164 rfs_rename_getfh(struct nfsrnmargs *args) 2165 { 2166 return (args->rna_from.da_fhandle); 2167 } 2168 2169 /* 2170 * Link to a file. 2171 * Create a file (to) which is a hard link to the given file (from). 2172 */ 2173 void 2174 rfs_link(struct nfslinkargs *args, enum nfsstat *status, 2175 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2176 { 2177 int error; 2178 vnode_t *fromvp; 2179 vnode_t *tovp; 2180 struct exportinfo *to_exi; 2181 fhandle_t *fh; 2182 2183 fromvp = nfs_fhtovp(args->la_from, exi); 2184 if (fromvp == NULL) { 2185 *status = NFSERR_STALE; 2186 return; 2187 } 2188 2189 fh = args->la_to.da_fhandle; 2190 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2191 if (to_exi == NULL) { 2192 VN_RELE(fromvp); 2193 *status = NFSERR_ACCES; 2194 return; 2195 } 2196 exi_rele(to_exi); 2197 2198 if (to_exi != exi) { 2199 VN_RELE(fromvp); 2200 *status = NFSERR_XDEV; 2201 return; 2202 } 2203 2204 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi); 2205 if (tovp == NULL) { 2206 VN_RELE(fromvp); 2207 *status = NFSERR_STALE; 2208 return; 2209 } 2210 2211 if (tovp->v_type != VDIR) { 2212 VN_RELE(tovp); 2213 VN_RELE(fromvp); 2214 *status = NFSERR_NOTDIR; 2215 return; 2216 } 2217 /* 2218 * Disallow NULL paths 2219 */ 2220 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') { 2221 VN_RELE(tovp); 2222 VN_RELE(fromvp); 2223 *status = NFSERR_ACCES; 2224 return; 2225 } 2226 2227 if (rdonly(exi, tovp, req)) { 2228 VN_RELE(tovp); 2229 VN_RELE(fromvp); 2230 *status = NFSERR_ROFS; 2231 return; 2232 } 2233 2234 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0); 2235 2236 /* 2237 * Force modified data and metadata out to stable storage. 2238 */ 2239 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2240 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL); 2241 2242 VN_RELE(tovp); 2243 VN_RELE(fromvp); 2244 2245 *status = puterrno(error); 2246 2247 } 2248 void * 2249 rfs_link_getfh(struct nfslinkargs *args) 2250 { 2251 return (args->la_from); 2252 } 2253 2254 /* 2255 * Symbolicly link to a file. 2256 * Create a file (to) with the given attributes which is a symbolic link 2257 * to the given path name (to). 2258 */ 2259 void 2260 rfs_symlink(struct nfsslargs *args, enum nfsstat *status, 2261 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2262 { 2263 int error; 2264 struct vattr va; 2265 vnode_t *vp; 2266 vnode_t *svp; 2267 int lerror; 2268 struct sockaddr *ca; 2269 char *name = NULL; 2270 2271 /* 2272 * Disallow NULL paths 2273 */ 2274 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') { 2275 *status = NFSERR_ACCES; 2276 return; 2277 } 2278 2279 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi); 2280 if (vp == NULL) { 2281 *status = NFSERR_STALE; 2282 return; 2283 } 2284 2285 if (rdonly(exi, vp, req)) { 2286 VN_RELE(vp); 2287 *status = NFSERR_ROFS; 2288 return; 2289 } 2290 2291 error = sattr_to_vattr(args->sla_sa, &va); 2292 if (error) { 2293 VN_RELE(vp); 2294 *status = puterrno(error); 2295 return; 2296 } 2297 2298 if (!(va.va_mask & AT_MODE)) { 2299 VN_RELE(vp); 2300 *status = NFSERR_INVAL; 2301 return; 2302 } 2303 2304 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2305 name = nfscmd_convname(ca, exi, args->sla_tnm, 2306 NFSCMD_CONV_INBOUND, MAXPATHLEN); 2307 2308 if (name == NULL) { 2309 *status = NFSERR_ACCES; 2310 return; 2311 } 2312 2313 va.va_type = VLNK; 2314 va.va_mask |= AT_TYPE; 2315 2316 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0); 2317 2318 /* 2319 * Force new data and metadata out to stable storage. 2320 */ 2321 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0, 2322 NULL, cr, NULL, NULL, NULL); 2323 2324 if (!lerror) { 2325 (void) VOP_FSYNC(svp, 0, cr, NULL); 2326 VN_RELE(svp); 2327 } 2328 2329 /* 2330 * Force modified data and metadata out to stable storage. 2331 */ 2332 (void) VOP_FSYNC(vp, 0, cr, NULL); 2333 2334 VN_RELE(vp); 2335 2336 *status = puterrno(error); 2337 if (name != args->sla_tnm) 2338 kmem_free(name, MAXPATHLEN); 2339 2340 } 2341 void * 2342 rfs_symlink_getfh(struct nfsslargs *args) 2343 { 2344 return (args->sla_from.da_fhandle); 2345 } 2346 2347 /* 2348 * Make a directory. 2349 * Create a directory with the given name, parent directory, and attributes. 2350 * Returns a file handle and attributes for the new directory. 2351 */ 2352 void 2353 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr, 2354 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2355 { 2356 int error; 2357 struct vattr va; 2358 vnode_t *dvp = NULL; 2359 vnode_t *vp; 2360 char *name = args->ca_da.da_name; 2361 2362 /* 2363 * Disallow NULL paths 2364 */ 2365 if (name == NULL || *name == '\0') { 2366 dr->dr_status = NFSERR_ACCES; 2367 return; 2368 } 2369 2370 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 2371 if (vp == NULL) { 2372 dr->dr_status = NFSERR_STALE; 2373 return; 2374 } 2375 2376 if (rdonly(exi, vp, req)) { 2377 VN_RELE(vp); 2378 dr->dr_status = NFSERR_ROFS; 2379 return; 2380 } 2381 2382 error = sattr_to_vattr(args->ca_sa, &va); 2383 if (error) { 2384 VN_RELE(vp); 2385 dr->dr_status = puterrno(error); 2386 return; 2387 } 2388 2389 if (!(va.va_mask & AT_MODE)) { 2390 VN_RELE(vp); 2391 dr->dr_status = NFSERR_INVAL; 2392 return; 2393 } 2394 2395 va.va_type = VDIR; 2396 va.va_mask |= AT_TYPE; 2397 2398 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL); 2399 2400 if (!error) { 2401 /* 2402 * Attribtutes of the newly created directory should 2403 * be returned to the client. 2404 */ 2405 va.va_mask = AT_ALL; /* We want everything */ 2406 error = VOP_GETATTR(dvp, &va, 0, cr, NULL); 2407 2408 /* check for overflows */ 2409 if (!error) { 2410 acl_perm(vp, exi, &va, cr); 2411 error = vattr_to_nattr(&va, &dr->dr_attr); 2412 if (!error) { 2413 error = makefh(&dr->dr_fhandle, dvp, exi); 2414 } 2415 } 2416 /* 2417 * Force new data and metadata out to stable storage. 2418 */ 2419 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2420 VN_RELE(dvp); 2421 } 2422 2423 /* 2424 * Force modified data and metadata out to stable storage. 2425 */ 2426 (void) VOP_FSYNC(vp, 0, cr, NULL); 2427 2428 VN_RELE(vp); 2429 2430 dr->dr_status = puterrno(error); 2431 2432 } 2433 void * 2434 rfs_mkdir_getfh(struct nfscreatargs *args) 2435 { 2436 return (args->ca_da.da_fhandle); 2437 } 2438 2439 /* 2440 * Remove a directory. 2441 * Remove the given directory name from the given parent directory. 2442 */ 2443 void 2444 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status, 2445 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2446 { 2447 int error; 2448 vnode_t *vp; 2449 2450 /* 2451 * Disallow NULL paths 2452 */ 2453 if (da->da_name == NULL || *da->da_name == '\0') { 2454 *status = NFSERR_ACCES; 2455 return; 2456 } 2457 2458 vp = nfs_fhtovp(da->da_fhandle, exi); 2459 if (vp == NULL) { 2460 *status = NFSERR_STALE; 2461 return; 2462 } 2463 2464 if (rdonly(exi, vp, req)) { 2465 VN_RELE(vp); 2466 *status = NFSERR_ROFS; 2467 return; 2468 } 2469 2470 /* 2471 * VOP_RMDIR takes a third argument (the current 2472 * directory of the process). That's because someone 2473 * wants to return EINVAL if one tries to remove ".". 2474 * Of course, NFS servers have no idea what their 2475 * clients' current directories are. We fake it by 2476 * supplying a vnode known to exist and illegal to 2477 * remove. 2478 */ 2479 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0); 2480 2481 /* 2482 * Force modified data and metadata out to stable storage. 2483 */ 2484 (void) VOP_FSYNC(vp, 0, cr, NULL); 2485 2486 VN_RELE(vp); 2487 2488 /* 2489 * System V defines rmdir to return EEXIST, not ENOTEMPTY, 2490 * if the directory is not empty. A System V NFS server 2491 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit 2492 * over the wire. 2493 */ 2494 if (error == EEXIST) 2495 *status = NFSERR_NOTEMPTY; 2496 else 2497 *status = puterrno(error); 2498 2499 } 2500 void * 2501 rfs_rmdir_getfh(struct nfsdiropargs *da) 2502 { 2503 return (da->da_fhandle); 2504 } 2505 2506 /* ARGSUSED */ 2507 void 2508 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd, 2509 struct exportinfo *exi, struct svc_req *req, cred_t *cr) 2510 { 2511 int error; 2512 int iseof; 2513 struct iovec iov; 2514 struct uio uio; 2515 vnode_t *vp; 2516 char *ndata = NULL; 2517 struct sockaddr *ca; 2518 size_t nents; 2519 int ret; 2520 2521 vp = nfs_fhtovp(&rda->rda_fh, exi); 2522 if (vp == NULL) { 2523 rd->rd_entries = NULL; 2524 rd->rd_status = NFSERR_STALE; 2525 return; 2526 } 2527 2528 if (vp->v_type != VDIR) { 2529 VN_RELE(vp); 2530 rd->rd_entries = NULL; 2531 rd->rd_status = NFSERR_NOTDIR; 2532 return; 2533 } 2534 2535 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2536 2537 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 2538 2539 if (error) { 2540 rd->rd_entries = NULL; 2541 goto bad; 2542 } 2543 2544 if (rda->rda_count == 0) { 2545 rd->rd_entries = NULL; 2546 rd->rd_size = 0; 2547 rd->rd_eof = FALSE; 2548 goto bad; 2549 } 2550 2551 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA); 2552 2553 /* 2554 * Allocate data for entries. This will be freed by rfs_rddirfree. 2555 */ 2556 rd->rd_bufsize = (uint_t)rda->rda_count; 2557 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP); 2558 2559 /* 2560 * Set up io vector to read directory data 2561 */ 2562 iov.iov_base = (caddr_t)rd->rd_entries; 2563 iov.iov_len = rda->rda_count; 2564 uio.uio_iov = &iov; 2565 uio.uio_iovcnt = 1; 2566 uio.uio_segflg = UIO_SYSSPACE; 2567 uio.uio_extflg = UIO_COPY_CACHED; 2568 uio.uio_loffset = (offset_t)rda->rda_offset; 2569 uio.uio_resid = rda->rda_count; 2570 2571 /* 2572 * read directory 2573 */ 2574 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); 2575 2576 /* 2577 * Clean up 2578 */ 2579 if (!error) { 2580 /* 2581 * set size and eof 2582 */ 2583 if (uio.uio_resid == rda->rda_count) { 2584 rd->rd_size = 0; 2585 rd->rd_eof = TRUE; 2586 } else { 2587 rd->rd_size = (uint32_t)(rda->rda_count - 2588 uio.uio_resid); 2589 rd->rd_eof = iseof ? TRUE : FALSE; 2590 } 2591 } 2592 2593 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2594 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size); 2595 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents, 2596 rda->rda_count, &ndata); 2597 2598 if (ret != 0) { 2599 size_t dropbytes; 2600 /* 2601 * We had to drop one or more entries in order to fit 2602 * during the character conversion. We need to patch 2603 * up the size and eof info. 2604 */ 2605 if (rd->rd_eof) 2606 rd->rd_eof = FALSE; 2607 dropbytes = nfscmd_dropped_entrysize( 2608 (struct dirent64 *)rd->rd_entries, nents, ret); 2609 rd->rd_size -= dropbytes; 2610 } 2611 if (ndata == NULL) { 2612 ndata = (char *)rd->rd_entries; 2613 } else if (ndata != (char *)rd->rd_entries) { 2614 kmem_free(rd->rd_entries, rd->rd_bufsize); 2615 rd->rd_entries = (void *)ndata; 2616 rd->rd_bufsize = rda->rda_count; 2617 } 2618 2619 bad: 2620 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2621 2622 #if 0 /* notyet */ 2623 /* 2624 * Don't do this. It causes local disk writes when just 2625 * reading the file and the overhead is deemed larger 2626 * than the benefit. 2627 */ 2628 /* 2629 * Force modified metadata out to stable storage. 2630 */ 2631 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 2632 #endif 2633 2634 VN_RELE(vp); 2635 2636 rd->rd_status = puterrno(error); 2637 2638 } 2639 void * 2640 rfs_readdir_getfh(struct nfsrddirargs *rda) 2641 { 2642 return (&rda->rda_fh); 2643 } 2644 void 2645 rfs_rddirfree(struct nfsrddirres *rd) 2646 { 2647 if (rd->rd_entries != NULL) 2648 kmem_free(rd->rd_entries, rd->rd_bufsize); 2649 } 2650 2651 /* ARGSUSED */ 2652 void 2653 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi, 2654 struct svc_req *req, cred_t *cr) 2655 { 2656 int error; 2657 struct statvfs64 sb; 2658 vnode_t *vp; 2659 2660 vp = nfs_fhtovp(fh, exi); 2661 if (vp == NULL) { 2662 fs->fs_status = NFSERR_STALE; 2663 return; 2664 } 2665 2666 error = VFS_STATVFS(vp->v_vfsp, &sb); 2667 2668 if (!error) { 2669 fs->fs_tsize = nfstsize(); 2670 fs->fs_bsize = sb.f_frsize; 2671 fs->fs_blocks = sb.f_blocks; 2672 fs->fs_bfree = sb.f_bfree; 2673 fs->fs_bavail = sb.f_bavail; 2674 } 2675 2676 VN_RELE(vp); 2677 2678 fs->fs_status = puterrno(error); 2679 2680 } 2681 void * 2682 rfs_statfs_getfh(fhandle_t *fh) 2683 { 2684 return (fh); 2685 } 2686 2687 static int 2688 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap) 2689 { 2690 vap->va_mask = 0; 2691 2692 /* 2693 * There was a sign extension bug in some VFS based systems 2694 * which stored the mode as a short. When it would get 2695 * assigned to a u_long, no sign extension would occur. 2696 * It needed to, but this wasn't noticed because sa_mode 2697 * would then get assigned back to the short, thus ignoring 2698 * the upper 16 bits of sa_mode. 2699 * 2700 * To make this implementation work for both broken 2701 * clients and good clients, we check for both versions 2702 * of the mode. 2703 */ 2704 if (sa->sa_mode != (uint32_t)((ushort_t)-1) && 2705 sa->sa_mode != (uint32_t)-1) { 2706 vap->va_mask |= AT_MODE; 2707 vap->va_mode = sa->sa_mode; 2708 } 2709 if (sa->sa_uid != (uint32_t)-1) { 2710 vap->va_mask |= AT_UID; 2711 vap->va_uid = sa->sa_uid; 2712 } 2713 if (sa->sa_gid != (uint32_t)-1) { 2714 vap->va_mask |= AT_GID; 2715 vap->va_gid = sa->sa_gid; 2716 } 2717 if (sa->sa_size != (uint32_t)-1) { 2718 vap->va_mask |= AT_SIZE; 2719 vap->va_size = sa->sa_size; 2720 } 2721 if (sa->sa_atime.tv_sec != (int32_t)-1 && 2722 sa->sa_atime.tv_usec != (int32_t)-1) { 2723 #ifndef _LP64 2724 /* return error if time overflow */ 2725 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec)) 2726 return (EOVERFLOW); 2727 #endif 2728 vap->va_mask |= AT_ATIME; 2729 /* 2730 * nfs protocol defines times as unsigned so don't extend sign, 2731 * unless sysadmin set nfs_allow_preepoch_time. 2732 */ 2733 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec); 2734 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000); 2735 } 2736 if (sa->sa_mtime.tv_sec != (int32_t)-1 && 2737 sa->sa_mtime.tv_usec != (int32_t)-1) { 2738 #ifndef _LP64 2739 /* return error if time overflow */ 2740 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec)) 2741 return (EOVERFLOW); 2742 #endif 2743 vap->va_mask |= AT_MTIME; 2744 /* 2745 * nfs protocol defines times as unsigned so don't extend sign, 2746 * unless sysadmin set nfs_allow_preepoch_time. 2747 */ 2748 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec); 2749 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000); 2750 } 2751 return (0); 2752 } 2753 2754 static enum nfsftype vt_to_nf[] = { 2755 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 2756 }; 2757 2758 /* 2759 * check the following fields for overflow: nodeid, size, and time. 2760 * There could be a problem when converting 64-bit LP64 fields 2761 * into 32-bit ones. Return an error if there is an overflow. 2762 */ 2763 int 2764 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na) 2765 { 2766 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD); 2767 na->na_type = vt_to_nf[vap->va_type]; 2768 2769 if (vap->va_mode == (unsigned short) -1) 2770 na->na_mode = (uint32_t)-1; 2771 else 2772 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode; 2773 2774 if (vap->va_uid == (unsigned short)(-1)) 2775 na->na_uid = (uint32_t)(-1); 2776 else if (vap->va_uid == UID_NOBODY) 2777 na->na_uid = (uint32_t)NFS_UID_NOBODY; 2778 else 2779 na->na_uid = vap->va_uid; 2780 2781 if (vap->va_gid == (unsigned short)(-1)) 2782 na->na_gid = (uint32_t)-1; 2783 else if (vap->va_gid == GID_NOBODY) 2784 na->na_gid = (uint32_t)NFS_GID_NOBODY; 2785 else 2786 na->na_gid = vap->va_gid; 2787 2788 /* 2789 * Do we need to check fsid for overflow? It is 64-bit in the 2790 * vattr, but are bigger than 32 bit values supported? 2791 */ 2792 na->na_fsid = vap->va_fsid; 2793 2794 na->na_nodeid = vap->va_nodeid; 2795 2796 /* 2797 * Check to make sure that the nodeid is representable over the 2798 * wire without losing bits. 2799 */ 2800 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid) 2801 return (EFBIG); 2802 na->na_nlink = vap->va_nlink; 2803 2804 /* 2805 * Check for big files here, instead of at the caller. See 2806 * comments in cstat for large special file explanation. 2807 */ 2808 if (vap->va_size > (u_longlong_t)MAXOFF32_T) { 2809 if ((vap->va_type == VREG) || (vap->va_type == VDIR)) 2810 return (EFBIG); 2811 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) { 2812 /* UNKNOWN_SIZE | OVERFLOW */ 2813 na->na_size = MAXOFF32_T; 2814 } else 2815 na->na_size = vap->va_size; 2816 } else 2817 na->na_size = vap->va_size; 2818 2819 /* 2820 * If the vnode times overflow the 32-bit times that NFS2 2821 * uses on the wire then return an error. 2822 */ 2823 if (!NFS_VAP_TIME_OK(vap)) { 2824 return (EOVERFLOW); 2825 } 2826 na->na_atime.tv_sec = vap->va_atime.tv_sec; 2827 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000; 2828 2829 na->na_mtime.tv_sec = vap->va_mtime.tv_sec; 2830 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000; 2831 2832 na->na_ctime.tv_sec = vap->va_ctime.tv_sec; 2833 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000; 2834 2835 /* 2836 * If the dev_t will fit into 16 bits then compress 2837 * it, otherwise leave it alone. See comments in 2838 * nfs_client.c. 2839 */ 2840 if (getminor(vap->va_rdev) <= SO4_MAXMIN && 2841 getmajor(vap->va_rdev) <= SO4_MAXMAJ) 2842 na->na_rdev = nfsv2_cmpdev(vap->va_rdev); 2843 else 2844 (void) cmpldev(&na->na_rdev, vap->va_rdev); 2845 2846 na->na_blocks = vap->va_nblocks; 2847 na->na_blocksize = vap->va_blksize; 2848 2849 /* 2850 * This bit of ugliness is a *TEMPORARY* hack to preserve the 2851 * over-the-wire protocols for named-pipe vnodes. It remaps the 2852 * VFIFO type to the special over-the-wire type. (see note in nfs.h) 2853 * 2854 * BUYER BEWARE: 2855 * If you are porting the NFS to a non-Sun server, you probably 2856 * don't want to include the following block of code. The 2857 * over-the-wire special file types will be changing with the 2858 * NFS Protocol Revision. 2859 */ 2860 if (vap->va_type == VFIFO) 2861 NA_SETFIFO(na); 2862 return (0); 2863 } 2864 2865 /* 2866 * acl v2 support: returns approximate permission. 2867 * default: returns minimal permission (more restrictive) 2868 * aclok: returns maximal permission (less restrictive) 2869 * This routine changes the permissions that are alaredy in *va. 2870 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES, 2871 * CLASS_OBJ is always the same as GROUP_OBJ entry. 2872 */ 2873 static void 2874 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr) 2875 { 2876 vsecattr_t vsa; 2877 int aclcnt; 2878 aclent_t *aclentp; 2879 mode_t mask_perm; 2880 mode_t grp_perm; 2881 mode_t other_perm; 2882 mode_t other_orig; 2883 int error; 2884 2885 /* dont care default acl */ 2886 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT); 2887 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL); 2888 2889 if (!error) { 2890 aclcnt = vsa.vsa_aclcnt; 2891 if (aclcnt > MIN_ACL_ENTRIES) { 2892 /* non-trivial ACL */ 2893 aclentp = vsa.vsa_aclentp; 2894 if (exi->exi_export.ex_flags & EX_ACLOK) { 2895 /* maximal permissions */ 2896 grp_perm = 0; 2897 other_perm = 0; 2898 for (; aclcnt > 0; aclcnt--, aclentp++) { 2899 switch (aclentp->a_type) { 2900 case USER_OBJ: 2901 break; 2902 case USER: 2903 grp_perm |= 2904 aclentp->a_perm << 3; 2905 other_perm |= aclentp->a_perm; 2906 break; 2907 case GROUP_OBJ: 2908 grp_perm |= 2909 aclentp->a_perm << 3; 2910 break; 2911 case GROUP: 2912 other_perm |= aclentp->a_perm; 2913 break; 2914 case OTHER_OBJ: 2915 other_orig = aclentp->a_perm; 2916 break; 2917 case CLASS_OBJ: 2918 mask_perm = aclentp->a_perm; 2919 break; 2920 default: 2921 break; 2922 } 2923 } 2924 grp_perm &= mask_perm << 3; 2925 other_perm &= mask_perm; 2926 other_perm |= other_orig; 2927 2928 } else { 2929 /* minimal permissions */ 2930 grp_perm = 070; 2931 other_perm = 07; 2932 for (; aclcnt > 0; aclcnt--, aclentp++) { 2933 switch (aclentp->a_type) { 2934 case USER_OBJ: 2935 break; 2936 case USER: 2937 case CLASS_OBJ: 2938 grp_perm &= 2939 aclentp->a_perm << 3; 2940 other_perm &= 2941 aclentp->a_perm; 2942 break; 2943 case GROUP_OBJ: 2944 grp_perm &= 2945 aclentp->a_perm << 3; 2946 break; 2947 case GROUP: 2948 other_perm &= 2949 aclentp->a_perm; 2950 break; 2951 case OTHER_OBJ: 2952 other_perm &= 2953 aclentp->a_perm; 2954 break; 2955 default: 2956 break; 2957 } 2958 } 2959 } 2960 /* copy to va */ 2961 va->va_mode &= ~077; 2962 va->va_mode |= grp_perm | other_perm; 2963 } 2964 if (vsa.vsa_aclcnt) 2965 kmem_free(vsa.vsa_aclentp, 2966 vsa.vsa_aclcnt * sizeof (aclent_t)); 2967 } 2968 } 2969 2970 void 2971 rfs_srvrinit(void) 2972 { 2973 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL); 2974 nfs2_srv_caller_id = fs_new_caller_id(); 2975 } 2976 2977 void 2978 rfs_srvrfini(void) 2979 { 2980 mutex_destroy(&rfs_async_write_lock); 2981 } 2982 2983 static int 2984 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr) 2985 { 2986 struct clist *wcl; 2987 int wlist_len; 2988 uint32_t count = rr->rr_count; 2989 2990 wcl = ra->ra_wlist; 2991 2992 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { 2993 return (FALSE); 2994 } 2995 2996 wcl = ra->ra_wlist; 2997 rr->rr_ok.rrok_wlist_len = wlist_len; 2998 rr->rr_ok.rrok_wlist = wcl; 2999 3000 return (TRUE); 3001 }