6583 remove whole-process swapping
1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All rights reserved. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/buf.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/uio.h> 39 #include <sys/stat.h> 40 #include <sys/errno.h> 41 #include <sys/sysmacros.h> 42 #include <sys/statvfs.h> 43 #include <sys/kmem.h> 44 #include <sys/kstat.h> 45 #include <sys/dirent.h> 46 #include <sys/cmn_err.h> 47 #include <sys/debug.h> 48 #include <sys/vtrace.h> 49 #include <sys/mode.h> 50 #include <sys/acl.h> 51 #include <sys/nbmlock.h> 52 #include <sys/policy.h> 53 #include <sys/sdt.h> 54 55 #include <rpc/types.h> 56 #include <rpc/auth.h> 57 #include <rpc/svc.h> 58 59 #include <nfs/nfs.h> 60 #include <nfs/export.h> 61 #include <nfs/nfs_cmd.h> 62 63 #include <vm/hat.h> 64 #include <vm/as.h> 65 #include <vm/seg.h> 66 #include <vm/seg_map.h> 67 #include <vm/seg_kmem.h> 68 69 #include <sys/strsubr.h> 70 71 /* 72 * These are the interface routines for the server side of the 73 * Network File System. See the NFS version 2 protocol specification 74 * for a description of this interface. 75 */ 76 77 static int sattr_to_vattr(struct nfssattr *, struct vattr *); 78 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, 79 cred_t *); 80 81 /* 82 * Some "over the wire" UNIX file types. These are encoded 83 * into the mode. This needs to be fixed in the next rev. 84 */ 85 #define IFMT 0170000 /* type of file */ 86 #define IFCHR 0020000 /* character special */ 87 #define IFBLK 0060000 /* block special */ 88 #define IFSOCK 0140000 /* socket */ 89 90 u_longlong_t nfs2_srv_caller_id; 91 92 /* 93 * Get file attributes. 94 * Returns the current attributes of the file with the given fhandle. 95 */ 96 /* ARGSUSED */ 97 void 98 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi, 99 struct svc_req *req, cred_t *cr, bool_t ro) 100 { 101 int error; 102 vnode_t *vp; 103 struct vattr va; 104 105 vp = nfs_fhtovp(fhp, exi); 106 if (vp == NULL) { 107 ns->ns_status = NFSERR_STALE; 108 return; 109 } 110 111 /* 112 * Do the getattr. 113 */ 114 va.va_mask = AT_ALL; /* we want all the attributes */ 115 116 error = rfs4_delegated_getattr(vp, &va, 0, cr); 117 118 /* check for overflows */ 119 if (!error) { 120 /* Lie about the object type for a referral */ 121 if (vn_is_nfs_reparse(vp, cr)) 122 va.va_type = VLNK; 123 124 acl_perm(vp, exi, &va, cr); 125 error = vattr_to_nattr(&va, &ns->ns_attr); 126 } 127 128 VN_RELE(vp); 129 130 ns->ns_status = puterrno(error); 131 } 132 void * 133 rfs_getattr_getfh(fhandle_t *fhp) 134 { 135 return (fhp); 136 } 137 138 /* 139 * Set file attributes. 140 * Sets the attributes of the file with the given fhandle. Returns 141 * the new attributes. 142 */ 143 /* ARGSUSED */ 144 void 145 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns, 146 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 147 { 148 int error; 149 int flag; 150 int in_crit = 0; 151 vnode_t *vp; 152 struct vattr va; 153 struct vattr bva; 154 struct flock64 bf; 155 caller_context_t ct; 156 157 158 vp = nfs_fhtovp(&args->saa_fh, exi); 159 if (vp == NULL) { 160 ns->ns_status = NFSERR_STALE; 161 return; 162 } 163 164 if (rdonly(ro, vp)) { 165 VN_RELE(vp); 166 ns->ns_status = NFSERR_ROFS; 167 return; 168 } 169 170 error = sattr_to_vattr(&args->saa_sa, &va); 171 if (error) { 172 VN_RELE(vp); 173 ns->ns_status = puterrno(error); 174 return; 175 } 176 177 /* 178 * If the client is requesting a change to the mtime, 179 * but the nanosecond field is set to 1 billion, then 180 * this is a flag to the server that it should set the 181 * atime and mtime fields to the server's current time. 182 * The 1 billion number actually came from the client 183 * as 1 million, but the units in the over the wire 184 * request are microseconds instead of nanoseconds. 185 * 186 * This is an overload of the protocol and should be 187 * documented in the NFS Version 2 protocol specification. 188 */ 189 if (va.va_mask & AT_MTIME) { 190 if (va.va_mtime.tv_nsec == 1000000000) { 191 gethrestime(&va.va_mtime); 192 va.va_atime = va.va_mtime; 193 va.va_mask |= AT_ATIME; 194 flag = 0; 195 } else 196 flag = ATTR_UTIME; 197 } else 198 flag = 0; 199 200 /* 201 * If the filesystem is exported with nosuid, then mask off 202 * the setuid and setgid bits. 203 */ 204 if ((va.va_mask & AT_MODE) && vp->v_type == VREG && 205 (exi->exi_export.ex_flags & EX_NOSUID)) 206 va.va_mode &= ~(VSUID | VSGID); 207 208 ct.cc_sysid = 0; 209 ct.cc_pid = 0; 210 ct.cc_caller_id = nfs2_srv_caller_id; 211 ct.cc_flags = CC_DONTBLOCK; 212 213 /* 214 * We need to specially handle size changes because it is 215 * possible for the client to create a file with modes 216 * which indicate read-only, but with the file opened for 217 * writing. If the client then tries to set the size of 218 * the file, then the normal access checking done in 219 * VOP_SETATTR would prevent the client from doing so, 220 * although it should be legal for it to do so. To get 221 * around this, we do the access checking for ourselves 222 * and then use VOP_SPACE which doesn't do the access 223 * checking which VOP_SETATTR does. VOP_SPACE can only 224 * operate on VREG files, let VOP_SETATTR handle the other 225 * extremely rare cases. 226 * Also the client should not be allowed to change the 227 * size of the file if there is a conflicting non-blocking 228 * mandatory lock in the region of change. 229 */ 230 if (vp->v_type == VREG && va.va_mask & AT_SIZE) { 231 if (nbl_need_check(vp)) { 232 nbl_start_crit(vp, RW_READER); 233 in_crit = 1; 234 } 235 236 bva.va_mask = AT_UID | AT_SIZE; 237 238 error = VOP_GETATTR(vp, &bva, 0, cr, &ct); 239 240 if (error) { 241 if (in_crit) 242 nbl_end_crit(vp); 243 VN_RELE(vp); 244 ns->ns_status = puterrno(error); 245 return; 246 } 247 248 if (in_crit) { 249 u_offset_t offset; 250 ssize_t length; 251 252 if (va.va_size < bva.va_size) { 253 offset = va.va_size; 254 length = bva.va_size - va.va_size; 255 } else { 256 offset = bva.va_size; 257 length = va.va_size - bva.va_size; 258 } 259 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 260 NULL)) { 261 error = EACCES; 262 } 263 } 264 265 if (crgetuid(cr) == bva.va_uid && !error && 266 va.va_size != bva.va_size) { 267 va.va_mask &= ~AT_SIZE; 268 bf.l_type = F_WRLCK; 269 bf.l_whence = 0; 270 bf.l_start = (off64_t)va.va_size; 271 bf.l_len = 0; 272 bf.l_sysid = 0; 273 bf.l_pid = 0; 274 275 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 276 (offset_t)va.va_size, cr, &ct); 277 } 278 if (in_crit) 279 nbl_end_crit(vp); 280 } else 281 error = 0; 282 283 /* 284 * Do the setattr. 285 */ 286 if (!error && va.va_mask) { 287 error = VOP_SETATTR(vp, &va, flag, cr, &ct); 288 } 289 290 /* 291 * check if the monitor on either vop_space or vop_setattr detected 292 * a delegation conflict and if so, mark the thread flag as 293 * wouldblock so that the response is dropped and the client will 294 * try again. 295 */ 296 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 297 VN_RELE(vp); 298 curthread->t_flag |= T_WOULDBLOCK; 299 return; 300 } 301 302 if (!error) { 303 va.va_mask = AT_ALL; /* get everything */ 304 305 error = rfs4_delegated_getattr(vp, &va, 0, cr); 306 307 /* check for overflows */ 308 if (!error) { 309 acl_perm(vp, exi, &va, cr); 310 error = vattr_to_nattr(&va, &ns->ns_attr); 311 } 312 } 313 314 ct.cc_flags = 0; 315 316 /* 317 * Force modified metadata out to stable storage. 318 */ 319 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 320 321 VN_RELE(vp); 322 323 ns->ns_status = puterrno(error); 324 } 325 void * 326 rfs_setattr_getfh(struct nfssaargs *args) 327 { 328 return (&args->saa_fh); 329 } 330 331 /* 332 * Directory lookup. 333 * Returns an fhandle and file attributes for file name in a directory. 334 */ 335 /* ARGSUSED */ 336 void 337 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, 338 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 339 { 340 int error; 341 vnode_t *dvp; 342 vnode_t *vp; 343 struct vattr va; 344 fhandle_t *fhp = da->da_fhandle; 345 struct sec_ol sec = {0, 0}; 346 bool_t publicfh_flag = FALSE, auth_weak = FALSE; 347 char *name; 348 struct sockaddr *ca; 349 350 /* 351 * Trusted Extension doesn't support NFSv2. MOUNT 352 * will reject v2 clients. Need to prevent v2 client 353 * access via WebNFS here. 354 */ 355 if (is_system_labeled() && req->rq_vers == 2) { 356 dr->dr_status = NFSERR_ACCES; 357 return; 358 } 359 360 /* 361 * Disallow NULL paths 362 */ 363 if (da->da_name == NULL || *da->da_name == '\0') { 364 dr->dr_status = NFSERR_ACCES; 365 return; 366 } 367 368 /* 369 * Allow lookups from the root - the default 370 * location of the public filehandle. 371 */ 372 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { 373 dvp = rootdir; 374 VN_HOLD(dvp); 375 } else { 376 dvp = nfs_fhtovp(fhp, exi); 377 if (dvp == NULL) { 378 dr->dr_status = NFSERR_STALE; 379 return; 380 } 381 } 382 383 /* 384 * Not allow lookup beyond root. 385 * If the filehandle matches a filehandle of the exi, 386 * then the ".." refers beyond the root of an exported filesystem. 387 */ 388 if (strcmp(da->da_name, "..") == 0 && 389 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) { 390 VN_RELE(dvp); 391 dr->dr_status = NFSERR_NOENT; 392 return; 393 } 394 395 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 396 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND, 397 MAXPATHLEN); 398 399 if (name == NULL) { 400 dr->dr_status = NFSERR_ACCES; 401 return; 402 } 403 404 /* 405 * If the public filehandle is used then allow 406 * a multi-component lookup, i.e. evaluate 407 * a pathname and follow symbolic links if 408 * necessary. 409 * 410 * This may result in a vnode in another filesystem 411 * which is OK as long as the filesystem is exported. 412 */ 413 if (PUBLIC_FH2(fhp)) { 414 publicfh_flag = TRUE; 415 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi, 416 &sec); 417 } else { 418 /* 419 * Do a normal single component lookup. 420 */ 421 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr, 422 NULL, NULL, NULL); 423 } 424 425 if (name != da->da_name) 426 kmem_free(name, MAXPATHLEN); 427 428 429 if (!error) { 430 va.va_mask = AT_ALL; /* we want everything */ 431 432 error = rfs4_delegated_getattr(vp, &va, 0, cr); 433 434 /* check for overflows */ 435 if (!error) { 436 acl_perm(vp, exi, &va, cr); 437 error = vattr_to_nattr(&va, &dr->dr_attr); 438 if (!error) { 439 if (sec.sec_flags & SEC_QUERY) 440 error = makefh_ol(&dr->dr_fhandle, exi, 441 sec.sec_index); 442 else { 443 error = makefh(&dr->dr_fhandle, vp, 444 exi); 445 if (!error && publicfh_flag && 446 !chk_clnt_sec(exi, req)) 447 auth_weak = TRUE; 448 } 449 } 450 } 451 VN_RELE(vp); 452 } 453 454 VN_RELE(dvp); 455 456 /* 457 * If publicfh_flag is true then we have called rfs_publicfh_mclookup 458 * and have obtained a new exportinfo in exi which needs to be 459 * released. Note the the original exportinfo pointed to by exi 460 * will be released by the caller, comon_dispatch. 461 */ 462 if (publicfh_flag && exi != NULL) 463 exi_rele(exi); 464 465 /* 466 * If it's public fh, no 0x81, and client's flavor is 467 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now. 468 * Then set RPC status to AUTH_TOOWEAK in common_dispatch. 469 */ 470 if (auth_weak) 471 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR; 472 else 473 dr->dr_status = puterrno(error); 474 } 475 void * 476 rfs_lookup_getfh(struct nfsdiropargs *da) 477 { 478 return (da->da_fhandle); 479 } 480 481 /* 482 * Read symbolic link. 483 * Returns the string in the symbolic link at the given fhandle. 484 */ 485 /* ARGSUSED */ 486 void 487 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, 488 struct svc_req *req, cred_t *cr, bool_t ro) 489 { 490 int error; 491 struct iovec iov; 492 struct uio uio; 493 vnode_t *vp; 494 struct vattr va; 495 struct sockaddr *ca; 496 char *name = NULL; 497 int is_referral = 0; 498 499 vp = nfs_fhtovp(fhp, exi); 500 if (vp == NULL) { 501 rl->rl_data = NULL; 502 rl->rl_status = NFSERR_STALE; 503 return; 504 } 505 506 va.va_mask = AT_MODE; 507 508 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 509 510 if (error) { 511 VN_RELE(vp); 512 rl->rl_data = NULL; 513 rl->rl_status = puterrno(error); 514 return; 515 } 516 517 if (MANDLOCK(vp, va.va_mode)) { 518 VN_RELE(vp); 519 rl->rl_data = NULL; 520 rl->rl_status = NFSERR_ACCES; 521 return; 522 } 523 524 /* We lied about the object type for a referral */ 525 if (vn_is_nfs_reparse(vp, cr)) 526 is_referral = 1; 527 528 /* 529 * XNFS and RFC1094 require us to return ENXIO if argument 530 * is not a link. BUGID 1138002. 531 */ 532 if (vp->v_type != VLNK && !is_referral) { 533 VN_RELE(vp); 534 rl->rl_data = NULL; 535 rl->rl_status = NFSERR_NXIO; 536 return; 537 } 538 539 /* 540 * Allocate data for pathname. This will be freed by rfs_rlfree. 541 */ 542 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP); 543 544 if (is_referral) { 545 char *s; 546 size_t strsz; 547 548 /* Get an artificial symlink based on a referral */ 549 s = build_symlink(vp, cr, &strsz); 550 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++; 551 DTRACE_PROBE2(nfs2serv__func__referral__reflink, 552 vnode_t *, vp, char *, s); 553 if (s == NULL) 554 error = EINVAL; 555 else { 556 error = 0; 557 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN); 558 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN); 559 kmem_free(s, strsz); 560 } 561 562 } else { 563 564 /* 565 * Set up io vector to read sym link data 566 */ 567 iov.iov_base = rl->rl_data; 568 iov.iov_len = NFS_MAXPATHLEN; 569 uio.uio_iov = &iov; 570 uio.uio_iovcnt = 1; 571 uio.uio_segflg = UIO_SYSSPACE; 572 uio.uio_extflg = UIO_COPY_CACHED; 573 uio.uio_loffset = (offset_t)0; 574 uio.uio_resid = NFS_MAXPATHLEN; 575 576 /* 577 * Do the readlink. 578 */ 579 error = VOP_READLINK(vp, &uio, cr, NULL); 580 581 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid); 582 583 if (!error) 584 rl->rl_data[rl->rl_count] = '\0'; 585 586 } 587 588 589 VN_RELE(vp); 590 591 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 592 name = nfscmd_convname(ca, exi, rl->rl_data, 593 NFSCMD_CONV_OUTBOUND, MAXPATHLEN); 594 595 if (name != NULL && name != rl->rl_data) { 596 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 597 rl->rl_data = name; 598 } 599 600 /* 601 * XNFS and RFC1094 require us to return ENXIO if argument 602 * is not a link. UFS returns EINVAL if this is the case, 603 * so we do the mapping here. BUGID 1138002. 604 */ 605 if (error == EINVAL) 606 rl->rl_status = NFSERR_NXIO; 607 else 608 rl->rl_status = puterrno(error); 609 610 } 611 void * 612 rfs_readlink_getfh(fhandle_t *fhp) 613 { 614 return (fhp); 615 } 616 /* 617 * Free data allocated by rfs_readlink 618 */ 619 void 620 rfs_rlfree(struct nfsrdlnres *rl) 621 { 622 if (rl->rl_data != NULL) 623 kmem_free(rl->rl_data, NFS_MAXPATHLEN); 624 } 625 626 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *); 627 628 /* 629 * Read data. 630 * Returns some data read from the file at the given fhandle. 631 */ 632 /* ARGSUSED */ 633 void 634 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, 635 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 636 { 637 vnode_t *vp; 638 int error; 639 struct vattr va; 640 struct iovec iov; 641 struct uio uio; 642 mblk_t *mp; 643 int alloc_err = 0; 644 int in_crit = 0; 645 caller_context_t ct; 646 647 vp = nfs_fhtovp(&ra->ra_fhandle, exi); 648 if (vp == NULL) { 649 rr->rr_data = NULL; 650 rr->rr_status = NFSERR_STALE; 651 return; 652 } 653 654 if (vp->v_type != VREG) { 655 VN_RELE(vp); 656 rr->rr_data = NULL; 657 rr->rr_status = NFSERR_ISDIR; 658 return; 659 } 660 661 ct.cc_sysid = 0; 662 ct.cc_pid = 0; 663 ct.cc_caller_id = nfs2_srv_caller_id; 664 ct.cc_flags = CC_DONTBLOCK; 665 666 /* 667 * Enter the critical region before calling VOP_RWLOCK 668 * to avoid a deadlock with write requests. 669 */ 670 if (nbl_need_check(vp)) { 671 nbl_start_crit(vp, RW_READER); 672 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count, 673 0, NULL)) { 674 nbl_end_crit(vp); 675 VN_RELE(vp); 676 rr->rr_data = NULL; 677 rr->rr_status = NFSERR_ACCES; 678 return; 679 } 680 in_crit = 1; 681 } 682 683 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct); 684 685 /* check if a monitor detected a delegation conflict */ 686 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 687 VN_RELE(vp); 688 /* mark as wouldblock so response is dropped */ 689 curthread->t_flag |= T_WOULDBLOCK; 690 691 rr->rr_data = NULL; 692 return; 693 } 694 695 va.va_mask = AT_ALL; 696 697 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 698 699 if (error) { 700 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 701 if (in_crit) 702 nbl_end_crit(vp); 703 704 VN_RELE(vp); 705 rr->rr_data = NULL; 706 rr->rr_status = puterrno(error); 707 708 return; 709 } 710 711 /* 712 * This is a kludge to allow reading of files created 713 * with no read permission. The owner of the file 714 * is always allowed to read it. 715 */ 716 if (crgetuid(cr) != va.va_uid) { 717 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct); 718 719 if (error) { 720 /* 721 * Exec is the same as read over the net because 722 * of demand loading. 723 */ 724 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct); 725 } 726 if (error) { 727 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 728 if (in_crit) 729 nbl_end_crit(vp); 730 VN_RELE(vp); 731 rr->rr_data = NULL; 732 rr->rr_status = puterrno(error); 733 734 return; 735 } 736 } 737 738 if (MANDLOCK(vp, va.va_mode)) { 739 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 740 if (in_crit) 741 nbl_end_crit(vp); 742 743 VN_RELE(vp); 744 rr->rr_data = NULL; 745 rr->rr_status = NFSERR_ACCES; 746 747 return; 748 } 749 750 rr->rr_ok.rrok_wlist_len = 0; 751 rr->rr_ok.rrok_wlist = NULL; 752 753 if ((u_offset_t)ra->ra_offset >= va.va_size) { 754 rr->rr_count = 0; 755 rr->rr_data = NULL; 756 /* 757 * In this case, status is NFS_OK, but there is no data 758 * to encode. So set rr_mp to NULL. 759 */ 760 rr->rr_mp = NULL; 761 rr->rr_ok.rrok_wlist = ra->ra_wlist; 762 if (rr->rr_ok.rrok_wlist) 763 clist_zero_len(rr->rr_ok.rrok_wlist); 764 goto done; 765 } 766 767 if (ra->ra_wlist) { 768 mp = NULL; 769 rr->rr_mp = NULL; 770 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist); 771 if (ra->ra_count > iov.iov_len) { 772 rr->rr_data = NULL; 773 rr->rr_status = NFSERR_INVAL; 774 goto done; 775 } 776 } else { 777 /* 778 * mp will contain the data to be sent out in the read reply. 779 * This will be freed after the reply has been sent out (by the 780 * driver). 781 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so 782 * that the call to xdrmblk_putmblk() never fails. 783 */ 784 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG, 785 &alloc_err); 786 ASSERT(mp != NULL); 787 ASSERT(alloc_err == 0); 788 789 rr->rr_mp = mp; 790 791 /* 792 * Set up io vector 793 */ 794 iov.iov_base = (caddr_t)mp->b_datap->db_base; 795 iov.iov_len = ra->ra_count; 796 } 797 798 uio.uio_iov = &iov; 799 uio.uio_iovcnt = 1; 800 uio.uio_segflg = UIO_SYSSPACE; 801 uio.uio_extflg = UIO_COPY_CACHED; 802 uio.uio_loffset = (offset_t)ra->ra_offset; 803 uio.uio_resid = ra->ra_count; 804 805 error = VOP_READ(vp, &uio, 0, cr, &ct); 806 807 if (error) { 808 if (mp) 809 freeb(mp); 810 811 /* 812 * check if a monitor detected a delegation conflict and 813 * mark as wouldblock so response is dropped 814 */ 815 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 816 curthread->t_flag |= T_WOULDBLOCK; 817 else 818 rr->rr_status = puterrno(error); 819 820 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 821 if (in_crit) 822 nbl_end_crit(vp); 823 824 VN_RELE(vp); 825 rr->rr_data = NULL; 826 827 return; 828 } 829 830 /* 831 * Get attributes again so we can send the latest access 832 * time to the client side for his cache. 833 */ 834 va.va_mask = AT_ALL; 835 836 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 837 838 if (error) { 839 if (mp) 840 freeb(mp); 841 842 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 843 if (in_crit) 844 nbl_end_crit(vp); 845 846 VN_RELE(vp); 847 rr->rr_data = NULL; 848 rr->rr_status = puterrno(error); 849 850 return; 851 } 852 853 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid); 854 855 if (mp) { 856 rr->rr_data = (char *)mp->b_datap->db_base; 857 } else { 858 if (ra->ra_wlist) { 859 rr->rr_data = (caddr_t)iov.iov_base; 860 if (!rdma_setup_read_data2(ra, rr)) { 861 rr->rr_data = NULL; 862 rr->rr_status = puterrno(NFSERR_INVAL); 863 } 864 } 865 } 866 done: 867 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct); 868 if (in_crit) 869 nbl_end_crit(vp); 870 871 acl_perm(vp, exi, &va, cr); 872 873 /* check for overflows */ 874 error = vattr_to_nattr(&va, &rr->rr_attr); 875 876 VN_RELE(vp); 877 878 rr->rr_status = puterrno(error); 879 } 880 881 /* 882 * Free data allocated by rfs_read 883 */ 884 void 885 rfs_rdfree(struct nfsrdresult *rr) 886 { 887 mblk_t *mp; 888 889 if (rr->rr_status == NFS_OK) { 890 mp = rr->rr_mp; 891 if (mp != NULL) 892 freeb(mp); 893 } 894 } 895 896 void * 897 rfs_read_getfh(struct nfsreadargs *ra) 898 { 899 return (&ra->ra_fhandle); 900 } 901 902 #define MAX_IOVECS 12 903 904 #ifdef DEBUG 905 static int rfs_write_sync_hits = 0; 906 static int rfs_write_sync_misses = 0; 907 #endif 908 909 /* 910 * Write data to file. 911 * Returns attributes of a file after writing some data to it. 912 * 913 * Any changes made here, especially in error handling might have 914 * to also be done in rfs_write (which clusters write requests). 915 */ 916 /* ARGSUSED */ 917 void 918 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, 919 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 920 { 921 int error; 922 vnode_t *vp; 923 rlim64_t rlimit; 924 struct vattr va; 925 struct uio uio; 926 struct iovec iov[MAX_IOVECS]; 927 mblk_t *m; 928 struct iovec *iovp; 929 int iovcnt; 930 cred_t *savecred; 931 int in_crit = 0; 932 caller_context_t ct; 933 934 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 935 if (vp == NULL) { 936 ns->ns_status = NFSERR_STALE; 937 return; 938 } 939 940 if (rdonly(ro, vp)) { 941 VN_RELE(vp); 942 ns->ns_status = NFSERR_ROFS; 943 return; 944 } 945 946 if (vp->v_type != VREG) { 947 VN_RELE(vp); 948 ns->ns_status = NFSERR_ISDIR; 949 return; 950 } 951 952 ct.cc_sysid = 0; 953 ct.cc_pid = 0; 954 ct.cc_caller_id = nfs2_srv_caller_id; 955 ct.cc_flags = CC_DONTBLOCK; 956 957 va.va_mask = AT_UID|AT_MODE; 958 959 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 960 961 if (error) { 962 VN_RELE(vp); 963 ns->ns_status = puterrno(error); 964 965 return; 966 } 967 968 if (crgetuid(cr) != va.va_uid) { 969 /* 970 * This is a kludge to allow writes of files created 971 * with read only permission. The owner of the file 972 * is always allowed to write it. 973 */ 974 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct); 975 976 if (error) { 977 VN_RELE(vp); 978 ns->ns_status = puterrno(error); 979 return; 980 } 981 } 982 983 /* 984 * Can't access a mandatory lock file. This might cause 985 * the NFS service thread to block forever waiting for a 986 * lock to be released that will never be released. 987 */ 988 if (MANDLOCK(vp, va.va_mode)) { 989 VN_RELE(vp); 990 ns->ns_status = NFSERR_ACCES; 991 return; 992 } 993 994 /* 995 * We have to enter the critical region before calling VOP_RWLOCK 996 * to avoid a deadlock with ufs. 997 */ 998 if (nbl_need_check(vp)) { 999 nbl_start_crit(vp, RW_READER); 1000 in_crit = 1; 1001 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset, 1002 wa->wa_count, 0, NULL)) { 1003 error = EACCES; 1004 goto out; 1005 } 1006 } 1007 1008 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1009 1010 /* check if a monitor detected a delegation conflict */ 1011 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1012 VN_RELE(vp); 1013 /* mark as wouldblock so response is dropped */ 1014 curthread->t_flag |= T_WOULDBLOCK; 1015 return; 1016 } 1017 1018 if (wa->wa_data || wa->wa_rlist) { 1019 /* Do the RDMA thing if necessary */ 1020 if (wa->wa_rlist) { 1021 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3); 1022 iov[0].iov_len = wa->wa_count; 1023 } else { 1024 iov[0].iov_base = wa->wa_data; 1025 iov[0].iov_len = wa->wa_count; 1026 } 1027 uio.uio_iov = iov; 1028 uio.uio_iovcnt = 1; 1029 uio.uio_segflg = UIO_SYSSPACE; 1030 uio.uio_extflg = UIO_COPY_DEFAULT; 1031 uio.uio_loffset = (offset_t)wa->wa_offset; 1032 uio.uio_resid = wa->wa_count; 1033 /* 1034 * The limit is checked on the client. We 1035 * should allow any size writes here. 1036 */ 1037 uio.uio_llimit = curproc->p_fsz_ctl; 1038 rlimit = uio.uio_llimit - wa->wa_offset; 1039 if (rlimit < (rlim64_t)uio.uio_resid) 1040 uio.uio_resid = (uint_t)rlimit; 1041 1042 /* 1043 * for now we assume no append mode 1044 */ 1045 /* 1046 * We're changing creds because VM may fault and we need 1047 * the cred of the current thread to be used if quota 1048 * checking is enabled. 1049 */ 1050 savecred = curthread->t_cred; 1051 curthread->t_cred = cr; 1052 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1053 curthread->t_cred = savecred; 1054 } else { 1055 iovcnt = 0; 1056 for (m = wa->wa_mblk; m != NULL; m = m->b_cont) 1057 iovcnt++; 1058 if (iovcnt <= MAX_IOVECS) { 1059 #ifdef DEBUG 1060 rfs_write_sync_hits++; 1061 #endif 1062 iovp = iov; 1063 } else { 1064 #ifdef DEBUG 1065 rfs_write_sync_misses++; 1066 #endif 1067 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 1068 } 1069 mblk_to_iov(wa->wa_mblk, iovcnt, iovp); 1070 uio.uio_iov = iovp; 1071 uio.uio_iovcnt = iovcnt; 1072 uio.uio_segflg = UIO_SYSSPACE; 1073 uio.uio_extflg = UIO_COPY_DEFAULT; 1074 uio.uio_loffset = (offset_t)wa->wa_offset; 1075 uio.uio_resid = wa->wa_count; 1076 /* 1077 * The limit is checked on the client. We 1078 * should allow any size writes here. 1079 */ 1080 uio.uio_llimit = curproc->p_fsz_ctl; 1081 rlimit = uio.uio_llimit - wa->wa_offset; 1082 if (rlimit < (rlim64_t)uio.uio_resid) 1083 uio.uio_resid = (uint_t)rlimit; 1084 1085 /* 1086 * For now we assume no append mode. 1087 */ 1088 /* 1089 * We're changing creds because VM may fault and we need 1090 * the cred of the current thread to be used if quota 1091 * checking is enabled. 1092 */ 1093 savecred = curthread->t_cred; 1094 curthread->t_cred = cr; 1095 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); 1096 curthread->t_cred = savecred; 1097 1098 if (iovp != iov) 1099 kmem_free(iovp, sizeof (*iovp) * iovcnt); 1100 } 1101 1102 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1103 1104 if (!error) { 1105 /* 1106 * Get attributes again so we send the latest mod 1107 * time to the client side for his cache. 1108 */ 1109 va.va_mask = AT_ALL; /* now we want everything */ 1110 1111 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1112 1113 /* check for overflows */ 1114 if (!error) { 1115 acl_perm(vp, exi, &va, cr); 1116 error = vattr_to_nattr(&va, &ns->ns_attr); 1117 } 1118 } 1119 1120 out: 1121 if (in_crit) 1122 nbl_end_crit(vp); 1123 VN_RELE(vp); 1124 1125 /* check if a monitor detected a delegation conflict */ 1126 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1127 /* mark as wouldblock so response is dropped */ 1128 curthread->t_flag |= T_WOULDBLOCK; 1129 else 1130 ns->ns_status = puterrno(error); 1131 1132 } 1133 1134 struct rfs_async_write { 1135 struct nfswriteargs *wa; 1136 struct nfsattrstat *ns; 1137 struct svc_req *req; 1138 cred_t *cr; 1139 bool_t ro; 1140 kthread_t *thread; 1141 struct rfs_async_write *list; 1142 }; 1143 1144 struct rfs_async_write_list { 1145 fhandle_t *fhp; 1146 kcondvar_t cv; 1147 struct rfs_async_write *list; 1148 struct rfs_async_write_list *next; 1149 }; 1150 1151 static struct rfs_async_write_list *rfs_async_write_head = NULL; 1152 static kmutex_t rfs_async_write_lock; 1153 static int rfs_write_async = 1; /* enables write clustering if == 1 */ 1154 1155 #define MAXCLIOVECS 42 1156 #define RFSWRITE_INITVAL (enum nfsstat) -1 1157 1158 #ifdef DEBUG 1159 static int rfs_write_hits = 0; 1160 static int rfs_write_misses = 0; 1161 #endif 1162 1163 /* 1164 * Write data to file. 1165 * Returns attributes of a file after writing some data to it. 1166 */ 1167 void 1168 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, 1169 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1170 { 1171 int error; 1172 vnode_t *vp; 1173 rlim64_t rlimit; 1174 struct vattr va; 1175 struct uio uio; 1176 struct rfs_async_write_list *lp; 1177 struct rfs_async_write_list *nlp; 1178 struct rfs_async_write *rp; 1179 struct rfs_async_write *nrp; 1180 struct rfs_async_write *trp; 1181 struct rfs_async_write *lrp; 1182 int data_written; 1183 int iovcnt; 1184 mblk_t *m; 1185 struct iovec *iovp; 1186 struct iovec *niovp; 1187 struct iovec iov[MAXCLIOVECS]; 1188 int count; 1189 int rcount; 1190 uint_t off; 1191 uint_t len; 1192 struct rfs_async_write nrpsp; 1193 struct rfs_async_write_list nlpsp; 1194 ushort_t t_flag; 1195 cred_t *savecred; 1196 int in_crit = 0; 1197 caller_context_t ct; 1198 1199 if (!rfs_write_async) { 1200 rfs_write_sync(wa, ns, exi, req, cr, ro); 1201 return; 1202 } 1203 1204 /* 1205 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0 1206 * is considered an OK. 1207 */ 1208 ns->ns_status = RFSWRITE_INITVAL; 1209 1210 nrp = &nrpsp; 1211 nrp->wa = wa; 1212 nrp->ns = ns; 1213 nrp->req = req; 1214 nrp->cr = cr; 1215 nrp->ro = ro; 1216 nrp->thread = curthread; 1217 1218 /* 1219 * Look to see if there is already a cluster started 1220 * for this file. 1221 */ 1222 mutex_enter(&rfs_async_write_lock); 1223 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) { 1224 if (bcmp(&wa->wa_fhandle, lp->fhp, 1225 sizeof (fhandle_t)) == 0) 1226 break; 1227 } 1228 1229 /* 1230 * If lp is non-NULL, then there is already a cluster 1231 * started. We need to place ourselves in the cluster 1232 * list in the right place as determined by starting 1233 * offset. Conflicts with non-blocking mandatory locked 1234 * regions will be checked when the cluster is processed. 1235 */ 1236 if (lp != NULL) { 1237 rp = lp->list; 1238 trp = NULL; 1239 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) { 1240 trp = rp; 1241 rp = rp->list; 1242 } 1243 nrp->list = rp; 1244 if (trp == NULL) 1245 lp->list = nrp; 1246 else 1247 trp->list = nrp; 1248 while (nrp->ns->ns_status == RFSWRITE_INITVAL) 1249 cv_wait(&lp->cv, &rfs_async_write_lock); 1250 mutex_exit(&rfs_async_write_lock); 1251 1252 return; 1253 } 1254 1255 /* 1256 * No cluster started yet, start one and add ourselves 1257 * to the list of clusters. 1258 */ 1259 nrp->list = NULL; 1260 1261 nlp = &nlpsp; 1262 nlp->fhp = &wa->wa_fhandle; 1263 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL); 1264 nlp->list = nrp; 1265 nlp->next = NULL; 1266 1267 if (rfs_async_write_head == NULL) { 1268 rfs_async_write_head = nlp; 1269 } else { 1270 lp = rfs_async_write_head; 1271 while (lp->next != NULL) 1272 lp = lp->next; 1273 lp->next = nlp; 1274 } 1275 mutex_exit(&rfs_async_write_lock); 1276 1277 /* 1278 * Convert the file handle common to all of the requests 1279 * in this cluster to a vnode. 1280 */ 1281 vp = nfs_fhtovp(&wa->wa_fhandle, exi); 1282 if (vp == NULL) { 1283 mutex_enter(&rfs_async_write_lock); 1284 if (rfs_async_write_head == nlp) 1285 rfs_async_write_head = nlp->next; 1286 else { 1287 lp = rfs_async_write_head; 1288 while (lp->next != nlp) 1289 lp = lp->next; 1290 lp->next = nlp->next; 1291 } 1292 t_flag = curthread->t_flag & T_WOULDBLOCK; 1293 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1294 rp->ns->ns_status = NFSERR_STALE; 1295 rp->thread->t_flag |= t_flag; 1296 } 1297 cv_broadcast(&nlp->cv); 1298 mutex_exit(&rfs_async_write_lock); 1299 1300 return; 1301 } 1302 1303 /* 1304 * Can only write regular files. Attempts to write any 1305 * other file types fail with EISDIR. 1306 */ 1307 if (vp->v_type != VREG) { 1308 VN_RELE(vp); 1309 mutex_enter(&rfs_async_write_lock); 1310 if (rfs_async_write_head == nlp) 1311 rfs_async_write_head = nlp->next; 1312 else { 1313 lp = rfs_async_write_head; 1314 while (lp->next != nlp) 1315 lp = lp->next; 1316 lp->next = nlp->next; 1317 } 1318 t_flag = curthread->t_flag & T_WOULDBLOCK; 1319 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1320 rp->ns->ns_status = NFSERR_ISDIR; 1321 rp->thread->t_flag |= t_flag; 1322 } 1323 cv_broadcast(&nlp->cv); 1324 mutex_exit(&rfs_async_write_lock); 1325 1326 return; 1327 } 1328 1329 /* 1330 * Enter the critical region before calling VOP_RWLOCK, to avoid a 1331 * deadlock with ufs. 1332 */ 1333 if (nbl_need_check(vp)) { 1334 nbl_start_crit(vp, RW_READER); 1335 in_crit = 1; 1336 } 1337 1338 ct.cc_sysid = 0; 1339 ct.cc_pid = 0; 1340 ct.cc_caller_id = nfs2_srv_caller_id; 1341 ct.cc_flags = CC_DONTBLOCK; 1342 1343 /* 1344 * Lock the file for writing. This operation provides 1345 * the delay which allows clusters to grow. 1346 */ 1347 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct); 1348 1349 /* check if a monitor detected a delegation conflict */ 1350 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { 1351 if (in_crit) 1352 nbl_end_crit(vp); 1353 VN_RELE(vp); 1354 /* mark as wouldblock so response is dropped */ 1355 curthread->t_flag |= T_WOULDBLOCK; 1356 mutex_enter(&rfs_async_write_lock); 1357 if (rfs_async_write_head == nlp) 1358 rfs_async_write_head = nlp->next; 1359 else { 1360 lp = rfs_async_write_head; 1361 while (lp->next != nlp) 1362 lp = lp->next; 1363 lp->next = nlp->next; 1364 } 1365 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1366 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1367 rp->ns->ns_status = puterrno(error); 1368 rp->thread->t_flag |= T_WOULDBLOCK; 1369 } 1370 } 1371 cv_broadcast(&nlp->cv); 1372 mutex_exit(&rfs_async_write_lock); 1373 1374 return; 1375 } 1376 1377 /* 1378 * Disconnect this cluster from the list of clusters. 1379 * The cluster that is being dealt with must be fixed 1380 * in size after this point, so there is no reason 1381 * to leave it on the list so that new requests can 1382 * find it. 1383 * 1384 * The algorithm is that the first write request will 1385 * create a cluster, convert the file handle to a 1386 * vnode pointer, and then lock the file for writing. 1387 * This request is not likely to be clustered with 1388 * any others. However, the next request will create 1389 * a new cluster and be blocked in VOP_RWLOCK while 1390 * the first request is being processed. This delay 1391 * will allow more requests to be clustered in this 1392 * second cluster. 1393 */ 1394 mutex_enter(&rfs_async_write_lock); 1395 if (rfs_async_write_head == nlp) 1396 rfs_async_write_head = nlp->next; 1397 else { 1398 lp = rfs_async_write_head; 1399 while (lp->next != nlp) 1400 lp = lp->next; 1401 lp->next = nlp->next; 1402 } 1403 mutex_exit(&rfs_async_write_lock); 1404 1405 /* 1406 * Step through the list of requests in this cluster. 1407 * We need to check permissions to make sure that all 1408 * of the requests have sufficient permission to write 1409 * the file. A cluster can be composed of requests 1410 * from different clients and different users on each 1411 * client. 1412 * 1413 * As a side effect, we also calculate the size of the 1414 * byte range that this cluster encompasses. 1415 */ 1416 rp = nlp->list; 1417 off = rp->wa->wa_offset; 1418 len = (uint_t)0; 1419 do { 1420 if (rdonly(rp->ro, vp)) { 1421 rp->ns->ns_status = NFSERR_ROFS; 1422 t_flag = curthread->t_flag & T_WOULDBLOCK; 1423 rp->thread->t_flag |= t_flag; 1424 continue; 1425 } 1426 1427 va.va_mask = AT_UID|AT_MODE; 1428 1429 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1430 1431 if (!error) { 1432 if (crgetuid(rp->cr) != va.va_uid) { 1433 /* 1434 * This is a kludge to allow writes of files 1435 * created with read only permission. The 1436 * owner of the file is always allowed to 1437 * write it. 1438 */ 1439 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct); 1440 } 1441 if (!error && MANDLOCK(vp, va.va_mode)) 1442 error = EACCES; 1443 } 1444 1445 /* 1446 * Check for a conflict with a nbmand-locked region. 1447 */ 1448 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset, 1449 rp->wa->wa_count, 0, NULL)) { 1450 error = EACCES; 1451 } 1452 1453 if (error) { 1454 rp->ns->ns_status = puterrno(error); 1455 t_flag = curthread->t_flag & T_WOULDBLOCK; 1456 rp->thread->t_flag |= t_flag; 1457 continue; 1458 } 1459 if (len < rp->wa->wa_offset + rp->wa->wa_count - off) 1460 len = rp->wa->wa_offset + rp->wa->wa_count - off; 1461 } while ((rp = rp->list) != NULL); 1462 1463 /* 1464 * Step through the cluster attempting to gather as many 1465 * requests which are contiguous as possible. These 1466 * contiguous requests are handled via one call to VOP_WRITE 1467 * instead of different calls to VOP_WRITE. We also keep 1468 * track of the fact that any data was written. 1469 */ 1470 rp = nlp->list; 1471 data_written = 0; 1472 do { 1473 /* 1474 * Skip any requests which are already marked as having an 1475 * error. 1476 */ 1477 if (rp->ns->ns_status != RFSWRITE_INITVAL) { 1478 rp = rp->list; 1479 continue; 1480 } 1481 1482 /* 1483 * Count the number of iovec's which are required 1484 * to handle this set of requests. One iovec is 1485 * needed for each data buffer, whether addressed 1486 * by wa_data or by the b_rptr pointers in the 1487 * mblk chains. 1488 */ 1489 iovcnt = 0; 1490 lrp = rp; 1491 for (;;) { 1492 if (lrp->wa->wa_data || lrp->wa->wa_rlist) 1493 iovcnt++; 1494 else { 1495 m = lrp->wa->wa_mblk; 1496 while (m != NULL) { 1497 iovcnt++; 1498 m = m->b_cont; 1499 } 1500 } 1501 if (lrp->list == NULL || 1502 lrp->list->ns->ns_status != RFSWRITE_INITVAL || 1503 lrp->wa->wa_offset + lrp->wa->wa_count != 1504 lrp->list->wa->wa_offset) { 1505 lrp = lrp->list; 1506 break; 1507 } 1508 lrp = lrp->list; 1509 } 1510 1511 if (iovcnt <= MAXCLIOVECS) { 1512 #ifdef DEBUG 1513 rfs_write_hits++; 1514 #endif 1515 niovp = iov; 1516 } else { 1517 #ifdef DEBUG 1518 rfs_write_misses++; 1519 #endif 1520 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP); 1521 } 1522 /* 1523 * Put together the scatter/gather iovecs. 1524 */ 1525 iovp = niovp; 1526 trp = rp; 1527 count = 0; 1528 do { 1529 if (trp->wa->wa_data || trp->wa->wa_rlist) { 1530 if (trp->wa->wa_rlist) { 1531 iovp->iov_base = 1532 (char *)((trp->wa->wa_rlist)-> 1533 u.c_daddr3); 1534 iovp->iov_len = trp->wa->wa_count; 1535 } else { 1536 iovp->iov_base = trp->wa->wa_data; 1537 iovp->iov_len = trp->wa->wa_count; 1538 } 1539 iovp++; 1540 } else { 1541 m = trp->wa->wa_mblk; 1542 rcount = trp->wa->wa_count; 1543 while (m != NULL) { 1544 iovp->iov_base = (caddr_t)m->b_rptr; 1545 iovp->iov_len = (m->b_wptr - m->b_rptr); 1546 rcount -= iovp->iov_len; 1547 if (rcount < 0) 1548 iovp->iov_len += rcount; 1549 iovp++; 1550 if (rcount <= 0) 1551 break; 1552 m = m->b_cont; 1553 } 1554 } 1555 count += trp->wa->wa_count; 1556 trp = trp->list; 1557 } while (trp != lrp); 1558 1559 uio.uio_iov = niovp; 1560 uio.uio_iovcnt = iovcnt; 1561 uio.uio_segflg = UIO_SYSSPACE; 1562 uio.uio_extflg = UIO_COPY_DEFAULT; 1563 uio.uio_loffset = (offset_t)rp->wa->wa_offset; 1564 uio.uio_resid = count; 1565 /* 1566 * The limit is checked on the client. We 1567 * should allow any size writes here. 1568 */ 1569 uio.uio_llimit = curproc->p_fsz_ctl; 1570 rlimit = uio.uio_llimit - rp->wa->wa_offset; 1571 if (rlimit < (rlim64_t)uio.uio_resid) 1572 uio.uio_resid = (uint_t)rlimit; 1573 1574 /* 1575 * For now we assume no append mode. 1576 */ 1577 1578 /* 1579 * We're changing creds because VM may fault 1580 * and we need the cred of the current 1581 * thread to be used if quota * checking is 1582 * enabled. 1583 */ 1584 savecred = curthread->t_cred; 1585 curthread->t_cred = cr; 1586 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct); 1587 curthread->t_cred = savecred; 1588 1589 /* check if a monitor detected a delegation conflict */ 1590 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 1591 /* mark as wouldblock so response is dropped */ 1592 curthread->t_flag |= T_WOULDBLOCK; 1593 1594 if (niovp != iov) 1595 kmem_free(niovp, sizeof (*niovp) * iovcnt); 1596 1597 if (!error) { 1598 data_written = 1; 1599 /* 1600 * Get attributes again so we send the latest mod 1601 * time to the client side for his cache. 1602 */ 1603 va.va_mask = AT_ALL; /* now we want everything */ 1604 1605 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct); 1606 1607 if (!error) 1608 acl_perm(vp, exi, &va, rp->cr); 1609 } 1610 1611 /* 1612 * Fill in the status responses for each request 1613 * which was just handled. Also, copy the latest 1614 * attributes in to the attribute responses if 1615 * appropriate. 1616 */ 1617 t_flag = curthread->t_flag & T_WOULDBLOCK; 1618 do { 1619 rp->thread->t_flag |= t_flag; 1620 /* check for overflows */ 1621 if (!error) { 1622 error = vattr_to_nattr(&va, &rp->ns->ns_attr); 1623 } 1624 rp->ns->ns_status = puterrno(error); 1625 rp = rp->list; 1626 } while (rp != lrp); 1627 } while (rp != NULL); 1628 1629 /* 1630 * If any data was written at all, then we need to flush 1631 * the data and metadata to stable storage. 1632 */ 1633 if (data_written) { 1634 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct); 1635 1636 if (!error) { 1637 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct); 1638 } 1639 } 1640 1641 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct); 1642 1643 if (in_crit) 1644 nbl_end_crit(vp); 1645 VN_RELE(vp); 1646 1647 t_flag = curthread->t_flag & T_WOULDBLOCK; 1648 mutex_enter(&rfs_async_write_lock); 1649 for (rp = nlp->list; rp != NULL; rp = rp->list) { 1650 if (rp->ns->ns_status == RFSWRITE_INITVAL) { 1651 rp->ns->ns_status = puterrno(error); 1652 rp->thread->t_flag |= t_flag; 1653 } 1654 } 1655 cv_broadcast(&nlp->cv); 1656 mutex_exit(&rfs_async_write_lock); 1657 1658 } 1659 1660 void * 1661 rfs_write_getfh(struct nfswriteargs *wa) 1662 { 1663 return (&wa->wa_fhandle); 1664 } 1665 1666 /* 1667 * Create a file. 1668 * Creates a file with given attributes and returns those attributes 1669 * and an fhandle for the new file. 1670 */ 1671 void 1672 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr, 1673 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1674 { 1675 int error; 1676 int lookuperr; 1677 int in_crit = 0; 1678 struct vattr va; 1679 vnode_t *vp; 1680 vnode_t *realvp; 1681 vnode_t *dvp; 1682 char *name = args->ca_da.da_name; 1683 vnode_t *tvp = NULL; 1684 int mode; 1685 int lookup_ok; 1686 bool_t trunc; 1687 struct sockaddr *ca; 1688 1689 /* 1690 * Disallow NULL paths 1691 */ 1692 if (name == NULL || *name == '\0') { 1693 dr->dr_status = NFSERR_ACCES; 1694 return; 1695 } 1696 1697 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 1698 if (dvp == NULL) { 1699 dr->dr_status = NFSERR_STALE; 1700 return; 1701 } 1702 1703 error = sattr_to_vattr(args->ca_sa, &va); 1704 if (error) { 1705 dr->dr_status = puterrno(error); 1706 return; 1707 } 1708 1709 /* 1710 * Must specify the mode. 1711 */ 1712 if (!(va.va_mask & AT_MODE)) { 1713 VN_RELE(dvp); 1714 dr->dr_status = NFSERR_INVAL; 1715 return; 1716 } 1717 1718 /* 1719 * This is a completely gross hack to make mknod 1720 * work over the wire until we can wack the protocol 1721 */ 1722 if ((va.va_mode & IFMT) == IFCHR) { 1723 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV) 1724 va.va_type = VFIFO; /* xtra kludge for named pipe */ 1725 else { 1726 va.va_type = VCHR; 1727 /* 1728 * uncompress the received dev_t 1729 * if the top half is zero indicating a request 1730 * from an `older style' OS. 1731 */ 1732 if ((va.va_size & 0xffff0000) == 0) 1733 va.va_rdev = nfsv2_expdev(va.va_size); 1734 else 1735 va.va_rdev = (dev_t)va.va_size; 1736 } 1737 va.va_mask &= ~AT_SIZE; 1738 } else if ((va.va_mode & IFMT) == IFBLK) { 1739 va.va_type = VBLK; 1740 /* 1741 * uncompress the received dev_t 1742 * if the top half is zero indicating a request 1743 * from an `older style' OS. 1744 */ 1745 if ((va.va_size & 0xffff0000) == 0) 1746 va.va_rdev = nfsv2_expdev(va.va_size); 1747 else 1748 va.va_rdev = (dev_t)va.va_size; 1749 va.va_mask &= ~AT_SIZE; 1750 } else if ((va.va_mode & IFMT) == IFSOCK) { 1751 va.va_type = VSOCK; 1752 } else { 1753 va.va_type = VREG; 1754 } 1755 va.va_mode &= ~IFMT; 1756 va.va_mask |= AT_TYPE; 1757 1758 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 1759 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND, 1760 MAXPATHLEN); 1761 if (name == NULL) { 1762 dr->dr_status = puterrno(EINVAL); 1763 return; 1764 } 1765 1766 /* 1767 * Why was the choice made to use VWRITE as the mode to the 1768 * call to VOP_CREATE ? This results in a bug. When a client 1769 * opens a file that already exists and is RDONLY, the second 1770 * open fails with an EACESS because of the mode. 1771 * bug ID 1054648. 1772 */ 1773 lookup_ok = 0; 1774 mode = VWRITE; 1775 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) { 1776 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1777 NULL, NULL, NULL); 1778 if (!error) { 1779 struct vattr at; 1780 1781 lookup_ok = 1; 1782 at.va_mask = AT_MODE; 1783 error = VOP_GETATTR(tvp, &at, 0, cr, NULL); 1784 if (!error) 1785 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD; 1786 VN_RELE(tvp); 1787 tvp = NULL; 1788 } 1789 } 1790 1791 if (!lookup_ok) { 1792 if (rdonly(ro, dvp)) { 1793 error = EROFS; 1794 } else if (va.va_type != VREG && va.va_type != VFIFO && 1795 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) { 1796 error = EPERM; 1797 } else { 1798 error = 0; 1799 } 1800 } 1801 1802 /* 1803 * If file size is being modified on an already existing file 1804 * make sure that there are no conflicting non-blocking mandatory 1805 * locks in the region being manipulated. Return EACCES if there 1806 * are conflicting locks. 1807 */ 1808 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) { 1809 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr, 1810 NULL, NULL, NULL); 1811 1812 if (!lookuperr && 1813 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) { 1814 VN_RELE(tvp); 1815 curthread->t_flag |= T_WOULDBLOCK; 1816 goto out; 1817 } 1818 1819 if (!lookuperr && nbl_need_check(tvp)) { 1820 /* 1821 * The file exists. Now check if it has any 1822 * conflicting non-blocking mandatory locks 1823 * in the region being changed. 1824 */ 1825 struct vattr bva; 1826 u_offset_t offset; 1827 ssize_t length; 1828 1829 nbl_start_crit(tvp, RW_READER); 1830 in_crit = 1; 1831 1832 bva.va_mask = AT_SIZE; 1833 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL); 1834 if (!error) { 1835 if (va.va_size < bva.va_size) { 1836 offset = va.va_size; 1837 length = bva.va_size - va.va_size; 1838 } else { 1839 offset = bva.va_size; 1840 length = va.va_size - bva.va_size; 1841 } 1842 if (length) { 1843 if (nbl_conflict(tvp, NBL_WRITE, 1844 offset, length, 0, NULL)) { 1845 error = EACCES; 1846 } 1847 } 1848 } 1849 if (error) { 1850 nbl_end_crit(tvp); 1851 VN_RELE(tvp); 1852 in_crit = 0; 1853 } 1854 } else if (tvp != NULL) { 1855 VN_RELE(tvp); 1856 } 1857 } 1858 1859 if (!error) { 1860 /* 1861 * If filesystem is shared with nosuid the remove any 1862 * setuid/setgid bits on create. 1863 */ 1864 if (va.va_type == VREG && 1865 exi->exi_export.ex_flags & EX_NOSUID) 1866 va.va_mode &= ~(VSUID | VSGID); 1867 1868 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0, 1869 NULL, NULL); 1870 1871 if (!error) { 1872 1873 if ((va.va_mask & AT_SIZE) && (va.va_size == 0)) 1874 trunc = TRUE; 1875 else 1876 trunc = FALSE; 1877 1878 if (rfs4_check_delegated(FWRITE, vp, trunc)) { 1879 VN_RELE(vp); 1880 curthread->t_flag |= T_WOULDBLOCK; 1881 goto out; 1882 } 1883 va.va_mask = AT_ALL; 1884 1885 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1886 1887 /* check for overflows */ 1888 if (!error) { 1889 acl_perm(vp, exi, &va, cr); 1890 error = vattr_to_nattr(&va, &dr->dr_attr); 1891 if (!error) { 1892 error = makefh(&dr->dr_fhandle, vp, 1893 exi); 1894 } 1895 } 1896 /* 1897 * Force modified metadata out to stable storage. 1898 * 1899 * if a underlying vp exists, pass it to VOP_FSYNC 1900 */ 1901 if (VOP_REALVP(vp, &realvp, NULL) == 0) 1902 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL); 1903 else 1904 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 1905 VN_RELE(vp); 1906 } 1907 1908 if (in_crit) { 1909 nbl_end_crit(tvp); 1910 VN_RELE(tvp); 1911 } 1912 } 1913 1914 /* 1915 * Force modified data and metadata out to stable storage. 1916 */ 1917 (void) VOP_FSYNC(dvp, 0, cr, NULL); 1918 1919 out: 1920 1921 VN_RELE(dvp); 1922 1923 dr->dr_status = puterrno(error); 1924 1925 if (name != args->ca_da.da_name) 1926 kmem_free(name, MAXPATHLEN); 1927 } 1928 void * 1929 rfs_create_getfh(struct nfscreatargs *args) 1930 { 1931 return (args->ca_da.da_fhandle); 1932 } 1933 1934 /* 1935 * Remove a file. 1936 * Remove named file from parent directory. 1937 */ 1938 /* ARGSUSED */ 1939 void 1940 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status, 1941 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 1942 { 1943 int error = 0; 1944 vnode_t *vp; 1945 vnode_t *targvp; 1946 int in_crit = 0; 1947 1948 /* 1949 * Disallow NULL paths 1950 */ 1951 if (da->da_name == NULL || *da->da_name == '\0') { 1952 *status = NFSERR_ACCES; 1953 return; 1954 } 1955 1956 vp = nfs_fhtovp(da->da_fhandle, exi); 1957 if (vp == NULL) { 1958 *status = NFSERR_STALE; 1959 return; 1960 } 1961 1962 if (rdonly(ro, vp)) { 1963 VN_RELE(vp); 1964 *status = NFSERR_ROFS; 1965 return; 1966 } 1967 1968 /* 1969 * Check for a conflict with a non-blocking mandatory share reservation. 1970 */ 1971 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0, 1972 NULL, cr, NULL, NULL, NULL); 1973 if (error != 0) { 1974 VN_RELE(vp); 1975 *status = puterrno(error); 1976 return; 1977 } 1978 1979 /* 1980 * If the file is delegated to an v4 client, then initiate 1981 * recall and drop this request (by setting T_WOULDBLOCK). 1982 * The client will eventually re-transmit the request and 1983 * (hopefully), by then, the v4 client will have returned 1984 * the delegation. 1985 */ 1986 1987 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 1988 VN_RELE(vp); 1989 VN_RELE(targvp); 1990 curthread->t_flag |= T_WOULDBLOCK; 1991 return; 1992 } 1993 1994 if (nbl_need_check(targvp)) { 1995 nbl_start_crit(targvp, RW_READER); 1996 in_crit = 1; 1997 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 1998 error = EACCES; 1999 goto out; 2000 } 2001 } 2002 2003 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0); 2004 2005 /* 2006 * Force modified data and metadata out to stable storage. 2007 */ 2008 (void) VOP_FSYNC(vp, 0, cr, NULL); 2009 2010 out: 2011 if (in_crit) 2012 nbl_end_crit(targvp); 2013 VN_RELE(targvp); 2014 VN_RELE(vp); 2015 2016 *status = puterrno(error); 2017 2018 } 2019 2020 void * 2021 rfs_remove_getfh(struct nfsdiropargs *da) 2022 { 2023 return (da->da_fhandle); 2024 } 2025 2026 /* 2027 * rename a file 2028 * Give a file (from) a new name (to). 2029 */ 2030 /* ARGSUSED */ 2031 void 2032 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status, 2033 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2034 { 2035 int error = 0; 2036 vnode_t *fromvp; 2037 vnode_t *tovp; 2038 struct exportinfo *to_exi; 2039 fhandle_t *fh; 2040 vnode_t *srcvp; 2041 vnode_t *targvp; 2042 int in_crit = 0; 2043 2044 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi); 2045 if (fromvp == NULL) { 2046 *status = NFSERR_STALE; 2047 return; 2048 } 2049 2050 fh = args->rna_to.da_fhandle; 2051 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2052 if (to_exi == NULL) { 2053 VN_RELE(fromvp); 2054 *status = NFSERR_ACCES; 2055 return; 2056 } 2057 exi_rele(to_exi); 2058 2059 if (to_exi != exi) { 2060 VN_RELE(fromvp); 2061 *status = NFSERR_XDEV; 2062 return; 2063 } 2064 2065 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi); 2066 if (tovp == NULL) { 2067 VN_RELE(fromvp); 2068 *status = NFSERR_STALE; 2069 return; 2070 } 2071 2072 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) { 2073 VN_RELE(tovp); 2074 VN_RELE(fromvp); 2075 *status = NFSERR_NOTDIR; 2076 return; 2077 } 2078 2079 /* 2080 * Disallow NULL paths 2081 */ 2082 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' || 2083 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') { 2084 VN_RELE(tovp); 2085 VN_RELE(fromvp); 2086 *status = NFSERR_ACCES; 2087 return; 2088 } 2089 2090 if (rdonly(ro, tovp)) { 2091 VN_RELE(tovp); 2092 VN_RELE(fromvp); 2093 *status = NFSERR_ROFS; 2094 return; 2095 } 2096 2097 /* 2098 * Check for a conflict with a non-blocking mandatory share reservation. 2099 */ 2100 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0, 2101 NULL, cr, NULL, NULL, NULL); 2102 if (error != 0) { 2103 VN_RELE(tovp); 2104 VN_RELE(fromvp); 2105 *status = puterrno(error); 2106 return; 2107 } 2108 2109 /* Check for delegations on the source file */ 2110 2111 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) { 2112 VN_RELE(tovp); 2113 VN_RELE(fromvp); 2114 VN_RELE(srcvp); 2115 curthread->t_flag |= T_WOULDBLOCK; 2116 return; 2117 } 2118 2119 /* Check for delegation on the file being renamed over, if it exists */ 2120 2121 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE && 2122 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, 2123 NULL, NULL, NULL) == 0) { 2124 2125 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) { 2126 VN_RELE(tovp); 2127 VN_RELE(fromvp); 2128 VN_RELE(srcvp); 2129 VN_RELE(targvp); 2130 curthread->t_flag |= T_WOULDBLOCK; 2131 return; 2132 } 2133 VN_RELE(targvp); 2134 } 2135 2136 2137 if (nbl_need_check(srcvp)) { 2138 nbl_start_crit(srcvp, RW_READER); 2139 in_crit = 1; 2140 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) { 2141 error = EACCES; 2142 goto out; 2143 } 2144 } 2145 2146 error = VOP_RENAME(fromvp, args->rna_from.da_name, 2147 tovp, args->rna_to.da_name, cr, NULL, 0); 2148 2149 if (error == 0) 2150 vn_renamepath(tovp, srcvp, args->rna_to.da_name, 2151 strlen(args->rna_to.da_name)); 2152 2153 /* 2154 * Force modified data and metadata out to stable storage. 2155 */ 2156 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2157 (void) VOP_FSYNC(fromvp, 0, cr, NULL); 2158 2159 out: 2160 if (in_crit) 2161 nbl_end_crit(srcvp); 2162 VN_RELE(srcvp); 2163 VN_RELE(tovp); 2164 VN_RELE(fromvp); 2165 2166 *status = puterrno(error); 2167 2168 } 2169 void * 2170 rfs_rename_getfh(struct nfsrnmargs *args) 2171 { 2172 return (args->rna_from.da_fhandle); 2173 } 2174 2175 /* 2176 * Link to a file. 2177 * Create a file (to) which is a hard link to the given file (from). 2178 */ 2179 /* ARGSUSED */ 2180 void 2181 rfs_link(struct nfslinkargs *args, enum nfsstat *status, 2182 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2183 { 2184 int error; 2185 vnode_t *fromvp; 2186 vnode_t *tovp; 2187 struct exportinfo *to_exi; 2188 fhandle_t *fh; 2189 2190 fromvp = nfs_fhtovp(args->la_from, exi); 2191 if (fromvp == NULL) { 2192 *status = NFSERR_STALE; 2193 return; 2194 } 2195 2196 fh = args->la_to.da_fhandle; 2197 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); 2198 if (to_exi == NULL) { 2199 VN_RELE(fromvp); 2200 *status = NFSERR_ACCES; 2201 return; 2202 } 2203 exi_rele(to_exi); 2204 2205 if (to_exi != exi) { 2206 VN_RELE(fromvp); 2207 *status = NFSERR_XDEV; 2208 return; 2209 } 2210 2211 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi); 2212 if (tovp == NULL) { 2213 VN_RELE(fromvp); 2214 *status = NFSERR_STALE; 2215 return; 2216 } 2217 2218 if (tovp->v_type != VDIR) { 2219 VN_RELE(tovp); 2220 VN_RELE(fromvp); 2221 *status = NFSERR_NOTDIR; 2222 return; 2223 } 2224 /* 2225 * Disallow NULL paths 2226 */ 2227 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') { 2228 VN_RELE(tovp); 2229 VN_RELE(fromvp); 2230 *status = NFSERR_ACCES; 2231 return; 2232 } 2233 2234 if (rdonly(ro, tovp)) { 2235 VN_RELE(tovp); 2236 VN_RELE(fromvp); 2237 *status = NFSERR_ROFS; 2238 return; 2239 } 2240 2241 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0); 2242 2243 /* 2244 * Force modified data and metadata out to stable storage. 2245 */ 2246 (void) VOP_FSYNC(tovp, 0, cr, NULL); 2247 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL); 2248 2249 VN_RELE(tovp); 2250 VN_RELE(fromvp); 2251 2252 *status = puterrno(error); 2253 2254 } 2255 void * 2256 rfs_link_getfh(struct nfslinkargs *args) 2257 { 2258 return (args->la_from); 2259 } 2260 2261 /* 2262 * Symbolicly link to a file. 2263 * Create a file (to) with the given attributes which is a symbolic link 2264 * to the given path name (to). 2265 */ 2266 void 2267 rfs_symlink(struct nfsslargs *args, enum nfsstat *status, 2268 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2269 { 2270 int error; 2271 struct vattr va; 2272 vnode_t *vp; 2273 vnode_t *svp; 2274 int lerror; 2275 struct sockaddr *ca; 2276 char *name = NULL; 2277 2278 /* 2279 * Disallow NULL paths 2280 */ 2281 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') { 2282 *status = NFSERR_ACCES; 2283 return; 2284 } 2285 2286 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi); 2287 if (vp == NULL) { 2288 *status = NFSERR_STALE; 2289 return; 2290 } 2291 2292 if (rdonly(ro, vp)) { 2293 VN_RELE(vp); 2294 *status = NFSERR_ROFS; 2295 return; 2296 } 2297 2298 error = sattr_to_vattr(args->sla_sa, &va); 2299 if (error) { 2300 VN_RELE(vp); 2301 *status = puterrno(error); 2302 return; 2303 } 2304 2305 if (!(va.va_mask & AT_MODE)) { 2306 VN_RELE(vp); 2307 *status = NFSERR_INVAL; 2308 return; 2309 } 2310 2311 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2312 name = nfscmd_convname(ca, exi, args->sla_tnm, 2313 NFSCMD_CONV_INBOUND, MAXPATHLEN); 2314 2315 if (name == NULL) { 2316 *status = NFSERR_ACCES; 2317 return; 2318 } 2319 2320 va.va_type = VLNK; 2321 va.va_mask |= AT_TYPE; 2322 2323 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0); 2324 2325 /* 2326 * Force new data and metadata out to stable storage. 2327 */ 2328 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0, 2329 NULL, cr, NULL, NULL, NULL); 2330 2331 if (!lerror) { 2332 (void) VOP_FSYNC(svp, 0, cr, NULL); 2333 VN_RELE(svp); 2334 } 2335 2336 /* 2337 * Force modified data and metadata out to stable storage. 2338 */ 2339 (void) VOP_FSYNC(vp, 0, cr, NULL); 2340 2341 VN_RELE(vp); 2342 2343 *status = puterrno(error); 2344 if (name != args->sla_tnm) 2345 kmem_free(name, MAXPATHLEN); 2346 2347 } 2348 void * 2349 rfs_symlink_getfh(struct nfsslargs *args) 2350 { 2351 return (args->sla_from.da_fhandle); 2352 } 2353 2354 /* 2355 * Make a directory. 2356 * Create a directory with the given name, parent directory, and attributes. 2357 * Returns a file handle and attributes for the new directory. 2358 */ 2359 /* ARGSUSED */ 2360 void 2361 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr, 2362 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2363 { 2364 int error; 2365 struct vattr va; 2366 vnode_t *dvp = NULL; 2367 vnode_t *vp; 2368 char *name = args->ca_da.da_name; 2369 2370 /* 2371 * Disallow NULL paths 2372 */ 2373 if (name == NULL || *name == '\0') { 2374 dr->dr_status = NFSERR_ACCES; 2375 return; 2376 } 2377 2378 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi); 2379 if (vp == NULL) { 2380 dr->dr_status = NFSERR_STALE; 2381 return; 2382 } 2383 2384 if (rdonly(ro, vp)) { 2385 VN_RELE(vp); 2386 dr->dr_status = NFSERR_ROFS; 2387 return; 2388 } 2389 2390 error = sattr_to_vattr(args->ca_sa, &va); 2391 if (error) { 2392 VN_RELE(vp); 2393 dr->dr_status = puterrno(error); 2394 return; 2395 } 2396 2397 if (!(va.va_mask & AT_MODE)) { 2398 VN_RELE(vp); 2399 dr->dr_status = NFSERR_INVAL; 2400 return; 2401 } 2402 2403 va.va_type = VDIR; 2404 va.va_mask |= AT_TYPE; 2405 2406 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL); 2407 2408 if (!error) { 2409 /* 2410 * Attribtutes of the newly created directory should 2411 * be returned to the client. 2412 */ 2413 va.va_mask = AT_ALL; /* We want everything */ 2414 error = VOP_GETATTR(dvp, &va, 0, cr, NULL); 2415 2416 /* check for overflows */ 2417 if (!error) { 2418 acl_perm(vp, exi, &va, cr); 2419 error = vattr_to_nattr(&va, &dr->dr_attr); 2420 if (!error) { 2421 error = makefh(&dr->dr_fhandle, dvp, exi); 2422 } 2423 } 2424 /* 2425 * Force new data and metadata out to stable storage. 2426 */ 2427 (void) VOP_FSYNC(dvp, 0, cr, NULL); 2428 VN_RELE(dvp); 2429 } 2430 2431 /* 2432 * Force modified data and metadata out to stable storage. 2433 */ 2434 (void) VOP_FSYNC(vp, 0, cr, NULL); 2435 2436 VN_RELE(vp); 2437 2438 dr->dr_status = puterrno(error); 2439 2440 } 2441 void * 2442 rfs_mkdir_getfh(struct nfscreatargs *args) 2443 { 2444 return (args->ca_da.da_fhandle); 2445 } 2446 2447 /* 2448 * Remove a directory. 2449 * Remove the given directory name from the given parent directory. 2450 */ 2451 /* ARGSUSED */ 2452 void 2453 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status, 2454 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2455 { 2456 int error; 2457 vnode_t *vp; 2458 2459 /* 2460 * Disallow NULL paths 2461 */ 2462 if (da->da_name == NULL || *da->da_name == '\0') { 2463 *status = NFSERR_ACCES; 2464 return; 2465 } 2466 2467 vp = nfs_fhtovp(da->da_fhandle, exi); 2468 if (vp == NULL) { 2469 *status = NFSERR_STALE; 2470 return; 2471 } 2472 2473 if (rdonly(ro, vp)) { 2474 VN_RELE(vp); 2475 *status = NFSERR_ROFS; 2476 return; 2477 } 2478 2479 /* 2480 * VOP_RMDIR takes a third argument (the current 2481 * directory of the process). That's because someone 2482 * wants to return EINVAL if one tries to remove ".". 2483 * Of course, NFS servers have no idea what their 2484 * clients' current directories are. We fake it by 2485 * supplying a vnode known to exist and illegal to 2486 * remove. 2487 */ 2488 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0); 2489 2490 /* 2491 * Force modified data and metadata out to stable storage. 2492 */ 2493 (void) VOP_FSYNC(vp, 0, cr, NULL); 2494 2495 VN_RELE(vp); 2496 2497 /* 2498 * System V defines rmdir to return EEXIST, not ENOTEMPTY, 2499 * if the directory is not empty. A System V NFS server 2500 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit 2501 * over the wire. 2502 */ 2503 if (error == EEXIST) 2504 *status = NFSERR_NOTEMPTY; 2505 else 2506 *status = puterrno(error); 2507 2508 } 2509 void * 2510 rfs_rmdir_getfh(struct nfsdiropargs *da) 2511 { 2512 return (da->da_fhandle); 2513 } 2514 2515 /* ARGSUSED */ 2516 void 2517 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd, 2518 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) 2519 { 2520 int error; 2521 int iseof; 2522 struct iovec iov; 2523 struct uio uio; 2524 vnode_t *vp; 2525 char *ndata = NULL; 2526 struct sockaddr *ca; 2527 size_t nents; 2528 int ret; 2529 2530 vp = nfs_fhtovp(&rda->rda_fh, exi); 2531 if (vp == NULL) { 2532 rd->rd_entries = NULL; 2533 rd->rd_status = NFSERR_STALE; 2534 return; 2535 } 2536 2537 if (vp->v_type != VDIR) { 2538 VN_RELE(vp); 2539 rd->rd_entries = NULL; 2540 rd->rd_status = NFSERR_NOTDIR; 2541 return; 2542 } 2543 2544 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2545 2546 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 2547 2548 if (error) { 2549 rd->rd_entries = NULL; 2550 goto bad; 2551 } 2552 2553 if (rda->rda_count == 0) { 2554 rd->rd_entries = NULL; 2555 rd->rd_size = 0; 2556 rd->rd_eof = FALSE; 2557 goto bad; 2558 } 2559 2560 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA); 2561 2562 /* 2563 * Allocate data for entries. This will be freed by rfs_rddirfree. 2564 */ 2565 rd->rd_bufsize = (uint_t)rda->rda_count; 2566 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP); 2567 2568 /* 2569 * Set up io vector to read directory data 2570 */ 2571 iov.iov_base = (caddr_t)rd->rd_entries; 2572 iov.iov_len = rda->rda_count; 2573 uio.uio_iov = &iov; 2574 uio.uio_iovcnt = 1; 2575 uio.uio_segflg = UIO_SYSSPACE; 2576 uio.uio_extflg = UIO_COPY_CACHED; 2577 uio.uio_loffset = (offset_t)rda->rda_offset; 2578 uio.uio_resid = rda->rda_count; 2579 2580 /* 2581 * read directory 2582 */ 2583 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0); 2584 2585 /* 2586 * Clean up 2587 */ 2588 if (!error) { 2589 /* 2590 * set size and eof 2591 */ 2592 if (uio.uio_resid == rda->rda_count) { 2593 rd->rd_size = 0; 2594 rd->rd_eof = TRUE; 2595 } else { 2596 rd->rd_size = (uint32_t)(rda->rda_count - 2597 uio.uio_resid); 2598 rd->rd_eof = iseof ? TRUE : FALSE; 2599 } 2600 } 2601 2602 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 2603 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size); 2604 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents, 2605 rda->rda_count, &ndata); 2606 2607 if (ret != 0) { 2608 size_t dropbytes; 2609 /* 2610 * We had to drop one or more entries in order to fit 2611 * during the character conversion. We need to patch 2612 * up the size and eof info. 2613 */ 2614 if (rd->rd_eof) 2615 rd->rd_eof = FALSE; 2616 dropbytes = nfscmd_dropped_entrysize( 2617 (struct dirent64 *)rd->rd_entries, nents, ret); 2618 rd->rd_size -= dropbytes; 2619 } 2620 if (ndata == NULL) { 2621 ndata = (char *)rd->rd_entries; 2622 } else if (ndata != (char *)rd->rd_entries) { 2623 kmem_free(rd->rd_entries, rd->rd_bufsize); 2624 rd->rd_entries = (void *)ndata; 2625 rd->rd_bufsize = rda->rda_count; 2626 } 2627 2628 bad: 2629 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2630 2631 #if 0 /* notyet */ 2632 /* 2633 * Don't do this. It causes local disk writes when just 2634 * reading the file and the overhead is deemed larger 2635 * than the benefit. 2636 */ 2637 /* 2638 * Force modified metadata out to stable storage. 2639 */ 2640 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL); 2641 #endif 2642 2643 VN_RELE(vp); 2644 2645 rd->rd_status = puterrno(error); 2646 2647 } 2648 void * 2649 rfs_readdir_getfh(struct nfsrddirargs *rda) 2650 { 2651 return (&rda->rda_fh); 2652 } 2653 void 2654 rfs_rddirfree(struct nfsrddirres *rd) 2655 { 2656 if (rd->rd_entries != NULL) 2657 kmem_free(rd->rd_entries, rd->rd_bufsize); 2658 } 2659 2660 /* ARGSUSED */ 2661 void 2662 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi, 2663 struct svc_req *req, cred_t *cr, bool_t ro) 2664 { 2665 int error; 2666 struct statvfs64 sb; 2667 vnode_t *vp; 2668 2669 vp = nfs_fhtovp(fh, exi); 2670 if (vp == NULL) { 2671 fs->fs_status = NFSERR_STALE; 2672 return; 2673 } 2674 2675 error = VFS_STATVFS(vp->v_vfsp, &sb); 2676 2677 if (!error) { 2678 fs->fs_tsize = nfstsize(); 2679 fs->fs_bsize = sb.f_frsize; 2680 fs->fs_blocks = sb.f_blocks; 2681 fs->fs_bfree = sb.f_bfree; 2682 fs->fs_bavail = sb.f_bavail; 2683 } 2684 2685 VN_RELE(vp); 2686 2687 fs->fs_status = puterrno(error); 2688 2689 } 2690 void * 2691 rfs_statfs_getfh(fhandle_t *fh) 2692 { 2693 return (fh); 2694 } 2695 2696 static int 2697 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap) 2698 { 2699 vap->va_mask = 0; 2700 2701 /* 2702 * There was a sign extension bug in some VFS based systems 2703 * which stored the mode as a short. When it would get 2704 * assigned to a u_long, no sign extension would occur. 2705 * It needed to, but this wasn't noticed because sa_mode 2706 * would then get assigned back to the short, thus ignoring 2707 * the upper 16 bits of sa_mode. 2708 * 2709 * To make this implementation work for both broken 2710 * clients and good clients, we check for both versions 2711 * of the mode. 2712 */ 2713 if (sa->sa_mode != (uint32_t)((ushort_t)-1) && 2714 sa->sa_mode != (uint32_t)-1) { 2715 vap->va_mask |= AT_MODE; 2716 vap->va_mode = sa->sa_mode; 2717 } 2718 if (sa->sa_uid != (uint32_t)-1) { 2719 vap->va_mask |= AT_UID; 2720 vap->va_uid = sa->sa_uid; 2721 } 2722 if (sa->sa_gid != (uint32_t)-1) { 2723 vap->va_mask |= AT_GID; 2724 vap->va_gid = sa->sa_gid; 2725 } 2726 if (sa->sa_size != (uint32_t)-1) { 2727 vap->va_mask |= AT_SIZE; 2728 vap->va_size = sa->sa_size; 2729 } 2730 if (sa->sa_atime.tv_sec != (int32_t)-1 && 2731 sa->sa_atime.tv_usec != (int32_t)-1) { 2732 #ifndef _LP64 2733 /* return error if time overflow */ 2734 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec)) 2735 return (EOVERFLOW); 2736 #endif 2737 vap->va_mask |= AT_ATIME; 2738 /* 2739 * nfs protocol defines times as unsigned so don't extend sign, 2740 * unless sysadmin set nfs_allow_preepoch_time. 2741 */ 2742 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec); 2743 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000); 2744 } 2745 if (sa->sa_mtime.tv_sec != (int32_t)-1 && 2746 sa->sa_mtime.tv_usec != (int32_t)-1) { 2747 #ifndef _LP64 2748 /* return error if time overflow */ 2749 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec)) 2750 return (EOVERFLOW); 2751 #endif 2752 vap->va_mask |= AT_MTIME; 2753 /* 2754 * nfs protocol defines times as unsigned so don't extend sign, 2755 * unless sysadmin set nfs_allow_preepoch_time. 2756 */ 2757 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec); 2758 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000); 2759 } 2760 return (0); 2761 } 2762 2763 static enum nfsftype vt_to_nf[] = { 2764 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 2765 }; 2766 2767 /* 2768 * check the following fields for overflow: nodeid, size, and time. 2769 * There could be a problem when converting 64-bit LP64 fields 2770 * into 32-bit ones. Return an error if there is an overflow. 2771 */ 2772 int 2773 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na) 2774 { 2775 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD); 2776 na->na_type = vt_to_nf[vap->va_type]; 2777 2778 if (vap->va_mode == (unsigned short) -1) 2779 na->na_mode = (uint32_t)-1; 2780 else 2781 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode; 2782 2783 if (vap->va_uid == (unsigned short)(-1)) 2784 na->na_uid = (uint32_t)(-1); 2785 else if (vap->va_uid == UID_NOBODY) 2786 na->na_uid = (uint32_t)NFS_UID_NOBODY; 2787 else 2788 na->na_uid = vap->va_uid; 2789 2790 if (vap->va_gid == (unsigned short)(-1)) 2791 na->na_gid = (uint32_t)-1; 2792 else if (vap->va_gid == GID_NOBODY) 2793 na->na_gid = (uint32_t)NFS_GID_NOBODY; 2794 else 2795 na->na_gid = vap->va_gid; 2796 2797 /* 2798 * Do we need to check fsid for overflow? It is 64-bit in the 2799 * vattr, but are bigger than 32 bit values supported? 2800 */ 2801 na->na_fsid = vap->va_fsid; 2802 2803 na->na_nodeid = vap->va_nodeid; 2804 2805 /* 2806 * Check to make sure that the nodeid is representable over the 2807 * wire without losing bits. 2808 */ 2809 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid) 2810 return (EFBIG); 2811 na->na_nlink = vap->va_nlink; 2812 2813 /* 2814 * Check for big files here, instead of at the caller. See 2815 * comments in cstat for large special file explanation. 2816 */ 2817 if (vap->va_size > (u_longlong_t)MAXOFF32_T) { 2818 if ((vap->va_type == VREG) || (vap->va_type == VDIR)) 2819 return (EFBIG); 2820 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) { 2821 /* UNKNOWN_SIZE | OVERFLOW */ 2822 na->na_size = MAXOFF32_T; 2823 } else 2824 na->na_size = vap->va_size; 2825 } else 2826 na->na_size = vap->va_size; 2827 2828 /* 2829 * If the vnode times overflow the 32-bit times that NFS2 2830 * uses on the wire then return an error. 2831 */ 2832 if (!NFS_VAP_TIME_OK(vap)) { 2833 return (EOVERFLOW); 2834 } 2835 na->na_atime.tv_sec = vap->va_atime.tv_sec; 2836 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000; 2837 2838 na->na_mtime.tv_sec = vap->va_mtime.tv_sec; 2839 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000; 2840 2841 na->na_ctime.tv_sec = vap->va_ctime.tv_sec; 2842 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000; 2843 2844 /* 2845 * If the dev_t will fit into 16 bits then compress 2846 * it, otherwise leave it alone. See comments in 2847 * nfs_client.c. 2848 */ 2849 if (getminor(vap->va_rdev) <= SO4_MAXMIN && 2850 getmajor(vap->va_rdev) <= SO4_MAXMAJ) 2851 na->na_rdev = nfsv2_cmpdev(vap->va_rdev); 2852 else 2853 (void) cmpldev(&na->na_rdev, vap->va_rdev); 2854 2855 na->na_blocks = vap->va_nblocks; 2856 na->na_blocksize = vap->va_blksize; 2857 2858 /* 2859 * This bit of ugliness is a *TEMPORARY* hack to preserve the 2860 * over-the-wire protocols for named-pipe vnodes. It remaps the 2861 * VFIFO type to the special over-the-wire type. (see note in nfs.h) 2862 * 2863 * BUYER BEWARE: 2864 * If you are porting the NFS to a non-Sun server, you probably 2865 * don't want to include the following block of code. The 2866 * over-the-wire special file types will be changing with the 2867 * NFS Protocol Revision. 2868 */ 2869 if (vap->va_type == VFIFO) 2870 NA_SETFIFO(na); 2871 return (0); 2872 } 2873 2874 /* 2875 * acl v2 support: returns approximate permission. 2876 * default: returns minimal permission (more restrictive) 2877 * aclok: returns maximal permission (less restrictive) 2878 * This routine changes the permissions that are alaredy in *va. 2879 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES, 2880 * CLASS_OBJ is always the same as GROUP_OBJ entry. 2881 */ 2882 static void 2883 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr) 2884 { 2885 vsecattr_t vsa; 2886 int aclcnt; 2887 aclent_t *aclentp; 2888 mode_t mask_perm; 2889 mode_t grp_perm; 2890 mode_t other_perm; 2891 mode_t other_orig; 2892 int error; 2893 2894 /* dont care default acl */ 2895 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT); 2896 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL); 2897 2898 if (!error) { 2899 aclcnt = vsa.vsa_aclcnt; 2900 if (aclcnt > MIN_ACL_ENTRIES) { 2901 /* non-trivial ACL */ 2902 aclentp = vsa.vsa_aclentp; 2903 if (exi->exi_export.ex_flags & EX_ACLOK) { 2904 /* maximal permissions */ 2905 grp_perm = 0; 2906 other_perm = 0; 2907 for (; aclcnt > 0; aclcnt--, aclentp++) { 2908 switch (aclentp->a_type) { 2909 case USER_OBJ: 2910 break; 2911 case USER: 2912 grp_perm |= 2913 aclentp->a_perm << 3; 2914 other_perm |= aclentp->a_perm; 2915 break; 2916 case GROUP_OBJ: 2917 grp_perm |= 2918 aclentp->a_perm << 3; 2919 break; 2920 case GROUP: 2921 other_perm |= aclentp->a_perm; 2922 break; 2923 case OTHER_OBJ: 2924 other_orig = aclentp->a_perm; 2925 break; 2926 case CLASS_OBJ: 2927 mask_perm = aclentp->a_perm; 2928 break; 2929 default: 2930 break; 2931 } 2932 } 2933 grp_perm &= mask_perm << 3; 2934 other_perm &= mask_perm; 2935 other_perm |= other_orig; 2936 2937 } else { 2938 /* minimal permissions */ 2939 grp_perm = 070; 2940 other_perm = 07; 2941 for (; aclcnt > 0; aclcnt--, aclentp++) { 2942 switch (aclentp->a_type) { 2943 case USER_OBJ: 2944 break; 2945 case USER: 2946 case CLASS_OBJ: 2947 grp_perm &= 2948 aclentp->a_perm << 3; 2949 other_perm &= 2950 aclentp->a_perm; 2951 break; 2952 case GROUP_OBJ: 2953 grp_perm &= 2954 aclentp->a_perm << 3; 2955 break; 2956 case GROUP: 2957 other_perm &= 2958 aclentp->a_perm; 2959 break; 2960 case OTHER_OBJ: 2961 other_perm &= 2962 aclentp->a_perm; 2963 break; 2964 default: 2965 break; 2966 } 2967 } 2968 } 2969 /* copy to va */ 2970 va->va_mode &= ~077; 2971 va->va_mode |= grp_perm | other_perm; 2972 } 2973 if (vsa.vsa_aclcnt) 2974 kmem_free(vsa.vsa_aclentp, 2975 vsa.vsa_aclcnt * sizeof (aclent_t)); 2976 } 2977 } 2978 2979 void 2980 rfs_srvrinit(void) 2981 { 2982 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL); 2983 nfs2_srv_caller_id = fs_new_caller_id(); 2984 } 2985 2986 void 2987 rfs_srvrfini(void) 2988 { 2989 mutex_destroy(&rfs_async_write_lock); 2990 } 2991 2992 static int 2993 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr) 2994 { 2995 struct clist *wcl; 2996 int wlist_len; 2997 uint32_t count = rr->rr_count; 2998 2999 wcl = ra->ra_wlist; 3000 3001 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { 3002 return (FALSE); 3003 } 3004 3005 wcl = ra->ra_wlist; 3006 rr->rr_ok.rrok_wlist_len = wlist_len; 3007 rr->rr_ok.rrok_wlist = wcl; 3008 3009 return (TRUE); 3010 } --- EOF ---