6583 remove whole-process swapping
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 * All rights reserved.
29 */
30
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/cred.h>
35 #include <sys/buf.h>
36 #include <sys/vfs.h>
37 #include <sys/vnode.h>
38 #include <sys/uio.h>
39 #include <sys/stat.h>
40 #include <sys/errno.h>
41 #include <sys/sysmacros.h>
42 #include <sys/statvfs.h>
43 #include <sys/kmem.h>
44 #include <sys/kstat.h>
45 #include <sys/dirent.h>
46 #include <sys/cmn_err.h>
47 #include <sys/debug.h>
48 #include <sys/vtrace.h>
49 #include <sys/mode.h>
50 #include <sys/acl.h>
51 #include <sys/nbmlock.h>
52 #include <sys/policy.h>
53 #include <sys/sdt.h>
54
55 #include <rpc/types.h>
56 #include <rpc/auth.h>
57 #include <rpc/svc.h>
58
59 #include <nfs/nfs.h>
60 #include <nfs/export.h>
61 #include <nfs/nfs_cmd.h>
62
63 #include <vm/hat.h>
64 #include <vm/as.h>
65 #include <vm/seg.h>
66 #include <vm/seg_map.h>
67 #include <vm/seg_kmem.h>
68
69 #include <sys/strsubr.h>
70
71 /*
72 * These are the interface routines for the server side of the
73 * Network File System. See the NFS version 2 protocol specification
74 * for a description of this interface.
75 */
76
77 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
78 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
79 cred_t *);
80
81 /*
82 * Some "over the wire" UNIX file types. These are encoded
83 * into the mode. This needs to be fixed in the next rev.
84 */
85 #define IFMT 0170000 /* type of file */
86 #define IFCHR 0020000 /* character special */
87 #define IFBLK 0060000 /* block special */
88 #define IFSOCK 0140000 /* socket */
89
90 u_longlong_t nfs2_srv_caller_id;
91
92 /*
93 * Get file attributes.
94 * Returns the current attributes of the file with the given fhandle.
95 */
96 /* ARGSUSED */
97 void
98 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
99 struct svc_req *req, cred_t *cr, bool_t ro)
100 {
101 int error;
102 vnode_t *vp;
103 struct vattr va;
104
105 vp = nfs_fhtovp(fhp, exi);
106 if (vp == NULL) {
107 ns->ns_status = NFSERR_STALE;
108 return;
109 }
110
111 /*
112 * Do the getattr.
113 */
114 va.va_mask = AT_ALL; /* we want all the attributes */
115
116 error = rfs4_delegated_getattr(vp, &va, 0, cr);
117
118 /* check for overflows */
119 if (!error) {
120 /* Lie about the object type for a referral */
121 if (vn_is_nfs_reparse(vp, cr))
122 va.va_type = VLNK;
123
124 acl_perm(vp, exi, &va, cr);
125 error = vattr_to_nattr(&va, &ns->ns_attr);
126 }
127
128 VN_RELE(vp);
129
130 ns->ns_status = puterrno(error);
131 }
132 void *
133 rfs_getattr_getfh(fhandle_t *fhp)
134 {
135 return (fhp);
136 }
137
138 /*
139 * Set file attributes.
140 * Sets the attributes of the file with the given fhandle. Returns
141 * the new attributes.
142 */
143 /* ARGSUSED */
144 void
145 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
146 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
147 {
148 int error;
149 int flag;
150 int in_crit = 0;
151 vnode_t *vp;
152 struct vattr va;
153 struct vattr bva;
154 struct flock64 bf;
155 caller_context_t ct;
156
157
158 vp = nfs_fhtovp(&args->saa_fh, exi);
159 if (vp == NULL) {
160 ns->ns_status = NFSERR_STALE;
161 return;
162 }
163
164 if (rdonly(ro, vp)) {
165 VN_RELE(vp);
166 ns->ns_status = NFSERR_ROFS;
167 return;
168 }
169
170 error = sattr_to_vattr(&args->saa_sa, &va);
171 if (error) {
172 VN_RELE(vp);
173 ns->ns_status = puterrno(error);
174 return;
175 }
176
177 /*
178 * If the client is requesting a change to the mtime,
179 * but the nanosecond field is set to 1 billion, then
180 * this is a flag to the server that it should set the
181 * atime and mtime fields to the server's current time.
182 * The 1 billion number actually came from the client
183 * as 1 million, but the units in the over the wire
184 * request are microseconds instead of nanoseconds.
185 *
186 * This is an overload of the protocol and should be
187 * documented in the NFS Version 2 protocol specification.
188 */
189 if (va.va_mask & AT_MTIME) {
190 if (va.va_mtime.tv_nsec == 1000000000) {
191 gethrestime(&va.va_mtime);
192 va.va_atime = va.va_mtime;
193 va.va_mask |= AT_ATIME;
194 flag = 0;
195 } else
196 flag = ATTR_UTIME;
197 } else
198 flag = 0;
199
200 /*
201 * If the filesystem is exported with nosuid, then mask off
202 * the setuid and setgid bits.
203 */
204 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
205 (exi->exi_export.ex_flags & EX_NOSUID))
206 va.va_mode &= ~(VSUID | VSGID);
207
208 ct.cc_sysid = 0;
209 ct.cc_pid = 0;
210 ct.cc_caller_id = nfs2_srv_caller_id;
211 ct.cc_flags = CC_DONTBLOCK;
212
213 /*
214 * We need to specially handle size changes because it is
215 * possible for the client to create a file with modes
216 * which indicate read-only, but with the file opened for
217 * writing. If the client then tries to set the size of
218 * the file, then the normal access checking done in
219 * VOP_SETATTR would prevent the client from doing so,
220 * although it should be legal for it to do so. To get
221 * around this, we do the access checking for ourselves
222 * and then use VOP_SPACE which doesn't do the access
223 * checking which VOP_SETATTR does. VOP_SPACE can only
224 * operate on VREG files, let VOP_SETATTR handle the other
225 * extremely rare cases.
226 * Also the client should not be allowed to change the
227 * size of the file if there is a conflicting non-blocking
228 * mandatory lock in the region of change.
229 */
230 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
231 if (nbl_need_check(vp)) {
232 nbl_start_crit(vp, RW_READER);
233 in_crit = 1;
234 }
235
236 bva.va_mask = AT_UID | AT_SIZE;
237
238 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
239
240 if (error) {
241 if (in_crit)
242 nbl_end_crit(vp);
243 VN_RELE(vp);
244 ns->ns_status = puterrno(error);
245 return;
246 }
247
248 if (in_crit) {
249 u_offset_t offset;
250 ssize_t length;
251
252 if (va.va_size < bva.va_size) {
253 offset = va.va_size;
254 length = bva.va_size - va.va_size;
255 } else {
256 offset = bva.va_size;
257 length = va.va_size - bva.va_size;
258 }
259 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
260 NULL)) {
261 error = EACCES;
262 }
263 }
264
265 if (crgetuid(cr) == bva.va_uid && !error &&
266 va.va_size != bva.va_size) {
267 va.va_mask &= ~AT_SIZE;
268 bf.l_type = F_WRLCK;
269 bf.l_whence = 0;
270 bf.l_start = (off64_t)va.va_size;
271 bf.l_len = 0;
272 bf.l_sysid = 0;
273 bf.l_pid = 0;
274
275 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
276 (offset_t)va.va_size, cr, &ct);
277 }
278 if (in_crit)
279 nbl_end_crit(vp);
280 } else
281 error = 0;
282
283 /*
284 * Do the setattr.
285 */
286 if (!error && va.va_mask) {
287 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
288 }
289
290 /*
291 * check if the monitor on either vop_space or vop_setattr detected
292 * a delegation conflict and if so, mark the thread flag as
293 * wouldblock so that the response is dropped and the client will
294 * try again.
295 */
296 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
297 VN_RELE(vp);
298 curthread->t_flag |= T_WOULDBLOCK;
299 return;
300 }
301
302 if (!error) {
303 va.va_mask = AT_ALL; /* get everything */
304
305 error = rfs4_delegated_getattr(vp, &va, 0, cr);
306
307 /* check for overflows */
308 if (!error) {
309 acl_perm(vp, exi, &va, cr);
310 error = vattr_to_nattr(&va, &ns->ns_attr);
311 }
312 }
313
314 ct.cc_flags = 0;
315
316 /*
317 * Force modified metadata out to stable storage.
318 */
319 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
320
321 VN_RELE(vp);
322
323 ns->ns_status = puterrno(error);
324 }
325 void *
326 rfs_setattr_getfh(struct nfssaargs *args)
327 {
328 return (&args->saa_fh);
329 }
330
331 /*
332 * Directory lookup.
333 * Returns an fhandle and file attributes for file name in a directory.
334 */
335 /* ARGSUSED */
336 void
337 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
338 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
339 {
340 int error;
341 vnode_t *dvp;
342 vnode_t *vp;
343 struct vattr va;
344 fhandle_t *fhp = da->da_fhandle;
345 struct sec_ol sec = {0, 0};
346 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
347 char *name;
348 struct sockaddr *ca;
349
350 /*
351 * Trusted Extension doesn't support NFSv2. MOUNT
352 * will reject v2 clients. Need to prevent v2 client
353 * access via WebNFS here.
354 */
355 if (is_system_labeled() && req->rq_vers == 2) {
356 dr->dr_status = NFSERR_ACCES;
357 return;
358 }
359
360 /*
361 * Disallow NULL paths
362 */
363 if (da->da_name == NULL || *da->da_name == '\0') {
364 dr->dr_status = NFSERR_ACCES;
365 return;
366 }
367
368 /*
369 * Allow lookups from the root - the default
370 * location of the public filehandle.
371 */
372 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
373 dvp = rootdir;
374 VN_HOLD(dvp);
375 } else {
376 dvp = nfs_fhtovp(fhp, exi);
377 if (dvp == NULL) {
378 dr->dr_status = NFSERR_STALE;
379 return;
380 }
381 }
382
383 /*
384 * Not allow lookup beyond root.
385 * If the filehandle matches a filehandle of the exi,
386 * then the ".." refers beyond the root of an exported filesystem.
387 */
388 if (strcmp(da->da_name, "..") == 0 &&
389 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
390 VN_RELE(dvp);
391 dr->dr_status = NFSERR_NOENT;
392 return;
393 }
394
395 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
396 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
397 MAXPATHLEN);
398
399 if (name == NULL) {
400 dr->dr_status = NFSERR_ACCES;
401 return;
402 }
403
404 /*
405 * If the public filehandle is used then allow
406 * a multi-component lookup, i.e. evaluate
407 * a pathname and follow symbolic links if
408 * necessary.
409 *
410 * This may result in a vnode in another filesystem
411 * which is OK as long as the filesystem is exported.
412 */
413 if (PUBLIC_FH2(fhp)) {
414 publicfh_flag = TRUE;
415 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
416 &sec);
417 } else {
418 /*
419 * Do a normal single component lookup.
420 */
421 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
422 NULL, NULL, NULL);
423 }
424
425 if (name != da->da_name)
426 kmem_free(name, MAXPATHLEN);
427
428
429 if (!error) {
430 va.va_mask = AT_ALL; /* we want everything */
431
432 error = rfs4_delegated_getattr(vp, &va, 0, cr);
433
434 /* check for overflows */
435 if (!error) {
436 acl_perm(vp, exi, &va, cr);
437 error = vattr_to_nattr(&va, &dr->dr_attr);
438 if (!error) {
439 if (sec.sec_flags & SEC_QUERY)
440 error = makefh_ol(&dr->dr_fhandle, exi,
441 sec.sec_index);
442 else {
443 error = makefh(&dr->dr_fhandle, vp,
444 exi);
445 if (!error && publicfh_flag &&
446 !chk_clnt_sec(exi, req))
447 auth_weak = TRUE;
448 }
449 }
450 }
451 VN_RELE(vp);
452 }
453
454 VN_RELE(dvp);
455
456 /*
457 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
458 * and have obtained a new exportinfo in exi which needs to be
459 * released. Note the the original exportinfo pointed to by exi
460 * will be released by the caller, comon_dispatch.
461 */
462 if (publicfh_flag && exi != NULL)
463 exi_rele(exi);
464
465 /*
466 * If it's public fh, no 0x81, and client's flavor is
467 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
468 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
469 */
470 if (auth_weak)
471 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
472 else
473 dr->dr_status = puterrno(error);
474 }
475 void *
476 rfs_lookup_getfh(struct nfsdiropargs *da)
477 {
478 return (da->da_fhandle);
479 }
480
481 /*
482 * Read symbolic link.
483 * Returns the string in the symbolic link at the given fhandle.
484 */
485 /* ARGSUSED */
486 void
487 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
488 struct svc_req *req, cred_t *cr, bool_t ro)
489 {
490 int error;
491 struct iovec iov;
492 struct uio uio;
493 vnode_t *vp;
494 struct vattr va;
495 struct sockaddr *ca;
496 char *name = NULL;
497 int is_referral = 0;
498
499 vp = nfs_fhtovp(fhp, exi);
500 if (vp == NULL) {
501 rl->rl_data = NULL;
502 rl->rl_status = NFSERR_STALE;
503 return;
504 }
505
506 va.va_mask = AT_MODE;
507
508 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
509
510 if (error) {
511 VN_RELE(vp);
512 rl->rl_data = NULL;
513 rl->rl_status = puterrno(error);
514 return;
515 }
516
517 if (MANDLOCK(vp, va.va_mode)) {
518 VN_RELE(vp);
519 rl->rl_data = NULL;
520 rl->rl_status = NFSERR_ACCES;
521 return;
522 }
523
524 /* We lied about the object type for a referral */
525 if (vn_is_nfs_reparse(vp, cr))
526 is_referral = 1;
527
528 /*
529 * XNFS and RFC1094 require us to return ENXIO if argument
530 * is not a link. BUGID 1138002.
531 */
532 if (vp->v_type != VLNK && !is_referral) {
533 VN_RELE(vp);
534 rl->rl_data = NULL;
535 rl->rl_status = NFSERR_NXIO;
536 return;
537 }
538
539 /*
540 * Allocate data for pathname. This will be freed by rfs_rlfree.
541 */
542 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
543
544 if (is_referral) {
545 char *s;
546 size_t strsz;
547
548 /* Get an artificial symlink based on a referral */
549 s = build_symlink(vp, cr, &strsz);
550 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
551 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
552 vnode_t *, vp, char *, s);
553 if (s == NULL)
554 error = EINVAL;
555 else {
556 error = 0;
557 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
558 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
559 kmem_free(s, strsz);
560 }
561
562 } else {
563
564 /*
565 * Set up io vector to read sym link data
566 */
567 iov.iov_base = rl->rl_data;
568 iov.iov_len = NFS_MAXPATHLEN;
569 uio.uio_iov = &iov;
570 uio.uio_iovcnt = 1;
571 uio.uio_segflg = UIO_SYSSPACE;
572 uio.uio_extflg = UIO_COPY_CACHED;
573 uio.uio_loffset = (offset_t)0;
574 uio.uio_resid = NFS_MAXPATHLEN;
575
576 /*
577 * Do the readlink.
578 */
579 error = VOP_READLINK(vp, &uio, cr, NULL);
580
581 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
582
583 if (!error)
584 rl->rl_data[rl->rl_count] = '\0';
585
586 }
587
588
589 VN_RELE(vp);
590
591 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
592 name = nfscmd_convname(ca, exi, rl->rl_data,
593 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
594
595 if (name != NULL && name != rl->rl_data) {
596 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
597 rl->rl_data = name;
598 }
599
600 /*
601 * XNFS and RFC1094 require us to return ENXIO if argument
602 * is not a link. UFS returns EINVAL if this is the case,
603 * so we do the mapping here. BUGID 1138002.
604 */
605 if (error == EINVAL)
606 rl->rl_status = NFSERR_NXIO;
607 else
608 rl->rl_status = puterrno(error);
609
610 }
611 void *
612 rfs_readlink_getfh(fhandle_t *fhp)
613 {
614 return (fhp);
615 }
616 /*
617 * Free data allocated by rfs_readlink
618 */
619 void
620 rfs_rlfree(struct nfsrdlnres *rl)
621 {
622 if (rl->rl_data != NULL)
623 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
624 }
625
626 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
627
628 /*
629 * Read data.
630 * Returns some data read from the file at the given fhandle.
631 */
632 /* ARGSUSED */
633 void
634 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
635 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
636 {
637 vnode_t *vp;
638 int error;
639 struct vattr va;
640 struct iovec iov;
641 struct uio uio;
642 mblk_t *mp;
643 int alloc_err = 0;
644 int in_crit = 0;
645 caller_context_t ct;
646
647 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
648 if (vp == NULL) {
649 rr->rr_data = NULL;
650 rr->rr_status = NFSERR_STALE;
651 return;
652 }
653
654 if (vp->v_type != VREG) {
655 VN_RELE(vp);
656 rr->rr_data = NULL;
657 rr->rr_status = NFSERR_ISDIR;
658 return;
659 }
660
661 ct.cc_sysid = 0;
662 ct.cc_pid = 0;
663 ct.cc_caller_id = nfs2_srv_caller_id;
664 ct.cc_flags = CC_DONTBLOCK;
665
666 /*
667 * Enter the critical region before calling VOP_RWLOCK
668 * to avoid a deadlock with write requests.
669 */
670 if (nbl_need_check(vp)) {
671 nbl_start_crit(vp, RW_READER);
672 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
673 0, NULL)) {
674 nbl_end_crit(vp);
675 VN_RELE(vp);
676 rr->rr_data = NULL;
677 rr->rr_status = NFSERR_ACCES;
678 return;
679 }
680 in_crit = 1;
681 }
682
683 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
684
685 /* check if a monitor detected a delegation conflict */
686 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
687 VN_RELE(vp);
688 /* mark as wouldblock so response is dropped */
689 curthread->t_flag |= T_WOULDBLOCK;
690
691 rr->rr_data = NULL;
692 return;
693 }
694
695 va.va_mask = AT_ALL;
696
697 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
698
699 if (error) {
700 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
701 if (in_crit)
702 nbl_end_crit(vp);
703
704 VN_RELE(vp);
705 rr->rr_data = NULL;
706 rr->rr_status = puterrno(error);
707
708 return;
709 }
710
711 /*
712 * This is a kludge to allow reading of files created
713 * with no read permission. The owner of the file
714 * is always allowed to read it.
715 */
716 if (crgetuid(cr) != va.va_uid) {
717 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
718
719 if (error) {
720 /*
721 * Exec is the same as read over the net because
722 * of demand loading.
723 */
724 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
725 }
726 if (error) {
727 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
728 if (in_crit)
729 nbl_end_crit(vp);
730 VN_RELE(vp);
731 rr->rr_data = NULL;
732 rr->rr_status = puterrno(error);
733
734 return;
735 }
736 }
737
738 if (MANDLOCK(vp, va.va_mode)) {
739 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
740 if (in_crit)
741 nbl_end_crit(vp);
742
743 VN_RELE(vp);
744 rr->rr_data = NULL;
745 rr->rr_status = NFSERR_ACCES;
746
747 return;
748 }
749
750 rr->rr_ok.rrok_wlist_len = 0;
751 rr->rr_ok.rrok_wlist = NULL;
752
753 if ((u_offset_t)ra->ra_offset >= va.va_size) {
754 rr->rr_count = 0;
755 rr->rr_data = NULL;
756 /*
757 * In this case, status is NFS_OK, but there is no data
758 * to encode. So set rr_mp to NULL.
759 */
760 rr->rr_mp = NULL;
761 rr->rr_ok.rrok_wlist = ra->ra_wlist;
762 if (rr->rr_ok.rrok_wlist)
763 clist_zero_len(rr->rr_ok.rrok_wlist);
764 goto done;
765 }
766
767 if (ra->ra_wlist) {
768 mp = NULL;
769 rr->rr_mp = NULL;
770 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
771 if (ra->ra_count > iov.iov_len) {
772 rr->rr_data = NULL;
773 rr->rr_status = NFSERR_INVAL;
774 goto done;
775 }
776 } else {
777 /*
778 * mp will contain the data to be sent out in the read reply.
779 * This will be freed after the reply has been sent out (by the
780 * driver).
781 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
782 * that the call to xdrmblk_putmblk() never fails.
783 */
784 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
785 &alloc_err);
786 ASSERT(mp != NULL);
787 ASSERT(alloc_err == 0);
788
789 rr->rr_mp = mp;
790
791 /*
792 * Set up io vector
793 */
794 iov.iov_base = (caddr_t)mp->b_datap->db_base;
795 iov.iov_len = ra->ra_count;
796 }
797
798 uio.uio_iov = &iov;
799 uio.uio_iovcnt = 1;
800 uio.uio_segflg = UIO_SYSSPACE;
801 uio.uio_extflg = UIO_COPY_CACHED;
802 uio.uio_loffset = (offset_t)ra->ra_offset;
803 uio.uio_resid = ra->ra_count;
804
805 error = VOP_READ(vp, &uio, 0, cr, &ct);
806
807 if (error) {
808 if (mp)
809 freeb(mp);
810
811 /*
812 * check if a monitor detected a delegation conflict and
813 * mark as wouldblock so response is dropped
814 */
815 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
816 curthread->t_flag |= T_WOULDBLOCK;
817 else
818 rr->rr_status = puterrno(error);
819
820 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
821 if (in_crit)
822 nbl_end_crit(vp);
823
824 VN_RELE(vp);
825 rr->rr_data = NULL;
826
827 return;
828 }
829
830 /*
831 * Get attributes again so we can send the latest access
832 * time to the client side for his cache.
833 */
834 va.va_mask = AT_ALL;
835
836 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
837
838 if (error) {
839 if (mp)
840 freeb(mp);
841
842 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
843 if (in_crit)
844 nbl_end_crit(vp);
845
846 VN_RELE(vp);
847 rr->rr_data = NULL;
848 rr->rr_status = puterrno(error);
849
850 return;
851 }
852
853 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
854
855 if (mp) {
856 rr->rr_data = (char *)mp->b_datap->db_base;
857 } else {
858 if (ra->ra_wlist) {
859 rr->rr_data = (caddr_t)iov.iov_base;
860 if (!rdma_setup_read_data2(ra, rr)) {
861 rr->rr_data = NULL;
862 rr->rr_status = puterrno(NFSERR_INVAL);
863 }
864 }
865 }
866 done:
867 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
868 if (in_crit)
869 nbl_end_crit(vp);
870
871 acl_perm(vp, exi, &va, cr);
872
873 /* check for overflows */
874 error = vattr_to_nattr(&va, &rr->rr_attr);
875
876 VN_RELE(vp);
877
878 rr->rr_status = puterrno(error);
879 }
880
881 /*
882 * Free data allocated by rfs_read
883 */
884 void
885 rfs_rdfree(struct nfsrdresult *rr)
886 {
887 mblk_t *mp;
888
889 if (rr->rr_status == NFS_OK) {
890 mp = rr->rr_mp;
891 if (mp != NULL)
892 freeb(mp);
893 }
894 }
895
896 void *
897 rfs_read_getfh(struct nfsreadargs *ra)
898 {
899 return (&ra->ra_fhandle);
900 }
901
902 #define MAX_IOVECS 12
903
904 #ifdef DEBUG
905 static int rfs_write_sync_hits = 0;
906 static int rfs_write_sync_misses = 0;
907 #endif
908
909 /*
910 * Write data to file.
911 * Returns attributes of a file after writing some data to it.
912 *
913 * Any changes made here, especially in error handling might have
914 * to also be done in rfs_write (which clusters write requests).
915 */
916 /* ARGSUSED */
917 void
918 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
919 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
920 {
921 int error;
922 vnode_t *vp;
923 rlim64_t rlimit;
924 struct vattr va;
925 struct uio uio;
926 struct iovec iov[MAX_IOVECS];
927 mblk_t *m;
928 struct iovec *iovp;
929 int iovcnt;
930 cred_t *savecred;
931 int in_crit = 0;
932 caller_context_t ct;
933
934 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
935 if (vp == NULL) {
936 ns->ns_status = NFSERR_STALE;
937 return;
938 }
939
940 if (rdonly(ro, vp)) {
941 VN_RELE(vp);
942 ns->ns_status = NFSERR_ROFS;
943 return;
944 }
945
946 if (vp->v_type != VREG) {
947 VN_RELE(vp);
948 ns->ns_status = NFSERR_ISDIR;
949 return;
950 }
951
952 ct.cc_sysid = 0;
953 ct.cc_pid = 0;
954 ct.cc_caller_id = nfs2_srv_caller_id;
955 ct.cc_flags = CC_DONTBLOCK;
956
957 va.va_mask = AT_UID|AT_MODE;
958
959 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
960
961 if (error) {
962 VN_RELE(vp);
963 ns->ns_status = puterrno(error);
964
965 return;
966 }
967
968 if (crgetuid(cr) != va.va_uid) {
969 /*
970 * This is a kludge to allow writes of files created
971 * with read only permission. The owner of the file
972 * is always allowed to write it.
973 */
974 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
975
976 if (error) {
977 VN_RELE(vp);
978 ns->ns_status = puterrno(error);
979 return;
980 }
981 }
982
983 /*
984 * Can't access a mandatory lock file. This might cause
985 * the NFS service thread to block forever waiting for a
986 * lock to be released that will never be released.
987 */
988 if (MANDLOCK(vp, va.va_mode)) {
989 VN_RELE(vp);
990 ns->ns_status = NFSERR_ACCES;
991 return;
992 }
993
994 /*
995 * We have to enter the critical region before calling VOP_RWLOCK
996 * to avoid a deadlock with ufs.
997 */
998 if (nbl_need_check(vp)) {
999 nbl_start_crit(vp, RW_READER);
1000 in_crit = 1;
1001 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
1002 wa->wa_count, 0, NULL)) {
1003 error = EACCES;
1004 goto out;
1005 }
1006 }
1007
1008 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1009
1010 /* check if a monitor detected a delegation conflict */
1011 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1012 VN_RELE(vp);
1013 /* mark as wouldblock so response is dropped */
1014 curthread->t_flag |= T_WOULDBLOCK;
1015 return;
1016 }
1017
1018 if (wa->wa_data || wa->wa_rlist) {
1019 /* Do the RDMA thing if necessary */
1020 if (wa->wa_rlist) {
1021 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1022 iov[0].iov_len = wa->wa_count;
1023 } else {
1024 iov[0].iov_base = wa->wa_data;
1025 iov[0].iov_len = wa->wa_count;
1026 }
1027 uio.uio_iov = iov;
1028 uio.uio_iovcnt = 1;
1029 uio.uio_segflg = UIO_SYSSPACE;
1030 uio.uio_extflg = UIO_COPY_DEFAULT;
1031 uio.uio_loffset = (offset_t)wa->wa_offset;
1032 uio.uio_resid = wa->wa_count;
1033 /*
1034 * The limit is checked on the client. We
1035 * should allow any size writes here.
1036 */
1037 uio.uio_llimit = curproc->p_fsz_ctl;
1038 rlimit = uio.uio_llimit - wa->wa_offset;
1039 if (rlimit < (rlim64_t)uio.uio_resid)
1040 uio.uio_resid = (uint_t)rlimit;
1041
1042 /*
1043 * for now we assume no append mode
1044 */
1045 /*
1046 * We're changing creds because VM may fault and we need
1047 * the cred of the current thread to be used if quota
1048 * checking is enabled.
1049 */
1050 savecred = curthread->t_cred;
1051 curthread->t_cred = cr;
1052 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1053 curthread->t_cred = savecred;
1054 } else {
1055 iovcnt = 0;
1056 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1057 iovcnt++;
1058 if (iovcnt <= MAX_IOVECS) {
1059 #ifdef DEBUG
1060 rfs_write_sync_hits++;
1061 #endif
1062 iovp = iov;
1063 } else {
1064 #ifdef DEBUG
1065 rfs_write_sync_misses++;
1066 #endif
1067 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1068 }
1069 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1070 uio.uio_iov = iovp;
1071 uio.uio_iovcnt = iovcnt;
1072 uio.uio_segflg = UIO_SYSSPACE;
1073 uio.uio_extflg = UIO_COPY_DEFAULT;
1074 uio.uio_loffset = (offset_t)wa->wa_offset;
1075 uio.uio_resid = wa->wa_count;
1076 /*
1077 * The limit is checked on the client. We
1078 * should allow any size writes here.
1079 */
1080 uio.uio_llimit = curproc->p_fsz_ctl;
1081 rlimit = uio.uio_llimit - wa->wa_offset;
1082 if (rlimit < (rlim64_t)uio.uio_resid)
1083 uio.uio_resid = (uint_t)rlimit;
1084
1085 /*
1086 * For now we assume no append mode.
1087 */
1088 /*
1089 * We're changing creds because VM may fault and we need
1090 * the cred of the current thread to be used if quota
1091 * checking is enabled.
1092 */
1093 savecred = curthread->t_cred;
1094 curthread->t_cred = cr;
1095 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1096 curthread->t_cred = savecred;
1097
1098 if (iovp != iov)
1099 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1100 }
1101
1102 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1103
1104 if (!error) {
1105 /*
1106 * Get attributes again so we send the latest mod
1107 * time to the client side for his cache.
1108 */
1109 va.va_mask = AT_ALL; /* now we want everything */
1110
1111 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1112
1113 /* check for overflows */
1114 if (!error) {
1115 acl_perm(vp, exi, &va, cr);
1116 error = vattr_to_nattr(&va, &ns->ns_attr);
1117 }
1118 }
1119
1120 out:
1121 if (in_crit)
1122 nbl_end_crit(vp);
1123 VN_RELE(vp);
1124
1125 /* check if a monitor detected a delegation conflict */
1126 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1127 /* mark as wouldblock so response is dropped */
1128 curthread->t_flag |= T_WOULDBLOCK;
1129 else
1130 ns->ns_status = puterrno(error);
1131
1132 }
1133
1134 struct rfs_async_write {
1135 struct nfswriteargs *wa;
1136 struct nfsattrstat *ns;
1137 struct svc_req *req;
1138 cred_t *cr;
1139 bool_t ro;
1140 kthread_t *thread;
1141 struct rfs_async_write *list;
1142 };
1143
1144 struct rfs_async_write_list {
1145 fhandle_t *fhp;
1146 kcondvar_t cv;
1147 struct rfs_async_write *list;
1148 struct rfs_async_write_list *next;
1149 };
1150
1151 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1152 static kmutex_t rfs_async_write_lock;
1153 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1154
1155 #define MAXCLIOVECS 42
1156 #define RFSWRITE_INITVAL (enum nfsstat) -1
1157
1158 #ifdef DEBUG
1159 static int rfs_write_hits = 0;
1160 static int rfs_write_misses = 0;
1161 #endif
1162
1163 /*
1164 * Write data to file.
1165 * Returns attributes of a file after writing some data to it.
1166 */
1167 void
1168 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1169 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1170 {
1171 int error;
1172 vnode_t *vp;
1173 rlim64_t rlimit;
1174 struct vattr va;
1175 struct uio uio;
1176 struct rfs_async_write_list *lp;
1177 struct rfs_async_write_list *nlp;
1178 struct rfs_async_write *rp;
1179 struct rfs_async_write *nrp;
1180 struct rfs_async_write *trp;
1181 struct rfs_async_write *lrp;
1182 int data_written;
1183 int iovcnt;
1184 mblk_t *m;
1185 struct iovec *iovp;
1186 struct iovec *niovp;
1187 struct iovec iov[MAXCLIOVECS];
1188 int count;
1189 int rcount;
1190 uint_t off;
1191 uint_t len;
1192 struct rfs_async_write nrpsp;
1193 struct rfs_async_write_list nlpsp;
1194 ushort_t t_flag;
1195 cred_t *savecred;
1196 int in_crit = 0;
1197 caller_context_t ct;
1198
1199 if (!rfs_write_async) {
1200 rfs_write_sync(wa, ns, exi, req, cr, ro);
1201 return;
1202 }
1203
1204 /*
1205 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1206 * is considered an OK.
1207 */
1208 ns->ns_status = RFSWRITE_INITVAL;
1209
1210 nrp = &nrpsp;
1211 nrp->wa = wa;
1212 nrp->ns = ns;
1213 nrp->req = req;
1214 nrp->cr = cr;
1215 nrp->ro = ro;
1216 nrp->thread = curthread;
1217
1218 /*
1219 * Look to see if there is already a cluster started
1220 * for this file.
1221 */
1222 mutex_enter(&rfs_async_write_lock);
1223 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
1224 if (bcmp(&wa->wa_fhandle, lp->fhp,
1225 sizeof (fhandle_t)) == 0)
1226 break;
1227 }
1228
1229 /*
1230 * If lp is non-NULL, then there is already a cluster
1231 * started. We need to place ourselves in the cluster
1232 * list in the right place as determined by starting
1233 * offset. Conflicts with non-blocking mandatory locked
1234 * regions will be checked when the cluster is processed.
1235 */
1236 if (lp != NULL) {
1237 rp = lp->list;
1238 trp = NULL;
1239 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1240 trp = rp;
1241 rp = rp->list;
1242 }
1243 nrp->list = rp;
1244 if (trp == NULL)
1245 lp->list = nrp;
1246 else
1247 trp->list = nrp;
1248 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1249 cv_wait(&lp->cv, &rfs_async_write_lock);
1250 mutex_exit(&rfs_async_write_lock);
1251
1252 return;
1253 }
1254
1255 /*
1256 * No cluster started yet, start one and add ourselves
1257 * to the list of clusters.
1258 */
1259 nrp->list = NULL;
1260
1261 nlp = &nlpsp;
1262 nlp->fhp = &wa->wa_fhandle;
1263 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1264 nlp->list = nrp;
1265 nlp->next = NULL;
1266
1267 if (rfs_async_write_head == NULL) {
1268 rfs_async_write_head = nlp;
1269 } else {
1270 lp = rfs_async_write_head;
1271 while (lp->next != NULL)
1272 lp = lp->next;
1273 lp->next = nlp;
1274 }
1275 mutex_exit(&rfs_async_write_lock);
1276
1277 /*
1278 * Convert the file handle common to all of the requests
1279 * in this cluster to a vnode.
1280 */
1281 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1282 if (vp == NULL) {
1283 mutex_enter(&rfs_async_write_lock);
1284 if (rfs_async_write_head == nlp)
1285 rfs_async_write_head = nlp->next;
1286 else {
1287 lp = rfs_async_write_head;
1288 while (lp->next != nlp)
1289 lp = lp->next;
1290 lp->next = nlp->next;
1291 }
1292 t_flag = curthread->t_flag & T_WOULDBLOCK;
1293 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1294 rp->ns->ns_status = NFSERR_STALE;
1295 rp->thread->t_flag |= t_flag;
1296 }
1297 cv_broadcast(&nlp->cv);
1298 mutex_exit(&rfs_async_write_lock);
1299
1300 return;
1301 }
1302
1303 /*
1304 * Can only write regular files. Attempts to write any
1305 * other file types fail with EISDIR.
1306 */
1307 if (vp->v_type != VREG) {
1308 VN_RELE(vp);
1309 mutex_enter(&rfs_async_write_lock);
1310 if (rfs_async_write_head == nlp)
1311 rfs_async_write_head = nlp->next;
1312 else {
1313 lp = rfs_async_write_head;
1314 while (lp->next != nlp)
1315 lp = lp->next;
1316 lp->next = nlp->next;
1317 }
1318 t_flag = curthread->t_flag & T_WOULDBLOCK;
1319 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1320 rp->ns->ns_status = NFSERR_ISDIR;
1321 rp->thread->t_flag |= t_flag;
1322 }
1323 cv_broadcast(&nlp->cv);
1324 mutex_exit(&rfs_async_write_lock);
1325
1326 return;
1327 }
1328
1329 /*
1330 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1331 * deadlock with ufs.
1332 */
1333 if (nbl_need_check(vp)) {
1334 nbl_start_crit(vp, RW_READER);
1335 in_crit = 1;
1336 }
1337
1338 ct.cc_sysid = 0;
1339 ct.cc_pid = 0;
1340 ct.cc_caller_id = nfs2_srv_caller_id;
1341 ct.cc_flags = CC_DONTBLOCK;
1342
1343 /*
1344 * Lock the file for writing. This operation provides
1345 * the delay which allows clusters to grow.
1346 */
1347 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1348
1349 /* check if a monitor detected a delegation conflict */
1350 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1351 if (in_crit)
1352 nbl_end_crit(vp);
1353 VN_RELE(vp);
1354 /* mark as wouldblock so response is dropped */
1355 curthread->t_flag |= T_WOULDBLOCK;
1356 mutex_enter(&rfs_async_write_lock);
1357 if (rfs_async_write_head == nlp)
1358 rfs_async_write_head = nlp->next;
1359 else {
1360 lp = rfs_async_write_head;
1361 while (lp->next != nlp)
1362 lp = lp->next;
1363 lp->next = nlp->next;
1364 }
1365 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1366 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1367 rp->ns->ns_status = puterrno(error);
1368 rp->thread->t_flag |= T_WOULDBLOCK;
1369 }
1370 }
1371 cv_broadcast(&nlp->cv);
1372 mutex_exit(&rfs_async_write_lock);
1373
1374 return;
1375 }
1376
1377 /*
1378 * Disconnect this cluster from the list of clusters.
1379 * The cluster that is being dealt with must be fixed
1380 * in size after this point, so there is no reason
1381 * to leave it on the list so that new requests can
1382 * find it.
1383 *
1384 * The algorithm is that the first write request will
1385 * create a cluster, convert the file handle to a
1386 * vnode pointer, and then lock the file for writing.
1387 * This request is not likely to be clustered with
1388 * any others. However, the next request will create
1389 * a new cluster and be blocked in VOP_RWLOCK while
1390 * the first request is being processed. This delay
1391 * will allow more requests to be clustered in this
1392 * second cluster.
1393 */
1394 mutex_enter(&rfs_async_write_lock);
1395 if (rfs_async_write_head == nlp)
1396 rfs_async_write_head = nlp->next;
1397 else {
1398 lp = rfs_async_write_head;
1399 while (lp->next != nlp)
1400 lp = lp->next;
1401 lp->next = nlp->next;
1402 }
1403 mutex_exit(&rfs_async_write_lock);
1404
1405 /*
1406 * Step through the list of requests in this cluster.
1407 * We need to check permissions to make sure that all
1408 * of the requests have sufficient permission to write
1409 * the file. A cluster can be composed of requests
1410 * from different clients and different users on each
1411 * client.
1412 *
1413 * As a side effect, we also calculate the size of the
1414 * byte range that this cluster encompasses.
1415 */
1416 rp = nlp->list;
1417 off = rp->wa->wa_offset;
1418 len = (uint_t)0;
1419 do {
1420 if (rdonly(rp->ro, vp)) {
1421 rp->ns->ns_status = NFSERR_ROFS;
1422 t_flag = curthread->t_flag & T_WOULDBLOCK;
1423 rp->thread->t_flag |= t_flag;
1424 continue;
1425 }
1426
1427 va.va_mask = AT_UID|AT_MODE;
1428
1429 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1430
1431 if (!error) {
1432 if (crgetuid(rp->cr) != va.va_uid) {
1433 /*
1434 * This is a kludge to allow writes of files
1435 * created with read only permission. The
1436 * owner of the file is always allowed to
1437 * write it.
1438 */
1439 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1440 }
1441 if (!error && MANDLOCK(vp, va.va_mode))
1442 error = EACCES;
1443 }
1444
1445 /*
1446 * Check for a conflict with a nbmand-locked region.
1447 */
1448 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1449 rp->wa->wa_count, 0, NULL)) {
1450 error = EACCES;
1451 }
1452
1453 if (error) {
1454 rp->ns->ns_status = puterrno(error);
1455 t_flag = curthread->t_flag & T_WOULDBLOCK;
1456 rp->thread->t_flag |= t_flag;
1457 continue;
1458 }
1459 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1460 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1461 } while ((rp = rp->list) != NULL);
1462
1463 /*
1464 * Step through the cluster attempting to gather as many
1465 * requests which are contiguous as possible. These
1466 * contiguous requests are handled via one call to VOP_WRITE
1467 * instead of different calls to VOP_WRITE. We also keep
1468 * track of the fact that any data was written.
1469 */
1470 rp = nlp->list;
1471 data_written = 0;
1472 do {
1473 /*
1474 * Skip any requests which are already marked as having an
1475 * error.
1476 */
1477 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1478 rp = rp->list;
1479 continue;
1480 }
1481
1482 /*
1483 * Count the number of iovec's which are required
1484 * to handle this set of requests. One iovec is
1485 * needed for each data buffer, whether addressed
1486 * by wa_data or by the b_rptr pointers in the
1487 * mblk chains.
1488 */
1489 iovcnt = 0;
1490 lrp = rp;
1491 for (;;) {
1492 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1493 iovcnt++;
1494 else {
1495 m = lrp->wa->wa_mblk;
1496 while (m != NULL) {
1497 iovcnt++;
1498 m = m->b_cont;
1499 }
1500 }
1501 if (lrp->list == NULL ||
1502 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1503 lrp->wa->wa_offset + lrp->wa->wa_count !=
1504 lrp->list->wa->wa_offset) {
1505 lrp = lrp->list;
1506 break;
1507 }
1508 lrp = lrp->list;
1509 }
1510
1511 if (iovcnt <= MAXCLIOVECS) {
1512 #ifdef DEBUG
1513 rfs_write_hits++;
1514 #endif
1515 niovp = iov;
1516 } else {
1517 #ifdef DEBUG
1518 rfs_write_misses++;
1519 #endif
1520 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1521 }
1522 /*
1523 * Put together the scatter/gather iovecs.
1524 */
1525 iovp = niovp;
1526 trp = rp;
1527 count = 0;
1528 do {
1529 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1530 if (trp->wa->wa_rlist) {
1531 iovp->iov_base =
1532 (char *)((trp->wa->wa_rlist)->
1533 u.c_daddr3);
1534 iovp->iov_len = trp->wa->wa_count;
1535 } else {
1536 iovp->iov_base = trp->wa->wa_data;
1537 iovp->iov_len = trp->wa->wa_count;
1538 }
1539 iovp++;
1540 } else {
1541 m = trp->wa->wa_mblk;
1542 rcount = trp->wa->wa_count;
1543 while (m != NULL) {
1544 iovp->iov_base = (caddr_t)m->b_rptr;
1545 iovp->iov_len = (m->b_wptr - m->b_rptr);
1546 rcount -= iovp->iov_len;
1547 if (rcount < 0)
1548 iovp->iov_len += rcount;
1549 iovp++;
1550 if (rcount <= 0)
1551 break;
1552 m = m->b_cont;
1553 }
1554 }
1555 count += trp->wa->wa_count;
1556 trp = trp->list;
1557 } while (trp != lrp);
1558
1559 uio.uio_iov = niovp;
1560 uio.uio_iovcnt = iovcnt;
1561 uio.uio_segflg = UIO_SYSSPACE;
1562 uio.uio_extflg = UIO_COPY_DEFAULT;
1563 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1564 uio.uio_resid = count;
1565 /*
1566 * The limit is checked on the client. We
1567 * should allow any size writes here.
1568 */
1569 uio.uio_llimit = curproc->p_fsz_ctl;
1570 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1571 if (rlimit < (rlim64_t)uio.uio_resid)
1572 uio.uio_resid = (uint_t)rlimit;
1573
1574 /*
1575 * For now we assume no append mode.
1576 */
1577
1578 /*
1579 * We're changing creds because VM may fault
1580 * and we need the cred of the current
1581 * thread to be used if quota * checking is
1582 * enabled.
1583 */
1584 savecred = curthread->t_cred;
1585 curthread->t_cred = cr;
1586 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1587 curthread->t_cred = savecred;
1588
1589 /* check if a monitor detected a delegation conflict */
1590 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1591 /* mark as wouldblock so response is dropped */
1592 curthread->t_flag |= T_WOULDBLOCK;
1593
1594 if (niovp != iov)
1595 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1596
1597 if (!error) {
1598 data_written = 1;
1599 /*
1600 * Get attributes again so we send the latest mod
1601 * time to the client side for his cache.
1602 */
1603 va.va_mask = AT_ALL; /* now we want everything */
1604
1605 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1606
1607 if (!error)
1608 acl_perm(vp, exi, &va, rp->cr);
1609 }
1610
1611 /*
1612 * Fill in the status responses for each request
1613 * which was just handled. Also, copy the latest
1614 * attributes in to the attribute responses if
1615 * appropriate.
1616 */
1617 t_flag = curthread->t_flag & T_WOULDBLOCK;
1618 do {
1619 rp->thread->t_flag |= t_flag;
1620 /* check for overflows */
1621 if (!error) {
1622 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1623 }
1624 rp->ns->ns_status = puterrno(error);
1625 rp = rp->list;
1626 } while (rp != lrp);
1627 } while (rp != NULL);
1628
1629 /*
1630 * If any data was written at all, then we need to flush
1631 * the data and metadata to stable storage.
1632 */
1633 if (data_written) {
1634 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1635
1636 if (!error) {
1637 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1638 }
1639 }
1640
1641 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1642
1643 if (in_crit)
1644 nbl_end_crit(vp);
1645 VN_RELE(vp);
1646
1647 t_flag = curthread->t_flag & T_WOULDBLOCK;
1648 mutex_enter(&rfs_async_write_lock);
1649 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1650 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1651 rp->ns->ns_status = puterrno(error);
1652 rp->thread->t_flag |= t_flag;
1653 }
1654 }
1655 cv_broadcast(&nlp->cv);
1656 mutex_exit(&rfs_async_write_lock);
1657
1658 }
1659
1660 void *
1661 rfs_write_getfh(struct nfswriteargs *wa)
1662 {
1663 return (&wa->wa_fhandle);
1664 }
1665
1666 /*
1667 * Create a file.
1668 * Creates a file with given attributes and returns those attributes
1669 * and an fhandle for the new file.
1670 */
1671 void
1672 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1673 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1674 {
1675 int error;
1676 int lookuperr;
1677 int in_crit = 0;
1678 struct vattr va;
1679 vnode_t *vp;
1680 vnode_t *realvp;
1681 vnode_t *dvp;
1682 char *name = args->ca_da.da_name;
1683 vnode_t *tvp = NULL;
1684 int mode;
1685 int lookup_ok;
1686 bool_t trunc;
1687 struct sockaddr *ca;
1688
1689 /*
1690 * Disallow NULL paths
1691 */
1692 if (name == NULL || *name == '\0') {
1693 dr->dr_status = NFSERR_ACCES;
1694 return;
1695 }
1696
1697 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1698 if (dvp == NULL) {
1699 dr->dr_status = NFSERR_STALE;
1700 return;
1701 }
1702
1703 error = sattr_to_vattr(args->ca_sa, &va);
1704 if (error) {
1705 dr->dr_status = puterrno(error);
1706 return;
1707 }
1708
1709 /*
1710 * Must specify the mode.
1711 */
1712 if (!(va.va_mask & AT_MODE)) {
1713 VN_RELE(dvp);
1714 dr->dr_status = NFSERR_INVAL;
1715 return;
1716 }
1717
1718 /*
1719 * This is a completely gross hack to make mknod
1720 * work over the wire until we can wack the protocol
1721 */
1722 if ((va.va_mode & IFMT) == IFCHR) {
1723 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1724 va.va_type = VFIFO; /* xtra kludge for named pipe */
1725 else {
1726 va.va_type = VCHR;
1727 /*
1728 * uncompress the received dev_t
1729 * if the top half is zero indicating a request
1730 * from an `older style' OS.
1731 */
1732 if ((va.va_size & 0xffff0000) == 0)
1733 va.va_rdev = nfsv2_expdev(va.va_size);
1734 else
1735 va.va_rdev = (dev_t)va.va_size;
1736 }
1737 va.va_mask &= ~AT_SIZE;
1738 } else if ((va.va_mode & IFMT) == IFBLK) {
1739 va.va_type = VBLK;
1740 /*
1741 * uncompress the received dev_t
1742 * if the top half is zero indicating a request
1743 * from an `older style' OS.
1744 */
1745 if ((va.va_size & 0xffff0000) == 0)
1746 va.va_rdev = nfsv2_expdev(va.va_size);
1747 else
1748 va.va_rdev = (dev_t)va.va_size;
1749 va.va_mask &= ~AT_SIZE;
1750 } else if ((va.va_mode & IFMT) == IFSOCK) {
1751 va.va_type = VSOCK;
1752 } else {
1753 va.va_type = VREG;
1754 }
1755 va.va_mode &= ~IFMT;
1756 va.va_mask |= AT_TYPE;
1757
1758 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1759 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1760 MAXPATHLEN);
1761 if (name == NULL) {
1762 dr->dr_status = puterrno(EINVAL);
1763 return;
1764 }
1765
1766 /*
1767 * Why was the choice made to use VWRITE as the mode to the
1768 * call to VOP_CREATE ? This results in a bug. When a client
1769 * opens a file that already exists and is RDONLY, the second
1770 * open fails with an EACESS because of the mode.
1771 * bug ID 1054648.
1772 */
1773 lookup_ok = 0;
1774 mode = VWRITE;
1775 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1776 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1777 NULL, NULL, NULL);
1778 if (!error) {
1779 struct vattr at;
1780
1781 lookup_ok = 1;
1782 at.va_mask = AT_MODE;
1783 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1784 if (!error)
1785 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1786 VN_RELE(tvp);
1787 tvp = NULL;
1788 }
1789 }
1790
1791 if (!lookup_ok) {
1792 if (rdonly(ro, dvp)) {
1793 error = EROFS;
1794 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1795 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1796 error = EPERM;
1797 } else {
1798 error = 0;
1799 }
1800 }
1801
1802 /*
1803 * If file size is being modified on an already existing file
1804 * make sure that there are no conflicting non-blocking mandatory
1805 * locks in the region being manipulated. Return EACCES if there
1806 * are conflicting locks.
1807 */
1808 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1809 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1810 NULL, NULL, NULL);
1811
1812 if (!lookuperr &&
1813 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1814 VN_RELE(tvp);
1815 curthread->t_flag |= T_WOULDBLOCK;
1816 goto out;
1817 }
1818
1819 if (!lookuperr && nbl_need_check(tvp)) {
1820 /*
1821 * The file exists. Now check if it has any
1822 * conflicting non-blocking mandatory locks
1823 * in the region being changed.
1824 */
1825 struct vattr bva;
1826 u_offset_t offset;
1827 ssize_t length;
1828
1829 nbl_start_crit(tvp, RW_READER);
1830 in_crit = 1;
1831
1832 bva.va_mask = AT_SIZE;
1833 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1834 if (!error) {
1835 if (va.va_size < bva.va_size) {
1836 offset = va.va_size;
1837 length = bva.va_size - va.va_size;
1838 } else {
1839 offset = bva.va_size;
1840 length = va.va_size - bva.va_size;
1841 }
1842 if (length) {
1843 if (nbl_conflict(tvp, NBL_WRITE,
1844 offset, length, 0, NULL)) {
1845 error = EACCES;
1846 }
1847 }
1848 }
1849 if (error) {
1850 nbl_end_crit(tvp);
1851 VN_RELE(tvp);
1852 in_crit = 0;
1853 }
1854 } else if (tvp != NULL) {
1855 VN_RELE(tvp);
1856 }
1857 }
1858
1859 if (!error) {
1860 /*
1861 * If filesystem is shared with nosuid the remove any
1862 * setuid/setgid bits on create.
1863 */
1864 if (va.va_type == VREG &&
1865 exi->exi_export.ex_flags & EX_NOSUID)
1866 va.va_mode &= ~(VSUID | VSGID);
1867
1868 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1869 NULL, NULL);
1870
1871 if (!error) {
1872
1873 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1874 trunc = TRUE;
1875 else
1876 trunc = FALSE;
1877
1878 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1879 VN_RELE(vp);
1880 curthread->t_flag |= T_WOULDBLOCK;
1881 goto out;
1882 }
1883 va.va_mask = AT_ALL;
1884
1885 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1886
1887 /* check for overflows */
1888 if (!error) {
1889 acl_perm(vp, exi, &va, cr);
1890 error = vattr_to_nattr(&va, &dr->dr_attr);
1891 if (!error) {
1892 error = makefh(&dr->dr_fhandle, vp,
1893 exi);
1894 }
1895 }
1896 /*
1897 * Force modified metadata out to stable storage.
1898 *
1899 * if a underlying vp exists, pass it to VOP_FSYNC
1900 */
1901 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1902 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
1903 else
1904 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1905 VN_RELE(vp);
1906 }
1907
1908 if (in_crit) {
1909 nbl_end_crit(tvp);
1910 VN_RELE(tvp);
1911 }
1912 }
1913
1914 /*
1915 * Force modified data and metadata out to stable storage.
1916 */
1917 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1918
1919 out:
1920
1921 VN_RELE(dvp);
1922
1923 dr->dr_status = puterrno(error);
1924
1925 if (name != args->ca_da.da_name)
1926 kmem_free(name, MAXPATHLEN);
1927 }
1928 void *
1929 rfs_create_getfh(struct nfscreatargs *args)
1930 {
1931 return (args->ca_da.da_fhandle);
1932 }
1933
1934 /*
1935 * Remove a file.
1936 * Remove named file from parent directory.
1937 */
1938 /* ARGSUSED */
1939 void
1940 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
1941 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
1942 {
1943 int error = 0;
1944 vnode_t *vp;
1945 vnode_t *targvp;
1946 int in_crit = 0;
1947
1948 /*
1949 * Disallow NULL paths
1950 */
1951 if (da->da_name == NULL || *da->da_name == '\0') {
1952 *status = NFSERR_ACCES;
1953 return;
1954 }
1955
1956 vp = nfs_fhtovp(da->da_fhandle, exi);
1957 if (vp == NULL) {
1958 *status = NFSERR_STALE;
1959 return;
1960 }
1961
1962 if (rdonly(ro, vp)) {
1963 VN_RELE(vp);
1964 *status = NFSERR_ROFS;
1965 return;
1966 }
1967
1968 /*
1969 * Check for a conflict with a non-blocking mandatory share reservation.
1970 */
1971 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
1972 NULL, cr, NULL, NULL, NULL);
1973 if (error != 0) {
1974 VN_RELE(vp);
1975 *status = puterrno(error);
1976 return;
1977 }
1978
1979 /*
1980 * If the file is delegated to an v4 client, then initiate
1981 * recall and drop this request (by setting T_WOULDBLOCK).
1982 * The client will eventually re-transmit the request and
1983 * (hopefully), by then, the v4 client will have returned
1984 * the delegation.
1985 */
1986
1987 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
1988 VN_RELE(vp);
1989 VN_RELE(targvp);
1990 curthread->t_flag |= T_WOULDBLOCK;
1991 return;
1992 }
1993
1994 if (nbl_need_check(targvp)) {
1995 nbl_start_crit(targvp, RW_READER);
1996 in_crit = 1;
1997 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
1998 error = EACCES;
1999 goto out;
2000 }
2001 }
2002
2003 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2004
2005 /*
2006 * Force modified data and metadata out to stable storage.
2007 */
2008 (void) VOP_FSYNC(vp, 0, cr, NULL);
2009
2010 out:
2011 if (in_crit)
2012 nbl_end_crit(targvp);
2013 VN_RELE(targvp);
2014 VN_RELE(vp);
2015
2016 *status = puterrno(error);
2017
2018 }
2019
2020 void *
2021 rfs_remove_getfh(struct nfsdiropargs *da)
2022 {
2023 return (da->da_fhandle);
2024 }
2025
2026 /*
2027 * rename a file
2028 * Give a file (from) a new name (to).
2029 */
2030 /* ARGSUSED */
2031 void
2032 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2033 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2034 {
2035 int error = 0;
2036 vnode_t *fromvp;
2037 vnode_t *tovp;
2038 struct exportinfo *to_exi;
2039 fhandle_t *fh;
2040 vnode_t *srcvp;
2041 vnode_t *targvp;
2042 int in_crit = 0;
2043
2044 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2045 if (fromvp == NULL) {
2046 *status = NFSERR_STALE;
2047 return;
2048 }
2049
2050 fh = args->rna_to.da_fhandle;
2051 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2052 if (to_exi == NULL) {
2053 VN_RELE(fromvp);
2054 *status = NFSERR_ACCES;
2055 return;
2056 }
2057 exi_rele(to_exi);
2058
2059 if (to_exi != exi) {
2060 VN_RELE(fromvp);
2061 *status = NFSERR_XDEV;
2062 return;
2063 }
2064
2065 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2066 if (tovp == NULL) {
2067 VN_RELE(fromvp);
2068 *status = NFSERR_STALE;
2069 return;
2070 }
2071
2072 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2073 VN_RELE(tovp);
2074 VN_RELE(fromvp);
2075 *status = NFSERR_NOTDIR;
2076 return;
2077 }
2078
2079 /*
2080 * Disallow NULL paths
2081 */
2082 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2083 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2084 VN_RELE(tovp);
2085 VN_RELE(fromvp);
2086 *status = NFSERR_ACCES;
2087 return;
2088 }
2089
2090 if (rdonly(ro, tovp)) {
2091 VN_RELE(tovp);
2092 VN_RELE(fromvp);
2093 *status = NFSERR_ROFS;
2094 return;
2095 }
2096
2097 /*
2098 * Check for a conflict with a non-blocking mandatory share reservation.
2099 */
2100 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2101 NULL, cr, NULL, NULL, NULL);
2102 if (error != 0) {
2103 VN_RELE(tovp);
2104 VN_RELE(fromvp);
2105 *status = puterrno(error);
2106 return;
2107 }
2108
2109 /* Check for delegations on the source file */
2110
2111 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2112 VN_RELE(tovp);
2113 VN_RELE(fromvp);
2114 VN_RELE(srcvp);
2115 curthread->t_flag |= T_WOULDBLOCK;
2116 return;
2117 }
2118
2119 /* Check for delegation on the file being renamed over, if it exists */
2120
2121 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2122 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2123 NULL, NULL, NULL) == 0) {
2124
2125 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2126 VN_RELE(tovp);
2127 VN_RELE(fromvp);
2128 VN_RELE(srcvp);
2129 VN_RELE(targvp);
2130 curthread->t_flag |= T_WOULDBLOCK;
2131 return;
2132 }
2133 VN_RELE(targvp);
2134 }
2135
2136
2137 if (nbl_need_check(srcvp)) {
2138 nbl_start_crit(srcvp, RW_READER);
2139 in_crit = 1;
2140 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2141 error = EACCES;
2142 goto out;
2143 }
2144 }
2145
2146 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2147 tovp, args->rna_to.da_name, cr, NULL, 0);
2148
2149 if (error == 0)
2150 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2151 strlen(args->rna_to.da_name));
2152
2153 /*
2154 * Force modified data and metadata out to stable storage.
2155 */
2156 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2157 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2158
2159 out:
2160 if (in_crit)
2161 nbl_end_crit(srcvp);
2162 VN_RELE(srcvp);
2163 VN_RELE(tovp);
2164 VN_RELE(fromvp);
2165
2166 *status = puterrno(error);
2167
2168 }
2169 void *
2170 rfs_rename_getfh(struct nfsrnmargs *args)
2171 {
2172 return (args->rna_from.da_fhandle);
2173 }
2174
2175 /*
2176 * Link to a file.
2177 * Create a file (to) which is a hard link to the given file (from).
2178 */
2179 /* ARGSUSED */
2180 void
2181 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2182 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2183 {
2184 int error;
2185 vnode_t *fromvp;
2186 vnode_t *tovp;
2187 struct exportinfo *to_exi;
2188 fhandle_t *fh;
2189
2190 fromvp = nfs_fhtovp(args->la_from, exi);
2191 if (fromvp == NULL) {
2192 *status = NFSERR_STALE;
2193 return;
2194 }
2195
2196 fh = args->la_to.da_fhandle;
2197 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2198 if (to_exi == NULL) {
2199 VN_RELE(fromvp);
2200 *status = NFSERR_ACCES;
2201 return;
2202 }
2203 exi_rele(to_exi);
2204
2205 if (to_exi != exi) {
2206 VN_RELE(fromvp);
2207 *status = NFSERR_XDEV;
2208 return;
2209 }
2210
2211 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2212 if (tovp == NULL) {
2213 VN_RELE(fromvp);
2214 *status = NFSERR_STALE;
2215 return;
2216 }
2217
2218 if (tovp->v_type != VDIR) {
2219 VN_RELE(tovp);
2220 VN_RELE(fromvp);
2221 *status = NFSERR_NOTDIR;
2222 return;
2223 }
2224 /*
2225 * Disallow NULL paths
2226 */
2227 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2228 VN_RELE(tovp);
2229 VN_RELE(fromvp);
2230 *status = NFSERR_ACCES;
2231 return;
2232 }
2233
2234 if (rdonly(ro, tovp)) {
2235 VN_RELE(tovp);
2236 VN_RELE(fromvp);
2237 *status = NFSERR_ROFS;
2238 return;
2239 }
2240
2241 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2242
2243 /*
2244 * Force modified data and metadata out to stable storage.
2245 */
2246 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2247 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2248
2249 VN_RELE(tovp);
2250 VN_RELE(fromvp);
2251
2252 *status = puterrno(error);
2253
2254 }
2255 void *
2256 rfs_link_getfh(struct nfslinkargs *args)
2257 {
2258 return (args->la_from);
2259 }
2260
2261 /*
2262 * Symbolicly link to a file.
2263 * Create a file (to) with the given attributes which is a symbolic link
2264 * to the given path name (to).
2265 */
2266 void
2267 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2268 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2269 {
2270 int error;
2271 struct vattr va;
2272 vnode_t *vp;
2273 vnode_t *svp;
2274 int lerror;
2275 struct sockaddr *ca;
2276 char *name = NULL;
2277
2278 /*
2279 * Disallow NULL paths
2280 */
2281 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2282 *status = NFSERR_ACCES;
2283 return;
2284 }
2285
2286 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2287 if (vp == NULL) {
2288 *status = NFSERR_STALE;
2289 return;
2290 }
2291
2292 if (rdonly(ro, vp)) {
2293 VN_RELE(vp);
2294 *status = NFSERR_ROFS;
2295 return;
2296 }
2297
2298 error = sattr_to_vattr(args->sla_sa, &va);
2299 if (error) {
2300 VN_RELE(vp);
2301 *status = puterrno(error);
2302 return;
2303 }
2304
2305 if (!(va.va_mask & AT_MODE)) {
2306 VN_RELE(vp);
2307 *status = NFSERR_INVAL;
2308 return;
2309 }
2310
2311 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2312 name = nfscmd_convname(ca, exi, args->sla_tnm,
2313 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2314
2315 if (name == NULL) {
2316 *status = NFSERR_ACCES;
2317 return;
2318 }
2319
2320 va.va_type = VLNK;
2321 va.va_mask |= AT_TYPE;
2322
2323 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2324
2325 /*
2326 * Force new data and metadata out to stable storage.
2327 */
2328 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2329 NULL, cr, NULL, NULL, NULL);
2330
2331 if (!lerror) {
2332 (void) VOP_FSYNC(svp, 0, cr, NULL);
2333 VN_RELE(svp);
2334 }
2335
2336 /*
2337 * Force modified data and metadata out to stable storage.
2338 */
2339 (void) VOP_FSYNC(vp, 0, cr, NULL);
2340
2341 VN_RELE(vp);
2342
2343 *status = puterrno(error);
2344 if (name != args->sla_tnm)
2345 kmem_free(name, MAXPATHLEN);
2346
2347 }
2348 void *
2349 rfs_symlink_getfh(struct nfsslargs *args)
2350 {
2351 return (args->sla_from.da_fhandle);
2352 }
2353
2354 /*
2355 * Make a directory.
2356 * Create a directory with the given name, parent directory, and attributes.
2357 * Returns a file handle and attributes for the new directory.
2358 */
2359 /* ARGSUSED */
2360 void
2361 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2362 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2363 {
2364 int error;
2365 struct vattr va;
2366 vnode_t *dvp = NULL;
2367 vnode_t *vp;
2368 char *name = args->ca_da.da_name;
2369
2370 /*
2371 * Disallow NULL paths
2372 */
2373 if (name == NULL || *name == '\0') {
2374 dr->dr_status = NFSERR_ACCES;
2375 return;
2376 }
2377
2378 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2379 if (vp == NULL) {
2380 dr->dr_status = NFSERR_STALE;
2381 return;
2382 }
2383
2384 if (rdonly(ro, vp)) {
2385 VN_RELE(vp);
2386 dr->dr_status = NFSERR_ROFS;
2387 return;
2388 }
2389
2390 error = sattr_to_vattr(args->ca_sa, &va);
2391 if (error) {
2392 VN_RELE(vp);
2393 dr->dr_status = puterrno(error);
2394 return;
2395 }
2396
2397 if (!(va.va_mask & AT_MODE)) {
2398 VN_RELE(vp);
2399 dr->dr_status = NFSERR_INVAL;
2400 return;
2401 }
2402
2403 va.va_type = VDIR;
2404 va.va_mask |= AT_TYPE;
2405
2406 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2407
2408 if (!error) {
2409 /*
2410 * Attribtutes of the newly created directory should
2411 * be returned to the client.
2412 */
2413 va.va_mask = AT_ALL; /* We want everything */
2414 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2415
2416 /* check for overflows */
2417 if (!error) {
2418 acl_perm(vp, exi, &va, cr);
2419 error = vattr_to_nattr(&va, &dr->dr_attr);
2420 if (!error) {
2421 error = makefh(&dr->dr_fhandle, dvp, exi);
2422 }
2423 }
2424 /*
2425 * Force new data and metadata out to stable storage.
2426 */
2427 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2428 VN_RELE(dvp);
2429 }
2430
2431 /*
2432 * Force modified data and metadata out to stable storage.
2433 */
2434 (void) VOP_FSYNC(vp, 0, cr, NULL);
2435
2436 VN_RELE(vp);
2437
2438 dr->dr_status = puterrno(error);
2439
2440 }
2441 void *
2442 rfs_mkdir_getfh(struct nfscreatargs *args)
2443 {
2444 return (args->ca_da.da_fhandle);
2445 }
2446
2447 /*
2448 * Remove a directory.
2449 * Remove the given directory name from the given parent directory.
2450 */
2451 /* ARGSUSED */
2452 void
2453 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2454 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2455 {
2456 int error;
2457 vnode_t *vp;
2458
2459 /*
2460 * Disallow NULL paths
2461 */
2462 if (da->da_name == NULL || *da->da_name == '\0') {
2463 *status = NFSERR_ACCES;
2464 return;
2465 }
2466
2467 vp = nfs_fhtovp(da->da_fhandle, exi);
2468 if (vp == NULL) {
2469 *status = NFSERR_STALE;
2470 return;
2471 }
2472
2473 if (rdonly(ro, vp)) {
2474 VN_RELE(vp);
2475 *status = NFSERR_ROFS;
2476 return;
2477 }
2478
2479 /*
2480 * VOP_RMDIR takes a third argument (the current
2481 * directory of the process). That's because someone
2482 * wants to return EINVAL if one tries to remove ".".
2483 * Of course, NFS servers have no idea what their
2484 * clients' current directories are. We fake it by
2485 * supplying a vnode known to exist and illegal to
2486 * remove.
2487 */
2488 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
2489
2490 /*
2491 * Force modified data and metadata out to stable storage.
2492 */
2493 (void) VOP_FSYNC(vp, 0, cr, NULL);
2494
2495 VN_RELE(vp);
2496
2497 /*
2498 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2499 * if the directory is not empty. A System V NFS server
2500 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2501 * over the wire.
2502 */
2503 if (error == EEXIST)
2504 *status = NFSERR_NOTEMPTY;
2505 else
2506 *status = puterrno(error);
2507
2508 }
2509 void *
2510 rfs_rmdir_getfh(struct nfsdiropargs *da)
2511 {
2512 return (da->da_fhandle);
2513 }
2514
2515 /* ARGSUSED */
2516 void
2517 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2518 struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
2519 {
2520 int error;
2521 int iseof;
2522 struct iovec iov;
2523 struct uio uio;
2524 vnode_t *vp;
2525 char *ndata = NULL;
2526 struct sockaddr *ca;
2527 size_t nents;
2528 int ret;
2529
2530 vp = nfs_fhtovp(&rda->rda_fh, exi);
2531 if (vp == NULL) {
2532 rd->rd_entries = NULL;
2533 rd->rd_status = NFSERR_STALE;
2534 return;
2535 }
2536
2537 if (vp->v_type != VDIR) {
2538 VN_RELE(vp);
2539 rd->rd_entries = NULL;
2540 rd->rd_status = NFSERR_NOTDIR;
2541 return;
2542 }
2543
2544 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2545
2546 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2547
2548 if (error) {
2549 rd->rd_entries = NULL;
2550 goto bad;
2551 }
2552
2553 if (rda->rda_count == 0) {
2554 rd->rd_entries = NULL;
2555 rd->rd_size = 0;
2556 rd->rd_eof = FALSE;
2557 goto bad;
2558 }
2559
2560 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2561
2562 /*
2563 * Allocate data for entries. This will be freed by rfs_rddirfree.
2564 */
2565 rd->rd_bufsize = (uint_t)rda->rda_count;
2566 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2567
2568 /*
2569 * Set up io vector to read directory data
2570 */
2571 iov.iov_base = (caddr_t)rd->rd_entries;
2572 iov.iov_len = rda->rda_count;
2573 uio.uio_iov = &iov;
2574 uio.uio_iovcnt = 1;
2575 uio.uio_segflg = UIO_SYSSPACE;
2576 uio.uio_extflg = UIO_COPY_CACHED;
2577 uio.uio_loffset = (offset_t)rda->rda_offset;
2578 uio.uio_resid = rda->rda_count;
2579
2580 /*
2581 * read directory
2582 */
2583 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2584
2585 /*
2586 * Clean up
2587 */
2588 if (!error) {
2589 /*
2590 * set size and eof
2591 */
2592 if (uio.uio_resid == rda->rda_count) {
2593 rd->rd_size = 0;
2594 rd->rd_eof = TRUE;
2595 } else {
2596 rd->rd_size = (uint32_t)(rda->rda_count -
2597 uio.uio_resid);
2598 rd->rd_eof = iseof ? TRUE : FALSE;
2599 }
2600 }
2601
2602 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2603 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2604 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2605 rda->rda_count, &ndata);
2606
2607 if (ret != 0) {
2608 size_t dropbytes;
2609 /*
2610 * We had to drop one or more entries in order to fit
2611 * during the character conversion. We need to patch
2612 * up the size and eof info.
2613 */
2614 if (rd->rd_eof)
2615 rd->rd_eof = FALSE;
2616 dropbytes = nfscmd_dropped_entrysize(
2617 (struct dirent64 *)rd->rd_entries, nents, ret);
2618 rd->rd_size -= dropbytes;
2619 }
2620 if (ndata == NULL) {
2621 ndata = (char *)rd->rd_entries;
2622 } else if (ndata != (char *)rd->rd_entries) {
2623 kmem_free(rd->rd_entries, rd->rd_bufsize);
2624 rd->rd_entries = (void *)ndata;
2625 rd->rd_bufsize = rda->rda_count;
2626 }
2627
2628 bad:
2629 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2630
2631 #if 0 /* notyet */
2632 /*
2633 * Don't do this. It causes local disk writes when just
2634 * reading the file and the overhead is deemed larger
2635 * than the benefit.
2636 */
2637 /*
2638 * Force modified metadata out to stable storage.
2639 */
2640 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2641 #endif
2642
2643 VN_RELE(vp);
2644
2645 rd->rd_status = puterrno(error);
2646
2647 }
2648 void *
2649 rfs_readdir_getfh(struct nfsrddirargs *rda)
2650 {
2651 return (&rda->rda_fh);
2652 }
2653 void
2654 rfs_rddirfree(struct nfsrddirres *rd)
2655 {
2656 if (rd->rd_entries != NULL)
2657 kmem_free(rd->rd_entries, rd->rd_bufsize);
2658 }
2659
2660 /* ARGSUSED */
2661 void
2662 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2663 struct svc_req *req, cred_t *cr, bool_t ro)
2664 {
2665 int error;
2666 struct statvfs64 sb;
2667 vnode_t *vp;
2668
2669 vp = nfs_fhtovp(fh, exi);
2670 if (vp == NULL) {
2671 fs->fs_status = NFSERR_STALE;
2672 return;
2673 }
2674
2675 error = VFS_STATVFS(vp->v_vfsp, &sb);
2676
2677 if (!error) {
2678 fs->fs_tsize = nfstsize();
2679 fs->fs_bsize = sb.f_frsize;
2680 fs->fs_blocks = sb.f_blocks;
2681 fs->fs_bfree = sb.f_bfree;
2682 fs->fs_bavail = sb.f_bavail;
2683 }
2684
2685 VN_RELE(vp);
2686
2687 fs->fs_status = puterrno(error);
2688
2689 }
2690 void *
2691 rfs_statfs_getfh(fhandle_t *fh)
2692 {
2693 return (fh);
2694 }
2695
2696 static int
2697 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2698 {
2699 vap->va_mask = 0;
2700
2701 /*
2702 * There was a sign extension bug in some VFS based systems
2703 * which stored the mode as a short. When it would get
2704 * assigned to a u_long, no sign extension would occur.
2705 * It needed to, but this wasn't noticed because sa_mode
2706 * would then get assigned back to the short, thus ignoring
2707 * the upper 16 bits of sa_mode.
2708 *
2709 * To make this implementation work for both broken
2710 * clients and good clients, we check for both versions
2711 * of the mode.
2712 */
2713 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2714 sa->sa_mode != (uint32_t)-1) {
2715 vap->va_mask |= AT_MODE;
2716 vap->va_mode = sa->sa_mode;
2717 }
2718 if (sa->sa_uid != (uint32_t)-1) {
2719 vap->va_mask |= AT_UID;
2720 vap->va_uid = sa->sa_uid;
2721 }
2722 if (sa->sa_gid != (uint32_t)-1) {
2723 vap->va_mask |= AT_GID;
2724 vap->va_gid = sa->sa_gid;
2725 }
2726 if (sa->sa_size != (uint32_t)-1) {
2727 vap->va_mask |= AT_SIZE;
2728 vap->va_size = sa->sa_size;
2729 }
2730 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2731 sa->sa_atime.tv_usec != (int32_t)-1) {
2732 #ifndef _LP64
2733 /* return error if time overflow */
2734 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2735 return (EOVERFLOW);
2736 #endif
2737 vap->va_mask |= AT_ATIME;
2738 /*
2739 * nfs protocol defines times as unsigned so don't extend sign,
2740 * unless sysadmin set nfs_allow_preepoch_time.
2741 */
2742 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2743 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2744 }
2745 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2746 sa->sa_mtime.tv_usec != (int32_t)-1) {
2747 #ifndef _LP64
2748 /* return error if time overflow */
2749 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2750 return (EOVERFLOW);
2751 #endif
2752 vap->va_mask |= AT_MTIME;
2753 /*
2754 * nfs protocol defines times as unsigned so don't extend sign,
2755 * unless sysadmin set nfs_allow_preepoch_time.
2756 */
2757 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2758 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2759 }
2760 return (0);
2761 }
2762
2763 static enum nfsftype vt_to_nf[] = {
2764 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2765 };
2766
2767 /*
2768 * check the following fields for overflow: nodeid, size, and time.
2769 * There could be a problem when converting 64-bit LP64 fields
2770 * into 32-bit ones. Return an error if there is an overflow.
2771 */
2772 int
2773 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2774 {
2775 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2776 na->na_type = vt_to_nf[vap->va_type];
2777
2778 if (vap->va_mode == (unsigned short) -1)
2779 na->na_mode = (uint32_t)-1;
2780 else
2781 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2782
2783 if (vap->va_uid == (unsigned short)(-1))
2784 na->na_uid = (uint32_t)(-1);
2785 else if (vap->va_uid == UID_NOBODY)
2786 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2787 else
2788 na->na_uid = vap->va_uid;
2789
2790 if (vap->va_gid == (unsigned short)(-1))
2791 na->na_gid = (uint32_t)-1;
2792 else if (vap->va_gid == GID_NOBODY)
2793 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2794 else
2795 na->na_gid = vap->va_gid;
2796
2797 /*
2798 * Do we need to check fsid for overflow? It is 64-bit in the
2799 * vattr, but are bigger than 32 bit values supported?
2800 */
2801 na->na_fsid = vap->va_fsid;
2802
2803 na->na_nodeid = vap->va_nodeid;
2804
2805 /*
2806 * Check to make sure that the nodeid is representable over the
2807 * wire without losing bits.
2808 */
2809 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2810 return (EFBIG);
2811 na->na_nlink = vap->va_nlink;
2812
2813 /*
2814 * Check for big files here, instead of at the caller. See
2815 * comments in cstat for large special file explanation.
2816 */
2817 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2818 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2819 return (EFBIG);
2820 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2821 /* UNKNOWN_SIZE | OVERFLOW */
2822 na->na_size = MAXOFF32_T;
2823 } else
2824 na->na_size = vap->va_size;
2825 } else
2826 na->na_size = vap->va_size;
2827
2828 /*
2829 * If the vnode times overflow the 32-bit times that NFS2
2830 * uses on the wire then return an error.
2831 */
2832 if (!NFS_VAP_TIME_OK(vap)) {
2833 return (EOVERFLOW);
2834 }
2835 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2836 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2837
2838 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2839 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2840
2841 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2842 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2843
2844 /*
2845 * If the dev_t will fit into 16 bits then compress
2846 * it, otherwise leave it alone. See comments in
2847 * nfs_client.c.
2848 */
2849 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2850 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2851 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2852 else
2853 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2854
2855 na->na_blocks = vap->va_nblocks;
2856 na->na_blocksize = vap->va_blksize;
2857
2858 /*
2859 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2860 * over-the-wire protocols for named-pipe vnodes. It remaps the
2861 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2862 *
2863 * BUYER BEWARE:
2864 * If you are porting the NFS to a non-Sun server, you probably
2865 * don't want to include the following block of code. The
2866 * over-the-wire special file types will be changing with the
2867 * NFS Protocol Revision.
2868 */
2869 if (vap->va_type == VFIFO)
2870 NA_SETFIFO(na);
2871 return (0);
2872 }
2873
2874 /*
2875 * acl v2 support: returns approximate permission.
2876 * default: returns minimal permission (more restrictive)
2877 * aclok: returns maximal permission (less restrictive)
2878 * This routine changes the permissions that are alaredy in *va.
2879 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
2880 * CLASS_OBJ is always the same as GROUP_OBJ entry.
2881 */
2882 static void
2883 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
2884 {
2885 vsecattr_t vsa;
2886 int aclcnt;
2887 aclent_t *aclentp;
2888 mode_t mask_perm;
2889 mode_t grp_perm;
2890 mode_t other_perm;
2891 mode_t other_orig;
2892 int error;
2893
2894 /* dont care default acl */
2895 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
2896 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
2897
2898 if (!error) {
2899 aclcnt = vsa.vsa_aclcnt;
2900 if (aclcnt > MIN_ACL_ENTRIES) {
2901 /* non-trivial ACL */
2902 aclentp = vsa.vsa_aclentp;
2903 if (exi->exi_export.ex_flags & EX_ACLOK) {
2904 /* maximal permissions */
2905 grp_perm = 0;
2906 other_perm = 0;
2907 for (; aclcnt > 0; aclcnt--, aclentp++) {
2908 switch (aclentp->a_type) {
2909 case USER_OBJ:
2910 break;
2911 case USER:
2912 grp_perm |=
2913 aclentp->a_perm << 3;
2914 other_perm |= aclentp->a_perm;
2915 break;
2916 case GROUP_OBJ:
2917 grp_perm |=
2918 aclentp->a_perm << 3;
2919 break;
2920 case GROUP:
2921 other_perm |= aclentp->a_perm;
2922 break;
2923 case OTHER_OBJ:
2924 other_orig = aclentp->a_perm;
2925 break;
2926 case CLASS_OBJ:
2927 mask_perm = aclentp->a_perm;
2928 break;
2929 default:
2930 break;
2931 }
2932 }
2933 grp_perm &= mask_perm << 3;
2934 other_perm &= mask_perm;
2935 other_perm |= other_orig;
2936
2937 } else {
2938 /* minimal permissions */
2939 grp_perm = 070;
2940 other_perm = 07;
2941 for (; aclcnt > 0; aclcnt--, aclentp++) {
2942 switch (aclentp->a_type) {
2943 case USER_OBJ:
2944 break;
2945 case USER:
2946 case CLASS_OBJ:
2947 grp_perm &=
2948 aclentp->a_perm << 3;
2949 other_perm &=
2950 aclentp->a_perm;
2951 break;
2952 case GROUP_OBJ:
2953 grp_perm &=
2954 aclentp->a_perm << 3;
2955 break;
2956 case GROUP:
2957 other_perm &=
2958 aclentp->a_perm;
2959 break;
2960 case OTHER_OBJ:
2961 other_perm &=
2962 aclentp->a_perm;
2963 break;
2964 default:
2965 break;
2966 }
2967 }
2968 }
2969 /* copy to va */
2970 va->va_mode &= ~077;
2971 va->va_mode |= grp_perm | other_perm;
2972 }
2973 if (vsa.vsa_aclcnt)
2974 kmem_free(vsa.vsa_aclentp,
2975 vsa.vsa_aclcnt * sizeof (aclent_t));
2976 }
2977 }
2978
2979 void
2980 rfs_srvrinit(void)
2981 {
2982 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
2983 nfs2_srv_caller_id = fs_new_caller_id();
2984 }
2985
2986 void
2987 rfs_srvrfini(void)
2988 {
2989 mutex_destroy(&rfs_async_write_lock);
2990 }
2991
2992 static int
2993 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
2994 {
2995 struct clist *wcl;
2996 int wlist_len;
2997 uint32_t count = rr->rr_count;
2998
2999 wcl = ra->ra_wlist;
3000
3001 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
3002 return (FALSE);
3003 }
3004
3005 wcl = ra->ra_wlist;
3006 rr->rr_ok.rrok_wlist_len = wlist_len;
3007 rr->rr_ok.rrok_wlist = wcl;
3008
3009 return (TRUE);
3010 }
--- EOF ---