Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs_vnops.c
+++ new/usr/src/uts/common/fs/nfs/nfs_vnops.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 *
24 24 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
25 25 * All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
30 30 */
31 31
32 32 #include <sys/param.h>
33 33 #include <sys/types.h>
34 34 #include <sys/systm.h>
35 35 #include <sys/cred.h>
36 36 #include <sys/time.h>
37 37 #include <sys/vnode.h>
38 38 #include <sys/vfs.h>
39 39 #include <sys/vfs_opreg.h>
40 40 #include <sys/file.h>
41 41 #include <sys/filio.h>
42 42 #include <sys/uio.h>
43 43 #include <sys/buf.h>
44 44 #include <sys/mman.h>
45 45 #include <sys/pathname.h>
46 46 #include <sys/dirent.h>
47 47 #include <sys/debug.h>
48 48 #include <sys/vmsystm.h>
49 49 #include <sys/fcntl.h>
50 50 #include <sys/flock.h>
51 51 #include <sys/swap.h>
52 52 #include <sys/errno.h>
53 53 #include <sys/strsubr.h>
54 54 #include <sys/sysmacros.h>
55 55 #include <sys/kmem.h>
56 56 #include <sys/cmn_err.h>
57 57 #include <sys/pathconf.h>
58 58 #include <sys/utsname.h>
59 59 #include <sys/dnlc.h>
60 60 #include <sys/acl.h>
61 61 #include <sys/atomic.h>
62 62 #include <sys/policy.h>
63 63 #include <sys/sdt.h>
64 64
65 65 #include <rpc/types.h>
66 66 #include <rpc/auth.h>
67 67 #include <rpc/clnt.h>
68 68
69 69 #include <nfs/nfs.h>
70 70 #include <nfs/nfs_clnt.h>
71 71 #include <nfs/rnode.h>
72 72 #include <nfs/nfs_acl.h>
73 73 #include <nfs/lm.h>
74 74
75 75 #include <vm/hat.h>
76 76 #include <vm/as.h>
77 77 #include <vm/page.h>
78 78 #include <vm/pvn.h>
79 79 #include <vm/seg.h>
80 80 #include <vm/seg_map.h>
81 81 #include <vm/seg_kpm.h>
82 82 #include <vm/seg_vn.h>
83 83
84 84 #include <fs/fs_subr.h>
85 85
86 86 #include <sys/ddi.h>
87 87
88 88 static int nfs_rdwrlbn(vnode_t *, page_t *, u_offset_t, size_t, int,
89 89 cred_t *);
90 90 static int nfswrite(vnode_t *, caddr_t, uint_t, int, cred_t *);
91 91 static int nfsread(vnode_t *, caddr_t, uint_t, int, size_t *, cred_t *);
92 92 static int nfssetattr(vnode_t *, struct vattr *, int, cred_t *);
93 93 static int nfslookup_dnlc(vnode_t *, char *, vnode_t **, cred_t *);
94 94 static int nfslookup_otw(vnode_t *, char *, vnode_t **, cred_t *, int);
95 95 static int nfsrename(vnode_t *, char *, vnode_t *, char *, cred_t *,
96 96 caller_context_t *);
97 97 static int nfsreaddir(vnode_t *, rddir_cache *, cred_t *);
98 98 static int nfs_bio(struct buf *, cred_t *);
99 99 static int nfs_getapage(vnode_t *, u_offset_t, size_t, uint_t *,
100 100 page_t *[], size_t, struct seg *, caddr_t,
101 101 enum seg_rw, cred_t *);
102 102 static void nfs_readahead(vnode_t *, u_offset_t, caddr_t, struct seg *,
103 103 cred_t *);
104 104 static int nfs_sync_putapage(vnode_t *, page_t *, u_offset_t, size_t,
105 105 int, cred_t *);
106 106 static int nfs_sync_pageio(vnode_t *, page_t *, u_offset_t, size_t,
107 107 int, cred_t *);
108 108 static void nfs_delmap_callback(struct as *, void *, uint_t);
109 109
110 110 /*
111 111 * Error flags used to pass information about certain special errors
112 112 * which need to be handled specially.
113 113 */
114 114 #define NFS_EOF -98
115 115
116 116 /*
117 117 * These are the vnode ops routines which implement the vnode interface to
118 118 * the networked file system. These routines just take their parameters,
119 119 * make them look networkish by putting the right info into interface structs,
120 120 * and then calling the appropriate remote routine(s) to do the work.
121 121 *
122 122 * Note on directory name lookup cacheing: If we detect a stale fhandle,
123 123 * we purge the directory cache relative to that vnode. This way, the
124 124 * user won't get burned by the cache repeatedly. See <nfs/rnode.h> for
125 125 * more details on rnode locking.
126 126 */
127 127
128 128 static int nfs_open(vnode_t **, int, cred_t *, caller_context_t *);
129 129 static int nfs_close(vnode_t *, int, int, offset_t, cred_t *,
130 130 caller_context_t *);
131 131 static int nfs_read(vnode_t *, struct uio *, int, cred_t *,
132 132 caller_context_t *);
133 133 static int nfs_write(vnode_t *, struct uio *, int, cred_t *,
134 134 caller_context_t *);
135 135 static int nfs_ioctl(vnode_t *, int, intptr_t, int, cred_t *, int *,
136 136 caller_context_t *);
137 137 static int nfs_getattr(vnode_t *, struct vattr *, int, cred_t *,
138 138 caller_context_t *);
139 139 static int nfs_setattr(vnode_t *, struct vattr *, int, cred_t *,
140 140 caller_context_t *);
141 141 static int nfs_access(vnode_t *, int, int, cred_t *, caller_context_t *);
142 142 static int nfs_accessx(void *, int, cred_t *);
143 143 static int nfs_readlink(vnode_t *, struct uio *, cred_t *,
144 144 caller_context_t *);
145 145 static int nfs_fsync(vnode_t *, int, cred_t *, caller_context_t *);
146 146 static void nfs_inactive(vnode_t *, cred_t *, caller_context_t *);
147 147 static int nfs_lookup(vnode_t *, char *, vnode_t **, struct pathname *,
148 148 int, vnode_t *, cred_t *, caller_context_t *,
149 149 int *, pathname_t *);
150 150 static int nfs_create(vnode_t *, char *, struct vattr *, enum vcexcl,
151 151 int, vnode_t **, cred_t *, int, caller_context_t *,
152 152 vsecattr_t *);
153 153 static int nfs_remove(vnode_t *, char *, cred_t *, caller_context_t *,
154 154 int);
155 155 static int nfs_link(vnode_t *, vnode_t *, char *, cred_t *,
156 156 caller_context_t *, int);
157 157 static int nfs_rename(vnode_t *, char *, vnode_t *, char *, cred_t *,
158 158 caller_context_t *, int);
159 159 static int nfs_mkdir(vnode_t *, char *, struct vattr *, vnode_t **,
160 160 cred_t *, caller_context_t *, int, vsecattr_t *);
161 161 static int nfs_rmdir(vnode_t *, char *, vnode_t *, cred_t *,
162 162 caller_context_t *, int);
163 163 static int nfs_symlink(vnode_t *, char *, struct vattr *, char *,
164 164 cred_t *, caller_context_t *, int);
165 165 static int nfs_readdir(vnode_t *, struct uio *, cred_t *, int *,
166 166 caller_context_t *, int);
167 167 static int nfs_fid(vnode_t *, fid_t *, caller_context_t *);
168 168 static int nfs_rwlock(vnode_t *, int, caller_context_t *);
169 169 static void nfs_rwunlock(vnode_t *, int, caller_context_t *);
170 170 static int nfs_seek(vnode_t *, offset_t, offset_t *, caller_context_t *);
171 171 static int nfs_getpage(vnode_t *, offset_t, size_t, uint_t *,
172 172 page_t *[], size_t, struct seg *, caddr_t,
173 173 enum seg_rw, cred_t *, caller_context_t *);
174 174 static int nfs_putpage(vnode_t *, offset_t, size_t, int, cred_t *,
175 175 caller_context_t *);
176 176 static int nfs_map(vnode_t *, offset_t, struct as *, caddr_t *, size_t,
177 177 uchar_t, uchar_t, uint_t, cred_t *, caller_context_t *);
178 178 static int nfs_addmap(vnode_t *, offset_t, struct as *, caddr_t, size_t,
179 179 uchar_t, uchar_t, uint_t, cred_t *, caller_context_t *);
180 180 static int nfs_frlock(vnode_t *, int, struct flock64 *, int, offset_t,
181 181 struct flk_callback *, cred_t *, caller_context_t *);
182 182 static int nfs_space(vnode_t *, int, struct flock64 *, int, offset_t,
183 183 cred_t *, caller_context_t *);
184 184 static int nfs_realvp(vnode_t *, vnode_t **, caller_context_t *);
185 185 static int nfs_delmap(vnode_t *, offset_t, struct as *, caddr_t, size_t,
186 186 uint_t, uint_t, uint_t, cred_t *, caller_context_t *);
187 187 static int nfs_pathconf(vnode_t *, int, ulong_t *, cred_t *,
188 188 caller_context_t *);
189 189 static int nfs_pageio(vnode_t *, page_t *, u_offset_t, size_t, int,
190 190 cred_t *, caller_context_t *);
191 191 static int nfs_setsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
192 192 caller_context_t *);
193 193 static int nfs_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
194 194 caller_context_t *);
195 195 static int nfs_shrlock(vnode_t *, int, struct shrlock *, int, cred_t *,
196 196 caller_context_t *);
197 197
198 198 struct vnodeops *nfs_vnodeops;
199 199
200 200 const fs_operation_def_t nfs_vnodeops_template[] = {
201 201 VOPNAME_OPEN, { .vop_open = nfs_open },
202 202 VOPNAME_CLOSE, { .vop_close = nfs_close },
203 203 VOPNAME_READ, { .vop_read = nfs_read },
204 204 VOPNAME_WRITE, { .vop_write = nfs_write },
205 205 VOPNAME_IOCTL, { .vop_ioctl = nfs_ioctl },
206 206 VOPNAME_GETATTR, { .vop_getattr = nfs_getattr },
207 207 VOPNAME_SETATTR, { .vop_setattr = nfs_setattr },
208 208 VOPNAME_ACCESS, { .vop_access = nfs_access },
209 209 VOPNAME_LOOKUP, { .vop_lookup = nfs_lookup },
210 210 VOPNAME_CREATE, { .vop_create = nfs_create },
211 211 VOPNAME_REMOVE, { .vop_remove = nfs_remove },
212 212 VOPNAME_LINK, { .vop_link = nfs_link },
213 213 VOPNAME_RENAME, { .vop_rename = nfs_rename },
214 214 VOPNAME_MKDIR, { .vop_mkdir = nfs_mkdir },
215 215 VOPNAME_RMDIR, { .vop_rmdir = nfs_rmdir },
216 216 VOPNAME_READDIR, { .vop_readdir = nfs_readdir },
217 217 VOPNAME_SYMLINK, { .vop_symlink = nfs_symlink },
218 218 VOPNAME_READLINK, { .vop_readlink = nfs_readlink },
219 219 VOPNAME_FSYNC, { .vop_fsync = nfs_fsync },
220 220 VOPNAME_INACTIVE, { .vop_inactive = nfs_inactive },
221 221 VOPNAME_FID, { .vop_fid = nfs_fid },
222 222 VOPNAME_RWLOCK, { .vop_rwlock = nfs_rwlock },
223 223 VOPNAME_RWUNLOCK, { .vop_rwunlock = nfs_rwunlock },
224 224 VOPNAME_SEEK, { .vop_seek = nfs_seek },
225 225 VOPNAME_FRLOCK, { .vop_frlock = nfs_frlock },
226 226 VOPNAME_SPACE, { .vop_space = nfs_space },
227 227 VOPNAME_REALVP, { .vop_realvp = nfs_realvp },
228 228 VOPNAME_GETPAGE, { .vop_getpage = nfs_getpage },
229 229 VOPNAME_PUTPAGE, { .vop_putpage = nfs_putpage },
230 230 VOPNAME_MAP, { .vop_map = nfs_map },
231 231 VOPNAME_ADDMAP, { .vop_addmap = nfs_addmap },
232 232 VOPNAME_DELMAP, { .vop_delmap = nfs_delmap },
233 233 VOPNAME_DUMP, { .vop_dump = nfs_dump },
234 234 VOPNAME_PATHCONF, { .vop_pathconf = nfs_pathconf },
235 235 VOPNAME_PAGEIO, { .vop_pageio = nfs_pageio },
236 236 VOPNAME_SETSECATTR, { .vop_setsecattr = nfs_setsecattr },
237 237 VOPNAME_GETSECATTR, { .vop_getsecattr = nfs_getsecattr },
238 238 VOPNAME_SHRLOCK, { .vop_shrlock = nfs_shrlock },
239 239 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support },
240 240 NULL, NULL
241 241 };
242 242
243 243 /*
244 244 * XXX: This is referenced in modstubs.s
245 245 */
246 246 struct vnodeops *
247 247 nfs_getvnodeops(void)
248 248 {
249 249 return (nfs_vnodeops);
250 250 }
251 251
252 252 /* ARGSUSED */
253 253 static int
254 254 nfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
255 255 {
256 256 int error;
257 257 struct vattr va;
258 258 rnode_t *rp;
259 259 vnode_t *vp;
260 260
261 261 vp = *vpp;
262 262 rp = VTOR(vp);
263 263 if (nfs_zone() != VTOMI(vp)->mi_zone)
264 264 return (EIO);
265 265 mutex_enter(&rp->r_statelock);
266 266 if (rp->r_cred == NULL) {
267 267 crhold(cr);
268 268 rp->r_cred = cr;
269 269 }
270 270 mutex_exit(&rp->r_statelock);
271 271
272 272 /*
273 273 * If there is no cached data or if close-to-open
274 274 * consistency checking is turned off, we can avoid
275 275 * the over the wire getattr. Otherwise, if the
276 276 * file system is mounted readonly, then just verify
277 277 * the caches are up to date using the normal mechanism.
278 278 * Else, if the file is not mmap'd, then just mark
279 279 * the attributes as timed out. They will be refreshed
280 280 * and the caches validated prior to being used.
281 281 * Else, the file system is mounted writeable so
282 282 * force an over the wire GETATTR in order to ensure
283 283 * that all cached data is valid.
284 284 */
285 285 if (vp->v_count > 1 ||
286 286 ((vn_has_cached_data(vp) || HAVE_RDDIR_CACHE(rp)) &&
287 287 !(VTOMI(vp)->mi_flags & MI_NOCTO))) {
288 288 if (vn_is_readonly(vp))
289 289 error = nfs_validate_caches(vp, cr);
290 290 else if (rp->r_mapcnt == 0 && vp->v_count == 1) {
291 291 PURGE_ATTRCACHE(vp);
292 292 error = 0;
293 293 } else {
294 294 va.va_mask = AT_ALL;
295 295 error = nfs_getattr_otw(vp, &va, cr);
296 296 }
297 297 } else
298 298 error = 0;
299 299
300 300 return (error);
301 301 }
302 302
303 303 /* ARGSUSED */
304 304 static int
305 305 nfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
306 306 caller_context_t *ct)
307 307 {
308 308 rnode_t *rp;
309 309 int error;
310 310 struct vattr va;
311 311
312 312 /*
313 313 * zone_enter(2) prevents processes from changing zones with NFS files
314 314 * open; if we happen to get here from the wrong zone we can't do
315 315 * anything over the wire.
316 316 */
317 317 if (VTOMI(vp)->mi_zone != nfs_zone()) {
318 318 /*
319 319 * We could attempt to clean up locks, except we're sure
320 320 * that the current process didn't acquire any locks on
321 321 * the file: any attempt to lock a file belong to another zone
322 322 * will fail, and one can't lock an NFS file and then change
323 323 * zones, as that fails too.
324 324 *
325 325 * Returning an error here is the sane thing to do. A
326 326 * subsequent call to VN_RELE() which translates to a
327 327 * nfs_inactive() will clean up state: if the zone of the
328 328 * vnode's origin is still alive and kicking, an async worker
329 329 * thread will handle the request (from the correct zone), and
330 330 * everything (minus the final nfs_getattr_otw() call) should
331 331 * be OK. If the zone is going away nfs_async_inactive() will
332 332 * throw away cached pages inline.
333 333 */
334 334 return (EIO);
335 335 }
336 336
337 337 /*
338 338 * If we are using local locking for this filesystem, then
339 339 * release all of the SYSV style record locks. Otherwise,
340 340 * we are doing network locking and we need to release all
341 341 * of the network locks. All of the locks held by this
342 342 * process on this file are released no matter what the
343 343 * incoming reference count is.
344 344 */
345 345 if (VTOMI(vp)->mi_flags & MI_LLOCK) {
346 346 cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
347 347 cleanshares(vp, ttoproc(curthread)->p_pid);
348 348 } else
349 349 nfs_lockrelease(vp, flag, offset, cr);
350 350
351 351 if (count > 1)
352 352 return (0);
353 353
354 354 /*
355 355 * If the file has been `unlinked', then purge the
356 356 * DNLC so that this vnode will get reycled quicker
357 357 * and the .nfs* file on the server will get removed.
358 358 */
359 359 rp = VTOR(vp);
360 360 if (rp->r_unldvp != NULL)
361 361 dnlc_purge_vp(vp);
362 362
363 363 /*
364 364 * If the file was open for write and there are pages,
365 365 * then if the file system was mounted using the "no-close-
366 366 * to-open" semantics, then start an asynchronous flush
367 367 * of the all of the pages in the file.
368 368 * else the file system was not mounted using the "no-close-
369 369 * to-open" semantics, then do a synchronous flush and
370 370 * commit of all of the dirty and uncommitted pages.
371 371 *
372 372 * The asynchronous flush of the pages in the "nocto" path
373 373 * mostly just associates a cred pointer with the rnode so
374 374 * writes which happen later will have a better chance of
375 375 * working. It also starts the data being written to the
376 376 * server, but without unnecessarily delaying the application.
377 377 */
378 378 if ((flag & FWRITE) && vn_has_cached_data(vp)) {
379 379 if ((VTOMI(vp)->mi_flags & MI_NOCTO)) {
380 380 error = nfs_putpage(vp, (offset_t)0, 0, B_ASYNC,
381 381 cr, ct);
382 382 if (error == EAGAIN)
383 383 error = 0;
384 384 } else
385 385 error = nfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
386 386 if (!error) {
387 387 mutex_enter(&rp->r_statelock);
388 388 error = rp->r_error;
389 389 rp->r_error = 0;
390 390 mutex_exit(&rp->r_statelock);
391 391 }
392 392 } else {
393 393 mutex_enter(&rp->r_statelock);
394 394 error = rp->r_error;
395 395 rp->r_error = 0;
396 396 mutex_exit(&rp->r_statelock);
397 397 }
398 398
399 399 /*
400 400 * If RWRITEATTR is set, then issue an over the wire GETATTR to
401 401 * refresh the attribute cache with a set of attributes which
402 402 * weren't returned from a WRITE. This will enable the close-
403 403 * to-open processing to work.
404 404 */
405 405 if (rp->r_flags & RWRITEATTR)
406 406 (void) nfs_getattr_otw(vp, &va, cr);
407 407
408 408 return (error);
409 409 }
410 410
411 411 /* ARGSUSED */
412 412 static int
413 413 nfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
414 414 caller_context_t *ct)
415 415 {
416 416 rnode_t *rp;
417 417 u_offset_t off;
418 418 offset_t diff;
419 419 int on;
420 420 size_t n;
421 421 caddr_t base;
422 422 uint_t flags;
423 423 int error;
424 424 mntinfo_t *mi;
425 425
426 426 rp = VTOR(vp);
427 427 mi = VTOMI(vp);
428 428
429 429 if (nfs_zone() != mi->mi_zone)
430 430 return (EIO);
431 431
432 432 ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_READER));
433 433
434 434 if (vp->v_type != VREG)
435 435 return (EISDIR);
436 436
437 437 if (uiop->uio_resid == 0)
438 438 return (0);
439 439
440 440 if (uiop->uio_loffset > MAXOFF32_T)
441 441 return (EFBIG);
442 442
443 443 if (uiop->uio_loffset < 0 ||
444 444 uiop->uio_loffset + uiop->uio_resid > MAXOFF32_T)
445 445 return (EINVAL);
446 446
447 447 /*
448 448 * Bypass VM if caching has been disabled (e.g., locking) or if
449 449 * using client-side direct I/O and the file is not mmap'd and
450 450 * there are no cached pages.
451 451 */
452 452 if ((vp->v_flag & VNOCACHE) ||
453 453 (((rp->r_flags & RDIRECTIO) || (mi->mi_flags & MI_DIRECTIO)) &&
454 454 rp->r_mapcnt == 0 && rp->r_inmap == 0 &&
455 455 !vn_has_cached_data(vp))) {
456 456 size_t bufsize;
457 457 size_t resid = 0;
458 458
459 459 /*
460 460 * Let's try to do read in as large a chunk as we can
461 461 * (Filesystem (NFS client) bsize if possible/needed).
462 462 * For V3, this is 32K and for V2, this is 8K.
463 463 */
464 464 bufsize = MIN(uiop->uio_resid, VTOMI(vp)->mi_curread);
465 465 base = kmem_alloc(bufsize, KM_SLEEP);
466 466 do {
467 467 n = MIN(uiop->uio_resid, bufsize);
468 468 error = nfsread(vp, base, uiop->uio_offset, n,
469 469 &resid, cr);
470 470 if (!error) {
471 471 n -= resid;
472 472 error = uiomove(base, n, UIO_READ, uiop);
473 473 }
474 474 } while (!error && uiop->uio_resid > 0 && n > 0);
475 475 kmem_free(base, bufsize);
476 476 return (error);
477 477 }
478 478
479 479 error = 0;
480 480
481 481 do {
482 482 off = uiop->uio_loffset & MAXBMASK; /* mapping offset */
483 483 on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */
484 484 n = MIN(MAXBSIZE - on, uiop->uio_resid);
485 485
486 486 error = nfs_validate_caches(vp, cr);
487 487 if (error)
488 488 break;
489 489
490 490 mutex_enter(&rp->r_statelock);
491 491 while (rp->r_flags & RINCACHEPURGE) {
492 492 if (!cv_wait_sig(&rp->r_cv, &rp->r_statelock)) {
493 493 mutex_exit(&rp->r_statelock);
494 494 return (EINTR);
495 495 }
496 496 }
497 497 diff = rp->r_size - uiop->uio_loffset;
498 498 mutex_exit(&rp->r_statelock);
499 499 if (diff <= 0)
500 500 break;
501 501 if (diff < n)
502 502 n = (size_t)diff;
503 503
504 504 if (vpm_enable) {
505 505 /*
506 506 * Copy data.
507 507 */
508 508 error = vpm_data_copy(vp, off + on, n, uiop,
509 509 1, NULL, 0, S_READ);
510 510 } else {
511 511 base = segmap_getmapflt(segkmap, vp, off + on, n,
512 512 1, S_READ);
513 513 error = uiomove(base + on, n, UIO_READ, uiop);
514 514 }
515 515
516 516 if (!error) {
517 517 /*
518 518 * If read a whole block or read to eof,
519 519 * won't need this buffer again soon.
520 520 */
521 521 mutex_enter(&rp->r_statelock);
522 522 if (n + on == MAXBSIZE ||
523 523 uiop->uio_loffset == rp->r_size)
524 524 flags = SM_DONTNEED;
525 525 else
526 526 flags = 0;
527 527 mutex_exit(&rp->r_statelock);
528 528 if (vpm_enable) {
529 529 error = vpm_sync_pages(vp, off, n, flags);
530 530 } else {
531 531 error = segmap_release(segkmap, base, flags);
532 532 }
533 533 } else {
534 534 if (vpm_enable) {
535 535 (void) vpm_sync_pages(vp, off, n, 0);
536 536 } else {
537 537 (void) segmap_release(segkmap, base, 0);
538 538 }
539 539 }
540 540 } while (!error && uiop->uio_resid > 0);
541 541
542 542 return (error);
543 543 }
544 544
545 545 /* ARGSUSED */
546 546 static int
547 547 nfs_write(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
548 548 caller_context_t *ct)
549 549 {
550 550 rnode_t *rp;
551 551 u_offset_t off;
552 552 caddr_t base;
553 553 uint_t flags;
554 554 int remainder;
555 555 size_t n;
556 556 int on;
557 557 int error;
558 558 int resid;
559 559 offset_t offset;
560 560 rlim_t limit;
561 561 mntinfo_t *mi;
562 562
563 563 rp = VTOR(vp);
564 564
565 565 mi = VTOMI(vp);
566 566 if (nfs_zone() != mi->mi_zone)
567 567 return (EIO);
568 568 if (vp->v_type != VREG)
569 569 return (EISDIR);
570 570
571 571 if (uiop->uio_resid == 0)
572 572 return (0);
573 573
574 574 if (ioflag & FAPPEND) {
575 575 struct vattr va;
576 576
577 577 /*
578 578 * Must serialize if appending.
579 579 */
580 580 if (nfs_rw_lock_held(&rp->r_rwlock, RW_READER)) {
581 581 nfs_rw_exit(&rp->r_rwlock);
582 582 if (nfs_rw_enter_sig(&rp->r_rwlock, RW_WRITER,
583 583 INTR(vp)))
584 584 return (EINTR);
585 585 }
586 586
587 587 va.va_mask = AT_SIZE;
588 588 error = nfsgetattr(vp, &va, cr);
589 589 if (error)
590 590 return (error);
591 591 uiop->uio_loffset = va.va_size;
592 592 }
593 593
594 594 if (uiop->uio_loffset > MAXOFF32_T)
595 595 return (EFBIG);
596 596
597 597 offset = uiop->uio_loffset + uiop->uio_resid;
598 598
599 599 if (uiop->uio_loffset < 0 || offset > MAXOFF32_T)
600 600 return (EINVAL);
601 601
602 602 if (uiop->uio_llimit > (rlim64_t)MAXOFF32_T) {
603 603 limit = MAXOFF32_T;
604 604 } else {
605 605 limit = (rlim_t)uiop->uio_llimit;
606 606 }
607 607
608 608 /*
609 609 * Check to make sure that the process will not exceed
610 610 * its limit on file size. It is okay to write up to
611 611 * the limit, but not beyond. Thus, the write which
612 612 * reaches the limit will be short and the next write
613 613 * will return an error.
614 614 */
615 615 remainder = 0;
616 616 if (offset > limit) {
617 617 remainder = offset - limit;
618 618 uiop->uio_resid = limit - uiop->uio_offset;
619 619 if (uiop->uio_resid <= 0) {
620 620 proc_t *p = ttoproc(curthread);
621 621
622 622 uiop->uio_resid += remainder;
623 623 mutex_enter(&p->p_lock);
624 624 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
625 625 p->p_rctls, p, RCA_UNSAFE_SIGINFO);
626 626 mutex_exit(&p->p_lock);
627 627 return (EFBIG);
628 628 }
629 629 }
630 630
631 631 if (nfs_rw_enter_sig(&rp->r_lkserlock, RW_READER, INTR(vp)))
632 632 return (EINTR);
633 633
634 634 /*
635 635 * Bypass VM if caching has been disabled (e.g., locking) or if
636 636 * using client-side direct I/O and the file is not mmap'd and
637 637 * there are no cached pages.
638 638 */
639 639 if ((vp->v_flag & VNOCACHE) ||
640 640 (((rp->r_flags & RDIRECTIO) || (mi->mi_flags & MI_DIRECTIO)) &&
641 641 rp->r_mapcnt == 0 && rp->r_inmap == 0 &&
642 642 !vn_has_cached_data(vp))) {
643 643 size_t bufsize;
644 644 int count;
645 645 uint_t org_offset;
646 646
647 647 nfs_fwrite:
648 648 if (rp->r_flags & RSTALE) {
649 649 resid = uiop->uio_resid;
650 650 offset = uiop->uio_loffset;
651 651 error = rp->r_error;
652 652 /*
653 653 * A close may have cleared r_error, if so,
654 654 * propagate ESTALE error return properly
655 655 */
656 656 if (error == 0)
657 657 error = ESTALE;
658 658 goto bottom;
659 659 }
660 660 bufsize = MIN(uiop->uio_resid, mi->mi_curwrite);
661 661 base = kmem_alloc(bufsize, KM_SLEEP);
662 662 do {
663 663 resid = uiop->uio_resid;
664 664 offset = uiop->uio_loffset;
665 665 count = MIN(uiop->uio_resid, bufsize);
666 666 org_offset = uiop->uio_offset;
667 667 error = uiomove(base, count, UIO_WRITE, uiop);
668 668 if (!error) {
669 669 error = nfswrite(vp, base, org_offset,
670 670 count, cr);
671 671 }
672 672 } while (!error && uiop->uio_resid > 0);
673 673 kmem_free(base, bufsize);
674 674 goto bottom;
675 675 }
676 676
677 677 do {
678 678 off = uiop->uio_loffset & MAXBMASK; /* mapping offset */
679 679 on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */
680 680 n = MIN(MAXBSIZE - on, uiop->uio_resid);
681 681
682 682 resid = uiop->uio_resid;
683 683 offset = uiop->uio_loffset;
684 684
685 685 if (rp->r_flags & RSTALE) {
686 686 error = rp->r_error;
687 687 /*
688 688 * A close may have cleared r_error, if so,
689 689 * propagate ESTALE error return properly
690 690 */
691 691 if (error == 0)
692 692 error = ESTALE;
693 693 break;
694 694 }
695 695
696 696 /*
697 697 * Don't create dirty pages faster than they
698 698 * can be cleaned so that the system doesn't
699 699 * get imbalanced. If the async queue is
700 700 * maxed out, then wait for it to drain before
701 701 * creating more dirty pages. Also, wait for
702 702 * any threads doing pagewalks in the vop_getattr
703 703 * entry points so that they don't block for
704 704 * long periods.
705 705 */
706 706 mutex_enter(&rp->r_statelock);
707 707 while ((mi->mi_max_threads != 0 &&
708 708 rp->r_awcount > 2 * mi->mi_max_threads) ||
709 709 rp->r_gcount > 0) {
710 710 if (INTR(vp)) {
711 711 klwp_t *lwp = ttolwp(curthread);
712 712
713 713 if (lwp != NULL)
714 714 lwp->lwp_nostop++;
715 715 if (!cv_wait_sig(&rp->r_cv, &rp->r_statelock)) {
716 716 mutex_exit(&rp->r_statelock);
717 717 if (lwp != NULL)
718 718 lwp->lwp_nostop--;
719 719 error = EINTR;
720 720 goto bottom;
721 721 }
722 722 if (lwp != NULL)
723 723 lwp->lwp_nostop--;
724 724 } else
725 725 cv_wait(&rp->r_cv, &rp->r_statelock);
726 726 }
727 727 mutex_exit(&rp->r_statelock);
728 728
729 729 /*
730 730 * Touch the page and fault it in if it is not in core
731 731 * before segmap_getmapflt or vpm_data_copy can lock it.
732 732 * This is to avoid the deadlock if the buffer is mapped
733 733 * to the same file through mmap which we want to write.
734 734 */
735 735 uio_prefaultpages((long)n, uiop);
736 736
737 737 if (vpm_enable) {
738 738 /*
739 739 * It will use kpm mappings, so no need to
740 740 * pass an address.
741 741 */
742 742 error = writerp(rp, NULL, n, uiop, 0);
743 743 } else {
744 744 if (segmap_kpm) {
745 745 int pon = uiop->uio_loffset & PAGEOFFSET;
746 746 size_t pn = MIN(PAGESIZE - pon,
747 747 uiop->uio_resid);
748 748 int pagecreate;
749 749
750 750 mutex_enter(&rp->r_statelock);
751 751 pagecreate = (pon == 0) && (pn == PAGESIZE ||
752 752 uiop->uio_loffset + pn >= rp->r_size);
753 753 mutex_exit(&rp->r_statelock);
754 754
755 755 base = segmap_getmapflt(segkmap, vp, off + on,
756 756 pn, !pagecreate, S_WRITE);
757 757
758 758 error = writerp(rp, base + pon, n, uiop,
759 759 pagecreate);
760 760
761 761 } else {
762 762 base = segmap_getmapflt(segkmap, vp, off + on,
763 763 n, 0, S_READ);
764 764 error = writerp(rp, base + on, n, uiop, 0);
765 765 }
766 766 }
767 767
768 768 if (!error) {
769 769 if (mi->mi_flags & MI_NOAC)
770 770 flags = SM_WRITE;
771 771 else if (n + on == MAXBSIZE || IS_SWAPVP(vp)) {
772 772 /*
773 773 * Have written a whole block.
774 774 * Start an asynchronous write
775 775 * and mark the buffer to
776 776 * indicate that it won't be
777 777 * needed again soon.
778 778 */
779 779 flags = SM_WRITE | SM_ASYNC | SM_DONTNEED;
780 780 } else
781 781 flags = 0;
782 782 if ((ioflag & (FSYNC|FDSYNC)) ||
783 783 (rp->r_flags & ROUTOFSPACE)) {
784 784 flags &= ~SM_ASYNC;
785 785 flags |= SM_WRITE;
786 786 }
787 787 if (vpm_enable) {
788 788 error = vpm_sync_pages(vp, off, n, flags);
789 789 } else {
790 790 error = segmap_release(segkmap, base, flags);
791 791 }
792 792 } else {
793 793 if (vpm_enable) {
794 794 (void) vpm_sync_pages(vp, off, n, 0);
795 795 } else {
796 796 (void) segmap_release(segkmap, base, 0);
797 797 }
798 798 /*
799 799 * In the event that we got an access error while
800 800 * faulting in a page for a write-only file just
801 801 * force a write.
802 802 */
803 803 if (error == EACCES)
804 804 goto nfs_fwrite;
805 805 }
806 806 } while (!error && uiop->uio_resid > 0);
807 807
808 808 bottom:
809 809 if (error) {
810 810 uiop->uio_resid = resid + remainder;
811 811 uiop->uio_loffset = offset;
812 812 } else
813 813 uiop->uio_resid += remainder;
814 814
815 815 nfs_rw_exit(&rp->r_lkserlock);
816 816
817 817 return (error);
818 818 }
819 819
820 820 /*
821 821 * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED}
822 822 */
823 823 static int
824 824 nfs_rdwrlbn(vnode_t *vp, page_t *pp, u_offset_t off, size_t len,
825 825 int flags, cred_t *cr)
826 826 {
827 827 struct buf *bp;
828 828 int error;
829 829
830 830 ASSERT(nfs_zone() == VTOMI(vp)->mi_zone);
831 831 bp = pageio_setup(pp, len, vp, flags);
832 832 ASSERT(bp != NULL);
833 833
834 834 /*
835 835 * pageio_setup should have set b_addr to 0. This
836 836 * is correct since we want to do I/O on a page
837 837 * boundary. bp_mapin will use this addr to calculate
838 838 * an offset, and then set b_addr to the kernel virtual
839 839 * address it allocated for us.
840 840 */
841 841 ASSERT(bp->b_un.b_addr == 0);
842 842
843 843 bp->b_edev = 0;
844 844 bp->b_dev = 0;
845 845 bp->b_lblkno = lbtodb(off);
846 846 bp->b_file = vp;
847 847 bp->b_offset = (offset_t)off;
848 848 bp_mapin(bp);
849 849
850 850 error = nfs_bio(bp, cr);
851 851
852 852 bp_mapout(bp);
853 853 pageio_done(bp);
854 854
855 855 return (error);
856 856 }
857 857
858 858 /*
859 859 * Write to file. Writes to remote server in largest size
860 860 * chunks that the server can handle. Write is synchronous.
861 861 */
862 862 static int
863 863 nfswrite(vnode_t *vp, caddr_t base, uint_t offset, int count, cred_t *cr)
864 864 {
865 865 rnode_t *rp;
866 866 mntinfo_t *mi;
867 867 struct nfswriteargs wa;
868 868 struct nfsattrstat ns;
869 869 int error;
870 870 int tsize;
871 871 int douprintf;
872 872
873 873 douprintf = 1;
874 874
875 875 rp = VTOR(vp);
876 876 mi = VTOMI(vp);
877 877
878 878 ASSERT(nfs_zone() == mi->mi_zone);
879 879
880 880 wa.wa_args = &wa.wa_args_buf;
881 881 wa.wa_fhandle = *VTOFH(vp);
882 882
883 883 do {
884 884 tsize = MIN(mi->mi_curwrite, count);
885 885 wa.wa_data = base;
886 886 wa.wa_begoff = offset;
887 887 wa.wa_totcount = tsize;
888 888 wa.wa_count = tsize;
889 889 wa.wa_offset = offset;
890 890
891 891 if (mi->mi_io_kstats) {
892 892 mutex_enter(&mi->mi_lock);
893 893 kstat_runq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
894 894 mutex_exit(&mi->mi_lock);
895 895 }
896 896 wa.wa_mblk = NULL;
897 897 do {
898 898 error = rfs2call(mi, RFS_WRITE,
899 899 xdr_writeargs, (caddr_t)&wa,
900 900 xdr_attrstat, (caddr_t)&ns, cr,
901 901 &douprintf, &ns.ns_status, 0, NULL);
902 902 } while (error == ENFS_TRYAGAIN);
903 903 if (mi->mi_io_kstats) {
904 904 mutex_enter(&mi->mi_lock);
905 905 kstat_runq_exit(KSTAT_IO_PTR(mi->mi_io_kstats));
906 906 mutex_exit(&mi->mi_lock);
907 907 }
908 908
909 909 if (!error) {
910 910 error = geterrno(ns.ns_status);
911 911 /*
912 912 * Can't check for stale fhandle and purge caches
913 913 * here because pages are held by nfs_getpage.
914 914 * Just mark the attribute cache as timed out
915 915 * and set RWRITEATTR to indicate that the file
916 916 * was modified with a WRITE operation.
917 917 */
918 918 if (!error) {
919 919 count -= tsize;
920 920 base += tsize;
921 921 offset += tsize;
922 922 if (mi->mi_io_kstats) {
923 923 mutex_enter(&mi->mi_lock);
924 924 KSTAT_IO_PTR(mi->mi_io_kstats)->
925 925 writes++;
926 926 KSTAT_IO_PTR(mi->mi_io_kstats)->
927 927 nwritten += tsize;
928 928 mutex_exit(&mi->mi_lock);
929 929 }
930 930 lwp_stat_update(LWP_STAT_OUBLK, 1);
931 931 mutex_enter(&rp->r_statelock);
932 932 PURGE_ATTRCACHE_LOCKED(rp);
933 933 rp->r_flags |= RWRITEATTR;
934 934 mutex_exit(&rp->r_statelock);
935 935 }
936 936 }
937 937 } while (!error && count);
938 938
939 939 return (error);
940 940 }
941 941
942 942 /*
943 943 * Read from a file. Reads data in largest chunks our interface can handle.
944 944 */
945 945 static int
946 946 nfsread(vnode_t *vp, caddr_t base, uint_t offset,
947 947 int count, size_t *residp, cred_t *cr)
948 948 {
949 949 mntinfo_t *mi;
950 950 struct nfsreadargs ra;
951 951 struct nfsrdresult rr;
952 952 int tsize;
953 953 int error;
954 954 int douprintf;
955 955 failinfo_t fi;
956 956 rnode_t *rp;
957 957 struct vattr va;
958 958 hrtime_t t;
959 959
960 960 rp = VTOR(vp);
961 961 mi = VTOMI(vp);
962 962
963 963 ASSERT(nfs_zone() == mi->mi_zone);
964 964
965 965 douprintf = 1;
966 966
967 967 ra.ra_fhandle = *VTOFH(vp);
968 968
969 969 fi.vp = vp;
970 970 fi.fhp = (caddr_t)&ra.ra_fhandle;
971 971 fi.copyproc = nfscopyfh;
972 972 fi.lookupproc = nfslookup;
973 973 fi.xattrdirproc = acl_getxattrdir2;
974 974
975 975 do {
976 976 if (mi->mi_io_kstats) {
977 977 mutex_enter(&mi->mi_lock);
978 978 kstat_runq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
979 979 mutex_exit(&mi->mi_lock);
980 980 }
981 981
982 982 do {
983 983 tsize = MIN(mi->mi_curread, count);
984 984 rr.rr_data = base;
985 985 ra.ra_offset = offset;
986 986 ra.ra_totcount = tsize;
987 987 ra.ra_count = tsize;
988 988 ra.ra_data = base;
989 989 t = gethrtime();
990 990 error = rfs2call(mi, RFS_READ,
991 991 xdr_readargs, (caddr_t)&ra,
992 992 xdr_rdresult, (caddr_t)&rr, cr,
993 993 &douprintf, &rr.rr_status, 0, &fi);
994 994 } while (error == ENFS_TRYAGAIN);
995 995
996 996 if (mi->mi_io_kstats) {
997 997 mutex_enter(&mi->mi_lock);
998 998 kstat_runq_exit(KSTAT_IO_PTR(mi->mi_io_kstats));
999 999 mutex_exit(&mi->mi_lock);
1000 1000 }
1001 1001
1002 1002 if (!error) {
1003 1003 error = geterrno(rr.rr_status);
1004 1004 if (!error) {
1005 1005 count -= rr.rr_count;
1006 1006 base += rr.rr_count;
1007 1007 offset += rr.rr_count;
1008 1008 if (mi->mi_io_kstats) {
1009 1009 mutex_enter(&mi->mi_lock);
1010 1010 KSTAT_IO_PTR(mi->mi_io_kstats)->reads++;
1011 1011 KSTAT_IO_PTR(mi->mi_io_kstats)->nread +=
1012 1012 rr.rr_count;
1013 1013 mutex_exit(&mi->mi_lock);
1014 1014 }
1015 1015 lwp_stat_update(LWP_STAT_INBLK, 1);
1016 1016 }
1017 1017 }
1018 1018 } while (!error && count && rr.rr_count == tsize);
1019 1019
1020 1020 *residp = count;
1021 1021
1022 1022 if (!error) {
1023 1023 /*
1024 1024 * Since no error occurred, we have the current
1025 1025 * attributes and we need to do a cache check and then
1026 1026 * potentially update the cached attributes. We can't
1027 1027 * use the normal attribute check and cache mechanisms
1028 1028 * because they might cause a cache flush which would
1029 1029 * deadlock. Instead, we just check the cache to see
1030 1030 * if the attributes have changed. If it is, then we
1031 1031 * just mark the attributes as out of date. The next
1032 1032 * time that the attributes are checked, they will be
1033 1033 * out of date, new attributes will be fetched, and
1034 1034 * the page cache will be flushed. If the attributes
1035 1035 * weren't changed, then we just update the cached
1036 1036 * attributes with these attributes.
1037 1037 */
1038 1038 /*
1039 1039 * If NFS_ACL is supported on the server, then the
1040 1040 * attributes returned by server may have minimal
1041 1041 * permissions sometimes denying access to users having
1042 1042 * proper access. To get the proper attributes, mark
1043 1043 * the attributes as expired so that they will be
1044 1044 * regotten via the NFS_ACL GETATTR2 procedure.
1045 1045 */
1046 1046 error = nattr_to_vattr(vp, &rr.rr_attr, &va);
1047 1047 mutex_enter(&rp->r_statelock);
1048 1048 if (error || !CACHE_VALID(rp, va.va_mtime, va.va_size) ||
1049 1049 (mi->mi_flags & MI_ACL)) {
1050 1050 mutex_exit(&rp->r_statelock);
1051 1051 PURGE_ATTRCACHE(vp);
1052 1052 } else {
1053 1053 if (rp->r_mtime <= t) {
1054 1054 nfs_attrcache_va(vp, &va);
1055 1055 }
1056 1056 mutex_exit(&rp->r_statelock);
1057 1057 }
1058 1058 }
1059 1059
1060 1060 return (error);
1061 1061 }
1062 1062
1063 1063 /* ARGSUSED */
1064 1064 static int
1065 1065 nfs_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp,
1066 1066 caller_context_t *ct)
1067 1067 {
1068 1068
1069 1069 if (nfs_zone() != VTOMI(vp)->mi_zone)
1070 1070 return (EIO);
1071 1071 switch (cmd) {
1072 1072 case _FIODIRECTIO:
1073 1073 return (nfs_directio(vp, (int)arg, cr));
1074 1074 default:
1075 1075 return (ENOTTY);
1076 1076 }
1077 1077 }
1078 1078
1079 1079 /* ARGSUSED */
1080 1080 static int
1081 1081 nfs_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
1082 1082 caller_context_t *ct)
1083 1083 {
1084 1084 int error;
1085 1085 rnode_t *rp;
1086 1086
1087 1087 if (nfs_zone() != VTOMI(vp)->mi_zone)
1088 1088 return (EIO);
1089 1089 /*
1090 1090 * If it has been specified that the return value will
1091 1091 * just be used as a hint, and we are only being asked
1092 1092 * for size, fsid or rdevid, then return the client's
1093 1093 * notion of these values without checking to make sure
1094 1094 * that the attribute cache is up to date.
1095 1095 * The whole point is to avoid an over the wire GETATTR
1096 1096 * call.
1097 1097 */
1098 1098 rp = VTOR(vp);
1099 1099 if (flags & ATTR_HINT) {
1100 1100 if (vap->va_mask ==
1101 1101 (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
1102 1102 mutex_enter(&rp->r_statelock);
1103 1103 if (vap->va_mask | AT_SIZE)
1104 1104 vap->va_size = rp->r_size;
1105 1105 if (vap->va_mask | AT_FSID)
1106 1106 vap->va_fsid = rp->r_attr.va_fsid;
1107 1107 if (vap->va_mask | AT_RDEV)
1108 1108 vap->va_rdev = rp->r_attr.va_rdev;
1109 1109 mutex_exit(&rp->r_statelock);
1110 1110 return (0);
1111 1111 }
1112 1112 }
1113 1113
1114 1114 /*
1115 1115 * Only need to flush pages if asking for the mtime
1116 1116 * and if there any dirty pages or any outstanding
1117 1117 * asynchronous (write) requests for this file.
1118 1118 */
1119 1119 if (vap->va_mask & AT_MTIME) {
1120 1120 if (vn_has_cached_data(vp) &&
1121 1121 ((rp->r_flags & RDIRTY) || rp->r_awcount > 0)) {
1122 1122 mutex_enter(&rp->r_statelock);
1123 1123 rp->r_gcount++;
1124 1124 mutex_exit(&rp->r_statelock);
1125 1125 error = nfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
1126 1126 mutex_enter(&rp->r_statelock);
1127 1127 if (error && (error == ENOSPC || error == EDQUOT)) {
1128 1128 if (!rp->r_error)
1129 1129 rp->r_error = error;
1130 1130 }
1131 1131 if (--rp->r_gcount == 0)
1132 1132 cv_broadcast(&rp->r_cv);
1133 1133 mutex_exit(&rp->r_statelock);
1134 1134 }
1135 1135 }
1136 1136
1137 1137 return (nfsgetattr(vp, vap, cr));
1138 1138 }
1139 1139
1140 1140 /*ARGSUSED4*/
1141 1141 static int
1142 1142 nfs_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
1143 1143 caller_context_t *ct)
1144 1144 {
1145 1145 int error;
1146 1146 uint_t mask;
1147 1147 struct vattr va;
1148 1148
1149 1149 mask = vap->va_mask;
1150 1150
1151 1151 if (mask & AT_NOSET)
1152 1152 return (EINVAL);
1153 1153
1154 1154 if ((mask & AT_SIZE) &&
1155 1155 vap->va_type == VREG &&
1156 1156 vap->va_size > MAXOFF32_T)
1157 1157 return (EFBIG);
1158 1158
1159 1159 if (nfs_zone() != VTOMI(vp)->mi_zone)
1160 1160 return (EIO);
1161 1161
1162 1162 va.va_mask = AT_UID | AT_MODE;
1163 1163
1164 1164 error = nfsgetattr(vp, &va, cr);
1165 1165 if (error)
1166 1166 return (error);
1167 1167
1168 1168 error = secpolicy_vnode_setattr(cr, vp, vap, &va, flags, nfs_accessx,
1169 1169 vp);
1170 1170
1171 1171 if (error)
1172 1172 return (error);
1173 1173
1174 1174 error = nfssetattr(vp, vap, flags, cr);
1175 1175
1176 1176 if (error == 0 && (mask & AT_SIZE) && vap->va_size == 0)
1177 1177 vnevent_truncate(vp, ct);
1178 1178
1179 1179 return (error);
1180 1180 }
1181 1181
1182 1182 static int
1183 1183 nfssetattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr)
1184 1184 {
1185 1185 int error;
1186 1186 uint_t mask;
1187 1187 struct nfssaargs args;
1188 1188 struct nfsattrstat ns;
1189 1189 int douprintf;
1190 1190 rnode_t *rp;
1191 1191 struct vattr va;
1192 1192 mode_t omode;
1193 1193 mntinfo_t *mi;
1194 1194 vsecattr_t *vsp;
1195 1195 hrtime_t t;
1196 1196
1197 1197 mask = vap->va_mask;
1198 1198
1199 1199 ASSERT(nfs_zone() == VTOMI(vp)->mi_zone);
1200 1200
1201 1201 rp = VTOR(vp);
1202 1202
1203 1203 /*
1204 1204 * Only need to flush pages if there are any pages and
1205 1205 * if the file is marked as dirty in some fashion. The
1206 1206 * file must be flushed so that we can accurately
1207 1207 * determine the size of the file and the cached data
1208 1208 * after the SETATTR returns. A file is considered to
1209 1209 * be dirty if it is either marked with RDIRTY, has
1210 1210 * outstanding i/o's active, or is mmap'd. In this
1211 1211 * last case, we can't tell whether there are dirty
1212 1212 * pages, so we flush just to be sure.
1213 1213 */
1214 1214 if (vn_has_cached_data(vp) &&
1215 1215 ((rp->r_flags & RDIRTY) ||
1216 1216 rp->r_count > 0 ||
1217 1217 rp->r_mapcnt > 0)) {
1218 1218 ASSERT(vp->v_type != VCHR);
1219 1219 error = nfs_putpage(vp, (offset_t)0, 0, 0, cr, NULL);
1220 1220 if (error && (error == ENOSPC || error == EDQUOT)) {
1221 1221 mutex_enter(&rp->r_statelock);
1222 1222 if (!rp->r_error)
1223 1223 rp->r_error = error;
1224 1224 mutex_exit(&rp->r_statelock);
1225 1225 }
1226 1226 }
1227 1227
1228 1228 /*
1229 1229 * If the system call was utime(2) or utimes(2) and the
1230 1230 * application did not specify the times, then set the
1231 1231 * mtime nanosecond field to 1 billion. This will get
1232 1232 * translated from 1 billion nanoseconds to 1 million
1233 1233 * microseconds in the over the wire request. The
1234 1234 * server will use 1 million in the microsecond field
1235 1235 * to tell whether both the mtime and atime should be
1236 1236 * set to the server's current time.
1237 1237 *
1238 1238 * This is an overload of the protocol and should be
1239 1239 * documented in the NFS Version 2 protocol specification.
1240 1240 */
1241 1241 if ((mask & AT_MTIME) && !(flags & ATTR_UTIME)) {
1242 1242 vap->va_mtime.tv_nsec = 1000000000;
1243 1243 if (NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
1244 1244 NFS_TIME_T_OK(vap->va_atime.tv_sec)) {
1245 1245 error = vattr_to_sattr(vap, &args.saa_sa);
1246 1246 } else {
1247 1247 /*
1248 1248 * Use server times. vap time values will not be used.
1249 1249 * To ensure no time overflow, make sure vap has
1250 1250 * valid values, but retain the original values.
1251 1251 */
1252 1252 timestruc_t mtime = vap->va_mtime;
1253 1253 timestruc_t atime = vap->va_atime;
1254 1254 time_t now;
1255 1255
1256 1256 now = gethrestime_sec();
1257 1257 if (NFS_TIME_T_OK(now)) {
1258 1258 /* Just in case server does not know of this */
1259 1259 vap->va_mtime.tv_sec = now;
1260 1260 vap->va_atime.tv_sec = now;
1261 1261 } else {
1262 1262 vap->va_mtime.tv_sec = 0;
1263 1263 vap->va_atime.tv_sec = 0;
1264 1264 }
1265 1265 error = vattr_to_sattr(vap, &args.saa_sa);
1266 1266 /* set vap times back on */
1267 1267 vap->va_mtime = mtime;
1268 1268 vap->va_atime = atime;
1269 1269 }
1270 1270 } else {
1271 1271 /* Either do not set times or use the client specified times */
1272 1272 error = vattr_to_sattr(vap, &args.saa_sa);
1273 1273 }
1274 1274 if (error) {
1275 1275 /* req time field(s) overflow - return immediately */
1276 1276 return (error);
1277 1277 }
1278 1278 args.saa_fh = *VTOFH(vp);
1279 1279
1280 1280 va.va_mask = AT_MODE;
1281 1281 error = nfsgetattr(vp, &va, cr);
1282 1282 if (error)
1283 1283 return (error);
1284 1284 omode = va.va_mode;
1285 1285
1286 1286 mi = VTOMI(vp);
1287 1287
1288 1288 douprintf = 1;
1289 1289
1290 1290 t = gethrtime();
1291 1291
1292 1292 error = rfs2call(mi, RFS_SETATTR,
1293 1293 xdr_saargs, (caddr_t)&args,
1294 1294 xdr_attrstat, (caddr_t)&ns, cr,
1295 1295 &douprintf, &ns.ns_status, 0, NULL);
1296 1296
1297 1297 /*
1298 1298 * Purge the access cache and ACL cache if changing either the
1299 1299 * owner of the file, the group owner, or the mode. These may
1300 1300 * change the access permissions of the file, so purge old
1301 1301 * information and start over again.
1302 1302 */
1303 1303 if ((mask & (AT_UID | AT_GID | AT_MODE)) && (mi->mi_flags & MI_ACL)) {
1304 1304 (void) nfs_access_purge_rp(rp);
1305 1305 if (rp->r_secattr != NULL) {
1306 1306 mutex_enter(&rp->r_statelock);
1307 1307 vsp = rp->r_secattr;
1308 1308 rp->r_secattr = NULL;
1309 1309 mutex_exit(&rp->r_statelock);
1310 1310 if (vsp != NULL)
1311 1311 nfs_acl_free(vsp);
1312 1312 }
1313 1313 }
1314 1314
1315 1315 if (!error) {
1316 1316 error = geterrno(ns.ns_status);
1317 1317 if (!error) {
1318 1318 /*
1319 1319 * If changing the size of the file, invalidate
1320 1320 * any local cached data which is no longer part
1321 1321 * of the file. We also possibly invalidate the
1322 1322 * last page in the file. We could use
1323 1323 * pvn_vpzero(), but this would mark the page as
1324 1324 * modified and require it to be written back to
1325 1325 * the server for no particularly good reason.
1326 1326 * This way, if we access it, then we bring it
1327 1327 * back in. A read should be cheaper than a
1328 1328 * write.
1329 1329 */
1330 1330 if (mask & AT_SIZE) {
1331 1331 nfs_invalidate_pages(vp,
1332 1332 (vap->va_size & PAGEMASK), cr);
1333 1333 }
1334 1334 (void) nfs_cache_fattr(vp, &ns.ns_attr, &va, t, cr);
1335 1335 /*
1336 1336 * If NFS_ACL is supported on the server, then the
1337 1337 * attributes returned by server may have minimal
1338 1338 * permissions sometimes denying access to users having
1339 1339 * proper access. To get the proper attributes, mark
1340 1340 * the attributes as expired so that they will be
1341 1341 * regotten via the NFS_ACL GETATTR2 procedure.
1342 1342 */
1343 1343 if (mi->mi_flags & MI_ACL) {
1344 1344 PURGE_ATTRCACHE(vp);
1345 1345 }
1346 1346 /*
1347 1347 * This next check attempts to deal with NFS
1348 1348 * servers which can not handle increasing
1349 1349 * the size of the file via setattr. Most
1350 1350 * of these servers do not return an error,
1351 1351 * but do not change the size of the file.
1352 1352 * Hence, this check and then attempt to set
1353 1353 * the file size by writing 1 byte at the
1354 1354 * offset of the end of the file that we need.
1355 1355 */
1356 1356 if ((mask & AT_SIZE) &&
1357 1357 ns.ns_attr.na_size < (uint32_t)vap->va_size) {
1358 1358 char zb = '\0';
1359 1359
1360 1360 error = nfswrite(vp, &zb,
1361 1361 vap->va_size - sizeof (zb),
1362 1362 sizeof (zb), cr);
1363 1363 }
1364 1364 /*
1365 1365 * Some servers will change the mode to clear the setuid
1366 1366 * and setgid bits when changing the uid or gid. The
1367 1367 * client needs to compensate appropriately.
1368 1368 */
1369 1369 if (mask & (AT_UID | AT_GID)) {
1370 1370 int terror;
1371 1371
1372 1372 va.va_mask = AT_MODE;
1373 1373 terror = nfsgetattr(vp, &va, cr);
1374 1374 if (!terror &&
1375 1375 (((mask & AT_MODE) &&
1376 1376 va.va_mode != vap->va_mode) ||
1377 1377 (!(mask & AT_MODE) &&
1378 1378 va.va_mode != omode))) {
1379 1379 va.va_mask = AT_MODE;
1380 1380 if (mask & AT_MODE)
1381 1381 va.va_mode = vap->va_mode;
1382 1382 else
1383 1383 va.va_mode = omode;
1384 1384 (void) nfssetattr(vp, &va, 0, cr);
1385 1385 }
1386 1386 }
1387 1387 } else {
1388 1388 PURGE_ATTRCACHE(vp);
1389 1389 PURGE_STALE_FH(error, vp, cr);
1390 1390 }
1391 1391 } else {
1392 1392 PURGE_ATTRCACHE(vp);
1393 1393 }
1394 1394
1395 1395 return (error);
1396 1396 }
1397 1397
1398 1398 static int
1399 1399 nfs_accessx(void *vp, int mode, cred_t *cr)
1400 1400 {
1401 1401 ASSERT(nfs_zone() == VTOMI((vnode_t *)vp)->mi_zone);
1402 1402 return (nfs_access(vp, mode, 0, cr, NULL));
1403 1403 }
1404 1404
1405 1405 /* ARGSUSED */
1406 1406 static int
1407 1407 nfs_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
1408 1408 {
1409 1409 struct vattr va;
1410 1410 int error;
1411 1411 mntinfo_t *mi;
1412 1412 int shift = 0;
1413 1413
1414 1414 mi = VTOMI(vp);
1415 1415
1416 1416 if (nfs_zone() != mi->mi_zone)
1417 1417 return (EIO);
1418 1418 if (mi->mi_flags & MI_ACL) {
1419 1419 error = acl_access2(vp, mode, flags, cr);
1420 1420 if (mi->mi_flags & MI_ACL)
1421 1421 return (error);
1422 1422 }
1423 1423
1424 1424 va.va_mask = AT_MODE | AT_UID | AT_GID;
1425 1425 error = nfsgetattr(vp, &va, cr);
1426 1426 if (error)
1427 1427 return (error);
1428 1428
1429 1429 /*
1430 1430 * Disallow write attempts on read-only
1431 1431 * file systems, unless the file is a
1432 1432 * device node.
1433 1433 */
1434 1434 if ((mode & VWRITE) && vn_is_readonly(vp) && !IS_DEVVP(vp))
1435 1435 return (EROFS);
1436 1436
1437 1437 /*
1438 1438 * Disallow attempts to access mandatory lock files.
1439 1439 */
1440 1440 if ((mode & (VWRITE | VREAD | VEXEC)) &&
1441 1441 MANDLOCK(vp, va.va_mode))
1442 1442 return (EACCES);
1443 1443
1444 1444 /*
1445 1445 * Access check is based on only
1446 1446 * one of owner, group, public.
1447 1447 * If not owner, then check group.
1448 1448 * If not a member of the group,
1449 1449 * then check public access.
1450 1450 */
1451 1451 if (crgetuid(cr) != va.va_uid) {
1452 1452 shift += 3;
1453 1453 if (!groupmember(va.va_gid, cr))
1454 1454 shift += 3;
1455 1455 }
1456 1456
1457 1457 return (secpolicy_vnode_access2(cr, vp, va.va_uid,
1458 1458 va.va_mode << shift, mode));
1459 1459 }
1460 1460
1461 1461 static int nfs_do_symlink_cache = 1;
1462 1462
1463 1463 /* ARGSUSED */
1464 1464 static int
1465 1465 nfs_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, caller_context_t *ct)
1466 1466 {
1467 1467 int error;
1468 1468 struct nfsrdlnres rl;
1469 1469 rnode_t *rp;
1470 1470 int douprintf;
1471 1471 failinfo_t fi;
1472 1472
1473 1473 /*
1474 1474 * We want to be consistent with UFS semantics so we will return
1475 1475 * EINVAL instead of ENXIO. This violates the XNFS spec and
1476 1476 * the RFC 1094, which are wrong any way. BUGID 1138002.
1477 1477 */
1478 1478 if (vp->v_type != VLNK)
1479 1479 return (EINVAL);
1480 1480
1481 1481 if (nfs_zone() != VTOMI(vp)->mi_zone)
1482 1482 return (EIO);
1483 1483
1484 1484 rp = VTOR(vp);
1485 1485 if (nfs_do_symlink_cache && rp->r_symlink.contents != NULL) {
1486 1486 error = nfs_validate_caches(vp, cr);
1487 1487 if (error)
1488 1488 return (error);
1489 1489 mutex_enter(&rp->r_statelock);
1490 1490 if (rp->r_symlink.contents != NULL) {
1491 1491 error = uiomove(rp->r_symlink.contents,
1492 1492 rp->r_symlink.len, UIO_READ, uiop);
1493 1493 mutex_exit(&rp->r_statelock);
1494 1494 return (error);
1495 1495 }
1496 1496 mutex_exit(&rp->r_statelock);
1497 1497 }
1498 1498
1499 1499
1500 1500 rl.rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
1501 1501
1502 1502 fi.vp = vp;
1503 1503 fi.fhp = NULL; /* no need to update, filehandle not copied */
1504 1504 fi.copyproc = nfscopyfh;
1505 1505 fi.lookupproc = nfslookup;
1506 1506 fi.xattrdirproc = acl_getxattrdir2;
1507 1507
1508 1508 douprintf = 1;
1509 1509
1510 1510 error = rfs2call(VTOMI(vp), RFS_READLINK,
1511 1511 xdr_readlink, (caddr_t)VTOFH(vp),
1512 1512 xdr_rdlnres, (caddr_t)&rl, cr,
1513 1513 &douprintf, &rl.rl_status, 0, &fi);
1514 1514
1515 1515 if (error) {
1516 1516
1517 1517 kmem_free((void *)rl.rl_data, NFS_MAXPATHLEN);
1518 1518 return (error);
1519 1519 }
1520 1520
1521 1521 error = geterrno(rl.rl_status);
1522 1522 if (!error) {
1523 1523 error = uiomove(rl.rl_data, (int)rl.rl_count, UIO_READ, uiop);
1524 1524 if (nfs_do_symlink_cache && rp->r_symlink.contents == NULL) {
1525 1525 mutex_enter(&rp->r_statelock);
1526 1526 if (rp->r_symlink.contents == NULL) {
1527 1527 rp->r_symlink.contents = rl.rl_data;
1528 1528 rp->r_symlink.len = (int)rl.rl_count;
1529 1529 rp->r_symlink.size = NFS_MAXPATHLEN;
1530 1530 mutex_exit(&rp->r_statelock);
1531 1531 } else {
1532 1532 mutex_exit(&rp->r_statelock);
1533 1533
1534 1534 kmem_free((void *)rl.rl_data,
1535 1535 NFS_MAXPATHLEN);
1536 1536 }
1537 1537 } else {
1538 1538
1539 1539 kmem_free((void *)rl.rl_data, NFS_MAXPATHLEN);
1540 1540 }
1541 1541 } else {
1542 1542 PURGE_STALE_FH(error, vp, cr);
1543 1543
1544 1544 kmem_free((void *)rl.rl_data, NFS_MAXPATHLEN);
1545 1545 }
1546 1546
1547 1547 /*
1548 1548 * Conform to UFS semantics (see comment above)
1549 1549 */
1550 1550 return (error == ENXIO ? EINVAL : error);
1551 1551 }
1552 1552
1553 1553 /*
1554 1554 * Flush local dirty pages to stable storage on the server.
1555 1555 *
1556 1556 * If FNODSYNC is specified, then there is nothing to do because
1557 1557 * metadata changes are not cached on the client before being
1558 1558 * sent to the server.
1559 1559 */
1560 1560 /* ARGSUSED */
1561 1561 static int
1562 1562 nfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
1563 1563 {
1564 1564 int error;
1565 1565
1566 1566 if ((syncflag & FNODSYNC) || IS_SWAPVP(vp))
1567 1567 return (0);
1568 1568
1569 1569 if (nfs_zone() != VTOMI(vp)->mi_zone)
1570 1570 return (EIO);
1571 1571
1572 1572 error = nfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
1573 1573 if (!error)
1574 1574 error = VTOR(vp)->r_error;
1575 1575 return (error);
1576 1576 }
1577 1577
1578 1578
1579 1579 /*
1580 1580 * Weirdness: if the file was removed or the target of a rename
1581 1581 * operation while it was open, it got renamed instead. Here we
1582 1582 * remove the renamed file.
1583 1583 */
1584 1584 /* ARGSUSED */
1585 1585 static void
1586 1586 nfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1587 1587 {
1588 1588 rnode_t *rp;
1589 1589
1590 1590 ASSERT(vp != DNLC_NO_VNODE);
1591 1591
1592 1592 /*
1593 1593 * If this is coming from the wrong zone, we let someone in the right
1594 1594 * zone take care of it asynchronously. We can get here due to
1595 1595 * VN_RELE() being called from pageout() or fsflush(). This call may
1596 1596 * potentially turn into an expensive no-op if, for instance, v_count
1597 1597 * gets incremented in the meantime, but it's still correct.
1598 1598 */
1599 1599 if (nfs_zone() != VTOMI(vp)->mi_zone) {
1600 1600 nfs_async_inactive(vp, cr, nfs_inactive);
1601 1601 return;
1602 1602 }
1603 1603
1604 1604 rp = VTOR(vp);
1605 1605 redo:
1606 1606 if (rp->r_unldvp != NULL) {
1607 1607 /*
1608 1608 * Save the vnode pointer for the directory where the
1609 1609 * unlinked-open file got renamed, then set it to NULL
1610 1610 * to prevent another thread from getting here before
1611 1611 * we're done with the remove. While we have the
1612 1612 * statelock, make local copies of the pertinent rnode
1613 1613 * fields. If we weren't to do this in an atomic way, the
1614 1614 * the unl* fields could become inconsistent with respect
1615 1615 * to each other due to a race condition between this
1616 1616 * code and nfs_remove(). See bug report 1034328.
1617 1617 */
1618 1618 mutex_enter(&rp->r_statelock);
1619 1619 if (rp->r_unldvp != NULL) {
1620 1620 vnode_t *unldvp;
1621 1621 char *unlname;
1622 1622 cred_t *unlcred;
1623 1623 struct nfsdiropargs da;
1624 1624 enum nfsstat status;
1625 1625 int douprintf;
1626 1626 int error;
1627 1627
1628 1628 unldvp = rp->r_unldvp;
1629 1629 rp->r_unldvp = NULL;
1630 1630 unlname = rp->r_unlname;
1631 1631 rp->r_unlname = NULL;
1632 1632 unlcred = rp->r_unlcred;
1633 1633 rp->r_unlcred = NULL;
1634 1634 mutex_exit(&rp->r_statelock);
1635 1635
1636 1636 /*
1637 1637 * If there are any dirty pages left, then flush
1638 1638 * them. This is unfortunate because they just
1639 1639 * may get thrown away during the remove operation,
1640 1640 * but we have to do this for correctness.
1641 1641 */
1642 1642 if (vn_has_cached_data(vp) &&
1643 1643 ((rp->r_flags & RDIRTY) || rp->r_count > 0)) {
1644 1644 ASSERT(vp->v_type != VCHR);
1645 1645 error = nfs_putpage(vp, (offset_t)0, 0, 0,
1646 1646 cr, ct);
1647 1647 if (error) {
1648 1648 mutex_enter(&rp->r_statelock);
1649 1649 if (!rp->r_error)
1650 1650 rp->r_error = error;
1651 1651 mutex_exit(&rp->r_statelock);
1652 1652 }
1653 1653 }
1654 1654
1655 1655 /*
1656 1656 * Do the remove operation on the renamed file
1657 1657 */
1658 1658 setdiropargs(&da, unlname, unldvp);
1659 1659
1660 1660 douprintf = 1;
1661 1661
1662 1662 (void) rfs2call(VTOMI(unldvp), RFS_REMOVE,
1663 1663 xdr_diropargs, (caddr_t)&da,
1664 1664 xdr_enum, (caddr_t)&status, unlcred,
1665 1665 &douprintf, &status, 0, NULL);
1666 1666
1667 1667 if (HAVE_RDDIR_CACHE(VTOR(unldvp)))
1668 1668 nfs_purge_rddir_cache(unldvp);
1669 1669 PURGE_ATTRCACHE(unldvp);
1670 1670
1671 1671 /*
1672 1672 * Release stuff held for the remove
1673 1673 */
1674 1674 VN_RELE(unldvp);
1675 1675 kmem_free(unlname, MAXNAMELEN);
1676 1676 crfree(unlcred);
1677 1677 goto redo;
1678 1678 }
1679 1679 mutex_exit(&rp->r_statelock);
1680 1680 }
1681 1681
1682 1682 rp_addfree(rp, cr);
1683 1683 }
1684 1684
1685 1685 /*
1686 1686 * Remote file system operations having to do with directory manipulation.
1687 1687 */
1688 1688
1689 1689 /* ARGSUSED */
1690 1690 static int
1691 1691 nfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
1692 1692 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
1693 1693 int *direntflags, pathname_t *realpnp)
1694 1694 {
1695 1695 int error;
1696 1696 vnode_t *vp;
1697 1697 vnode_t *avp = NULL;
1698 1698 rnode_t *drp;
1699 1699
1700 1700 if (nfs_zone() != VTOMI(dvp)->mi_zone)
1701 1701 return (EPERM);
1702 1702
1703 1703 drp = VTOR(dvp);
1704 1704
1705 1705 /*
1706 1706 * Are we looking up extended attributes? If so, "dvp" is
1707 1707 * the file or directory for which we want attributes, and
1708 1708 * we need a lookup of the hidden attribute directory
1709 1709 * before we lookup the rest of the path.
1710 1710 */
1711 1711 if (flags & LOOKUP_XATTR) {
1712 1712 bool_t cflag = ((flags & CREATE_XATTR_DIR) != 0);
1713 1713 mntinfo_t *mi;
1714 1714
1715 1715 mi = VTOMI(dvp);
1716 1716 if (!(mi->mi_flags & MI_EXTATTR))
1717 1717 return (EINVAL);
1718 1718
1719 1719 if (nfs_rw_enter_sig(&drp->r_rwlock, RW_READER, INTR(dvp)))
1720 1720 return (EINTR);
1721 1721
1722 1722 (void) nfslookup_dnlc(dvp, XATTR_DIR_NAME, &avp, cr);
1723 1723 if (avp == NULL)
1724 1724 error = acl_getxattrdir2(dvp, &avp, cflag, cr, 0);
1725 1725 else
1726 1726 error = 0;
1727 1727
1728 1728 nfs_rw_exit(&drp->r_rwlock);
1729 1729
1730 1730 if (error) {
1731 1731 if (mi->mi_flags & MI_EXTATTR)
1732 1732 return (error);
1733 1733 return (EINVAL);
1734 1734 }
1735 1735 dvp = avp;
1736 1736 drp = VTOR(dvp);
1737 1737 }
1738 1738
1739 1739 if (nfs_rw_enter_sig(&drp->r_rwlock, RW_READER, INTR(dvp))) {
1740 1740 error = EINTR;
1741 1741 goto out;
1742 1742 }
1743 1743
1744 1744 error = nfslookup(dvp, nm, vpp, pnp, flags, rdir, cr, 0);
1745 1745
1746 1746 nfs_rw_exit(&drp->r_rwlock);
1747 1747
1748 1748 /*
1749 1749 * If vnode is a device, create special vnode.
1750 1750 */
1751 1751 if (!error && IS_DEVVP(*vpp)) {
1752 1752 vp = *vpp;
1753 1753 *vpp = specvp(vp, vp->v_rdev, vp->v_type, cr);
1754 1754 VN_RELE(vp);
1755 1755 }
1756 1756
1757 1757 out:
1758 1758 if (avp != NULL)
1759 1759 VN_RELE(avp);
1760 1760
1761 1761 return (error);
1762 1762 }
1763 1763
1764 1764 static int nfs_lookup_neg_cache = 1;
1765 1765
1766 1766 #ifdef DEBUG
1767 1767 static int nfs_lookup_dnlc_hits = 0;
1768 1768 static int nfs_lookup_dnlc_misses = 0;
1769 1769 static int nfs_lookup_dnlc_neg_hits = 0;
1770 1770 static int nfs_lookup_dnlc_disappears = 0;
1771 1771 static int nfs_lookup_dnlc_lookups = 0;
1772 1772 #endif
1773 1773
1774 1774 /* ARGSUSED */
1775 1775 int
1776 1776 nfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
1777 1777 int flags, vnode_t *rdir, cred_t *cr, int rfscall_flags)
1778 1778 {
1779 1779 int error;
1780 1780
1781 1781 ASSERT(nfs_zone() == VTOMI(dvp)->mi_zone);
1782 1782
1783 1783 /*
1784 1784 * If lookup is for "", just return dvp. Don't need
1785 1785 * to send it over the wire, look it up in the dnlc,
1786 1786 * or perform any access checks.
1787 1787 */
1788 1788 if (*nm == '\0') {
1789 1789 VN_HOLD(dvp);
1790 1790 *vpp = dvp;
1791 1791 return (0);
1792 1792 }
1793 1793
1794 1794 /*
1795 1795 * Can't do lookups in non-directories.
1796 1796 */
1797 1797 if (dvp->v_type != VDIR)
1798 1798 return (ENOTDIR);
1799 1799
1800 1800 /*
1801 1801 * If we're called with RFSCALL_SOFT, it's important that
1802 1802 * the only rfscall is one we make directly; if we permit
1803 1803 * an access call because we're looking up "." or validating
1804 1804 * a dnlc hit, we'll deadlock because that rfscall will not
1805 1805 * have the RFSCALL_SOFT set.
1806 1806 */
1807 1807 if (rfscall_flags & RFSCALL_SOFT)
1808 1808 goto callit;
1809 1809
1810 1810 /*
1811 1811 * If lookup is for ".", just return dvp. Don't need
1812 1812 * to send it over the wire or look it up in the dnlc,
1813 1813 * just need to check access.
1814 1814 */
1815 1815 if (strcmp(nm, ".") == 0) {
1816 1816 error = nfs_access(dvp, VEXEC, 0, cr, NULL);
1817 1817 if (error)
1818 1818 return (error);
1819 1819 VN_HOLD(dvp);
1820 1820 *vpp = dvp;
1821 1821 return (0);
1822 1822 }
1823 1823
1824 1824 /*
1825 1825 * Lookup this name in the DNLC. If there was a valid entry,
1826 1826 * then return the results of the lookup.
1827 1827 */
1828 1828 error = nfslookup_dnlc(dvp, nm, vpp, cr);
1829 1829 if (error || *vpp != NULL)
1830 1830 return (error);
1831 1831
1832 1832 callit:
1833 1833 error = nfslookup_otw(dvp, nm, vpp, cr, rfscall_flags);
1834 1834
1835 1835 return (error);
1836 1836 }
1837 1837
1838 1838 static int
1839 1839 nfslookup_dnlc(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr)
1840 1840 {
1841 1841 int error;
1842 1842 vnode_t *vp;
1843 1843
1844 1844 ASSERT(*nm != '\0');
1845 1845 ASSERT(nfs_zone() == VTOMI(dvp)->mi_zone);
1846 1846
1847 1847 /*
1848 1848 * Lookup this name in the DNLC. If successful, then validate
1849 1849 * the caches and then recheck the DNLC. The DNLC is rechecked
1850 1850 * just in case this entry got invalidated during the call
1851 1851 * to nfs_validate_caches.
1852 1852 *
1853 1853 * An assumption is being made that it is safe to say that a
1854 1854 * file exists which may not on the server. Any operations to
1855 1855 * the server will fail with ESTALE.
1856 1856 */
1857 1857 #ifdef DEBUG
1858 1858 nfs_lookup_dnlc_lookups++;
1859 1859 #endif
1860 1860 vp = dnlc_lookup(dvp, nm);
1861 1861 if (vp != NULL) {
1862 1862 VN_RELE(vp);
1863 1863 if (vp == DNLC_NO_VNODE && !vn_is_readonly(dvp)) {
1864 1864 PURGE_ATTRCACHE(dvp);
1865 1865 }
1866 1866 error = nfs_validate_caches(dvp, cr);
1867 1867 if (error)
1868 1868 return (error);
1869 1869 vp = dnlc_lookup(dvp, nm);
1870 1870 if (vp != NULL) {
1871 1871 error = nfs_access(dvp, VEXEC, 0, cr, NULL);
1872 1872 if (error) {
1873 1873 VN_RELE(vp);
1874 1874 return (error);
1875 1875 }
1876 1876 if (vp == DNLC_NO_VNODE) {
1877 1877 VN_RELE(vp);
1878 1878 #ifdef DEBUG
1879 1879 nfs_lookup_dnlc_neg_hits++;
1880 1880 #endif
1881 1881 return (ENOENT);
1882 1882 }
1883 1883 *vpp = vp;
1884 1884 #ifdef DEBUG
1885 1885 nfs_lookup_dnlc_hits++;
1886 1886 #endif
1887 1887 return (0);
1888 1888 }
1889 1889 #ifdef DEBUG
1890 1890 nfs_lookup_dnlc_disappears++;
1891 1891 #endif
1892 1892 }
1893 1893 #ifdef DEBUG
1894 1894 else
1895 1895 nfs_lookup_dnlc_misses++;
1896 1896 #endif
1897 1897
1898 1898 *vpp = NULL;
1899 1899
1900 1900 return (0);
1901 1901 }
1902 1902
1903 1903 static int
1904 1904 nfslookup_otw(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr,
1905 1905 int rfscall_flags)
1906 1906 {
1907 1907 int error;
1908 1908 struct nfsdiropargs da;
1909 1909 struct nfsdiropres dr;
1910 1910 int douprintf;
1911 1911 failinfo_t fi;
1912 1912 hrtime_t t;
1913 1913
1914 1914 ASSERT(*nm != '\0');
1915 1915 ASSERT(dvp->v_type == VDIR);
1916 1916 ASSERT(nfs_zone() == VTOMI(dvp)->mi_zone);
1917 1917
1918 1918 setdiropargs(&da, nm, dvp);
1919 1919
1920 1920 fi.vp = dvp;
1921 1921 fi.fhp = NULL; /* no need to update, filehandle not copied */
1922 1922 fi.copyproc = nfscopyfh;
1923 1923 fi.lookupproc = nfslookup;
1924 1924 fi.xattrdirproc = acl_getxattrdir2;
1925 1925
1926 1926 douprintf = 1;
1927 1927
1928 1928 t = gethrtime();
1929 1929
1930 1930 error = rfs2call(VTOMI(dvp), RFS_LOOKUP,
1931 1931 xdr_diropargs, (caddr_t)&da,
1932 1932 xdr_diropres, (caddr_t)&dr, cr,
1933 1933 &douprintf, &dr.dr_status, rfscall_flags, &fi);
1934 1934
1935 1935 if (!error) {
1936 1936 error = geterrno(dr.dr_status);
1937 1937 if (!error) {
1938 1938 *vpp = makenfsnode(&dr.dr_fhandle, &dr.dr_attr,
1939 1939 dvp->v_vfsp, t, cr, VTOR(dvp)->r_path, nm);
1940 1940 /*
1941 1941 * If NFS_ACL is supported on the server, then the
1942 1942 * attributes returned by server may have minimal
1943 1943 * permissions sometimes denying access to users having
1944 1944 * proper access. To get the proper attributes, mark
1945 1945 * the attributes as expired so that they will be
1946 1946 * regotten via the NFS_ACL GETATTR2 procedure.
1947 1947 */
1948 1948 if (VTOMI(*vpp)->mi_flags & MI_ACL) {
1949 1949 PURGE_ATTRCACHE(*vpp);
1950 1950 }
1951 1951 if (!(rfscall_flags & RFSCALL_SOFT))
1952 1952 dnlc_update(dvp, nm, *vpp);
1953 1953 } else {
1954 1954 PURGE_STALE_FH(error, dvp, cr);
1955 1955 if (error == ENOENT && nfs_lookup_neg_cache)
1956 1956 dnlc_enter(dvp, nm, DNLC_NO_VNODE);
1957 1957 }
1958 1958 }
1959 1959
1960 1960 return (error);
1961 1961 }
1962 1962
1963 1963 /* ARGSUSED */
1964 1964 static int
1965 1965 nfs_create(vnode_t *dvp, char *nm, struct vattr *va, enum vcexcl exclusive,
1966 1966 int mode, vnode_t **vpp, cred_t *cr, int lfaware, caller_context_t *ct,
1967 1967 vsecattr_t *vsecp)
1968 1968 {
1969 1969 int error;
1970 1970 struct nfscreatargs args;
1971 1971 struct nfsdiropres dr;
1972 1972 int douprintf;
1973 1973 vnode_t *vp;
1974 1974 rnode_t *rp;
1975 1975 struct vattr vattr;
1976 1976 rnode_t *drp;
1977 1977 vnode_t *tempvp;
1978 1978 hrtime_t t;
1979 1979
1980 1980 drp = VTOR(dvp);
1981 1981
1982 1982 if (nfs_zone() != VTOMI(dvp)->mi_zone)
1983 1983 return (EPERM);
1984 1984 if (nfs_rw_enter_sig(&drp->r_rwlock, RW_WRITER, INTR(dvp)))
1985 1985 return (EINTR);
1986 1986
1987 1987 /*
1988 1988 * We make a copy of the attributes because the caller does not
1989 1989 * expect us to change what va points to.
1990 1990 */
1991 1991 vattr = *va;
1992 1992
1993 1993 /*
1994 1994 * If the pathname is "", just use dvp. Don't need
1995 1995 * to send it over the wire, look it up in the dnlc,
1996 1996 * or perform any access checks.
1997 1997 */
1998 1998 if (*nm == '\0') {
1999 1999 error = 0;
2000 2000 VN_HOLD(dvp);
2001 2001 vp = dvp;
2002 2002 /*
2003 2003 * If the pathname is ".", just use dvp. Don't need
2004 2004 * to send it over the wire or look it up in the dnlc,
2005 2005 * just need to check access.
2006 2006 */
2007 2007 } else if (strcmp(nm, ".") == 0) {
2008 2008 error = nfs_access(dvp, VEXEC, 0, cr, ct);
2009 2009 if (error) {
2010 2010 nfs_rw_exit(&drp->r_rwlock);
2011 2011 return (error);
2012 2012 }
2013 2013 VN_HOLD(dvp);
2014 2014 vp = dvp;
2015 2015 /*
2016 2016 * We need to go over the wire, just to be sure whether the
2017 2017 * file exists or not. Using the DNLC can be dangerous in
2018 2018 * this case when making a decision regarding existence.
2019 2019 */
2020 2020 } else {
2021 2021 error = nfslookup_otw(dvp, nm, &vp, cr, 0);
2022 2022 }
2023 2023 if (!error) {
2024 2024 if (exclusive == EXCL)
2025 2025 error = EEXIST;
2026 2026 else if (vp->v_type == VDIR && (mode & VWRITE))
2027 2027 error = EISDIR;
2028 2028 else {
2029 2029 /*
2030 2030 * If vnode is a device, create special vnode.
2031 2031 */
2032 2032 if (IS_DEVVP(vp)) {
2033 2033 tempvp = vp;
2034 2034 vp = specvp(vp, vp->v_rdev, vp->v_type, cr);
2035 2035 VN_RELE(tempvp);
2036 2036 }
2037 2037 if (!(error = VOP_ACCESS(vp, mode, 0, cr, ct))) {
2038 2038 if ((vattr.va_mask & AT_SIZE) &&
2039 2039 vp->v_type == VREG) {
2040 2040 vattr.va_mask = AT_SIZE;
2041 2041 error = nfssetattr(vp, &vattr, 0, cr);
2042 2042
2043 2043 if (!error) {
2044 2044 /*
2045 2045 * Existing file was truncated;
2046 2046 * emit a create event.
2047 2047 */
2048 2048 vnevent_create(vp, ct);
2049 2049 }
2050 2050 }
2051 2051 }
2052 2052 }
2053 2053 nfs_rw_exit(&drp->r_rwlock);
2054 2054 if (error) {
2055 2055 VN_RELE(vp);
2056 2056 } else {
2057 2057 *vpp = vp;
2058 2058 }
2059 2059 return (error);
2060 2060 }
2061 2061
2062 2062 ASSERT(vattr.va_mask & AT_TYPE);
2063 2063 if (vattr.va_type == VREG) {
2064 2064 ASSERT(vattr.va_mask & AT_MODE);
2065 2065 if (MANDMODE(vattr.va_mode)) {
2066 2066 nfs_rw_exit(&drp->r_rwlock);
2067 2067 return (EACCES);
2068 2068 }
2069 2069 }
2070 2070
2071 2071 dnlc_remove(dvp, nm);
2072 2072
2073 2073 setdiropargs(&args.ca_da, nm, dvp);
2074 2074
2075 2075 /*
2076 2076 * Decide what the group-id of the created file should be.
2077 2077 * Set it in attribute list as advisory...then do a setattr
2078 2078 * if the server didn't get it right the first time.
2079 2079 */
2080 2080 error = setdirgid(dvp, &vattr.va_gid, cr);
2081 2081 if (error) {
2082 2082 nfs_rw_exit(&drp->r_rwlock);
2083 2083 return (error);
2084 2084 }
2085 2085 vattr.va_mask |= AT_GID;
2086 2086
2087 2087 /*
2088 2088 * This is a completely gross hack to make mknod
2089 2089 * work over the wire until we can wack the protocol
2090 2090 */
2091 2091 #define IFCHR 0020000 /* character special */
2092 2092 #define IFBLK 0060000 /* block special */
2093 2093 #define IFSOCK 0140000 /* socket */
2094 2094
2095 2095 /*
2096 2096 * dev_t is uint_t in 5.x and short in 4.x. Both 4.x
2097 2097 * supports 8 bit majors. 5.x supports 14 bit majors. 5.x supports 18
2098 2098 * bits in the minor number where 4.x supports 8 bits. If the 5.x
2099 2099 * minor/major numbers <= 8 bits long, compress the device
2100 2100 * number before sending it. Otherwise, the 4.x server will not
2101 2101 * create the device with the correct device number and nothing can be
2102 2102 * done about this.
2103 2103 */
2104 2104 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2105 2105 dev_t d = vattr.va_rdev;
2106 2106 dev32_t dev32;
2107 2107
2108 2108 if (vattr.va_type == VCHR)
2109 2109 vattr.va_mode |= IFCHR;
2110 2110 else
2111 2111 vattr.va_mode |= IFBLK;
2112 2112
2113 2113 (void) cmpldev(&dev32, d);
2114 2114 if (dev32 & ~((SO4_MAXMAJ << L_BITSMINOR32) | SO4_MAXMIN))
2115 2115 vattr.va_size = (u_offset_t)dev32;
2116 2116 else
2117 2117 vattr.va_size = (u_offset_t)nfsv2_cmpdev(d);
2118 2118
2119 2119 vattr.va_mask |= AT_MODE|AT_SIZE;
2120 2120 } else if (vattr.va_type == VFIFO) {
2121 2121 vattr.va_mode |= IFCHR; /* xtra kludge for namedpipe */
2122 2122 vattr.va_size = (u_offset_t)NFS_FIFO_DEV; /* blech */
2123 2123 vattr.va_mask |= AT_MODE|AT_SIZE;
2124 2124 } else if (vattr.va_type == VSOCK) {
2125 2125 vattr.va_mode |= IFSOCK;
2126 2126 /*
2127 2127 * To avoid triggering bugs in the servers set AT_SIZE
2128 2128 * (all other RFS_CREATE calls set this).
2129 2129 */
2130 2130 vattr.va_size = 0;
2131 2131 vattr.va_mask |= AT_MODE|AT_SIZE;
2132 2132 }
2133 2133
2134 2134 args.ca_sa = &args.ca_sa_buf;
2135 2135 error = vattr_to_sattr(&vattr, args.ca_sa);
2136 2136 if (error) {
2137 2137 /* req time field(s) overflow - return immediately */
2138 2138 nfs_rw_exit(&drp->r_rwlock);
2139 2139 return (error);
2140 2140 }
2141 2141
2142 2142 douprintf = 1;
2143 2143
2144 2144 t = gethrtime();
2145 2145
2146 2146 error = rfs2call(VTOMI(dvp), RFS_CREATE,
2147 2147 xdr_creatargs, (caddr_t)&args,
2148 2148 xdr_diropres, (caddr_t)&dr, cr,
2149 2149 &douprintf, &dr.dr_status, 0, NULL);
2150 2150
2151 2151 PURGE_ATTRCACHE(dvp); /* mod time changed */
2152 2152
2153 2153 if (!error) {
2154 2154 error = geterrno(dr.dr_status);
2155 2155 if (!error) {
2156 2156 if (HAVE_RDDIR_CACHE(drp))
2157 2157 nfs_purge_rddir_cache(dvp);
2158 2158 vp = makenfsnode(&dr.dr_fhandle, &dr.dr_attr,
2159 2159 dvp->v_vfsp, t, cr, NULL, NULL);
2160 2160 /*
2161 2161 * If NFS_ACL is supported on the server, then the
2162 2162 * attributes returned by server may have minimal
2163 2163 * permissions sometimes denying access to users having
2164 2164 * proper access. To get the proper attributes, mark
2165 2165 * the attributes as expired so that they will be
2166 2166 * regotten via the NFS_ACL GETATTR2 procedure.
2167 2167 */
2168 2168 if (VTOMI(vp)->mi_flags & MI_ACL) {
2169 2169 PURGE_ATTRCACHE(vp);
2170 2170 }
2171 2171 dnlc_update(dvp, nm, vp);
2172 2172 rp = VTOR(vp);
2173 2173 if (vattr.va_size == 0) {
2174 2174 mutex_enter(&rp->r_statelock);
2175 2175 rp->r_size = 0;
2176 2176 mutex_exit(&rp->r_statelock);
2177 2177 if (vn_has_cached_data(vp)) {
2178 2178 ASSERT(vp->v_type != VCHR);
2179 2179 nfs_invalidate_pages(vp,
2180 2180 (u_offset_t)0, cr);
2181 2181 }
2182 2182 }
2183 2183
2184 2184 /*
2185 2185 * Make sure the gid was set correctly.
2186 2186 * If not, try to set it (but don't lose
2187 2187 * any sleep over it).
2188 2188 */
2189 2189 if (vattr.va_gid != rp->r_attr.va_gid) {
2190 2190 vattr.va_mask = AT_GID;
2191 2191 (void) nfssetattr(vp, &vattr, 0, cr);
2192 2192 }
2193 2193
2194 2194 /*
2195 2195 * If vnode is a device create special vnode
2196 2196 */
2197 2197 if (IS_DEVVP(vp)) {
2198 2198 *vpp = specvp(vp, vp->v_rdev, vp->v_type, cr);
2199 2199 VN_RELE(vp);
2200 2200 } else
2201 2201 *vpp = vp;
2202 2202 } else {
2203 2203 PURGE_STALE_FH(error, dvp, cr);
2204 2204 }
2205 2205 }
2206 2206
2207 2207 nfs_rw_exit(&drp->r_rwlock);
2208 2208
2209 2209 return (error);
2210 2210 }
2211 2211
2212 2212 /*
2213 2213 * Weirdness: if the vnode to be removed is open
2214 2214 * we rename it instead of removing it and nfs_inactive
2215 2215 * will remove the new name.
2216 2216 */
2217 2217 /* ARGSUSED */
2218 2218 static int
2219 2219 nfs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct, int flags)
2220 2220 {
2221 2221 int error;
2222 2222 struct nfsdiropargs da;
2223 2223 enum nfsstat status;
2224 2224 vnode_t *vp;
2225 2225 char *tmpname;
2226 2226 int douprintf;
2227 2227 rnode_t *rp;
2228 2228 rnode_t *drp;
2229 2229
2230 2230 if (nfs_zone() != VTOMI(dvp)->mi_zone)
2231 2231 return (EPERM);
2232 2232 drp = VTOR(dvp);
2233 2233 if (nfs_rw_enter_sig(&drp->r_rwlock, RW_WRITER, INTR(dvp)))
2234 2234 return (EINTR);
2235 2235
2236 2236 error = nfslookup(dvp, nm, &vp, NULL, 0, NULL, cr, 0);
2237 2237 if (error) {
2238 2238 nfs_rw_exit(&drp->r_rwlock);
2239 2239 return (error);
2240 2240 }
2241 2241
2242 2242 if (vp->v_type == VDIR && secpolicy_fs_linkdir(cr, dvp->v_vfsp)) {
2243 2243 VN_RELE(vp);
2244 2244 nfs_rw_exit(&drp->r_rwlock);
2245 2245 return (EPERM);
2246 2246 }
2247 2247
2248 2248 /*
2249 2249 * First just remove the entry from the name cache, as it
2250 2250 * is most likely the only entry for this vp.
2251 2251 */
2252 2252 dnlc_remove(dvp, nm);
2253 2253
2254 2254 /*
2255 2255 * If the file has a v_count > 1 then there may be more than one
2256 2256 * entry in the name cache due multiple links or an open file,
2257 2257 * but we don't have the real reference count so flush all
2258 2258 * possible entries.
2259 2259 */
2260 2260 if (vp->v_count > 1)
2261 2261 dnlc_purge_vp(vp);
2262 2262
2263 2263 /*
2264 2264 * Now we have the real reference count on the vnode
2265 2265 */
2266 2266 rp = VTOR(vp);
2267 2267 mutex_enter(&rp->r_statelock);
2268 2268 if (vp->v_count > 1 &&
2269 2269 (rp->r_unldvp == NULL || strcmp(nm, rp->r_unlname) == 0)) {
2270 2270 mutex_exit(&rp->r_statelock);
2271 2271 tmpname = newname();
2272 2272 error = nfsrename(dvp, nm, dvp, tmpname, cr, ct);
2273 2273 if (error)
2274 2274 kmem_free(tmpname, MAXNAMELEN);
2275 2275 else {
2276 2276 mutex_enter(&rp->r_statelock);
2277 2277 if (rp->r_unldvp == NULL) {
2278 2278 VN_HOLD(dvp);
2279 2279 rp->r_unldvp = dvp;
2280 2280 if (rp->r_unlcred != NULL)
2281 2281 crfree(rp->r_unlcred);
2282 2282 crhold(cr);
2283 2283 rp->r_unlcred = cr;
2284 2284 rp->r_unlname = tmpname;
2285 2285 } else {
2286 2286 kmem_free(rp->r_unlname, MAXNAMELEN);
2287 2287 rp->r_unlname = tmpname;
2288 2288 }
2289 2289 mutex_exit(&rp->r_statelock);
2290 2290 }
2291 2291 } else {
2292 2292 mutex_exit(&rp->r_statelock);
2293 2293 /*
2294 2294 * We need to flush any dirty pages which happen to
2295 2295 * be hanging around before removing the file. This
2296 2296 * shouldn't happen very often and mostly on file
2297 2297 * systems mounted "nocto".
2298 2298 */
2299 2299 if (vn_has_cached_data(vp) &&
2300 2300 ((rp->r_flags & RDIRTY) || rp->r_count > 0)) {
2301 2301 error = nfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
2302 2302 if (error && (error == ENOSPC || error == EDQUOT)) {
2303 2303 mutex_enter(&rp->r_statelock);
2304 2304 if (!rp->r_error)
2305 2305 rp->r_error = error;
2306 2306 mutex_exit(&rp->r_statelock);
2307 2307 }
2308 2308 }
2309 2309
2310 2310 setdiropargs(&da, nm, dvp);
2311 2311
2312 2312 douprintf = 1;
2313 2313
2314 2314 error = rfs2call(VTOMI(dvp), RFS_REMOVE,
2315 2315 xdr_diropargs, (caddr_t)&da,
2316 2316 xdr_enum, (caddr_t)&status, cr,
2317 2317 &douprintf, &status, 0, NULL);
2318 2318
2319 2319 /*
2320 2320 * The xattr dir may be gone after last attr is removed,
2321 2321 * so flush it from dnlc.
2322 2322 */
2323 2323 if (dvp->v_flag & V_XATTRDIR)
2324 2324 dnlc_purge_vp(dvp);
2325 2325
2326 2326 PURGE_ATTRCACHE(dvp); /* mod time changed */
2327 2327 PURGE_ATTRCACHE(vp); /* link count changed */
2328 2328
2329 2329 if (!error) {
2330 2330 error = geterrno(status);
2331 2331 if (!error) {
2332 2332 if (HAVE_RDDIR_CACHE(drp))
2333 2333 nfs_purge_rddir_cache(dvp);
2334 2334 } else {
2335 2335 PURGE_STALE_FH(error, dvp, cr);
2336 2336 }
2337 2337 }
2338 2338 }
2339 2339
2340 2340 if (error == 0) {
2341 2341 vnevent_remove(vp, dvp, nm, ct);
2342 2342 }
2343 2343 VN_RELE(vp);
2344 2344
2345 2345 nfs_rw_exit(&drp->r_rwlock);
2346 2346
2347 2347 return (error);
2348 2348 }
2349 2349
2350 2350 /* ARGSUSED */
2351 2351 static int
2352 2352 nfs_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr,
2353 2353 caller_context_t *ct, int flags)
2354 2354 {
2355 2355 int error;
2356 2356 struct nfslinkargs args;
2357 2357 enum nfsstat status;
2358 2358 vnode_t *realvp;
2359 2359 int douprintf;
2360 2360 rnode_t *tdrp;
2361 2361
2362 2362 if (nfs_zone() != VTOMI(tdvp)->mi_zone)
2363 2363 return (EPERM);
2364 2364 if (VOP_REALVP(svp, &realvp, ct) == 0)
2365 2365 svp = realvp;
2366 2366
2367 2367 args.la_from = VTOFH(svp);
2368 2368 setdiropargs(&args.la_to, tnm, tdvp);
2369 2369
2370 2370 tdrp = VTOR(tdvp);
2371 2371 if (nfs_rw_enter_sig(&tdrp->r_rwlock, RW_WRITER, INTR(tdvp)))
2372 2372 return (EINTR);
2373 2373
2374 2374 dnlc_remove(tdvp, tnm);
2375 2375
2376 2376 douprintf = 1;
2377 2377
2378 2378 error = rfs2call(VTOMI(svp), RFS_LINK,
2379 2379 xdr_linkargs, (caddr_t)&args,
2380 2380 xdr_enum, (caddr_t)&status, cr,
2381 2381 &douprintf, &status, 0, NULL);
2382 2382
2383 2383 PURGE_ATTRCACHE(tdvp); /* mod time changed */
2384 2384 PURGE_ATTRCACHE(svp); /* link count changed */
2385 2385
2386 2386 if (!error) {
2387 2387 error = geterrno(status);
2388 2388 if (!error) {
2389 2389 if (HAVE_RDDIR_CACHE(tdrp))
2390 2390 nfs_purge_rddir_cache(tdvp);
2391 2391 }
2392 2392 }
2393 2393
2394 2394 nfs_rw_exit(&tdrp->r_rwlock);
2395 2395
2396 2396 if (!error) {
2397 2397 /*
2398 2398 * Notify the source file of this link operation.
2399 2399 */
2400 2400 vnevent_link(svp, ct);
2401 2401 }
2402 2402 return (error);
2403 2403 }
2404 2404
2405 2405 /* ARGSUSED */
2406 2406 static int
2407 2407 nfs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr,
2408 2408 caller_context_t *ct, int flags)
2409 2409 {
2410 2410 vnode_t *realvp;
2411 2411
2412 2412 if (nfs_zone() != VTOMI(odvp)->mi_zone)
2413 2413 return (EPERM);
2414 2414 if (VOP_REALVP(ndvp, &realvp, ct) == 0)
2415 2415 ndvp = realvp;
2416 2416
2417 2417 return (nfsrename(odvp, onm, ndvp, nnm, cr, ct));
2418 2418 }
2419 2419
2420 2420 /*
2421 2421 * nfsrename does the real work of renaming in NFS Version 2.
2422 2422 */
2423 2423 static int
2424 2424 nfsrename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr,
2425 2425 caller_context_t *ct)
2426 2426 {
2427 2427 int error;
2428 2428 enum nfsstat status;
2429 2429 struct nfsrnmargs args;
2430 2430 int douprintf;
2431 2431 vnode_t *nvp = NULL;
2432 2432 vnode_t *ovp = NULL;
2433 2433 char *tmpname;
2434 2434 rnode_t *rp;
2435 2435 rnode_t *odrp;
2436 2436 rnode_t *ndrp;
2437 2437
2438 2438 ASSERT(nfs_zone() == VTOMI(odvp)->mi_zone);
2439 2439 if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 ||
2440 2440 strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0)
2441 2441 return (EINVAL);
2442 2442
2443 2443 odrp = VTOR(odvp);
2444 2444 ndrp = VTOR(ndvp);
2445 2445 if ((intptr_t)odrp < (intptr_t)ndrp) {
2446 2446 if (nfs_rw_enter_sig(&odrp->r_rwlock, RW_WRITER, INTR(odvp)))
2447 2447 return (EINTR);
2448 2448 if (nfs_rw_enter_sig(&ndrp->r_rwlock, RW_WRITER, INTR(ndvp))) {
2449 2449 nfs_rw_exit(&odrp->r_rwlock);
2450 2450 return (EINTR);
2451 2451 }
2452 2452 } else {
2453 2453 if (nfs_rw_enter_sig(&ndrp->r_rwlock, RW_WRITER, INTR(ndvp)))
2454 2454 return (EINTR);
2455 2455 if (nfs_rw_enter_sig(&odrp->r_rwlock, RW_WRITER, INTR(odvp))) {
2456 2456 nfs_rw_exit(&ndrp->r_rwlock);
2457 2457 return (EINTR);
2458 2458 }
2459 2459 }
2460 2460
2461 2461 /*
2462 2462 * Lookup the target file. If it exists, it needs to be
2463 2463 * checked to see whether it is a mount point and whether
2464 2464 * it is active (open).
2465 2465 */
2466 2466 error = nfslookup(ndvp, nnm, &nvp, NULL, 0, NULL, cr, 0);
2467 2467 if (!error) {
2468 2468 /*
2469 2469 * If this file has been mounted on, then just
2470 2470 * return busy because renaming to it would remove
2471 2471 * the mounted file system from the name space.
2472 2472 */
2473 2473 if (vn_mountedvfs(nvp) != NULL) {
2474 2474 VN_RELE(nvp);
2475 2475 nfs_rw_exit(&odrp->r_rwlock);
2476 2476 nfs_rw_exit(&ndrp->r_rwlock);
2477 2477 return (EBUSY);
2478 2478 }
2479 2479
2480 2480 /*
2481 2481 * Purge the name cache of all references to this vnode
2482 2482 * so that we can check the reference count to infer
2483 2483 * whether it is active or not.
2484 2484 */
2485 2485 /*
2486 2486 * First just remove the entry from the name cache, as it
2487 2487 * is most likely the only entry for this vp.
2488 2488 */
2489 2489 dnlc_remove(ndvp, nnm);
2490 2490 /*
2491 2491 * If the file has a v_count > 1 then there may be more
2492 2492 * than one entry in the name cache due multiple links
2493 2493 * or an open file, but we don't have the real reference
2494 2494 * count so flush all possible entries.
2495 2495 */
2496 2496 if (nvp->v_count > 1)
2497 2497 dnlc_purge_vp(nvp);
2498 2498
2499 2499 /*
2500 2500 * If the vnode is active and is not a directory,
2501 2501 * arrange to rename it to a
2502 2502 * temporary file so that it will continue to be
2503 2503 * accessible. This implements the "unlink-open-file"
2504 2504 * semantics for the target of a rename operation.
2505 2505 * Before doing this though, make sure that the
2506 2506 * source and target files are not already the same.
2507 2507 */
2508 2508 if (nvp->v_count > 1 && nvp->v_type != VDIR) {
2509 2509 /*
2510 2510 * Lookup the source name.
2511 2511 */
2512 2512 error = nfslookup(odvp, onm, &ovp, NULL, 0, NULL,
2513 2513 cr, 0);
2514 2514
2515 2515 /*
2516 2516 * The source name *should* already exist.
2517 2517 */
2518 2518 if (error) {
2519 2519 VN_RELE(nvp);
2520 2520 nfs_rw_exit(&odrp->r_rwlock);
2521 2521 nfs_rw_exit(&ndrp->r_rwlock);
2522 2522 return (error);
2523 2523 }
2524 2524
2525 2525 /*
2526 2526 * Compare the two vnodes. If they are the same,
2527 2527 * just release all held vnodes and return success.
2528 2528 */
2529 2529 if (ovp == nvp) {
2530 2530 VN_RELE(ovp);
2531 2531 VN_RELE(nvp);
2532 2532 nfs_rw_exit(&odrp->r_rwlock);
2533 2533 nfs_rw_exit(&ndrp->r_rwlock);
2534 2534 return (0);
2535 2535 }
2536 2536
2537 2537 /*
2538 2538 * Can't mix and match directories and non-
2539 2539 * directories in rename operations. We already
2540 2540 * know that the target is not a directory. If
2541 2541 * the source is a directory, return an error.
2542 2542 */
2543 2543 if (ovp->v_type == VDIR) {
2544 2544 VN_RELE(ovp);
2545 2545 VN_RELE(nvp);
2546 2546 nfs_rw_exit(&odrp->r_rwlock);
2547 2547 nfs_rw_exit(&ndrp->r_rwlock);
2548 2548 return (ENOTDIR);
2549 2549 }
2550 2550
2551 2551 /*
2552 2552 * The target file exists, is not the same as
2553 2553 * the source file, and is active. Link it
2554 2554 * to a temporary filename to avoid having
2555 2555 * the server removing the file completely.
2556 2556 */
2557 2557 tmpname = newname();
2558 2558 error = nfs_link(ndvp, nvp, tmpname, cr, NULL, 0);
2559 2559 if (error == EOPNOTSUPP) {
2560 2560 error = nfs_rename(ndvp, nnm, ndvp, tmpname,
2561 2561 cr, NULL, 0);
2562 2562 }
2563 2563 if (error) {
2564 2564 kmem_free(tmpname, MAXNAMELEN);
2565 2565 VN_RELE(ovp);
2566 2566 VN_RELE(nvp);
2567 2567 nfs_rw_exit(&odrp->r_rwlock);
2568 2568 nfs_rw_exit(&ndrp->r_rwlock);
2569 2569 return (error);
2570 2570 }
2571 2571 rp = VTOR(nvp);
2572 2572 mutex_enter(&rp->r_statelock);
2573 2573 if (rp->r_unldvp == NULL) {
2574 2574 VN_HOLD(ndvp);
2575 2575 rp->r_unldvp = ndvp;
2576 2576 if (rp->r_unlcred != NULL)
2577 2577 crfree(rp->r_unlcred);
2578 2578 crhold(cr);
2579 2579 rp->r_unlcred = cr;
2580 2580 rp->r_unlname = tmpname;
2581 2581 } else {
2582 2582 kmem_free(rp->r_unlname, MAXNAMELEN);
2583 2583 rp->r_unlname = tmpname;
2584 2584 }
2585 2585 mutex_exit(&rp->r_statelock);
2586 2586 }
2587 2587 }
2588 2588
2589 2589 if (ovp == NULL) {
2590 2590 /*
2591 2591 * When renaming directories to be a subdirectory of a
2592 2592 * different parent, the dnlc entry for ".." will no
2593 2593 * longer be valid, so it must be removed.
2594 2594 *
2595 2595 * We do a lookup here to determine whether we are renaming
2596 2596 * a directory and we need to check if we are renaming
2597 2597 * an unlinked file. This might have already been done
2598 2598 * in previous code, so we check ovp == NULL to avoid
2599 2599 * doing it twice.
2600 2600 */
2601 2601
2602 2602 error = nfslookup(odvp, onm, &ovp, NULL, 0, NULL, cr, 0);
2603 2603
2604 2604 /*
2605 2605 * The source name *should* already exist.
2606 2606 */
2607 2607 if (error) {
2608 2608 nfs_rw_exit(&odrp->r_rwlock);
2609 2609 nfs_rw_exit(&ndrp->r_rwlock);
2610 2610 if (nvp) {
2611 2611 VN_RELE(nvp);
2612 2612 }
2613 2613 return (error);
2614 2614 }
2615 2615 ASSERT(ovp != NULL);
2616 2616 }
2617 2617
2618 2618 dnlc_remove(odvp, onm);
2619 2619 dnlc_remove(ndvp, nnm);
2620 2620
2621 2621 setdiropargs(&args.rna_from, onm, odvp);
2622 2622 setdiropargs(&args.rna_to, nnm, ndvp);
2623 2623
2624 2624 douprintf = 1;
2625 2625
2626 2626 error = rfs2call(VTOMI(odvp), RFS_RENAME,
2627 2627 xdr_rnmargs, (caddr_t)&args,
2628 2628 xdr_enum, (caddr_t)&status, cr,
2629 2629 &douprintf, &status, 0, NULL);
2630 2630
2631 2631 PURGE_ATTRCACHE(odvp); /* mod time changed */
2632 2632 PURGE_ATTRCACHE(ndvp); /* mod time changed */
2633 2633
2634 2634 if (!error) {
2635 2635 error = geterrno(status);
2636 2636 if (!error) {
2637 2637 if (HAVE_RDDIR_CACHE(odrp))
2638 2638 nfs_purge_rddir_cache(odvp);
2639 2639 if (HAVE_RDDIR_CACHE(ndrp))
2640 2640 nfs_purge_rddir_cache(ndvp);
2641 2641 /*
2642 2642 * when renaming directories to be a subdirectory of a
2643 2643 * different parent, the dnlc entry for ".." will no
2644 2644 * longer be valid, so it must be removed
2645 2645 */
2646 2646 rp = VTOR(ovp);
2647 2647 if (ndvp != odvp) {
2648 2648 if (ovp->v_type == VDIR) {
2649 2649 dnlc_remove(ovp, "..");
2650 2650 if (HAVE_RDDIR_CACHE(rp))
2651 2651 nfs_purge_rddir_cache(ovp);
2652 2652 }
2653 2653 }
2654 2654
2655 2655 /*
2656 2656 * If we are renaming the unlinked file, update the
2657 2657 * r_unldvp and r_unlname as needed.
2658 2658 */
2659 2659 mutex_enter(&rp->r_statelock);
2660 2660 if (rp->r_unldvp != NULL) {
2661 2661 if (strcmp(rp->r_unlname, onm) == 0) {
2662 2662 (void) strncpy(rp->r_unlname,
2663 2663 nnm, MAXNAMELEN);
2664 2664 rp->r_unlname[MAXNAMELEN - 1] = '\0';
2665 2665
2666 2666 if (ndvp != rp->r_unldvp) {
2667 2667 VN_RELE(rp->r_unldvp);
2668 2668 rp->r_unldvp = ndvp;
2669 2669 VN_HOLD(ndvp);
2670 2670 }
2671 2671 }
2672 2672 }
2673 2673 mutex_exit(&rp->r_statelock);
2674 2674 } else {
2675 2675 /*
2676 2676 * System V defines rename to return EEXIST, not
2677 2677 * ENOTEMPTY if the target directory is not empty.
2678 2678 * Over the wire, the error is NFSERR_ENOTEMPTY
2679 2679 * which geterrno maps to ENOTEMPTY.
2680 2680 */
2681 2681 if (error == ENOTEMPTY)
2682 2682 error = EEXIST;
2683 2683 }
2684 2684 }
2685 2685
2686 2686 if (error == 0) {
2687 2687 if (nvp)
2688 2688 vnevent_rename_dest(nvp, ndvp, nnm, ct);
2689 2689
2690 2690 if (odvp != ndvp)
2691 2691 vnevent_rename_dest_dir(ndvp, ct);
2692 2692
2693 2693 ASSERT(ovp != NULL);
2694 2694 vnevent_rename_src(ovp, odvp, onm, ct);
2695 2695 }
2696 2696
2697 2697 if (nvp) {
2698 2698 VN_RELE(nvp);
2699 2699 }
2700 2700 VN_RELE(ovp);
2701 2701
2702 2702 nfs_rw_exit(&odrp->r_rwlock);
2703 2703 nfs_rw_exit(&ndrp->r_rwlock);
2704 2704
2705 2705 return (error);
2706 2706 }
2707 2707
2708 2708 /* ARGSUSED */
2709 2709 static int
2710 2710 nfs_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp, cred_t *cr,
2711 2711 caller_context_t *ct, int flags, vsecattr_t *vsecp)
2712 2712 {
2713 2713 int error;
2714 2714 struct nfscreatargs args;
2715 2715 struct nfsdiropres dr;
2716 2716 int douprintf;
2717 2717 rnode_t *drp;
2718 2718 hrtime_t t;
2719 2719
2720 2720 if (nfs_zone() != VTOMI(dvp)->mi_zone)
2721 2721 return (EPERM);
2722 2722
2723 2723 setdiropargs(&args.ca_da, nm, dvp);
2724 2724
2725 2725 /*
2726 2726 * Decide what the group-id and set-gid bit of the created directory
2727 2727 * should be. May have to do a setattr to get the gid right.
2728 2728 */
2729 2729 error = setdirgid(dvp, &va->va_gid, cr);
2730 2730 if (error)
2731 2731 return (error);
2732 2732 error = setdirmode(dvp, &va->va_mode, cr);
2733 2733 if (error)
2734 2734 return (error);
2735 2735 va->va_mask |= AT_MODE|AT_GID;
2736 2736
2737 2737 args.ca_sa = &args.ca_sa_buf;
2738 2738 error = vattr_to_sattr(va, args.ca_sa);
2739 2739 if (error) {
2740 2740 /* req time field(s) overflow - return immediately */
2741 2741 return (error);
2742 2742 }
2743 2743
2744 2744 drp = VTOR(dvp);
2745 2745 if (nfs_rw_enter_sig(&drp->r_rwlock, RW_WRITER, INTR(dvp)))
2746 2746 return (EINTR);
2747 2747
2748 2748 dnlc_remove(dvp, nm);
2749 2749
2750 2750 douprintf = 1;
2751 2751
2752 2752 t = gethrtime();
2753 2753
2754 2754 error = rfs2call(VTOMI(dvp), RFS_MKDIR,
2755 2755 xdr_creatargs, (caddr_t)&args,
2756 2756 xdr_diropres, (caddr_t)&dr, cr,
2757 2757 &douprintf, &dr.dr_status, 0, NULL);
2758 2758
2759 2759 PURGE_ATTRCACHE(dvp); /* mod time changed */
2760 2760
2761 2761 if (!error) {
2762 2762 error = geterrno(dr.dr_status);
2763 2763 if (!error) {
2764 2764 if (HAVE_RDDIR_CACHE(drp))
2765 2765 nfs_purge_rddir_cache(dvp);
2766 2766 /*
2767 2767 * The attributes returned by RFS_MKDIR can not
2768 2768 * be depended upon, so mark the attribute cache
2769 2769 * as purged. A subsequent GETATTR will get the
2770 2770 * correct attributes from the server.
2771 2771 */
2772 2772 *vpp = makenfsnode(&dr.dr_fhandle, &dr.dr_attr,
2773 2773 dvp->v_vfsp, t, cr, NULL, NULL);
2774 2774 PURGE_ATTRCACHE(*vpp);
2775 2775 dnlc_update(dvp, nm, *vpp);
2776 2776
2777 2777 /*
2778 2778 * Make sure the gid was set correctly.
2779 2779 * If not, try to set it (but don't lose
2780 2780 * any sleep over it).
2781 2781 */
2782 2782 if (va->va_gid != VTOR(*vpp)->r_attr.va_gid) {
2783 2783 va->va_mask = AT_GID;
2784 2784 (void) nfssetattr(*vpp, va, 0, cr);
2785 2785 }
2786 2786 } else {
2787 2787 PURGE_STALE_FH(error, dvp, cr);
2788 2788 }
2789 2789 }
2790 2790
2791 2791 nfs_rw_exit(&drp->r_rwlock);
2792 2792
2793 2793 return (error);
2794 2794 }
2795 2795
2796 2796 /* ARGSUSED */
2797 2797 static int
2798 2798 nfs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
2799 2799 caller_context_t *ct, int flags)
2800 2800 {
2801 2801 int error;
2802 2802 enum nfsstat status;
2803 2803 struct nfsdiropargs da;
2804 2804 vnode_t *vp;
2805 2805 int douprintf;
2806 2806 rnode_t *drp;
2807 2807
2808 2808 if (nfs_zone() != VTOMI(dvp)->mi_zone)
2809 2809 return (EPERM);
2810 2810 drp = VTOR(dvp);
2811 2811 if (nfs_rw_enter_sig(&drp->r_rwlock, RW_WRITER, INTR(dvp)))
2812 2812 return (EINTR);
2813 2813
2814 2814 /*
2815 2815 * Attempt to prevent a rmdir(".") from succeeding.
2816 2816 */
2817 2817 error = nfslookup(dvp, nm, &vp, NULL, 0, NULL, cr, 0);
2818 2818 if (error) {
2819 2819 nfs_rw_exit(&drp->r_rwlock);
2820 2820 return (error);
2821 2821 }
2822 2822
2823 2823 if (vp == cdir) {
2824 2824 VN_RELE(vp);
2825 2825 nfs_rw_exit(&drp->r_rwlock);
2826 2826 return (EINVAL);
2827 2827 }
2828 2828
2829 2829 setdiropargs(&da, nm, dvp);
2830 2830
2831 2831 /*
2832 2832 * First just remove the entry from the name cache, as it
2833 2833 * is most likely an entry for this vp.
2834 2834 */
2835 2835 dnlc_remove(dvp, nm);
2836 2836
2837 2837 /*
2838 2838 * If there vnode reference count is greater than one, then
2839 2839 * there may be additional references in the DNLC which will
2840 2840 * need to be purged. First, trying removing the entry for
2841 2841 * the parent directory and see if that removes the additional
2842 2842 * reference(s). If that doesn't do it, then use dnlc_purge_vp
2843 2843 * to completely remove any references to the directory which
2844 2844 * might still exist in the DNLC.
2845 2845 */
2846 2846 if (vp->v_count > 1) {
2847 2847 dnlc_remove(vp, "..");
2848 2848 if (vp->v_count > 1)
2849 2849 dnlc_purge_vp(vp);
2850 2850 }
2851 2851
2852 2852 douprintf = 1;
2853 2853
2854 2854 error = rfs2call(VTOMI(dvp), RFS_RMDIR,
2855 2855 xdr_diropargs, (caddr_t)&da,
2856 2856 xdr_enum, (caddr_t)&status, cr,
2857 2857 &douprintf, &status, 0, NULL);
2858 2858
2859 2859 PURGE_ATTRCACHE(dvp); /* mod time changed */
2860 2860
2861 2861 if (error) {
2862 2862 VN_RELE(vp);
2863 2863 nfs_rw_exit(&drp->r_rwlock);
2864 2864 return (error);
2865 2865 }
2866 2866
2867 2867 error = geterrno(status);
2868 2868 if (!error) {
2869 2869 if (HAVE_RDDIR_CACHE(drp))
2870 2870 nfs_purge_rddir_cache(dvp);
2871 2871 if (HAVE_RDDIR_CACHE(VTOR(vp)))
2872 2872 nfs_purge_rddir_cache(vp);
2873 2873 } else {
2874 2874 PURGE_STALE_FH(error, dvp, cr);
2875 2875 /*
2876 2876 * System V defines rmdir to return EEXIST, not
2877 2877 * ENOTEMPTY if the directory is not empty. Over
2878 2878 * the wire, the error is NFSERR_ENOTEMPTY which
2879 2879 * geterrno maps to ENOTEMPTY.
2880 2880 */
2881 2881 if (error == ENOTEMPTY)
2882 2882 error = EEXIST;
2883 2883 }
2884 2884
2885 2885 if (error == 0) {
2886 2886 vnevent_rmdir(vp, dvp, nm, ct);
2887 2887 }
2888 2888 VN_RELE(vp);
2889 2889
2890 2890 nfs_rw_exit(&drp->r_rwlock);
2891 2891
2892 2892 return (error);
2893 2893 }
2894 2894
2895 2895 /* ARGSUSED */
2896 2896 static int
2897 2897 nfs_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm, cred_t *cr,
2898 2898 caller_context_t *ct, int flags)
2899 2899 {
2900 2900 int error;
2901 2901 struct nfsslargs args;
2902 2902 enum nfsstat status;
2903 2903 int douprintf;
2904 2904 rnode_t *drp;
2905 2905
2906 2906 if (nfs_zone() != VTOMI(dvp)->mi_zone)
2907 2907 return (EPERM);
2908 2908 setdiropargs(&args.sla_from, lnm, dvp);
2909 2909 args.sla_sa = &args.sla_sa_buf;
2910 2910 error = vattr_to_sattr(tva, args.sla_sa);
2911 2911 if (error) {
2912 2912 /* req time field(s) overflow - return immediately */
2913 2913 return (error);
2914 2914 }
2915 2915 args.sla_tnm = tnm;
2916 2916
2917 2917 drp = VTOR(dvp);
2918 2918 if (nfs_rw_enter_sig(&drp->r_rwlock, RW_WRITER, INTR(dvp)))
2919 2919 return (EINTR);
2920 2920
2921 2921 dnlc_remove(dvp, lnm);
2922 2922
2923 2923 douprintf = 1;
2924 2924
2925 2925 error = rfs2call(VTOMI(dvp), RFS_SYMLINK,
2926 2926 xdr_slargs, (caddr_t)&args,
2927 2927 xdr_enum, (caddr_t)&status, cr,
2928 2928 &douprintf, &status, 0, NULL);
2929 2929
2930 2930 PURGE_ATTRCACHE(dvp); /* mod time changed */
2931 2931
2932 2932 if (!error) {
2933 2933 error = geterrno(status);
2934 2934 if (!error) {
2935 2935 if (HAVE_RDDIR_CACHE(drp))
2936 2936 nfs_purge_rddir_cache(dvp);
2937 2937 } else {
2938 2938 PURGE_STALE_FH(error, dvp, cr);
2939 2939 }
2940 2940 }
2941 2941
2942 2942 nfs_rw_exit(&drp->r_rwlock);
2943 2943
2944 2944 return (error);
2945 2945 }
2946 2946
2947 2947 #ifdef DEBUG
2948 2948 static int nfs_readdir_cache_hits = 0;
2949 2949 static int nfs_readdir_cache_shorts = 0;
2950 2950 static int nfs_readdir_cache_waits = 0;
2951 2951 static int nfs_readdir_cache_misses = 0;
2952 2952 static int nfs_readdir_readahead = 0;
2953 2953 #endif
2954 2954
2955 2955 static int nfs_shrinkreaddir = 0;
2956 2956
2957 2957 /*
2958 2958 * Read directory entries.
2959 2959 * There are some weird things to look out for here. The uio_offset
2960 2960 * field is either 0 or it is the offset returned from a previous
2961 2961 * readdir. It is an opaque value used by the server to find the
2962 2962 * correct directory block to read. The count field is the number
2963 2963 * of blocks to read on the server. This is advisory only, the server
2964 2964 * may return only one block's worth of entries. Entries may be compressed
2965 2965 * on the server.
2966 2966 */
2967 2967 /* ARGSUSED */
2968 2968 static int
2969 2969 nfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
2970 2970 caller_context_t *ct, int flags)
2971 2971 {
2972 2972 int error;
2973 2973 size_t count;
2974 2974 rnode_t *rp;
2975 2975 rddir_cache *rdc;
2976 2976 rddir_cache *nrdc;
2977 2977 rddir_cache *rrdc;
2978 2978 #ifdef DEBUG
2979 2979 int missed;
2980 2980 #endif
2981 2981 rddir_cache srdc;
2982 2982 avl_index_t where;
2983 2983
2984 2984 rp = VTOR(vp);
2985 2985
2986 2986 ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_READER));
2987 2987 if (nfs_zone() != VTOMI(vp)->mi_zone)
2988 2988 return (EIO);
2989 2989 /*
2990 2990 * Make sure that the directory cache is valid.
2991 2991 */
2992 2992 if (HAVE_RDDIR_CACHE(rp)) {
2993 2993 if (nfs_disable_rddir_cache) {
2994 2994 /*
2995 2995 * Setting nfs_disable_rddir_cache in /etc/system
2996 2996 * allows interoperability with servers that do not
2997 2997 * properly update the attributes of directories.
2998 2998 * Any cached information gets purged before an
2999 2999 * access is made to it.
3000 3000 */
3001 3001 nfs_purge_rddir_cache(vp);
3002 3002 } else {
3003 3003 error = nfs_validate_caches(vp, cr);
3004 3004 if (error)
3005 3005 return (error);
3006 3006 }
3007 3007 }
3008 3008
3009 3009 /*
3010 3010 * UGLINESS: SunOS 3.2 servers apparently cannot always handle an
3011 3011 * RFS_READDIR request with rda_count set to more than 0x400. So
3012 3012 * we reduce the request size here purely for compatibility.
3013 3013 *
3014 3014 * In general, this is no longer required. However, if a server
3015 3015 * is discovered which can not handle requests larger than 1024,
3016 3016 * nfs_shrinkreaddir can be set to 1 to enable this backwards
3017 3017 * compatibility.
3018 3018 *
3019 3019 * In any case, the request size is limited to NFS_MAXDATA bytes.
3020 3020 */
3021 3021 count = MIN(uiop->uio_iov->iov_len,
3022 3022 nfs_shrinkreaddir ? 0x400 : NFS_MAXDATA);
3023 3023
3024 3024 nrdc = NULL;
3025 3025 #ifdef DEBUG
3026 3026 missed = 0;
3027 3027 #endif
3028 3028 top:
3029 3029 /*
3030 3030 * Short circuit last readdir which always returns 0 bytes.
3031 3031 * This can be done after the directory has been read through
3032 3032 * completely at least once. This will set r_direof which
3033 3033 * can be used to find the value of the last cookie.
3034 3034 */
3035 3035 mutex_enter(&rp->r_statelock);
3036 3036 if (rp->r_direof != NULL &&
3037 3037 uiop->uio_offset == rp->r_direof->nfs_ncookie) {
3038 3038 mutex_exit(&rp->r_statelock);
3039 3039 #ifdef DEBUG
3040 3040 nfs_readdir_cache_shorts++;
3041 3041 #endif
3042 3042 if (eofp)
3043 3043 *eofp = 1;
3044 3044 if (nrdc != NULL)
3045 3045 rddir_cache_rele(nrdc);
3046 3046 return (0);
3047 3047 }
3048 3048 /*
3049 3049 * Look for a cache entry. Cache entries are identified
3050 3050 * by the NFS cookie value and the byte count requested.
3051 3051 */
3052 3052 srdc.nfs_cookie = uiop->uio_offset;
3053 3053 srdc.buflen = count;
3054 3054 rdc = avl_find(&rp->r_dir, &srdc, &where);
3055 3055 if (rdc != NULL) {
3056 3056 rddir_cache_hold(rdc);
3057 3057 /*
3058 3058 * If the cache entry is in the process of being
3059 3059 * filled in, wait until this completes. The
3060 3060 * RDDIRWAIT bit is set to indicate that someone
3061 3061 * is waiting and then the thread currently
3062 3062 * filling the entry is done, it should do a
3063 3063 * cv_broadcast to wakeup all of the threads
3064 3064 * waiting for it to finish.
3065 3065 */
3066 3066 if (rdc->flags & RDDIR) {
3067 3067 nfs_rw_exit(&rp->r_rwlock);
3068 3068 rdc->flags |= RDDIRWAIT;
3069 3069 #ifdef DEBUG
3070 3070 nfs_readdir_cache_waits++;
3071 3071 #endif
3072 3072 if (!cv_wait_sig(&rdc->cv, &rp->r_statelock)) {
3073 3073 /*
3074 3074 * We got interrupted, probably
3075 3075 * the user typed ^C or an alarm
3076 3076 * fired. We free the new entry
3077 3077 * if we allocated one.
3078 3078 */
3079 3079 mutex_exit(&rp->r_statelock);
3080 3080 (void) nfs_rw_enter_sig(&rp->r_rwlock,
3081 3081 RW_READER, FALSE);
3082 3082 rddir_cache_rele(rdc);
3083 3083 if (nrdc != NULL)
3084 3084 rddir_cache_rele(nrdc);
3085 3085 return (EINTR);
3086 3086 }
3087 3087 mutex_exit(&rp->r_statelock);
3088 3088 (void) nfs_rw_enter_sig(&rp->r_rwlock,
3089 3089 RW_READER, FALSE);
3090 3090 rddir_cache_rele(rdc);
3091 3091 goto top;
3092 3092 }
3093 3093 /*
3094 3094 * Check to see if a readdir is required to
3095 3095 * fill the entry. If so, mark this entry
3096 3096 * as being filled, remove our reference,
3097 3097 * and branch to the code to fill the entry.
3098 3098 */
3099 3099 if (rdc->flags & RDDIRREQ) {
3100 3100 rdc->flags &= ~RDDIRREQ;
3101 3101 rdc->flags |= RDDIR;
3102 3102 if (nrdc != NULL)
3103 3103 rddir_cache_rele(nrdc);
3104 3104 nrdc = rdc;
3105 3105 mutex_exit(&rp->r_statelock);
3106 3106 goto bottom;
3107 3107 }
3108 3108 #ifdef DEBUG
3109 3109 if (!missed)
3110 3110 nfs_readdir_cache_hits++;
3111 3111 #endif
3112 3112 /*
3113 3113 * If an error occurred while attempting
3114 3114 * to fill the cache entry, just return it.
3115 3115 */
3116 3116 if (rdc->error) {
3117 3117 error = rdc->error;
3118 3118 mutex_exit(&rp->r_statelock);
3119 3119 rddir_cache_rele(rdc);
3120 3120 if (nrdc != NULL)
3121 3121 rddir_cache_rele(nrdc);
3122 3122 return (error);
3123 3123 }
3124 3124
3125 3125 /*
3126 3126 * The cache entry is complete and good,
3127 3127 * copyout the dirent structs to the calling
3128 3128 * thread.
3129 3129 */
3130 3130 error = uiomove(rdc->entries, rdc->entlen, UIO_READ, uiop);
3131 3131
3132 3132 /*
3133 3133 * If no error occurred during the copyout,
3134 3134 * update the offset in the uio struct to
3135 3135 * contain the value of the next cookie
3136 3136 * and set the eof value appropriately.
3137 3137 */
3138 3138 if (!error) {
3139 3139 uiop->uio_offset = rdc->nfs_ncookie;
3140 3140 if (eofp)
3141 3141 *eofp = rdc->eof;
3142 3142 }
3143 3143
3144 3144 /*
3145 3145 * Decide whether to do readahead. Don't if
3146 3146 * have already read to the end of directory.
3147 3147 */
3148 3148 if (rdc->eof) {
3149 3149 rp->r_direof = rdc;
3150 3150 mutex_exit(&rp->r_statelock);
3151 3151 rddir_cache_rele(rdc);
3152 3152 if (nrdc != NULL)
3153 3153 rddir_cache_rele(nrdc);
3154 3154 return (error);
3155 3155 }
3156 3156
3157 3157 /*
3158 3158 * Check to see whether we found an entry
3159 3159 * for the readahead. If so, we don't need
3160 3160 * to do anything further, so free the new
3161 3161 * entry if one was allocated. Otherwise,
3162 3162 * allocate a new entry, add it to the cache,
3163 3163 * and then initiate an asynchronous readdir
3164 3164 * operation to fill it.
3165 3165 */
3166 3166 srdc.nfs_cookie = rdc->nfs_ncookie;
3167 3167 srdc.buflen = count;
3168 3168 rrdc = avl_find(&rp->r_dir, &srdc, &where);
3169 3169 if (rrdc != NULL) {
3170 3170 if (nrdc != NULL)
3171 3171 rddir_cache_rele(nrdc);
3172 3172 } else {
3173 3173 if (nrdc != NULL)
3174 3174 rrdc = nrdc;
3175 3175 else {
3176 3176 rrdc = rddir_cache_alloc(KM_NOSLEEP);
3177 3177 }
3178 3178 if (rrdc != NULL) {
3179 3179 rrdc->nfs_cookie = rdc->nfs_ncookie;
3180 3180 rrdc->buflen = count;
3181 3181 avl_insert(&rp->r_dir, rrdc, where);
3182 3182 rddir_cache_hold(rrdc);
3183 3183 mutex_exit(&rp->r_statelock);
3184 3184 rddir_cache_rele(rdc);
3185 3185 #ifdef DEBUG
3186 3186 nfs_readdir_readahead++;
3187 3187 #endif
3188 3188 nfs_async_readdir(vp, rrdc, cr, nfsreaddir);
3189 3189 return (error);
3190 3190 }
3191 3191 }
3192 3192
3193 3193 mutex_exit(&rp->r_statelock);
3194 3194 rddir_cache_rele(rdc);
3195 3195 return (error);
3196 3196 }
3197 3197
3198 3198 /*
3199 3199 * Didn't find an entry in the cache. Construct a new empty
3200 3200 * entry and link it into the cache. Other processes attempting
3201 3201 * to access this entry will need to wait until it is filled in.
3202 3202 *
3203 3203 * Since kmem_alloc may block, another pass through the cache
3204 3204 * will need to be taken to make sure that another process
3205 3205 * hasn't already added an entry to the cache for this request.
3206 3206 */
3207 3207 if (nrdc == NULL) {
3208 3208 mutex_exit(&rp->r_statelock);
3209 3209 nrdc = rddir_cache_alloc(KM_SLEEP);
3210 3210 nrdc->nfs_cookie = uiop->uio_offset;
3211 3211 nrdc->buflen = count;
3212 3212 goto top;
3213 3213 }
3214 3214
3215 3215 /*
3216 3216 * Add this entry to the cache.
3217 3217 */
3218 3218 avl_insert(&rp->r_dir, nrdc, where);
3219 3219 rddir_cache_hold(nrdc);
3220 3220 mutex_exit(&rp->r_statelock);
3221 3221
3222 3222 bottom:
3223 3223 #ifdef DEBUG
3224 3224 missed = 1;
3225 3225 nfs_readdir_cache_misses++;
3226 3226 #endif
3227 3227 /*
3228 3228 * Do the readdir.
3229 3229 */
3230 3230 error = nfsreaddir(vp, nrdc, cr);
3231 3231
3232 3232 /*
3233 3233 * If this operation failed, just return the error which occurred.
3234 3234 */
3235 3235 if (error != 0)
3236 3236 return (error);
3237 3237
3238 3238 /*
3239 3239 * Since the RPC operation will have taken sometime and blocked
3240 3240 * this process, another pass through the cache will need to be
3241 3241 * taken to find the correct cache entry. It is possible that
3242 3242 * the correct cache entry will not be there (although one was
3243 3243 * added) because the directory changed during the RPC operation
3244 3244 * and the readdir cache was flushed. In this case, just start
3245 3245 * over. It is hoped that this will not happen too often... :-)
3246 3246 */
3247 3247 nrdc = NULL;
3248 3248 goto top;
3249 3249 /* NOTREACHED */
3250 3250 }
3251 3251
3252 3252 static int
3253 3253 nfsreaddir(vnode_t *vp, rddir_cache *rdc, cred_t *cr)
3254 3254 {
3255 3255 int error;
3256 3256 struct nfsrddirargs rda;
3257 3257 struct nfsrddirres rd;
3258 3258 rnode_t *rp;
3259 3259 mntinfo_t *mi;
3260 3260 uint_t count;
3261 3261 int douprintf;
3262 3262 failinfo_t fi, *fip;
3263 3263
3264 3264 ASSERT(nfs_zone() == VTOMI(vp)->mi_zone);
3265 3265 count = rdc->buflen;
3266 3266
3267 3267 rp = VTOR(vp);
3268 3268 mi = VTOMI(vp);
3269 3269
3270 3270 rda.rda_fh = *VTOFH(vp);
3271 3271 rda.rda_offset = rdc->nfs_cookie;
3272 3272
3273 3273 /*
3274 3274 * NFS client failover support
3275 3275 * suppress failover unless we have a zero cookie
3276 3276 */
3277 3277 if (rdc->nfs_cookie == (off_t)0) {
3278 3278 fi.vp = vp;
3279 3279 fi.fhp = (caddr_t)&rda.rda_fh;
3280 3280 fi.copyproc = nfscopyfh;
3281 3281 fi.lookupproc = nfslookup;
3282 3282 fi.xattrdirproc = acl_getxattrdir2;
3283 3283 fip = &fi;
3284 3284 } else {
3285 3285 fip = NULL;
3286 3286 }
3287 3287
3288 3288 rd.rd_entries = kmem_alloc(rdc->buflen, KM_SLEEP);
3289 3289 rd.rd_size = count;
3290 3290 rd.rd_offset = rda.rda_offset;
3291 3291
3292 3292 douprintf = 1;
3293 3293
3294 3294 if (mi->mi_io_kstats) {
3295 3295 mutex_enter(&mi->mi_lock);
3296 3296 kstat_runq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
3297 3297 mutex_exit(&mi->mi_lock);
3298 3298 }
3299 3299
3300 3300 do {
3301 3301 rda.rda_count = MIN(count, mi->mi_curread);
3302 3302 error = rfs2call(mi, RFS_READDIR,
3303 3303 xdr_rddirargs, (caddr_t)&rda,
3304 3304 xdr_getrddirres, (caddr_t)&rd, cr,
3305 3305 &douprintf, &rd.rd_status, 0, fip);
3306 3306 } while (error == ENFS_TRYAGAIN);
3307 3307
3308 3308 if (mi->mi_io_kstats) {
3309 3309 mutex_enter(&mi->mi_lock);
3310 3310 kstat_runq_exit(KSTAT_IO_PTR(mi->mi_io_kstats));
3311 3311 mutex_exit(&mi->mi_lock);
3312 3312 }
3313 3313
3314 3314 /*
3315 3315 * Since we are actually doing a READDIR RPC, we must have
3316 3316 * exclusive access to the cache entry being filled. Thus,
3317 3317 * it is safe to update all fields except for the flags
3318 3318 * field. The r_statelock in the rnode must be held to
3319 3319 * prevent two different threads from simultaneously
3320 3320 * attempting to update the flags field. This can happen
3321 3321 * if we are turning off RDDIR and the other thread is
3322 3322 * trying to set RDDIRWAIT.
3323 3323 */
3324 3324 ASSERT(rdc->flags & RDDIR);
3325 3325 if (!error) {
3326 3326 error = geterrno(rd.rd_status);
3327 3327 if (!error) {
3328 3328 rdc->nfs_ncookie = rd.rd_offset;
3329 3329 rdc->eof = rd.rd_eof ? 1 : 0;
3330 3330 rdc->entlen = rd.rd_size;
3331 3331 ASSERT(rdc->entlen <= rdc->buflen);
3332 3332 #ifdef DEBUG
3333 3333 rdc->entries = rddir_cache_buf_alloc(rdc->buflen,
3334 3334 KM_SLEEP);
3335 3335 #else
3336 3336 rdc->entries = kmem_alloc(rdc->buflen, KM_SLEEP);
3337 3337 #endif
3338 3338 bcopy(rd.rd_entries, rdc->entries, rdc->entlen);
3339 3339 rdc->error = 0;
3340 3340 if (mi->mi_io_kstats) {
3341 3341 mutex_enter(&mi->mi_lock);
3342 3342 KSTAT_IO_PTR(mi->mi_io_kstats)->reads++;
3343 3343 KSTAT_IO_PTR(mi->mi_io_kstats)->nread +=
3344 3344 rd.rd_size;
3345 3345 mutex_exit(&mi->mi_lock);
3346 3346 }
3347 3347 } else {
3348 3348 PURGE_STALE_FH(error, vp, cr);
3349 3349 }
3350 3350 }
3351 3351 if (error) {
3352 3352 rdc->entries = NULL;
3353 3353 rdc->error = error;
3354 3354 }
3355 3355 kmem_free(rd.rd_entries, rdc->buflen);
3356 3356
3357 3357 mutex_enter(&rp->r_statelock);
3358 3358 rdc->flags &= ~RDDIR;
3359 3359 if (rdc->flags & RDDIRWAIT) {
3360 3360 rdc->flags &= ~RDDIRWAIT;
3361 3361 cv_broadcast(&rdc->cv);
3362 3362 }
3363 3363 if (error)
3364 3364 rdc->flags |= RDDIRREQ;
3365 3365 mutex_exit(&rp->r_statelock);
3366 3366
3367 3367 rddir_cache_rele(rdc);
3368 3368
3369 3369 return (error);
3370 3370 }
3371 3371
3372 3372 #ifdef DEBUG
3373 3373 static int nfs_bio_do_stop = 0;
3374 3374 #endif
3375 3375
3376 3376 static int
3377 3377 nfs_bio(struct buf *bp, cred_t *cr)
3378 3378 {
3379 3379 rnode_t *rp = VTOR(bp->b_vp);
3380 3380 int count;
3381 3381 int error;
3382 3382 cred_t *cred;
3383 3383 uint_t offset;
3384 3384
3385 3385 DTRACE_IO1(start, struct buf *, bp);
3386 3386
3387 3387 ASSERT(nfs_zone() == VTOMI(bp->b_vp)->mi_zone);
3388 3388 offset = dbtob(bp->b_blkno);
3389 3389
3390 3390 if (bp->b_flags & B_READ) {
3391 3391 mutex_enter(&rp->r_statelock);
3392 3392 if (rp->r_cred != NULL) {
3393 3393 cred = rp->r_cred;
3394 3394 crhold(cred);
3395 3395 } else {
3396 3396 rp->r_cred = cr;
3397 3397 crhold(cr);
3398 3398 cred = cr;
3399 3399 crhold(cred);
3400 3400 }
3401 3401 mutex_exit(&rp->r_statelock);
3402 3402 read_again:
3403 3403 error = bp->b_error = nfsread(bp->b_vp, bp->b_un.b_addr,
3404 3404 offset, bp->b_bcount, &bp->b_resid, cred);
3405 3405
3406 3406 crfree(cred);
3407 3407 if (!error) {
3408 3408 if (bp->b_resid) {
3409 3409 /*
3410 3410 * Didn't get it all because we hit EOF,
3411 3411 * zero all the memory beyond the EOF.
3412 3412 */
3413 3413 /* bzero(rdaddr + */
3414 3414 bzero(bp->b_un.b_addr +
3415 3415 bp->b_bcount - bp->b_resid, bp->b_resid);
3416 3416 }
3417 3417 mutex_enter(&rp->r_statelock);
3418 3418 if (bp->b_resid == bp->b_bcount &&
3419 3419 offset >= rp->r_size) {
3420 3420 /*
3421 3421 * We didn't read anything at all as we are
3422 3422 * past EOF. Return an error indicator back
3423 3423 * but don't destroy the pages (yet).
3424 3424 */
3425 3425 error = NFS_EOF;
3426 3426 }
3427 3427 mutex_exit(&rp->r_statelock);
3428 3428 } else if (error == EACCES) {
3429 3429 mutex_enter(&rp->r_statelock);
3430 3430 if (cred != cr) {
3431 3431 if (rp->r_cred != NULL)
3432 3432 crfree(rp->r_cred);
3433 3433 rp->r_cred = cr;
3434 3434 crhold(cr);
3435 3435 cred = cr;
3436 3436 crhold(cred);
3437 3437 mutex_exit(&rp->r_statelock);
3438 3438 goto read_again;
3439 3439 }
3440 3440 mutex_exit(&rp->r_statelock);
3441 3441 }
3442 3442 } else {
3443 3443 if (!(rp->r_flags & RSTALE)) {
3444 3444 mutex_enter(&rp->r_statelock);
3445 3445 if (rp->r_cred != NULL) {
3446 3446 cred = rp->r_cred;
3447 3447 crhold(cred);
3448 3448 } else {
3449 3449 rp->r_cred = cr;
3450 3450 crhold(cr);
3451 3451 cred = cr;
3452 3452 crhold(cred);
3453 3453 }
3454 3454 mutex_exit(&rp->r_statelock);
3455 3455 write_again:
3456 3456 mutex_enter(&rp->r_statelock);
3457 3457 count = MIN(bp->b_bcount, rp->r_size - offset);
3458 3458 mutex_exit(&rp->r_statelock);
3459 3459 if (count < 0)
3460 3460 cmn_err(CE_PANIC, "nfs_bio: write count < 0");
3461 3461 #ifdef DEBUG
3462 3462 if (count == 0) {
3463 3463 zcmn_err(getzoneid(), CE_WARN,
3464 3464 "nfs_bio: zero length write at %d",
3465 3465 offset);
3466 3466 nfs_printfhandle(&rp->r_fh);
3467 3467 if (nfs_bio_do_stop)
3468 3468 debug_enter("nfs_bio");
3469 3469 }
3470 3470 #endif
3471 3471 error = nfswrite(bp->b_vp, bp->b_un.b_addr, offset,
3472 3472 count, cred);
3473 3473 if (error == EACCES) {
3474 3474 mutex_enter(&rp->r_statelock);
3475 3475 if (cred != cr) {
3476 3476 if (rp->r_cred != NULL)
3477 3477 crfree(rp->r_cred);
3478 3478 rp->r_cred = cr;
3479 3479 crhold(cr);
3480 3480 crfree(cred);
3481 3481 cred = cr;
3482 3482 crhold(cred);
3483 3483 mutex_exit(&rp->r_statelock);
3484 3484 goto write_again;
3485 3485 }
3486 3486 mutex_exit(&rp->r_statelock);
3487 3487 }
3488 3488 bp->b_error = error;
3489 3489 if (error && error != EINTR) {
3490 3490 /*
3491 3491 * Don't print EDQUOT errors on the console.
3492 3492 * Don't print asynchronous EACCES errors.
3493 3493 * Don't print EFBIG errors.
3494 3494 * Print all other write errors.
3495 3495 */
3496 3496 if (error != EDQUOT && error != EFBIG &&
3497 3497 (error != EACCES ||
3498 3498 !(bp->b_flags & B_ASYNC)))
3499 3499 nfs_write_error(bp->b_vp, error, cred);
3500 3500 /*
3501 3501 * Update r_error and r_flags as appropriate.
3502 3502 * If the error was ESTALE, then mark the
3503 3503 * rnode as not being writeable and save
3504 3504 * the error status. Otherwise, save any
3505 3505 * errors which occur from asynchronous
3506 3506 * page invalidations. Any errors occurring
3507 3507 * from other operations should be saved
3508 3508 * by the caller.
3509 3509 */
3510 3510 mutex_enter(&rp->r_statelock);
3511 3511 if (error == ESTALE) {
3512 3512 rp->r_flags |= RSTALE;
3513 3513 if (!rp->r_error)
3514 3514 rp->r_error = error;
3515 3515 } else if (!rp->r_error &&
3516 3516 (bp->b_flags &
3517 3517 (B_INVAL|B_FORCE|B_ASYNC)) ==
3518 3518 (B_INVAL|B_FORCE|B_ASYNC)) {
3519 3519 rp->r_error = error;
3520 3520 }
3521 3521 mutex_exit(&rp->r_statelock);
3522 3522 }
3523 3523 crfree(cred);
3524 3524 } else {
3525 3525 error = rp->r_error;
3526 3526 /*
3527 3527 * A close may have cleared r_error, if so,
3528 3528 * propagate ESTALE error return properly
3529 3529 */
3530 3530 if (error == 0)
3531 3531 error = ESTALE;
3532 3532 }
3533 3533 }
3534 3534
3535 3535 if (error != 0 && error != NFS_EOF)
3536 3536 bp->b_flags |= B_ERROR;
3537 3537
3538 3538 DTRACE_IO1(done, struct buf *, bp);
3539 3539
3540 3540 return (error);
3541 3541 }
3542 3542
3543 3543 /* ARGSUSED */
3544 3544 static int
3545 3545 nfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
3546 3546 {
3547 3547 struct nfs_fid *fp;
3548 3548 rnode_t *rp;
3549 3549
3550 3550 rp = VTOR(vp);
3551 3551
3552 3552 if (fidp->fid_len < (sizeof (struct nfs_fid) - sizeof (short))) {
3553 3553 fidp->fid_len = sizeof (struct nfs_fid) - sizeof (short);
3554 3554 return (ENOSPC);
3555 3555 }
3556 3556 fp = (struct nfs_fid *)fidp;
3557 3557 fp->nf_pad = 0;
3558 3558 fp->nf_len = sizeof (struct nfs_fid) - sizeof (short);
3559 3559 bcopy(rp->r_fh.fh_buf, fp->nf_data, NFS_FHSIZE);
3560 3560 return (0);
3561 3561 }
3562 3562
3563 3563 /* ARGSUSED2 */
3564 3564 static int
3565 3565 nfs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
3566 3566 {
3567 3567 rnode_t *rp = VTOR(vp);
3568 3568
3569 3569 if (!write_lock) {
3570 3570 (void) nfs_rw_enter_sig(&rp->r_rwlock, RW_READER, FALSE);
3571 3571 return (V_WRITELOCK_FALSE);
3572 3572 }
3573 3573
3574 3574 if ((rp->r_flags & RDIRECTIO) || (VTOMI(vp)->mi_flags & MI_DIRECTIO)) {
3575 3575 (void) nfs_rw_enter_sig(&rp->r_rwlock, RW_READER, FALSE);
3576 3576 if (rp->r_mapcnt == 0 && !vn_has_cached_data(vp))
3577 3577 return (V_WRITELOCK_FALSE);
3578 3578 nfs_rw_exit(&rp->r_rwlock);
3579 3579 }
3580 3580
3581 3581 (void) nfs_rw_enter_sig(&rp->r_rwlock, RW_WRITER, FALSE);
3582 3582 return (V_WRITELOCK_TRUE);
3583 3583 }
3584 3584
3585 3585 /* ARGSUSED */
3586 3586 static void
3587 3587 nfs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
3588 3588 {
3589 3589 rnode_t *rp = VTOR(vp);
3590 3590
3591 3591 nfs_rw_exit(&rp->r_rwlock);
3592 3592 }
3593 3593
3594 3594 /* ARGSUSED */
3595 3595 static int
3596 3596 nfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
3597 3597 {
3598 3598
3599 3599 /*
3600 3600 * Because we stuff the readdir cookie into the offset field
3601 3601 * someone may attempt to do an lseek with the cookie which
3602 3602 * we want to succeed.
3603 3603 */
3604 3604 if (vp->v_type == VDIR)
3605 3605 return (0);
3606 3606 if (*noffp < 0 || *noffp > MAXOFF32_T)
3607 3607 return (EINVAL);
3608 3608 return (0);
3609 3609 }
3610 3610
3611 3611 /*
3612 3612 * number of NFS_MAXDATA blocks to read ahead
3613 3613 * optimized for 100 base-T.
3614 3614 */
3615 3615 static int nfs_nra = 4;
3616 3616
3617 3617 #ifdef DEBUG
3618 3618 static int nfs_lostpage = 0; /* number of times we lost original page */
3619 3619 #endif
3620 3620
3621 3621 /*
3622 3622 * Return all the pages from [off..off+len) in file
3623 3623 */
3624 3624 /* ARGSUSED */
3625 3625 static int
3626 3626 nfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
3627 3627 page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
3628 3628 enum seg_rw rw, cred_t *cr, caller_context_t *ct)
3629 3629 {
3630 3630 rnode_t *rp;
3631 3631 int error;
3632 3632 mntinfo_t *mi;
3633 3633
3634 3634 if (vp->v_flag & VNOMAP)
3635 3635 return (ENOSYS);
3636 3636
3637 3637 ASSERT(off <= MAXOFF32_T);
3638 3638 if (nfs_zone() != VTOMI(vp)->mi_zone)
3639 3639 return (EIO);
3640 3640 if (protp != NULL)
3641 3641 *protp = PROT_ALL;
3642 3642
3643 3643 /*
3644 3644 * Now valididate that the caches are up to date.
3645 3645 */
3646 3646 error = nfs_validate_caches(vp, cr);
3647 3647 if (error)
3648 3648 return (error);
3649 3649
3650 3650 rp = VTOR(vp);
3651 3651 mi = VTOMI(vp);
3652 3652 retry:
3653 3653 mutex_enter(&rp->r_statelock);
3654 3654
3655 3655 /*
3656 3656 * Don't create dirty pages faster than they
3657 3657 * can be cleaned so that the system doesn't
3658 3658 * get imbalanced. If the async queue is
3659 3659 * maxed out, then wait for it to drain before
3660 3660 * creating more dirty pages. Also, wait for
3661 3661 * any threads doing pagewalks in the vop_getattr
3662 3662 * entry points so that they don't block for
3663 3663 * long periods.
3664 3664 */
3665 3665 if (rw == S_CREATE) {
3666 3666 while ((mi->mi_max_threads != 0 &&
3667 3667 rp->r_awcount > 2 * mi->mi_max_threads) ||
3668 3668 rp->r_gcount > 0)
3669 3669 cv_wait(&rp->r_cv, &rp->r_statelock);
3670 3670 }
3671 3671
3672 3672 /*
3673 3673 * If we are getting called as a side effect of an nfs_write()
3674 3674 * operation the local file size might not be extended yet.
3675 3675 * In this case we want to be able to return pages of zeroes.
3676 3676 */
3677 3677 if (off + len > rp->r_size + PAGEOFFSET && seg != segkmap) {
3678 3678 mutex_exit(&rp->r_statelock);
3679 3679 return (EFAULT); /* beyond EOF */
3680 3680 }
3681 3681
3682 3682 mutex_exit(&rp->r_statelock);
3683 3683
3684 3684 if (len <= PAGESIZE) {
3685 3685 error = nfs_getapage(vp, off, len, protp, pl, plsz,
3686 3686 seg, addr, rw, cr);
3687 3687 } else {
3688 3688 error = pvn_getpages(nfs_getapage, vp, off, len, protp,
3689 3689 pl, plsz, seg, addr, rw, cr);
3690 3690 }
3691 3691
3692 3692 switch (error) {
3693 3693 case NFS_EOF:
3694 3694 nfs_purge_caches(vp, NFS_NOPURGE_DNLC, cr);
3695 3695 goto retry;
3696 3696 case ESTALE:
3697 3697 PURGE_STALE_FH(error, vp, cr);
3698 3698 }
3699 3699
3700 3700 return (error);
3701 3701 }
3702 3702
3703 3703 /*
3704 3704 * Called from pvn_getpages or nfs_getpage to get a particular page.
3705 3705 */
3706 3706 /* ARGSUSED */
3707 3707 static int
3708 3708 nfs_getapage(vnode_t *vp, u_offset_t off, size_t len, uint_t *protp,
3709 3709 page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
3710 3710 enum seg_rw rw, cred_t *cr)
3711 3711 {
3712 3712 rnode_t *rp;
3713 3713 uint_t bsize;
3714 3714 struct buf *bp;
3715 3715 page_t *pp;
3716 3716 u_offset_t lbn;
3717 3717 u_offset_t io_off;
3718 3718 u_offset_t blkoff;
3719 3719 u_offset_t rablkoff;
3720 3720 size_t io_len;
3721 3721 uint_t blksize;
3722 3722 int error;
3723 3723 int readahead;
3724 3724 int readahead_issued = 0;
3725 3725 int ra_window; /* readahead window */
3726 3726 page_t *pagefound;
3727 3727
3728 3728 if (nfs_zone() != VTOMI(vp)->mi_zone)
3729 3729 return (EIO);
3730 3730 rp = VTOR(vp);
3731 3731 bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
3732 3732
3733 3733 reread:
3734 3734 bp = NULL;
3735 3735 pp = NULL;
3736 3736 pagefound = NULL;
3737 3737
3738 3738 if (pl != NULL)
3739 3739 pl[0] = NULL;
3740 3740
3741 3741 error = 0;
3742 3742 lbn = off / bsize;
3743 3743 blkoff = lbn * bsize;
3744 3744
3745 3745 /*
3746 3746 * Queueing up the readahead before doing the synchronous read
3747 3747 * results in a significant increase in read throughput because
3748 3748 * of the increased parallelism between the async threads and
3749 3749 * the process context.
3750 3750 */
3751 3751 if ((off & ((vp->v_vfsp->vfs_bsize) - 1)) == 0 &&
3752 3752 rw != S_CREATE &&
3753 3753 !(vp->v_flag & VNOCACHE)) {
3754 3754 mutex_enter(&rp->r_statelock);
3755 3755
3756 3756 /*
3757 3757 * Calculate the number of readaheads to do.
3758 3758 * a) No readaheads at offset = 0.
3759 3759 * b) Do maximum(nfs_nra) readaheads when the readahead
3760 3760 * window is closed.
3761 3761 * c) Do readaheads between 1 to (nfs_nra - 1) depending
3762 3762 * upon how far the readahead window is open or close.
3763 3763 * d) No readaheads if rp->r_nextr is not within the scope
3764 3764 * of the readahead window (random i/o).
3765 3765 */
3766 3766
3767 3767 if (off == 0)
3768 3768 readahead = 0;
3769 3769 else if (blkoff == rp->r_nextr)
3770 3770 readahead = nfs_nra;
3771 3771 else if (rp->r_nextr > blkoff &&
3772 3772 ((ra_window = (rp->r_nextr - blkoff) / bsize)
3773 3773 <= (nfs_nra - 1)))
3774 3774 readahead = nfs_nra - ra_window;
3775 3775 else
3776 3776 readahead = 0;
3777 3777
3778 3778 rablkoff = rp->r_nextr;
3779 3779 while (readahead > 0 && rablkoff + bsize < rp->r_size) {
3780 3780 mutex_exit(&rp->r_statelock);
3781 3781 if (nfs_async_readahead(vp, rablkoff + bsize,
3782 3782 addr + (rablkoff + bsize - off), seg, cr,
3783 3783 nfs_readahead) < 0) {
3784 3784 mutex_enter(&rp->r_statelock);
3785 3785 break;
3786 3786 }
3787 3787 readahead--;
3788 3788 rablkoff += bsize;
3789 3789 /*
3790 3790 * Indicate that we did a readahead so
3791 3791 * readahead offset is not updated
3792 3792 * by the synchronous read below.
3793 3793 */
3794 3794 readahead_issued = 1;
3795 3795 mutex_enter(&rp->r_statelock);
3796 3796 /*
3797 3797 * set readahead offset to
3798 3798 * offset of last async readahead
3799 3799 * request.
3800 3800 */
3801 3801 rp->r_nextr = rablkoff;
3802 3802 }
3803 3803 mutex_exit(&rp->r_statelock);
3804 3804 }
3805 3805
3806 3806 again:
3807 3807 if ((pagefound = page_exists(vp, off)) == NULL) {
3808 3808 if (pl == NULL) {
3809 3809 (void) nfs_async_readahead(vp, blkoff, addr, seg, cr,
3810 3810 nfs_readahead);
3811 3811 } else if (rw == S_CREATE) {
3812 3812 /*
3813 3813 * Block for this page is not allocated, or the offset
3814 3814 * is beyond the current allocation size, or we're
3815 3815 * allocating a swap slot and the page was not found,
3816 3816 * so allocate it and return a zero page.
3817 3817 */
3818 3818 if ((pp = page_create_va(vp, off,
3819 3819 PAGESIZE, PG_WAIT, seg, addr)) == NULL)
3820 3820 cmn_err(CE_PANIC, "nfs_getapage: page_create");
3821 3821 io_len = PAGESIZE;
3822 3822 mutex_enter(&rp->r_statelock);
3823 3823 rp->r_nextr = off + PAGESIZE;
3824 3824 mutex_exit(&rp->r_statelock);
3825 3825 } else {
3826 3826 /*
3827 3827 * Need to go to server to get a BLOCK, exception to
3828 3828 * that being while reading at offset = 0 or doing
3829 3829 * random i/o, in that case read only a PAGE.
3830 3830 */
3831 3831 mutex_enter(&rp->r_statelock);
3832 3832 if (blkoff < rp->r_size &&
3833 3833 blkoff + bsize >= rp->r_size) {
3834 3834 /*
3835 3835 * If only a block or less is left in
3836 3836 * the file, read all that is remaining.
3837 3837 */
3838 3838 if (rp->r_size <= off) {
3839 3839 /*
3840 3840 * Trying to access beyond EOF,
3841 3841 * set up to get at least one page.
3842 3842 */
3843 3843 blksize = off + PAGESIZE - blkoff;
3844 3844 } else
3845 3845 blksize = rp->r_size - blkoff;
3846 3846 } else if ((off == 0) ||
3847 3847 (off != rp->r_nextr && !readahead_issued)) {
3848 3848 blksize = PAGESIZE;
3849 3849 blkoff = off; /* block = page here */
3850 3850 } else
3851 3851 blksize = bsize;
3852 3852 mutex_exit(&rp->r_statelock);
3853 3853
3854 3854 pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
3855 3855 &io_len, blkoff, blksize, 0);
3856 3856
3857 3857 /*
3858 3858 * Some other thread has entered the page,
3859 3859 * so just use it.
3860 3860 */
3861 3861 if (pp == NULL)
3862 3862 goto again;
3863 3863
3864 3864 /*
3865 3865 * Now round the request size up to page boundaries.
3866 3866 * This ensures that the entire page will be
3867 3867 * initialized to zeroes if EOF is encountered.
3868 3868 */
3869 3869 io_len = ptob(btopr(io_len));
3870 3870
3871 3871 bp = pageio_setup(pp, io_len, vp, B_READ);
3872 3872 ASSERT(bp != NULL);
3873 3873
3874 3874 /*
3875 3875 * pageio_setup should have set b_addr to 0. This
3876 3876 * is correct since we want to do I/O on a page
3877 3877 * boundary. bp_mapin will use this addr to calculate
3878 3878 * an offset, and then set b_addr to the kernel virtual
3879 3879 * address it allocated for us.
3880 3880 */
3881 3881 ASSERT(bp->b_un.b_addr == 0);
3882 3882
3883 3883 bp->b_edev = 0;
3884 3884 bp->b_dev = 0;
3885 3885 bp->b_lblkno = lbtodb(io_off);
3886 3886 bp->b_file = vp;
3887 3887 bp->b_offset = (offset_t)off;
3888 3888 bp_mapin(bp);
3889 3889
3890 3890 /*
3891 3891 * If doing a write beyond what we believe is EOF,
3892 3892 * don't bother trying to read the pages from the
3893 3893 * server, we'll just zero the pages here. We
3894 3894 * don't check that the rw flag is S_WRITE here
3895 3895 * because some implementations may attempt a
3896 3896 * read access to the buffer before copying data.
3897 3897 */
3898 3898 mutex_enter(&rp->r_statelock);
3899 3899 if (io_off >= rp->r_size && seg == segkmap) {
3900 3900 mutex_exit(&rp->r_statelock);
3901 3901 bzero(bp->b_un.b_addr, io_len);
3902 3902 } else {
3903 3903 mutex_exit(&rp->r_statelock);
3904 3904 error = nfs_bio(bp, cr);
3905 3905 }
3906 3906
3907 3907 /*
3908 3908 * Unmap the buffer before freeing it.
3909 3909 */
3910 3910 bp_mapout(bp);
3911 3911 pageio_done(bp);
3912 3912
3913 3913 if (error == NFS_EOF) {
3914 3914 /*
3915 3915 * If doing a write system call just return
3916 3916 * zeroed pages, else user tried to get pages
3917 3917 * beyond EOF, return error. We don't check
3918 3918 * that the rw flag is S_WRITE here because
3919 3919 * some implementations may attempt a read
3920 3920 * access to the buffer before copying data.
3921 3921 */
3922 3922 if (seg == segkmap)
3923 3923 error = 0;
3924 3924 else
3925 3925 error = EFAULT;
3926 3926 }
3927 3927
3928 3928 if (!readahead_issued && !error) {
3929 3929 mutex_enter(&rp->r_statelock);
3930 3930 rp->r_nextr = io_off + io_len;
3931 3931 mutex_exit(&rp->r_statelock);
3932 3932 }
3933 3933 }
3934 3934 }
3935 3935
3936 3936 out:
3937 3937 if (pl == NULL)
3938 3938 return (error);
3939 3939
3940 3940 if (error) {
3941 3941 if (pp != NULL)
3942 3942 pvn_read_done(pp, B_ERROR);
3943 3943 return (error);
3944 3944 }
3945 3945
3946 3946 if (pagefound) {
3947 3947 se_t se = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
3948 3948
3949 3949 /*
3950 3950 * Page exists in the cache, acquire the appropriate lock.
3951 3951 * If this fails, start all over again.
3952 3952 */
3953 3953 if ((pp = page_lookup(vp, off, se)) == NULL) {
3954 3954 #ifdef DEBUG
3955 3955 nfs_lostpage++;
3956 3956 #endif
3957 3957 goto reread;
3958 3958 }
3959 3959 pl[0] = pp;
3960 3960 pl[1] = NULL;
3961 3961 return (0);
3962 3962 }
3963 3963
3964 3964 if (pp != NULL)
3965 3965 pvn_plist_init(pp, pl, plsz, off, io_len, rw);
3966 3966
3967 3967 return (error);
3968 3968 }
3969 3969
3970 3970 static void
3971 3971 nfs_readahead(vnode_t *vp, u_offset_t blkoff, caddr_t addr, struct seg *seg,
3972 3972 cred_t *cr)
3973 3973 {
3974 3974 int error;
3975 3975 page_t *pp;
3976 3976 u_offset_t io_off;
3977 3977 size_t io_len;
3978 3978 struct buf *bp;
3979 3979 uint_t bsize, blksize;
3980 3980 rnode_t *rp = VTOR(vp);
3981 3981
3982 3982 ASSERT(nfs_zone() == VTOMI(vp)->mi_zone);
3983 3983
3984 3984 bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
3985 3985
3986 3986 mutex_enter(&rp->r_statelock);
3987 3987 if (blkoff < rp->r_size && blkoff + bsize > rp->r_size) {
3988 3988 /*
3989 3989 * If less than a block left in file read less
3990 3990 * than a block.
3991 3991 */
3992 3992 blksize = rp->r_size - blkoff;
3993 3993 } else
3994 3994 blksize = bsize;
3995 3995 mutex_exit(&rp->r_statelock);
3996 3996
3997 3997 pp = pvn_read_kluster(vp, blkoff, segkmap, addr,
3998 3998 &io_off, &io_len, blkoff, blksize, 1);
3999 3999 /*
4000 4000 * The isra flag passed to the kluster function is 1, we may have
4001 4001 * gotten a return value of NULL for a variety of reasons (# of free
4002 4002 * pages < minfree, someone entered the page on the vnode etc). In all
4003 4003 * cases, we want to punt on the readahead.
4004 4004 */
4005 4005 if (pp == NULL)
4006 4006 return;
4007 4007
4008 4008 /*
4009 4009 * Now round the request size up to page boundaries.
4010 4010 * This ensures that the entire page will be
4011 4011 * initialized to zeroes if EOF is encountered.
4012 4012 */
4013 4013 io_len = ptob(btopr(io_len));
4014 4014
4015 4015 bp = pageio_setup(pp, io_len, vp, B_READ);
4016 4016 ASSERT(bp != NULL);
4017 4017
4018 4018 /*
4019 4019 * pageio_setup should have set b_addr to 0. This is correct since
4020 4020 * we want to do I/O on a page boundary. bp_mapin() will use this addr
4021 4021 * to calculate an offset, and then set b_addr to the kernel virtual
4022 4022 * address it allocated for us.
4023 4023 */
4024 4024 ASSERT(bp->b_un.b_addr == 0);
4025 4025
4026 4026 bp->b_edev = 0;
4027 4027 bp->b_dev = 0;
4028 4028 bp->b_lblkno = lbtodb(io_off);
4029 4029 bp->b_file = vp;
4030 4030 bp->b_offset = (offset_t)blkoff;
4031 4031 bp_mapin(bp);
4032 4032
4033 4033 /*
4034 4034 * If doing a write beyond what we believe is EOF, don't bother trying
4035 4035 * to read the pages from the server, we'll just zero the pages here.
4036 4036 * We don't check that the rw flag is S_WRITE here because some
4037 4037 * implementations may attempt a read access to the buffer before
4038 4038 * copying data.
4039 4039 */
4040 4040 mutex_enter(&rp->r_statelock);
4041 4041 if (io_off >= rp->r_size && seg == segkmap) {
4042 4042 mutex_exit(&rp->r_statelock);
4043 4043 bzero(bp->b_un.b_addr, io_len);
4044 4044 error = 0;
4045 4045 } else {
4046 4046 mutex_exit(&rp->r_statelock);
4047 4047 error = nfs_bio(bp, cr);
4048 4048 if (error == NFS_EOF)
4049 4049 error = 0;
4050 4050 }
4051 4051
4052 4052 /*
4053 4053 * Unmap the buffer before freeing it.
4054 4054 */
4055 4055 bp_mapout(bp);
4056 4056 pageio_done(bp);
4057 4057
4058 4058 pvn_read_done(pp, error ? B_READ | B_ERROR : B_READ);
4059 4059
4060 4060 /*
4061 4061 * In case of error set readahead offset
4062 4062 * to the lowest offset.
4063 4063 * pvn_read_done() calls VN_DISPOSE to destroy the pages
4064 4064 */
4065 4065 if (error && rp->r_nextr > io_off) {
4066 4066 mutex_enter(&rp->r_statelock);
4067 4067 if (rp->r_nextr > io_off)
4068 4068 rp->r_nextr = io_off;
4069 4069 mutex_exit(&rp->r_statelock);
4070 4070 }
4071 4071 }
4072 4072
4073 4073 /*
4074 4074 * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
4075 4075 * If len == 0, do from off to EOF.
4076 4076 *
4077 4077 * The normal cases should be len == 0 && off == 0 (entire vp list),
4078 4078 * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
4079 4079 * (from pageout).
4080 4080 */
4081 4081 /* ARGSUSED */
4082 4082 static int
4083 4083 nfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
4084 4084 caller_context_t *ct)
4085 4085 {
4086 4086 int error;
4087 4087 rnode_t *rp;
4088 4088
4089 4089 ASSERT(cr != NULL);
4090 4090
4091 4091 /*
4092 4092 * XXX - Why should this check be made here?
4093 4093 */
4094 4094 if (vp->v_flag & VNOMAP)
4095 4095 return (ENOSYS);
4096 4096
4097 4097 if (len == 0 && !(flags & B_INVAL) && vn_is_readonly(vp))
4098 4098 return (0);
4099 4099
4100 4100 if (!(flags & B_ASYNC) && nfs_zone() != VTOMI(vp)->mi_zone)
4101 4101 return (EIO);
4102 4102 ASSERT(off <= MAXOFF32_T);
4103 4103
4104 4104 rp = VTOR(vp);
4105 4105 mutex_enter(&rp->r_statelock);
4106 4106 rp->r_count++;
4107 4107 mutex_exit(&rp->r_statelock);
4108 4108 error = nfs_putpages(vp, off, len, flags, cr);
4109 4109 mutex_enter(&rp->r_statelock);
4110 4110 rp->r_count--;
4111 4111 cv_broadcast(&rp->r_cv);
4112 4112 mutex_exit(&rp->r_statelock);
4113 4113
4114 4114 return (error);
4115 4115 }
4116 4116
4117 4117 /*
4118 4118 * Write out a single page, possibly klustering adjacent dirty pages.
4119 4119 */
4120 4120 int
4121 4121 nfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
4122 4122 int flags, cred_t *cr)
4123 4123 {
4124 4124 u_offset_t io_off;
4125 4125 u_offset_t lbn_off;
4126 4126 u_offset_t lbn;
4127 4127 size_t io_len;
4128 4128 uint_t bsize;
4129 4129 int error;
4130 4130 rnode_t *rp;
4131 4131
4132 4132 ASSERT(!vn_is_readonly(vp));
4133 4133 ASSERT(pp != NULL);
4134 4134 ASSERT(cr != NULL);
4135 4135 ASSERT((flags & B_ASYNC) || nfs_zone() == VTOMI(vp)->mi_zone);
4136 4136
4137 4137 rp = VTOR(vp);
4138 4138 ASSERT(rp->r_count > 0);
4139 4139
4140 4140 ASSERT(pp->p_offset <= MAXOFF32_T);
4141 4141
4142 4142 bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
4143 4143 lbn = pp->p_offset / bsize;
4144 4144 lbn_off = lbn * bsize;
4145 4145
4146 4146 /*
4147 4147 * Find a kluster that fits in one block, or in
4148 4148 * one page if pages are bigger than blocks. If
4149 4149 * there is less file space allocated than a whole
4150 4150 * page, we'll shorten the i/o request below.
4151 4151 */
4152 4152 pp = pvn_write_kluster(vp, pp, &io_off, &io_len, lbn_off,
4153 4153 roundup(bsize, PAGESIZE), flags);
4154 4154
4155 4155 /*
4156 4156 * pvn_write_kluster shouldn't have returned a page with offset
4157 4157 * behind the original page we were given. Verify that.
4158 4158 */
4159 4159 ASSERT((pp->p_offset / bsize) >= lbn);
4160 4160
4161 4161 /*
4162 4162 * Now pp will have the list of kept dirty pages marked for
4163 4163 * write back. It will also handle invalidation and freeing
4164 4164 * of pages that are not dirty. Check for page length rounding
4165 4165 * problems.
4166 4166 */
4167 4167 if (io_off + io_len > lbn_off + bsize) {
4168 4168 ASSERT((io_off + io_len) - (lbn_off + bsize) < PAGESIZE);
4169 4169 io_len = lbn_off + bsize - io_off;
4170 4170 }
4171 4171 /*
4172 4172 * The RMODINPROGRESS flag makes sure that nfs(3)_bio() sees a
4173 4173 * consistent value of r_size. RMODINPROGRESS is set in writerp().
4174 4174 * When RMODINPROGRESS is set it indicates that a uiomove() is in
4175 4175 * progress and the r_size has not been made consistent with the
4176 4176 * new size of the file. When the uiomove() completes the r_size is
4177 4177 * updated and the RMODINPROGRESS flag is cleared.
4178 4178 *
4179 4179 * The RMODINPROGRESS flag makes sure that nfs(3)_bio() sees a
4180 4180 * consistent value of r_size. Without this handshaking, it is
4181 4181 * possible that nfs(3)_bio() picks up the old value of r_size
4182 4182 * before the uiomove() in writerp() completes. This will result
4183 4183 * in the write through nfs(3)_bio() being dropped.
4184 4184 *
4185 4185 * More precisely, there is a window between the time the uiomove()
4186 4186 * completes and the time the r_size is updated. If a VOP_PUTPAGE()
4187 4187 * operation intervenes in this window, the page will be picked up,
4188 4188 * because it is dirty (it will be unlocked, unless it was
4189 4189 * pagecreate'd). When the page is picked up as dirty, the dirty
4190 4190 * bit is reset (pvn_getdirty()). In nfs(3)write(), r_size is
4191 4191 * checked. This will still be the old size. Therefore the page will
4192 4192 * not be written out. When segmap_release() calls VOP_PUTPAGE(),
4193 4193 * the page will be found to be clean and the write will be dropped.
4194 4194 */
4195 4195 if (rp->r_flags & RMODINPROGRESS) {
4196 4196 mutex_enter(&rp->r_statelock);
4197 4197 if ((rp->r_flags & RMODINPROGRESS) &&
4198 4198 rp->r_modaddr + MAXBSIZE > io_off &&
4199 4199 rp->r_modaddr < io_off + io_len) {
4200 4200 page_t *plist;
4201 4201 /*
4202 4202 * A write is in progress for this region of the file.
4203 4203 * If we did not detect RMODINPROGRESS here then this
4204 4204 * path through nfs_putapage() would eventually go to
4205 4205 * nfs(3)_bio() and may not write out all of the data
4206 4206 * in the pages. We end up losing data. So we decide
4207 4207 * to set the modified bit on each page in the page
4208 4208 * list and mark the rnode with RDIRTY. This write
4209 4209 * will be restarted at some later time.
4210 4210 */
4211 4211 plist = pp;
4212 4212 while (plist != NULL) {
4213 4213 pp = plist;
4214 4214 page_sub(&plist, pp);
4215 4215 hat_setmod(pp);
4216 4216 page_io_unlock(pp);
4217 4217 page_unlock(pp);
4218 4218 }
4219 4219 rp->r_flags |= RDIRTY;
4220 4220 mutex_exit(&rp->r_statelock);
4221 4221 if (offp)
4222 4222 *offp = io_off;
4223 4223 if (lenp)
4224 4224 *lenp = io_len;
4225 4225 return (0);
4226 4226 }
4227 4227 mutex_exit(&rp->r_statelock);
4228 4228 }
4229 4229
4230 4230 if (flags & B_ASYNC) {
4231 4231 error = nfs_async_putapage(vp, pp, io_off, io_len, flags, cr,
4232 4232 nfs_sync_putapage);
4233 4233 } else
4234 4234 error = nfs_sync_putapage(vp, pp, io_off, io_len, flags, cr);
4235 4235
4236 4236 if (offp)
4237 4237 *offp = io_off;
4238 4238 if (lenp)
4239 4239 *lenp = io_len;
4240 4240 return (error);
4241 4241 }
4242 4242
4243 4243 static int
4244 4244 nfs_sync_putapage(vnode_t *vp, page_t *pp, u_offset_t io_off, size_t io_len,
4245 4245 int flags, cred_t *cr)
4246 4246 {
4247 4247 int error;
4248 4248 rnode_t *rp;
4249 4249
4250 4250 flags |= B_WRITE;
4251 4251
4252 4252 ASSERT(nfs_zone() == VTOMI(vp)->mi_zone);
4253 4253 error = nfs_rdwrlbn(vp, pp, io_off, io_len, flags, cr);
4254 4254
4255 4255 rp = VTOR(vp);
4256 4256
4257 4257 if ((error == ENOSPC || error == EDQUOT || error == EACCES) &&
4258 4258 (flags & (B_INVAL|B_FORCE)) != (B_INVAL|B_FORCE)) {
4259 4259 if (!(rp->r_flags & ROUTOFSPACE)) {
4260 4260 mutex_enter(&rp->r_statelock);
4261 4261 rp->r_flags |= ROUTOFSPACE;
4262 4262 mutex_exit(&rp->r_statelock);
4263 4263 }
4264 4264 flags |= B_ERROR;
4265 4265 pvn_write_done(pp, flags);
4266 4266 /*
4267 4267 * If this was not an async thread, then try again to
4268 4268 * write out the pages, but this time, also destroy
4269 4269 * them whether or not the write is successful. This
4270 4270 * will prevent memory from filling up with these
4271 4271 * pages and destroying them is the only alternative
4272 4272 * if they can't be written out.
4273 4273 *
4274 4274 * Don't do this if this is an async thread because
4275 4275 * when the pages are unlocked in pvn_write_done,
4276 4276 * some other thread could have come along, locked
4277 4277 * them, and queued for an async thread. It would be
4278 4278 * possible for all of the async threads to be tied
4279 4279 * up waiting to lock the pages again and they would
4280 4280 * all already be locked and waiting for an async
4281 4281 * thread to handle them. Deadlock.
4282 4282 */
4283 4283 if (!(flags & B_ASYNC)) {
4284 4284 error = nfs_putpage(vp, io_off, io_len,
4285 4285 B_INVAL | B_FORCE, cr, NULL);
4286 4286 }
4287 4287 } else {
4288 4288 if (error)
4289 4289 flags |= B_ERROR;
4290 4290 else if (rp->r_flags & ROUTOFSPACE) {
4291 4291 mutex_enter(&rp->r_statelock);
4292 4292 rp->r_flags &= ~ROUTOFSPACE;
4293 4293 mutex_exit(&rp->r_statelock);
4294 4294 }
4295 4295 pvn_write_done(pp, flags);
4296 4296 }
4297 4297
4298 4298 return (error);
4299 4299 }
4300 4300
4301 4301 /* ARGSUSED */
4302 4302 static int
4303 4303 nfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
4304 4304 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
4305 4305 caller_context_t *ct)
4306 4306 {
4307 4307 struct segvn_crargs vn_a;
4308 4308 int error;
4309 4309 rnode_t *rp;
4310 4310 struct vattr va;
4311 4311
4312 4312 if (nfs_zone() != VTOMI(vp)->mi_zone)
4313 4313 return (EIO);
4314 4314
4315 4315 if (vp->v_flag & VNOMAP)
4316 4316 return (ENOSYS);
4317 4317
4318 4318 if (off > MAXOFF32_T)
4319 4319 return (EFBIG);
4320 4320
4321 4321 if (off < 0 || off + len < 0)
4322 4322 return (ENXIO);
4323 4323
4324 4324 if (vp->v_type != VREG)
4325 4325 return (ENODEV);
4326 4326
4327 4327 /*
4328 4328 * If there is cached data and if close-to-open consistency
4329 4329 * checking is not turned off and if the file system is not
4330 4330 * mounted readonly, then force an over the wire getattr.
4331 4331 * Otherwise, just invoke nfsgetattr to get a copy of the
4332 4332 * attributes. The attribute cache will be used unless it
4333 4333 * is timed out and if it is, then an over the wire getattr
4334 4334 * will be issued.
4335 4335 */
4336 4336 va.va_mask = AT_ALL;
4337 4337 if (vn_has_cached_data(vp) &&
4338 4338 !(VTOMI(vp)->mi_flags & MI_NOCTO) && !vn_is_readonly(vp))
4339 4339 error = nfs_getattr_otw(vp, &va, cr);
4340 4340 else
4341 4341 error = nfsgetattr(vp, &va, cr);
4342 4342 if (error)
4343 4343 return (error);
4344 4344
4345 4345 /*
4346 4346 * Check to see if the vnode is currently marked as not cachable.
4347 4347 * This means portions of the file are locked (through VOP_FRLOCK).
4348 4348 * In this case the map request must be refused. We use
4349 4349 * rp->r_lkserlock to avoid a race with concurrent lock requests.
4350 4350 */
4351 4351 rp = VTOR(vp);
4352 4352
4353 4353 /*
4354 4354 * Atomically increment r_inmap after acquiring r_rwlock. The
4355 4355 * idea here is to acquire r_rwlock to block read/write and
4356 4356 * not to protect r_inmap. r_inmap will inform nfs_read/write()
4357 4357 * that we are in nfs_map(). Now, r_rwlock is acquired in order
↓ open down ↓ |
4357 lines elided |
↑ open up ↑ |
4358 4358 * and we can prevent the deadlock that would have occurred
4359 4359 * when nfs_addmap() would have acquired it out of order.
4360 4360 *
4361 4361 * Since we are not protecting r_inmap by any lock, we do not
4362 4362 * hold any lock when we decrement it. We atomically decrement
4363 4363 * r_inmap after we release r_lkserlock.
4364 4364 */
4365 4365
4366 4366 if (nfs_rw_enter_sig(&rp->r_rwlock, RW_WRITER, INTR(vp)))
4367 4367 return (EINTR);
4368 - atomic_add_int(&rp->r_inmap, 1);
4368 + atomic_inc_uint(&rp->r_inmap);
4369 4369 nfs_rw_exit(&rp->r_rwlock);
4370 4370
4371 4371 if (nfs_rw_enter_sig(&rp->r_lkserlock, RW_READER, INTR(vp))) {
4372 - atomic_add_int(&rp->r_inmap, -1);
4372 + atomic_dec_uint(&rp->r_inmap);
4373 4373 return (EINTR);
4374 4374 }
4375 4375 if (vp->v_flag & VNOCACHE) {
4376 4376 error = EAGAIN;
4377 4377 goto done;
4378 4378 }
4379 4379
4380 4380 /*
4381 4381 * Don't allow concurrent locks and mapping if mandatory locking is
4382 4382 * enabled.
4383 4383 */
4384 4384 if ((flk_has_remote_locks(vp) || lm_has_sleep(vp)) &&
4385 4385 MANDLOCK(vp, va.va_mode)) {
4386 4386 error = EAGAIN;
4387 4387 goto done;
4388 4388 }
4389 4389
4390 4390 as_rangelock(as);
4391 4391 error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
4392 4392 if (error != 0) {
4393 4393 as_rangeunlock(as);
4394 4394 goto done;
4395 4395 }
4396 4396
4397 4397 vn_a.vp = vp;
4398 4398 vn_a.offset = off;
4399 4399 vn_a.type = (flags & MAP_TYPE);
4400 4400 vn_a.prot = (uchar_t)prot;
4401 4401 vn_a.maxprot = (uchar_t)maxprot;
4402 4402 vn_a.flags = (flags & ~MAP_TYPE);
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
4403 4403 vn_a.cred = cr;
4404 4404 vn_a.amp = NULL;
4405 4405 vn_a.szc = 0;
4406 4406 vn_a.lgrp_mem_policy_flags = 0;
4407 4407
4408 4408 error = as_map(as, *addrp, len, segvn_create, &vn_a);
4409 4409 as_rangeunlock(as);
4410 4410
4411 4411 done:
4412 4412 nfs_rw_exit(&rp->r_lkserlock);
4413 - atomic_add_int(&rp->r_inmap, -1);
4413 + atomic_dec_uint(&rp->r_inmap);
4414 4414 return (error);
4415 4415 }
4416 4416
4417 4417 /* ARGSUSED */
4418 4418 static int
4419 4419 nfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
4420 4420 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
4421 4421 caller_context_t *ct)
4422 4422 {
4423 4423 rnode_t *rp;
4424 4424
4425 4425 if (vp->v_flag & VNOMAP)
4426 4426 return (ENOSYS);
4427 4427 if (nfs_zone() != VTOMI(vp)->mi_zone)
4428 4428 return (EIO);
4429 4429
4430 4430 rp = VTOR(vp);
4431 4431 atomic_add_long((ulong_t *)&rp->r_mapcnt, btopr(len));
4432 4432
4433 4433 return (0);
4434 4434 }
4435 4435
4436 4436 /* ARGSUSED */
4437 4437 static int
4438 4438 nfs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, offset_t offset,
4439 4439 struct flk_callback *flk_cbp, cred_t *cr, caller_context_t *ct)
4440 4440 {
4441 4441 netobj lm_fh;
4442 4442 int rc;
4443 4443 u_offset_t start, end;
4444 4444 rnode_t *rp;
4445 4445 int error = 0, intr = INTR(vp);
4446 4446
4447 4447 /* check for valid cmd parameter */
4448 4448 if (cmd != F_GETLK && cmd != F_SETLK && cmd != F_SETLKW)
4449 4449 return (EINVAL);
4450 4450 if (nfs_zone() != VTOMI(vp)->mi_zone)
4451 4451 return (EIO);
4452 4452
4453 4453 /* Verify l_type. */
4454 4454 switch (bfp->l_type) {
4455 4455 case F_RDLCK:
4456 4456 if (cmd != F_GETLK && !(flag & FREAD))
4457 4457 return (EBADF);
4458 4458 break;
4459 4459 case F_WRLCK:
4460 4460 if (cmd != F_GETLK && !(flag & FWRITE))
4461 4461 return (EBADF);
4462 4462 break;
4463 4463 case F_UNLCK:
4464 4464 intr = 0;
4465 4465 break;
4466 4466
4467 4467 default:
4468 4468 return (EINVAL);
4469 4469 }
4470 4470
4471 4471 /* check the validity of the lock range */
4472 4472 if (rc = flk_convert_lock_data(vp, bfp, &start, &end, offset))
4473 4473 return (rc);
4474 4474 if (rc = flk_check_lock_data(start, end, MAXOFF32_T))
4475 4475 return (rc);
4476 4476
4477 4477 /*
4478 4478 * If the filesystem is mounted using local locking, pass the
4479 4479 * request off to the local locking code.
4480 4480 */
4481 4481 if (VTOMI(vp)->mi_flags & MI_LLOCK) {
4482 4482 if (offset > MAXOFF32_T)
4483 4483 return (EFBIG);
4484 4484 if (cmd == F_SETLK || cmd == F_SETLKW) {
4485 4485 /*
4486 4486 * For complete safety, we should be holding
4487 4487 * r_lkserlock. However, we can't call
4488 4488 * lm_safelock and then fs_frlock while
4489 4489 * holding r_lkserlock, so just invoke
4490 4490 * lm_safelock and expect that this will
4491 4491 * catch enough of the cases.
4492 4492 */
4493 4493 if (!lm_safelock(vp, bfp, cr))
4494 4494 return (EAGAIN);
4495 4495 }
4496 4496 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
4497 4497 }
4498 4498
4499 4499 rp = VTOR(vp);
4500 4500
4501 4501 /*
4502 4502 * Check whether the given lock request can proceed, given the
4503 4503 * current file mappings.
4504 4504 */
4505 4505 if (nfs_rw_enter_sig(&rp->r_lkserlock, RW_WRITER, intr))
4506 4506 return (EINTR);
4507 4507 if (cmd == F_SETLK || cmd == F_SETLKW) {
4508 4508 if (!lm_safelock(vp, bfp, cr)) {
4509 4509 rc = EAGAIN;
4510 4510 goto done;
4511 4511 }
4512 4512 }
4513 4513
4514 4514 /*
4515 4515 * Flush the cache after waiting for async I/O to finish. For new
4516 4516 * locks, this is so that the process gets the latest bits from the
4517 4517 * server. For unlocks, this is so that other clients see the
4518 4518 * latest bits once the file has been unlocked. If currently dirty
4519 4519 * pages can't be flushed, then don't allow a lock to be set. But
4520 4520 * allow unlocks to succeed, to avoid having orphan locks on the
4521 4521 * server.
4522 4522 */
4523 4523 if (cmd != F_GETLK) {
4524 4524 mutex_enter(&rp->r_statelock);
4525 4525 while (rp->r_count > 0) {
4526 4526 if (intr) {
4527 4527 klwp_t *lwp = ttolwp(curthread);
4528 4528
4529 4529 if (lwp != NULL)
4530 4530 lwp->lwp_nostop++;
4531 4531 if (cv_wait_sig(&rp->r_cv, &rp->r_statelock)
4532 4532 == 0) {
4533 4533 if (lwp != NULL)
4534 4534 lwp->lwp_nostop--;
4535 4535 rc = EINTR;
4536 4536 break;
4537 4537 }
4538 4538 if (lwp != NULL)
4539 4539 lwp->lwp_nostop--;
4540 4540 } else
4541 4541 cv_wait(&rp->r_cv, &rp->r_statelock);
4542 4542 }
4543 4543 mutex_exit(&rp->r_statelock);
4544 4544 if (rc != 0)
4545 4545 goto done;
4546 4546 error = nfs_putpage(vp, (offset_t)0, 0, B_INVAL, cr, ct);
4547 4547 if (error) {
4548 4548 if (error == ENOSPC || error == EDQUOT) {
4549 4549 mutex_enter(&rp->r_statelock);
4550 4550 if (!rp->r_error)
4551 4551 rp->r_error = error;
4552 4552 mutex_exit(&rp->r_statelock);
4553 4553 }
4554 4554 if (bfp->l_type != F_UNLCK) {
4555 4555 rc = ENOLCK;
4556 4556 goto done;
4557 4557 }
4558 4558 }
4559 4559 }
4560 4560
4561 4561 lm_fh.n_len = sizeof (fhandle_t);
4562 4562 lm_fh.n_bytes = (char *)VTOFH(vp);
4563 4563
4564 4564 /*
4565 4565 * Call the lock manager to do the real work of contacting
4566 4566 * the server and obtaining the lock.
4567 4567 */
4568 4568 rc = lm_frlock(vp, cmd, bfp, flag, offset, cr, &lm_fh, flk_cbp);
4569 4569
4570 4570 if (rc == 0)
4571 4571 nfs_lockcompletion(vp, cmd);
4572 4572
4573 4573 done:
4574 4574 nfs_rw_exit(&rp->r_lkserlock);
4575 4575 return (rc);
4576 4576 }
4577 4577
4578 4578 /*
4579 4579 * Free storage space associated with the specified vnode. The portion
4580 4580 * to be freed is specified by bfp->l_start and bfp->l_len (already
4581 4581 * normalized to a "whence" of 0).
4582 4582 *
4583 4583 * This is an experimental facility whose continued existence is not
4584 4584 * guaranteed. Currently, we only support the special case
4585 4585 * of l_len == 0, meaning free to end of file.
4586 4586 */
4587 4587 /* ARGSUSED */
4588 4588 static int
4589 4589 nfs_space(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
4590 4590 offset_t offset, cred_t *cr, caller_context_t *ct)
4591 4591 {
4592 4592 int error;
4593 4593
4594 4594 ASSERT(vp->v_type == VREG);
4595 4595 if (cmd != F_FREESP)
4596 4596 return (EINVAL);
4597 4597
4598 4598 if (offset > MAXOFF32_T)
4599 4599 return (EFBIG);
4600 4600
4601 4601 if ((bfp->l_start > MAXOFF32_T) || (bfp->l_end > MAXOFF32_T) ||
4602 4602 (bfp->l_len > MAXOFF32_T))
4603 4603 return (EFBIG);
4604 4604
4605 4605 if (nfs_zone() != VTOMI(vp)->mi_zone)
4606 4606 return (EIO);
4607 4607
4608 4608 error = convoff(vp, bfp, 0, offset);
4609 4609 if (!error) {
4610 4610 ASSERT(bfp->l_start >= 0);
4611 4611 if (bfp->l_len == 0) {
4612 4612 struct vattr va;
4613 4613
4614 4614 /*
4615 4615 * ftruncate should not change the ctime and
4616 4616 * mtime if we truncate the file to its
4617 4617 * previous size.
4618 4618 */
4619 4619 va.va_mask = AT_SIZE;
4620 4620 error = nfsgetattr(vp, &va, cr);
4621 4621 if (error || va.va_size == bfp->l_start)
4622 4622 return (error);
4623 4623 va.va_mask = AT_SIZE;
4624 4624 va.va_size = bfp->l_start;
4625 4625 error = nfssetattr(vp, &va, 0, cr);
4626 4626
4627 4627 if (error == 0 && bfp->l_start == 0)
4628 4628 vnevent_truncate(vp, ct);
4629 4629 } else
4630 4630 error = EINVAL;
4631 4631 }
4632 4632
4633 4633 return (error);
4634 4634 }
4635 4635
4636 4636 /* ARGSUSED */
4637 4637 static int
4638 4638 nfs_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
4639 4639 {
4640 4640
4641 4641 return (EINVAL);
4642 4642 }
4643 4643
4644 4644 /*
4645 4645 * Setup and add an address space callback to do the work of the delmap call.
4646 4646 * The callback will (and must be) deleted in the actual callback function.
4647 4647 *
4648 4648 * This is done in order to take care of the problem that we have with holding
4649 4649 * the address space's a_lock for a long period of time (e.g. if the NFS server
4650 4650 * is down). Callbacks will be executed in the address space code while the
4651 4651 * a_lock is not held. Holding the address space's a_lock causes things such
4652 4652 * as ps and fork to hang because they are trying to acquire this lock as well.
4653 4653 */
4654 4654 /* ARGSUSED */
4655 4655 static int
4656 4656 nfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
4657 4657 size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr,
4658 4658 caller_context_t *ct)
4659 4659 {
4660 4660 int caller_found;
4661 4661 int error;
4662 4662 rnode_t *rp;
4663 4663 nfs_delmap_args_t *dmapp;
4664 4664 nfs_delmapcall_t *delmap_call;
4665 4665
4666 4666 if (vp->v_flag & VNOMAP)
4667 4667 return (ENOSYS);
4668 4668 /*
4669 4669 * A process may not change zones if it has NFS pages mmap'ed
4670 4670 * in, so we can't legitimately get here from the wrong zone.
4671 4671 */
4672 4672 ASSERT(nfs_zone() == VTOMI(vp)->mi_zone);
4673 4673
4674 4674 rp = VTOR(vp);
4675 4675
4676 4676 /*
4677 4677 * The way that the address space of this process deletes its mapping
4678 4678 * of this file is via the following call chains:
4679 4679 * - as_free()->SEGOP_UNMAP()/segvn_unmap()->VOP_DELMAP()/nfs_delmap()
4680 4680 * - as_unmap()->SEGOP_UNMAP()/segvn_unmap()->VOP_DELMAP()/nfs_delmap()
4681 4681 *
4682 4682 * With the use of address space callbacks we are allowed to drop the
4683 4683 * address space lock, a_lock, while executing the NFS operations that
4684 4684 * need to go over the wire. Returning EAGAIN to the caller of this
4685 4685 * function is what drives the execution of the callback that we add
4686 4686 * below. The callback will be executed by the address space code
4687 4687 * after dropping the a_lock. When the callback is finished, since
4688 4688 * we dropped the a_lock, it must be re-acquired and segvn_unmap()
4689 4689 * is called again on the same segment to finish the rest of the work
4690 4690 * that needs to happen during unmapping.
4691 4691 *
4692 4692 * This action of calling back into the segment driver causes
4693 4693 * nfs_delmap() to get called again, but since the callback was
4694 4694 * already executed at this point, it already did the work and there
4695 4695 * is nothing left for us to do.
4696 4696 *
4697 4697 * To Summarize:
4698 4698 * - The first time nfs_delmap is called by the current thread is when
4699 4699 * we add the caller associated with this delmap to the delmap caller
4700 4700 * list, add the callback, and return EAGAIN.
4701 4701 * - The second time in this call chain when nfs_delmap is called we
4702 4702 * will find this caller in the delmap caller list and realize there
4703 4703 * is no more work to do thus removing this caller from the list and
4704 4704 * returning the error that was set in the callback execution.
4705 4705 */
4706 4706 caller_found = nfs_find_and_delete_delmapcall(rp, &error);
4707 4707 if (caller_found) {
4708 4708 /*
4709 4709 * 'error' is from the actual delmap operations. To avoid
4710 4710 * hangs, we need to handle the return of EAGAIN differently
4711 4711 * since this is what drives the callback execution.
4712 4712 * In this case, we don't want to return EAGAIN and do the
4713 4713 * callback execution because there are none to execute.
4714 4714 */
4715 4715 if (error == EAGAIN)
4716 4716 return (0);
4717 4717 else
4718 4718 return (error);
4719 4719 }
4720 4720
4721 4721 /* current caller was not in the list */
4722 4722 delmap_call = nfs_init_delmapcall();
4723 4723
4724 4724 mutex_enter(&rp->r_statelock);
4725 4725 list_insert_tail(&rp->r_indelmap, delmap_call);
4726 4726 mutex_exit(&rp->r_statelock);
4727 4727
4728 4728 dmapp = kmem_alloc(sizeof (nfs_delmap_args_t), KM_SLEEP);
4729 4729
4730 4730 dmapp->vp = vp;
4731 4731 dmapp->off = off;
4732 4732 dmapp->addr = addr;
4733 4733 dmapp->len = len;
4734 4734 dmapp->prot = prot;
4735 4735 dmapp->maxprot = maxprot;
4736 4736 dmapp->flags = flags;
4737 4737 dmapp->cr = cr;
4738 4738 dmapp->caller = delmap_call;
4739 4739
4740 4740 error = as_add_callback(as, nfs_delmap_callback, dmapp,
4741 4741 AS_UNMAP_EVENT, addr, len, KM_SLEEP);
4742 4742
4743 4743 return (error ? error : EAGAIN);
4744 4744 }
4745 4745
4746 4746 /*
4747 4747 * Remove some pages from an mmap'd vnode. Just update the
4748 4748 * count of pages. If doing close-to-open, then flush all
4749 4749 * of the pages associated with this file. Otherwise, start
4750 4750 * an asynchronous page flush to write out any dirty pages.
4751 4751 * This will also associate a credential with the rnode which
4752 4752 * can be used to write the pages.
4753 4753 */
4754 4754 /* ARGSUSED */
4755 4755 static void
4756 4756 nfs_delmap_callback(struct as *as, void *arg, uint_t event)
4757 4757 {
4758 4758 int error;
4759 4759 rnode_t *rp;
4760 4760 mntinfo_t *mi;
4761 4761 nfs_delmap_args_t *dmapp = (nfs_delmap_args_t *)arg;
4762 4762
4763 4763 rp = VTOR(dmapp->vp);
4764 4764 mi = VTOMI(dmapp->vp);
4765 4765
4766 4766 atomic_add_long((ulong_t *)&rp->r_mapcnt, -btopr(dmapp->len));
4767 4767 ASSERT(rp->r_mapcnt >= 0);
4768 4768
4769 4769 /*
4770 4770 * Initiate a page flush if there are pages, the file system
4771 4771 * was not mounted readonly, the segment was mapped shared, and
4772 4772 * the pages themselves were writeable.
4773 4773 */
4774 4774 if (vn_has_cached_data(dmapp->vp) && !vn_is_readonly(dmapp->vp) &&
4775 4775 dmapp->flags == MAP_SHARED && (dmapp->maxprot & PROT_WRITE)) {
4776 4776 mutex_enter(&rp->r_statelock);
4777 4777 rp->r_flags |= RDIRTY;
4778 4778 mutex_exit(&rp->r_statelock);
4779 4779 /*
4780 4780 * If this is a cross-zone access a sync putpage won't work, so
4781 4781 * the best we can do is try an async putpage. That seems
4782 4782 * better than something more draconian such as discarding the
4783 4783 * dirty pages.
4784 4784 */
4785 4785 if ((mi->mi_flags & MI_NOCTO) ||
4786 4786 nfs_zone() != mi->mi_zone)
4787 4787 error = nfs_putpage(dmapp->vp, dmapp->off, dmapp->len,
4788 4788 B_ASYNC, dmapp->cr, NULL);
4789 4789 else
4790 4790 error = nfs_putpage(dmapp->vp, dmapp->off, dmapp->len,
4791 4791 0, dmapp->cr, NULL);
4792 4792 if (!error) {
4793 4793 mutex_enter(&rp->r_statelock);
4794 4794 error = rp->r_error;
4795 4795 rp->r_error = 0;
4796 4796 mutex_exit(&rp->r_statelock);
4797 4797 }
4798 4798 } else
4799 4799 error = 0;
4800 4800
4801 4801 if ((rp->r_flags & RDIRECTIO) || (mi->mi_flags & MI_DIRECTIO))
4802 4802 (void) nfs_putpage(dmapp->vp, dmapp->off, dmapp->len,
4803 4803 B_INVAL, dmapp->cr, NULL);
4804 4804
4805 4805 dmapp->caller->error = error;
4806 4806 (void) as_delete_callback(as, arg);
4807 4807 kmem_free(dmapp, sizeof (nfs_delmap_args_t));
4808 4808 }
4809 4809
4810 4810 /* ARGSUSED */
4811 4811 static int
4812 4812 nfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
4813 4813 caller_context_t *ct)
4814 4814 {
4815 4815 int error = 0;
4816 4816
4817 4817 if (nfs_zone() != VTOMI(vp)->mi_zone)
4818 4818 return (EIO);
4819 4819 /*
4820 4820 * This looks a little weird because it's written in a general
4821 4821 * manner but we make little use of cases. If cntl() ever gets
4822 4822 * widely used, the outer switch will make more sense.
4823 4823 */
4824 4824
4825 4825 switch (cmd) {
4826 4826
4827 4827 /*
4828 4828 * Large file spec - need to base answer new query with
4829 4829 * hardcoded constant based on the protocol.
4830 4830 */
4831 4831 case _PC_FILESIZEBITS:
4832 4832 *valp = 32;
4833 4833 return (0);
4834 4834
4835 4835 case _PC_LINK_MAX:
4836 4836 case _PC_NAME_MAX:
4837 4837 case _PC_PATH_MAX:
4838 4838 case _PC_SYMLINK_MAX:
4839 4839 case _PC_CHOWN_RESTRICTED:
4840 4840 case _PC_NO_TRUNC: {
4841 4841 mntinfo_t *mi;
4842 4842 struct pathcnf *pc;
4843 4843
4844 4844 if ((mi = VTOMI(vp)) == NULL || (pc = mi->mi_pathconf) == NULL)
4845 4845 return (EINVAL);
4846 4846 error = _PC_ISSET(cmd, pc->pc_mask); /* error or bool */
4847 4847 switch (cmd) {
4848 4848 case _PC_LINK_MAX:
4849 4849 *valp = pc->pc_link_max;
4850 4850 break;
4851 4851 case _PC_NAME_MAX:
4852 4852 *valp = pc->pc_name_max;
4853 4853 break;
4854 4854 case _PC_PATH_MAX:
4855 4855 case _PC_SYMLINK_MAX:
4856 4856 *valp = pc->pc_path_max;
4857 4857 break;
4858 4858 case _PC_CHOWN_RESTRICTED:
4859 4859 /*
4860 4860 * if we got here, error is really a boolean which
4861 4861 * indicates whether cmd is set or not.
4862 4862 */
4863 4863 *valp = error ? 1 : 0; /* see above */
4864 4864 error = 0;
4865 4865 break;
4866 4866 case _PC_NO_TRUNC:
4867 4867 /*
4868 4868 * if we got here, error is really a boolean which
4869 4869 * indicates whether cmd is set or not.
4870 4870 */
4871 4871 *valp = error ? 1 : 0; /* see above */
4872 4872 error = 0;
4873 4873 break;
4874 4874 }
4875 4875 return (error ? EINVAL : 0);
4876 4876 }
4877 4877
4878 4878 case _PC_XATTR_EXISTS:
4879 4879 *valp = 0;
4880 4880 if (vp->v_vfsp->vfs_flag & VFS_XATTR) {
4881 4881 vnode_t *avp;
4882 4882 rnode_t *rp;
4883 4883 mntinfo_t *mi = VTOMI(vp);
4884 4884
4885 4885 if (!(mi->mi_flags & MI_EXTATTR))
4886 4886 return (0);
4887 4887
4888 4888 rp = VTOR(vp);
4889 4889 if (nfs_rw_enter_sig(&rp->r_rwlock, RW_READER,
4890 4890 INTR(vp)))
4891 4891 return (EINTR);
4892 4892
4893 4893 error = nfslookup_dnlc(vp, XATTR_DIR_NAME, &avp, cr);
4894 4894 if (error || avp == NULL)
4895 4895 error = acl_getxattrdir2(vp, &avp, 0, cr, 0);
4896 4896
4897 4897 nfs_rw_exit(&rp->r_rwlock);
4898 4898
4899 4899 if (error == 0 && avp != NULL) {
4900 4900 error = do_xattr_exists_check(avp, valp, cr);
4901 4901 VN_RELE(avp);
4902 4902 }
4903 4903 }
4904 4904 return (error ? EINVAL : 0);
4905 4905
4906 4906 case _PC_ACL_ENABLED:
4907 4907 *valp = _ACL_ACLENT_ENABLED;
4908 4908 return (0);
4909 4909
4910 4910 default:
4911 4911 return (EINVAL);
4912 4912 }
4913 4913 }
4914 4914
4915 4915 /*
4916 4916 * Called by async thread to do synchronous pageio. Do the i/o, wait
4917 4917 * for it to complete, and cleanup the page list when done.
4918 4918 */
4919 4919 static int
4920 4920 nfs_sync_pageio(vnode_t *vp, page_t *pp, u_offset_t io_off, size_t io_len,
4921 4921 int flags, cred_t *cr)
4922 4922 {
4923 4923 int error;
4924 4924
4925 4925 ASSERT(nfs_zone() == VTOMI(vp)->mi_zone);
4926 4926 error = nfs_rdwrlbn(vp, pp, io_off, io_len, flags, cr);
4927 4927 if (flags & B_READ)
4928 4928 pvn_read_done(pp, (error ? B_ERROR : 0) | flags);
4929 4929 else
4930 4930 pvn_write_done(pp, (error ? B_ERROR : 0) | flags);
4931 4931 return (error);
4932 4932 }
4933 4933
4934 4934 /* ARGSUSED */
4935 4935 static int
4936 4936 nfs_pageio(vnode_t *vp, page_t *pp, u_offset_t io_off, size_t io_len,
4937 4937 int flags, cred_t *cr, caller_context_t *ct)
4938 4938 {
4939 4939 int error;
4940 4940 rnode_t *rp;
4941 4941
4942 4942 if (pp == NULL)
4943 4943 return (EINVAL);
4944 4944
4945 4945 if (io_off > MAXOFF32_T)
4946 4946 return (EFBIG);
4947 4947 if (nfs_zone() != VTOMI(vp)->mi_zone)
4948 4948 return (EIO);
4949 4949 rp = VTOR(vp);
4950 4950 mutex_enter(&rp->r_statelock);
4951 4951 rp->r_count++;
4952 4952 mutex_exit(&rp->r_statelock);
4953 4953
4954 4954 if (flags & B_ASYNC) {
4955 4955 error = nfs_async_pageio(vp, pp, io_off, io_len, flags, cr,
4956 4956 nfs_sync_pageio);
4957 4957 } else
4958 4958 error = nfs_rdwrlbn(vp, pp, io_off, io_len, flags, cr);
4959 4959 mutex_enter(&rp->r_statelock);
4960 4960 rp->r_count--;
4961 4961 cv_broadcast(&rp->r_cv);
4962 4962 mutex_exit(&rp->r_statelock);
4963 4963 return (error);
4964 4964 }
4965 4965
4966 4966 /* ARGSUSED */
4967 4967 static int
4968 4968 nfs_setsecattr(vnode_t *vp, vsecattr_t *vsecattr, int flag, cred_t *cr,
4969 4969 caller_context_t *ct)
4970 4970 {
4971 4971 int error;
4972 4972 mntinfo_t *mi;
4973 4973
4974 4974 mi = VTOMI(vp);
4975 4975
4976 4976 if (nfs_zone() != mi->mi_zone)
4977 4977 return (EIO);
4978 4978 if (mi->mi_flags & MI_ACL) {
4979 4979 error = acl_setacl2(vp, vsecattr, flag, cr);
4980 4980 if (mi->mi_flags & MI_ACL)
4981 4981 return (error);
4982 4982 }
4983 4983
4984 4984 return (ENOSYS);
4985 4985 }
4986 4986
4987 4987 /* ARGSUSED */
4988 4988 static int
4989 4989 nfs_getsecattr(vnode_t *vp, vsecattr_t *vsecattr, int flag, cred_t *cr,
4990 4990 caller_context_t *ct)
4991 4991 {
4992 4992 int error;
4993 4993 mntinfo_t *mi;
4994 4994
4995 4995 mi = VTOMI(vp);
4996 4996
4997 4997 if (nfs_zone() != mi->mi_zone)
4998 4998 return (EIO);
4999 4999 if (mi->mi_flags & MI_ACL) {
5000 5000 error = acl_getacl2(vp, vsecattr, flag, cr);
5001 5001 if (mi->mi_flags & MI_ACL)
5002 5002 return (error);
5003 5003 }
5004 5004
5005 5005 return (fs_fab_acl(vp, vsecattr, flag, cr, ct));
5006 5006 }
5007 5007
5008 5008 /* ARGSUSED */
5009 5009 static int
5010 5010 nfs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
5011 5011 caller_context_t *ct)
5012 5012 {
5013 5013 int error;
5014 5014 struct shrlock nshr;
5015 5015 struct nfs_owner nfs_owner;
5016 5016 netobj lm_fh;
5017 5017
5018 5018 if (nfs_zone() != VTOMI(vp)->mi_zone)
5019 5019 return (EIO);
5020 5020
5021 5021 /*
5022 5022 * check for valid cmd parameter
5023 5023 */
5024 5024 if (cmd != F_SHARE && cmd != F_UNSHARE && cmd != F_HASREMOTELOCKS)
5025 5025 return (EINVAL);
5026 5026
5027 5027 /*
5028 5028 * Check access permissions
5029 5029 */
5030 5030 if (cmd == F_SHARE &&
5031 5031 (((shr->s_access & F_RDACC) && !(flag & FREAD)) ||
5032 5032 ((shr->s_access & F_WRACC) && !(flag & FWRITE))))
5033 5033 return (EBADF);
5034 5034
5035 5035 /*
5036 5036 * If the filesystem is mounted using local locking, pass the
5037 5037 * request off to the local share code.
5038 5038 */
5039 5039 if (VTOMI(vp)->mi_flags & MI_LLOCK)
5040 5040 return (fs_shrlock(vp, cmd, shr, flag, cr, ct));
5041 5041
5042 5042 switch (cmd) {
5043 5043 case F_SHARE:
5044 5044 case F_UNSHARE:
5045 5045 lm_fh.n_len = sizeof (fhandle_t);
5046 5046 lm_fh.n_bytes = (char *)VTOFH(vp);
5047 5047
5048 5048 /*
5049 5049 * If passed an owner that is too large to fit in an
5050 5050 * nfs_owner it is likely a recursive call from the
5051 5051 * lock manager client and pass it straight through. If
5052 5052 * it is not a nfs_owner then simply return an error.
5053 5053 */
5054 5054 if (shr->s_own_len > sizeof (nfs_owner.lowner)) {
5055 5055 if (((struct nfs_owner *)shr->s_owner)->magic !=
5056 5056 NFS_OWNER_MAGIC)
5057 5057 return (EINVAL);
5058 5058
5059 5059 if (error = lm_shrlock(vp, cmd, shr, flag, &lm_fh)) {
5060 5060 error = set_errno(error);
5061 5061 }
5062 5062 return (error);
5063 5063 }
5064 5064 /*
5065 5065 * Remote share reservations owner is a combination of
5066 5066 * a magic number, hostname, and the local owner
5067 5067 */
5068 5068 bzero(&nfs_owner, sizeof (nfs_owner));
5069 5069 nfs_owner.magic = NFS_OWNER_MAGIC;
5070 5070 (void) strncpy(nfs_owner.hname, uts_nodename(),
5071 5071 sizeof (nfs_owner.hname));
5072 5072 bcopy(shr->s_owner, nfs_owner.lowner, shr->s_own_len);
5073 5073 nshr.s_access = shr->s_access;
5074 5074 nshr.s_deny = shr->s_deny;
5075 5075 nshr.s_sysid = 0;
5076 5076 nshr.s_pid = ttoproc(curthread)->p_pid;
5077 5077 nshr.s_own_len = sizeof (nfs_owner);
5078 5078 nshr.s_owner = (caddr_t)&nfs_owner;
5079 5079
5080 5080 if (error = lm_shrlock(vp, cmd, &nshr, flag, &lm_fh)) {
5081 5081 error = set_errno(error);
5082 5082 }
5083 5083
5084 5084 break;
5085 5085
5086 5086 case F_HASREMOTELOCKS:
5087 5087 /*
5088 5088 * NFS client can't store remote locks itself
5089 5089 */
5090 5090 shr->s_access = 0;
5091 5091 error = 0;
5092 5092 break;
5093 5093
5094 5094 default:
5095 5095 error = EINVAL;
5096 5096 break;
5097 5097 }
5098 5098
5099 5099 return (error);
5100 5100 }
↓ open down ↓ |
677 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX