1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/param.h>
  26 #include <sys/types.h>
  27 #include <sys/systm.h>
  28 #include <sys/cred.h>
  29 #include <sys/proc.h>
  30 #include <sys/user.h>
  31 #include <sys/time.h>
  32 #include <sys/vnode.h>
  33 #include <sys/vfs.h>
  34 #include <sys/vfs_opreg.h>
  35 #include <sys/file.h>
  36 #include <sys/filio.h>
  37 #include <sys/uio.h>
  38 #include <sys/buf.h>
  39 #include <sys/mman.h>
  40 #include <sys/tiuser.h>
  41 #include <sys/pathname.h>
  42 #include <sys/dirent.h>
  43 #include <sys/conf.h>
  44 #include <sys/debug.h>
  45 #include <sys/vmsystm.h>
  46 #include <sys/fcntl.h>
  47 #include <sys/flock.h>
  48 #include <sys/swap.h>
  49 #include <sys/errno.h>
  50 #include <sys/sysmacros.h>
  51 #include <sys/disp.h>
  52 #include <sys/kmem.h>
  53 #include <sys/cmn_err.h>
  54 #include <sys/vtrace.h>
  55 #include <sys/mount.h>
  56 #include <sys/bootconf.h>
  57 #include <sys/dnlc.h>
  58 #include <sys/stat.h>
  59 #include <sys/acl.h>
  60 #include <sys/policy.h>
  61 #include <rpc/types.h>
  62 
  63 #include <vm/hat.h>
  64 #include <vm/as.h>
  65 #include <vm/page.h>
  66 #include <vm/pvn.h>
  67 #include <vm/seg.h>
  68 #include <vm/seg_map.h>
  69 #include <vm/seg_vn.h>
  70 #include <vm/rm.h>
  71 #include <sys/fs/cachefs_fs.h>
  72 #include <sys/fs/cachefs_dir.h>
  73 #include <sys/fs/cachefs_dlog.h>
  74 #include <sys/fs/cachefs_ioctl.h>
  75 #include <sys/fs/cachefs_log.h>
  76 #include <fs/fs_subr.h>
  77 
  78 int cachefs_dnlc;       /* use dnlc, debugging */
  79 
  80 static void cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp,
  81     cred_t *cr);
  82 static void cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap,
  83     cred_t *cr);
  84 static void cachefs_createacl(cnode_t *dcp, cnode_t *newcp);
  85 static int cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec);
  86 static int cachefs_getacldirvp(cnode_t *cp);
  87 static void cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec);
  88 static int cachefs_access_local(void *cp, int mode, cred_t *cr);
  89 static int cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr);
  90 static int cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
  91     u_offset_t iooff, cred_t *cr);
  92 static int cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
  93     u_offset_t iooff, cred_t *cr);
  94 static int cachefs_setattr_connected(vnode_t *vp, vattr_t *vap, int flags,
  95     cred_t *cr, caller_context_t *ct);
  96 static int cachefs_setattr_disconnected(vnode_t *vp, vattr_t *vap,
  97     int flags, cred_t *cr, caller_context_t *ct);
  98 static int cachefs_access_connected(struct vnode *vp, int mode,
  99     int flags, cred_t *cr);
 100 static int cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
 101     cred_t *cr);
 102 static int cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
 103     char *tnm, cred_t *cr);
 104 static int cachefs_symlink_disconnected(vnode_t *dvp, char *lnm,
 105     vattr_t *tva, char *tnm, cred_t *cr);
 106 static int cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
 107     cred_t *cr);
 108 static int cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp,
 109     char *tnm, cred_t *cr);
 110 static int cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
 111     vnode_t **vpp, cred_t *cr);
 112 static int cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
 113     vnode_t **vpp, cred_t *cr);
 114 static int cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr);
 115 static int cachefs_rmdir_connected(vnode_t *dvp, char *nm,
 116     vnode_t *cdir, cred_t *cr, vnode_t *vp);
 117 static int cachefs_rmdir_disconnected(vnode_t *dvp, char *nm,
 118     vnode_t *cdir, cred_t *cr, vnode_t *vp);
 119 static char *cachefs_newname(void);
 120 static int cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm,
 121     cred_t *cr);
 122 static int cachefs_rename_connected(vnode_t *odvp, char *onm,
 123     vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
 124 static int cachefs_rename_disconnected(vnode_t *odvp, char *onm,
 125     vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
 126 static int cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr,
 127     int *eofp);
 128 static int cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop,
 129     cred_t *cr, int *eofp);
 130 static int cachefs_readback_translate(cnode_t *cp, uio_t *uiop,
 131         cred_t *cr, int *eofp);
 132 
 133 static int cachefs_setattr_common(vnode_t *vp, vattr_t *vap, int flags,
 134     cred_t *cr, caller_context_t *ct);
 135 
 136 static  int     cachefs_open(struct vnode **, int, cred_t *,
 137                         caller_context_t *);
 138 static  int     cachefs_close(struct vnode *, int, int, offset_t,
 139                         cred_t *, caller_context_t *);
 140 static  int     cachefs_read(struct vnode *, struct uio *, int, cred_t *,
 141                         caller_context_t *);
 142 static  int     cachefs_write(struct vnode *, struct uio *, int, cred_t *,
 143                         caller_context_t *);
 144 static  int     cachefs_ioctl(struct vnode *, int, intptr_t, int, cred_t *,
 145                         int *, caller_context_t *);
 146 static  int     cachefs_getattr(struct vnode *, struct vattr *, int,
 147                         cred_t *, caller_context_t *);
 148 static  int     cachefs_setattr(struct vnode *, struct vattr *,
 149                         int, cred_t *, caller_context_t *);
 150 static  int     cachefs_access(struct vnode *, int, int, cred_t *,
 151                         caller_context_t *);
 152 static  int     cachefs_lookup(struct vnode *, char *, struct vnode **,
 153                         struct pathname *, int, struct vnode *, cred_t *,
 154                         caller_context_t *, int *, pathname_t *);
 155 static  int     cachefs_create(struct vnode *, char *, struct vattr *,
 156                         enum vcexcl, int, struct vnode **, cred_t *, int,
 157                         caller_context_t *, vsecattr_t *);
 158 static  int     cachefs_create_connected(vnode_t *dvp, char *nm,
 159                         vattr_t *vap, enum vcexcl exclusive, int mode,
 160                         vnode_t **vpp, cred_t *cr);
 161 static  int     cachefs_create_disconnected(vnode_t *dvp, char *nm,
 162                         vattr_t *vap, enum vcexcl exclusive, int mode,
 163                         vnode_t **vpp, cred_t *cr);
 164 static  int     cachefs_remove(struct vnode *, char *, cred_t *,
 165                         caller_context_t *, int);
 166 static  int     cachefs_link(struct vnode *, struct vnode *, char *,
 167                         cred_t *, caller_context_t *, int);
 168 static  int     cachefs_rename(struct vnode *, char *, struct vnode *,
 169                         char *, cred_t *, caller_context_t *, int);
 170 static  int     cachefs_mkdir(struct vnode *, char *, struct
 171                         vattr *, struct vnode **, cred_t *, caller_context_t *,
 172                         int, vsecattr_t *);
 173 static  int     cachefs_rmdir(struct vnode *, char *, struct vnode *,
 174                         cred_t *, caller_context_t *, int);
 175 static  int     cachefs_readdir(struct vnode *, struct uio *,
 176                         cred_t *, int *, caller_context_t *, int);
 177 static  int     cachefs_symlink(struct vnode *, char *, struct vattr *,
 178                         char *, cred_t *, caller_context_t *, int);
 179 static  int     cachefs_readlink(struct vnode *, struct uio *, cred_t *,
 180                         caller_context_t *);
 181 static int cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr);
 182 static int cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop);
 183 static  int     cachefs_fsync(struct vnode *, int, cred_t *,
 184                         caller_context_t *);
 185 static  void    cachefs_inactive(struct vnode *, cred_t *, caller_context_t *);
 186 static  int     cachefs_fid(struct vnode *, struct fid *, caller_context_t *);
 187 static  int     cachefs_rwlock(struct vnode *, int, caller_context_t *);
 188 static  void    cachefs_rwunlock(struct vnode *, int, caller_context_t *);
 189 static  int     cachefs_seek(struct vnode *, offset_t, offset_t *,
 190                         caller_context_t *);
 191 static  int     cachefs_frlock(struct vnode *, int, struct flock64 *,
 192                         int, offset_t, struct flk_callback *, cred_t *,
 193                         caller_context_t *);
 194 static  int     cachefs_space(struct vnode *, int, struct flock64 *, int,
 195                         offset_t, cred_t *, caller_context_t *);
 196 static  int     cachefs_realvp(struct vnode *, struct vnode **,
 197                         caller_context_t *);
 198 static  int     cachefs_getpage(struct vnode *, offset_t, size_t, uint_t *,
 199                         struct page *[], size_t, struct seg *, caddr_t,
 200                         enum seg_rw, cred_t *, caller_context_t *);
 201 static  int     cachefs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
 202                         struct page *[], size_t, struct seg *, caddr_t,
 203                         enum seg_rw, cred_t *);
 204 static  int     cachefs_getapage_back(struct vnode *, u_offset_t, size_t,
 205                 uint_t *, struct page *[], size_t, struct seg *, caddr_t,
 206                         enum seg_rw, cred_t *);
 207 static  int     cachefs_putpage(struct vnode *, offset_t, size_t, int,
 208                         cred_t *, caller_context_t *);
 209 static  int     cachefs_map(struct vnode *, offset_t, struct as *,
 210                         caddr_t *, size_t, uchar_t, uchar_t, uint_t, cred_t *,
 211                         caller_context_t *);
 212 static  int     cachefs_addmap(struct vnode *, offset_t, struct as *,
 213                         caddr_t, size_t, uchar_t, uchar_t, uint_t, cred_t *,
 214                         caller_context_t *);
 215 static  int     cachefs_delmap(struct vnode *, offset_t, struct as *,
 216                         caddr_t, size_t, uint_t, uint_t, uint_t, cred_t *,
 217                         caller_context_t *);
 218 static int      cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec,
 219                         int flag, cred_t *cr, caller_context_t *);
 220 static int      cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec,
 221                         int flag, cred_t *cr, caller_context_t *);
 222 static  int     cachefs_shrlock(vnode_t *, int, struct shrlock *, int,
 223                         cred_t *, caller_context_t *);
 224 static int cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
 225     cred_t *cr);
 226 static int cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec,
 227     int flag, cred_t *cr);
 228 
 229 static int      cachefs_dump(struct vnode *, caddr_t, offset_t, offset_t,
 230                         caller_context_t *);
 231 static int      cachefs_pageio(struct vnode *, page_t *,
 232                     u_offset_t, size_t, int, cred_t *, caller_context_t *);
 233 static int      cachefs_writepage(struct vnode *vp, caddr_t base,
 234                     int tcount, struct uio *uiop);
 235 static int      cachefs_pathconf(vnode_t *, int, ulong_t *, cred_t *,
 236                         caller_context_t *);
 237 
 238 static int      cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
 239                         cred_t *cr, caller_context_t *ct);
 240 static int      cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
 241                         cred_t *cr, caller_context_t *ct);
 242 static int      cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
 243                         int flags, cred_t *cr, caller_context_t *ct);
 244 static int      cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr,
 245                         vnode_t *vp);
 246 static int      cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off,
 247                         size_t len, uint_t *protp, struct page *pl[],
 248                         size_t plsz, struct seg *seg, caddr_t addr,
 249                         enum seg_rw rw, cred_t *cr);
 250 static int      cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off,
 251                         size_t len, int flags, cred_t *cr);
 252 static int      cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off,
 253                         struct as *as, caddr_t *addrp, size_t len, uchar_t prot,
 254                         uchar_t maxprot, uint_t flags, cred_t *cr);
 255 static int      cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd,
 256                         struct flock64 *bfp, int flag, offset_t offset,
 257                         cred_t *cr, caller_context_t *ct);
 258 
 259 struct vnodeops *cachefs_vnodeops;
 260 
 261 static const fs_operation_def_t cachefs_vnodeops_template[] = {
 262         VOPNAME_OPEN,           { .vop_open = cachefs_open },
 263         VOPNAME_CLOSE,          { .vop_close = cachefs_close },
 264         VOPNAME_READ,           { .vop_read = cachefs_read },
 265         VOPNAME_WRITE,          { .vop_write = cachefs_write },
 266         VOPNAME_IOCTL,          { .vop_ioctl = cachefs_ioctl },
 267         VOPNAME_GETATTR,        { .vop_getattr = cachefs_getattr },
 268         VOPNAME_SETATTR,        { .vop_setattr = cachefs_setattr },
 269         VOPNAME_ACCESS,         { .vop_access = cachefs_access },
 270         VOPNAME_LOOKUP,         { .vop_lookup = cachefs_lookup },
 271         VOPNAME_CREATE,         { .vop_create = cachefs_create },
 272         VOPNAME_REMOVE,         { .vop_remove = cachefs_remove },
 273         VOPNAME_LINK,           { .vop_link = cachefs_link },
 274         VOPNAME_RENAME,         { .vop_rename = cachefs_rename },
 275         VOPNAME_MKDIR,          { .vop_mkdir = cachefs_mkdir },
 276         VOPNAME_RMDIR,          { .vop_rmdir = cachefs_rmdir },
 277         VOPNAME_READDIR,        { .vop_readdir = cachefs_readdir },
 278         VOPNAME_SYMLINK,        { .vop_symlink = cachefs_symlink },
 279         VOPNAME_READLINK,       { .vop_readlink = cachefs_readlink },
 280         VOPNAME_FSYNC,          { .vop_fsync = cachefs_fsync },
 281         VOPNAME_INACTIVE,       { .vop_inactive = cachefs_inactive },
 282         VOPNAME_FID,            { .vop_fid = cachefs_fid },
 283         VOPNAME_RWLOCK,         { .vop_rwlock = cachefs_rwlock },
 284         VOPNAME_RWUNLOCK,       { .vop_rwunlock = cachefs_rwunlock },
 285         VOPNAME_SEEK,           { .vop_seek = cachefs_seek },
 286         VOPNAME_FRLOCK,         { .vop_frlock = cachefs_frlock },
 287         VOPNAME_SPACE,          { .vop_space = cachefs_space },
 288         VOPNAME_REALVP,         { .vop_realvp = cachefs_realvp },
 289         VOPNAME_GETPAGE,        { .vop_getpage = cachefs_getpage },
 290         VOPNAME_PUTPAGE,        { .vop_putpage = cachefs_putpage },
 291         VOPNAME_MAP,            { .vop_map = cachefs_map },
 292         VOPNAME_ADDMAP,         { .vop_addmap = cachefs_addmap },
 293         VOPNAME_DELMAP,         { .vop_delmap = cachefs_delmap },
 294         VOPNAME_DUMP,           { .vop_dump = cachefs_dump },
 295         VOPNAME_PATHCONF,       { .vop_pathconf = cachefs_pathconf },
 296         VOPNAME_PAGEIO,         { .vop_pageio = cachefs_pageio },
 297         VOPNAME_SETSECATTR,     { .vop_setsecattr = cachefs_setsecattr },
 298         VOPNAME_GETSECATTR,     { .vop_getsecattr = cachefs_getsecattr },
 299         VOPNAME_SHRLOCK,        { .vop_shrlock = cachefs_shrlock },
 300         NULL,                   NULL
 301 };
 302 
 303 /* forward declarations of statics */
 304 static void cachefs_modified(cnode_t *cp);
 305 static int cachefs_modified_alloc(cnode_t *cp);
 306 
 307 int
 308 cachefs_init_vnops(char *name)
 309 {
 310         return (vn_make_ops(name,
 311             cachefs_vnodeops_template, &cachefs_vnodeops));
 312 }
 313 
 314 struct vnodeops *
 315 cachefs_getvnodeops(void)
 316 {
 317         return (cachefs_vnodeops);
 318 }
 319 
 320 static int
 321 cachefs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
 322 {
 323         int error = 0;
 324         cnode_t *cp = VTOC(*vpp);
 325         fscache_t *fscp = C_TO_FSCACHE(cp);
 326         int held = 0;
 327         int type;
 328         int connected = 0;
 329 
 330 #ifdef CFSDEBUG
 331         CFS_DEBUG(CFSDEBUG_VOPS)
 332                 printf("cachefs_open: ENTER vpp %p flag %x\n",
 333                     (void *)vpp, flag);
 334 #endif
 335         if (getzoneid() != GLOBAL_ZONEID) {
 336                 error = EPERM;
 337                 goto out;
 338         }
 339         if ((flag & FWRITE) &&
 340             ((*vpp)->v_type == VDIR || (*vpp)->v_type == VLNK)) {
 341                 error = EISDIR;
 342                 goto out;
 343         }
 344 
 345         /*
 346          * Cachefs only provides pass-through support for NFSv4,
 347          * and all vnode operations are passed through to the
 348          * back file system. For NFSv4 pass-through to work, only
 349          * connected operation is supported, the cnode backvp must
 350          * exist, and cachefs optional (eg., disconnectable) flags
 351          * are turned off. Assert these conditions to ensure that
 352          * the backfilesystem is called for the open operation.
 353          */
 354         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
 355         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
 356 
 357         for (;;) {
 358                 /* get (or renew) access to the file system */
 359                 if (held) {
 360                         /* Won't loop with NFSv4 connected behavior */
 361                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
 362                         cachefs_cd_release(fscp);
 363                         held = 0;
 364                 }
 365                 error = cachefs_cd_access(fscp, connected, 0);
 366                 if (error)
 367                         goto out;
 368                 held = 1;
 369 
 370                 mutex_enter(&cp->c_statelock);
 371 
 372                 /* grab creds if we do not have any yet */
 373                 if (cp->c_cred == NULL) {
 374                         crhold(cr);
 375                         cp->c_cred = cr;
 376                 }
 377                 cp->c_flags |= CN_NEEDOPEN;
 378 
 379                 /* if we are disconnected */
 380                 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
 381                         /* if we cannot write to the file system */
 382                         if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp)) {
 383                                 mutex_exit(&cp->c_statelock);
 384                                 connected = 1;
 385                                 continue;
 386                         }
 387                         /*
 388                          * Allow read only requests to continue
 389                          */
 390                         if ((flag & (FWRITE|FREAD)) == FREAD) {
 391                                 /* track the flag for opening the backvp */
 392                                 cp->c_rdcnt++;
 393                                 mutex_exit(&cp->c_statelock);
 394                                 error = 0;
 395                                 break;
 396                         }
 397 
 398                         /*
 399                          * check credentials  - if this procs
 400                          * credentials don't match the creds in the
 401                          * cnode disallow writing while disconnected.
 402                          */
 403                         if (crcmp(cp->c_cred, CRED()) != 0 &&
 404                             secpolicy_vnode_access2(CRED(), *vpp,
 405                             cp->c_attr.va_uid, 0, VWRITE) != 0) {
 406                                 mutex_exit(&cp->c_statelock);
 407                                 connected = 1;
 408                                 continue;
 409                         }
 410                         /* to get here, we know that the WRITE flag is on */
 411                         cp->c_wrcnt++;
 412                         if (flag & FREAD)
 413                                 cp->c_rdcnt++;
 414                 }
 415 
 416                 /* else if we are connected */
 417                 else {
 418                         /* if cannot use the cached copy of the file */
 419                         if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp) &&
 420                             ((cp->c_flags & CN_NOCACHE) == 0))
 421                                 cachefs_nocache(cp);
 422 
 423                         /* pass open to the back file */
 424                         if (cp->c_backvp) {
 425                                 cp->c_flags &= ~CN_NEEDOPEN;
 426                                 CFS_DPRINT_BACKFS_NFSV4(fscp,
 427                                     ("cachefs_open (nfsv4): cnode %p, "
 428                                     "backvp %p\n", cp, cp->c_backvp));
 429                                 error = VOP_OPEN(&cp->c_backvp, flag, cr, ct);
 430                                 if (CFS_TIMEOUT(fscp, error)) {
 431                                         mutex_exit(&cp->c_statelock);
 432                                         cachefs_cd_release(fscp);
 433                                         held = 0;
 434                                         cachefs_cd_timedout(fscp);
 435                                         continue;
 436                                 } else if (error) {
 437                                         mutex_exit(&cp->c_statelock);
 438                                         break;
 439                                 }
 440                         } else {
 441                                 /* backvp will be VOP_OPEN'd later */
 442                                 if (flag & FREAD)
 443                                         cp->c_rdcnt++;
 444                                 if (flag & FWRITE)
 445                                         cp->c_wrcnt++;
 446                         }
 447 
 448                         /*
 449                          * Now perform a consistency check on the file.
 450                          * If strict consistency then force a check to
 451                          * the backfs even if the timeout has not expired
 452                          * for close-to-open consistency.
 453                          */
 454                         type = 0;
 455                         if (fscp->fs_consttype == CFS_FS_CONST_STRICT)
 456                                 type = C_BACK_CHECK;
 457                         error = CFSOP_CHECK_COBJECT(fscp, cp, type, cr);
 458                         if (CFS_TIMEOUT(fscp, error)) {
 459                                 mutex_exit(&cp->c_statelock);
 460                                 cachefs_cd_release(fscp);
 461                                 held = 0;
 462                                 cachefs_cd_timedout(fscp);
 463                                 continue;
 464                         }
 465                 }
 466                 mutex_exit(&cp->c_statelock);
 467                 break;
 468         }
 469         if (held)
 470                 cachefs_cd_release(fscp);
 471 out:
 472 #ifdef CFS_CD_DEBUG
 473         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
 474 #endif
 475 #ifdef CFSDEBUG
 476         CFS_DEBUG(CFSDEBUG_VOPS)
 477                 printf("cachefs_open: EXIT vpp %p error %d\n",
 478                     (void *)vpp, error);
 479 #endif
 480         return (error);
 481 }
 482 
 483 /* ARGSUSED */
 484 static int
 485 cachefs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
 486         caller_context_t *ct)
 487 {
 488         int error = 0;
 489         cnode_t *cp = VTOC(vp);
 490         fscache_t *fscp = C_TO_FSCACHE(cp);
 491         int held = 0;
 492         int connected = 0;
 493         int close_cnt = 1;
 494         cachefscache_t *cachep;
 495 
 496 #ifdef CFSDEBUG
 497         CFS_DEBUG(CFSDEBUG_VOPS)
 498                 printf("cachefs_close: ENTER vp %p\n", (void *)vp);
 499 #endif
 500         /*
 501          * Cachefs only provides pass-through support for NFSv4,
 502          * and all vnode operations are passed through to the
 503          * back file system. For NFSv4 pass-through to work, only
 504          * connected operation is supported, the cnode backvp must
 505          * exist, and cachefs optional (eg., disconnectable) flags
 506          * are turned off. Assert these conditions to ensure that
 507          * the backfilesystem is called for the close operation.
 508          */
 509         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
 510         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
 511 
 512         /*
 513          * File could have been passed in or inherited from the global zone, so
 514          * we don't want to flat out reject the request; we'll just leave things
 515          * the way they are and let the backfs (NFS) deal with it.
 516          */
 517         /* get rid of any local locks */
 518         if (CFS_ISFS_LLOCK(fscp)) {
 519                 (void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
 520         }
 521 
 522         /* clean up if this is the daemon closing down */
 523         if ((fscp->fs_cddaemonid == ttoproc(curthread)->p_pid) &&
 524             ((ttoproc(curthread)->p_pid) != 0) &&
 525             (vp == fscp->fs_rootvp) &&
 526             (count == 1)) {
 527                 mutex_enter(&fscp->fs_cdlock);
 528                 fscp->fs_cddaemonid = 0;
 529                 if (fscp->fs_dlogfile)
 530                         fscp->fs_cdconnected = CFS_CD_DISCONNECTED;
 531                 else
 532                         fscp->fs_cdconnected = CFS_CD_CONNECTED;
 533                 cv_broadcast(&fscp->fs_cdwaitcv);
 534                 mutex_exit(&fscp->fs_cdlock);
 535                 if (fscp->fs_flags & CFS_FS_ROOTFS) {
 536                         cachep = fscp->fs_cache;
 537                         mutex_enter(&cachep->c_contentslock);
 538                         ASSERT(cachep->c_rootdaemonid != 0);
 539                         cachep->c_rootdaemonid = 0;
 540                         mutex_exit(&cachep->c_contentslock);
 541                 }
 542                 return (0);
 543         }
 544 
 545         for (;;) {
 546                 /* get (or renew) access to the file system */
 547                 if (held) {
 548                         /* Won't loop with NFSv4 connected behavior */
 549                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
 550                         cachefs_cd_release(fscp);
 551                         held = 0;
 552                 }
 553                 error = cachefs_cd_access(fscp, connected, 0);
 554                 if (error)
 555                         goto out;
 556                 held = 1;
 557                 connected = 0;
 558 
 559                 /* if not the last close */
 560                 if (count > 1) {
 561                         if (fscp->fs_cdconnected != CFS_CD_CONNECTED)
 562                                 goto out;
 563                         mutex_enter(&cp->c_statelock);
 564                         if (cp->c_backvp) {
 565                                 CFS_DPRINT_BACKFS_NFSV4(fscp,
 566                                     ("cachefs_close (nfsv4): cnode %p, "
 567                                     "backvp %p\n", cp, cp->c_backvp));
 568                                 error = VOP_CLOSE(cp->c_backvp, flag, count,
 569                                     offset, cr, ct);
 570                                 if (CFS_TIMEOUT(fscp, error)) {
 571                                         mutex_exit(&cp->c_statelock);
 572                                         cachefs_cd_release(fscp);
 573                                         held = 0;
 574                                         cachefs_cd_timedout(fscp);
 575                                         continue;
 576                                 }
 577                         }
 578                         mutex_exit(&cp->c_statelock);
 579                         goto out;
 580                 }
 581 
 582                 /*
 583                  * If the file is an unlinked file, then flush the lookup
 584                  * cache so that inactive will be called if this is
 585                  * the last reference.  It will invalidate all of the
 586                  * cached pages, without writing them out.  Writing them
 587                  * out is not required because they will be written to a
 588                  * file which will be immediately removed.
 589                  */
 590                 if (cp->c_unldvp != NULL) {
 591                         dnlc_purge_vp(vp);
 592                         mutex_enter(&cp->c_statelock);
 593                         error = cp->c_error;
 594                         cp->c_error = 0;
 595                         mutex_exit(&cp->c_statelock);
 596                         /* always call VOP_CLOSE() for back fs vnode */
 597                 }
 598 
 599                 /* force dirty data to stable storage */
 600                 else if ((vp->v_type == VREG) && (flag & FWRITE) &&
 601                     !CFS_ISFS_BACKFS_NFSV4(fscp)) {
 602                         /* clean the cachefs pages synchronously */
 603                         error = cachefs_putpage_common(vp, (offset_t)0,
 604                             0, 0, cr);
 605                         if (CFS_TIMEOUT(fscp, error)) {
 606                                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
 607                                         cachefs_cd_release(fscp);
 608                                         held = 0;
 609                                         cachefs_cd_timedout(fscp);
 610                                         continue;
 611                                 } else {
 612                                         connected = 1;
 613                                         continue;
 614                                 }
 615                         }
 616 
 617                         /* if no space left in cache, wait until connected */
 618                         if ((error == ENOSPC) &&
 619                             (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
 620                                 connected = 1;
 621                                 continue;
 622                         }
 623 
 624                         /* clear the cnode error if putpage worked */
 625                         if ((error == 0) && cp->c_error) {
 626                                 mutex_enter(&cp->c_statelock);
 627                                 cp->c_error = 0;
 628                                 mutex_exit(&cp->c_statelock);
 629                         }
 630 
 631                         /* if any other important error */
 632                         if (cp->c_error) {
 633                                 /* get rid of the pages */
 634                                 (void) cachefs_putpage_common(vp,
 635                                     (offset_t)0, 0, B_INVAL | B_FORCE, cr);
 636                                 dnlc_purge_vp(vp);
 637                         }
 638                 }
 639 
 640                 mutex_enter(&cp->c_statelock);
 641                 if (cp->c_backvp &&
 642                     (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
 643                         error = VOP_CLOSE(cp->c_backvp, flag, close_cnt,
 644                             offset, cr, ct);
 645                         if (CFS_TIMEOUT(fscp, error)) {
 646                                 mutex_exit(&cp->c_statelock);
 647                                 cachefs_cd_release(fscp);
 648                                 held = 0;
 649                                 cachefs_cd_timedout(fscp);
 650                                 /* don't decrement the vnode counts again */
 651                                 close_cnt = 0;
 652                                 continue;
 653                         }
 654                 }
 655                 mutex_exit(&cp->c_statelock);
 656                 break;
 657         }
 658 
 659         mutex_enter(&cp->c_statelock);
 660         if (!error)
 661                 error = cp->c_error;
 662         cp->c_error = 0;
 663         mutex_exit(&cp->c_statelock);
 664 
 665 out:
 666         if (held)
 667                 cachefs_cd_release(fscp);
 668 #ifdef CFS_CD_DEBUG
 669         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
 670 #endif
 671 
 672 #ifdef CFSDEBUG
 673         CFS_DEBUG(CFSDEBUG_VOPS)
 674                 printf("cachefs_close: EXIT vp %p\n", (void *)vp);
 675 #endif
 676         return (error);
 677 }
 678 
 679 /*ARGSUSED*/
 680 static int
 681 cachefs_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 682         caller_context_t *ct)
 683 {
 684         struct cnode *cp = VTOC(vp);
 685         fscache_t *fscp = C_TO_FSCACHE(cp);
 686         register u_offset_t off;
 687         register int mapoff;
 688         register caddr_t base;
 689         int n;
 690         offset_t diff;
 691         uint_t flags = 0;
 692         int error = 0;
 693 
 694 #if 0
 695         if (vp->v_flag & VNOCACHE)
 696                 flags = SM_INVAL;
 697 #endif
 698         if (getzoneid() != GLOBAL_ZONEID)
 699                 return (EPERM);
 700         if (vp->v_type != VREG)
 701                 return (EISDIR);
 702 
 703         ASSERT(RW_READ_HELD(&cp->c_rwlock));
 704 
 705         if (uiop->uio_resid == 0)
 706                 return (0);
 707 
 708 
 709         if (uiop->uio_loffset < (offset_t)0)
 710                 return (EINVAL);
 711 
 712         /*
 713          * Call backfilesystem to read if NFSv4, the cachefs code
 714          * does the read from the back filesystem asynchronously
 715          * which is not supported by pass-through functionality.
 716          */
 717         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
 718                 error = cachefs_read_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
 719                 goto out;
 720         }
 721 
 722         if (MANDLOCK(vp, cp->c_attr.va_mode)) {
 723                 error = chklock(vp, FREAD, (offset_t)uiop->uio_loffset,
 724                     uiop->uio_resid, uiop->uio_fmode, ct);
 725                 if (error)
 726                         return (error);
 727         }
 728 
 729         /*
 730          * Sit in a loop and transfer (uiomove) the data in up to
 731          * MAXBSIZE chunks. Each chunk is mapped into the kernel's
 732          * address space as needed and then released.
 733          */
 734         do {
 735                 /*
 736                  *      off     Offset of current MAXBSIZE chunk
 737                  *      mapoff  Offset within the current chunk
 738                  *      n       Number of bytes to move from this chunk
 739                  *      base    kernel address of mapped in chunk
 740                  */
 741                 off = uiop->uio_loffset & (offset_t)MAXBMASK;
 742                 mapoff = uiop->uio_loffset & MAXBOFFSET;
 743                 n = MAXBSIZE - mapoff;
 744                 if (n > uiop->uio_resid)
 745                         n = (uint_t)uiop->uio_resid;
 746 
 747                 /* perform consistency check */
 748                 error = cachefs_cd_access(fscp, 0, 0);
 749                 if (error)
 750                         break;
 751                 mutex_enter(&cp->c_statelock);
 752                 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
 753                 diff = cp->c_size - uiop->uio_loffset;
 754                 mutex_exit(&cp->c_statelock);
 755                 if (CFS_TIMEOUT(fscp, error)) {
 756                         cachefs_cd_release(fscp);
 757                         cachefs_cd_timedout(fscp);
 758                         error = 0;
 759                         continue;
 760                 }
 761                 cachefs_cd_release(fscp);
 762 
 763                 if (error)
 764                         break;
 765 
 766                 if (diff <= (offset_t)0)
 767                         break;
 768                 if (diff < (offset_t)n)
 769                         n = diff;
 770 
 771                 base = segmap_getmapflt(segkmap, vp, off, (uint_t)n, 1, S_READ);
 772 
 773                 error = segmap_fault(kas.a_hat, segkmap, base, n,
 774                     F_SOFTLOCK, S_READ);
 775                 if (error) {
 776                         (void) segmap_release(segkmap, base, 0);
 777                         if (FC_CODE(error) == FC_OBJERR)
 778                                 error =  FC_ERRNO(error);
 779                         else
 780                                 error = EIO;
 781                         break;
 782                 }
 783                 error = uiomove(base+mapoff, n, UIO_READ, uiop);
 784                 (void) segmap_fault(kas.a_hat, segkmap, base, n,
 785                     F_SOFTUNLOCK, S_READ);
 786                 if (error == 0) {
 787                         /*
 788                          * if we read a whole page(s), or to eof,
 789                          *  we won't need this page(s) again soon.
 790                          */
 791                         if (n + mapoff == MAXBSIZE ||
 792                             uiop->uio_loffset == cp->c_size)
 793                                 flags |= SM_DONTNEED;
 794                 }
 795                 (void) segmap_release(segkmap, base, flags);
 796         } while (error == 0 && uiop->uio_resid > 0);
 797 
 798 out:
 799 #ifdef CFSDEBUG
 800         CFS_DEBUG(CFSDEBUG_VOPS)
 801                 printf("cachefs_read: EXIT error %d resid %ld\n", error,
 802                     uiop->uio_resid);
 803 #endif
 804         return (error);
 805 }
 806 
 807 /*
 808  * cachefs_read_backfs_nfsv4
 809  *
 810  * Call NFSv4 back filesystem to handle the read (cachefs
 811  * pass-through support for NFSv4).
 812  */
 813 static int
 814 cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 815                         caller_context_t *ct)
 816 {
 817         cnode_t *cp = VTOC(vp);
 818         fscache_t *fscp = C_TO_FSCACHE(cp);
 819         vnode_t *backvp;
 820         int error;
 821 
 822         /*
 823          * For NFSv4 pass-through to work, only connected operation
 824          * is supported, the cnode backvp must exist, and cachefs
 825          * optional (eg., disconnectable) flags are turned off. Assert
 826          * these conditions for the read operation.
 827          */
 828         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
 829         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
 830 
 831         /* Call backfs vnode op after extracting backvp */
 832         mutex_enter(&cp->c_statelock);
 833         backvp = cp->c_backvp;
 834         mutex_exit(&cp->c_statelock);
 835 
 836         CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_read_backfs_nfsv4: cnode %p, "
 837             "backvp %p\n", cp, backvp));
 838 
 839         (void) VOP_RWLOCK(backvp, V_WRITELOCK_FALSE, ct);
 840         error = VOP_READ(backvp, uiop, ioflag, cr, ct);
 841         VOP_RWUNLOCK(backvp, V_WRITELOCK_FALSE, ct);
 842 
 843         /* Increment cache miss counter */
 844         fscp->fs_stats.st_misses++;
 845 
 846         return (error);
 847 }
 848 
 849 /*ARGSUSED*/
 850 static int
 851 cachefs_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 852         caller_context_t *ct)
 853 {
 854         struct cnode *cp = VTOC(vp);
 855         fscache_t *fscp = C_TO_FSCACHE(cp);
 856         int error = 0;
 857         u_offset_t off;
 858         caddr_t base;
 859         uint_t bsize;
 860         uint_t flags;
 861         int n, on;
 862         rlim64_t limit = uiop->uio_llimit;
 863         ssize_t resid;
 864         offset_t offset;
 865         offset_t remainder;
 866 
 867 #ifdef CFSDEBUG
 868         CFS_DEBUG(CFSDEBUG_VOPS)
 869                 printf(
 870                 "cachefs_write: ENTER vp %p offset %llu count %ld cflags %x\n",
 871                     (void *)vp, uiop->uio_loffset, uiop->uio_resid,
 872                     cp->c_flags);
 873 #endif
 874         if (getzoneid() != GLOBAL_ZONEID) {
 875                 error = EPERM;
 876                 goto out;
 877         }
 878         if (vp->v_type != VREG) {
 879                 error = EISDIR;
 880                 goto out;
 881         }
 882 
 883         ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
 884 
 885         if (uiop->uio_resid == 0) {
 886                 goto out;
 887         }
 888 
 889         /* Call backfilesystem to write if NFSv4 */
 890         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
 891                 error = cachefs_write_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
 892                 goto out2;
 893         }
 894 
 895         if (MANDLOCK(vp, cp->c_attr.va_mode)) {
 896                 error = chklock(vp, FWRITE, (offset_t)uiop->uio_loffset,
 897                     uiop->uio_resid, uiop->uio_fmode, ct);
 898                 if (error)
 899                         goto out;
 900         }
 901 
 902         if (ioflag & FAPPEND) {
 903                 for (;;) {
 904                         /* do consistency check to get correct file size */
 905                         error = cachefs_cd_access(fscp, 0, 1);
 906                         if (error)
 907                                 goto out;
 908                         mutex_enter(&cp->c_statelock);
 909                         error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
 910                         uiop->uio_loffset = cp->c_size;
 911                         mutex_exit(&cp->c_statelock);
 912                         if (CFS_TIMEOUT(fscp, error)) {
 913                                 cachefs_cd_release(fscp);
 914                                 cachefs_cd_timedout(fscp);
 915                                 continue;
 916                         }
 917                         cachefs_cd_release(fscp);
 918                         if (error)
 919                                 goto out;
 920                         break;
 921                 }
 922         }
 923 
 924         if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 925                 limit = MAXOFFSET_T;
 926 
 927         if (uiop->uio_loffset >= limit) {
 928                 proc_t *p = ttoproc(curthread);
 929 
 930                 mutex_enter(&p->p_lock);
 931                 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
 932                     p, RCA_UNSAFE_SIGINFO);
 933                 mutex_exit(&p->p_lock);
 934                 error = EFBIG;
 935                 goto out;
 936         }
 937         if (uiop->uio_loffset > fscp->fs_offmax) {
 938                 error = EFBIG;
 939                 goto out;
 940         }
 941 
 942         if (limit > fscp->fs_offmax)
 943                 limit = fscp->fs_offmax;
 944 
 945         if (uiop->uio_loffset < (offset_t)0) {
 946                 error = EINVAL;
 947                 goto out;
 948         }
 949 
 950         offset = uiop->uio_loffset + uiop->uio_resid;
 951         /*
 952          * Check to make sure that the process will not exceed
 953          * its limit on file size.  It is okay to write up to
 954          * the limit, but not beyond.  Thus, the write which
 955          * reaches the limit will be short and the next write
 956          * will return an error.
 957          */
 958         remainder = 0;
 959         if (offset > limit) {
 960                 remainder = (int)(offset - (u_offset_t)limit);
 961                 uiop->uio_resid = limit - uiop->uio_loffset;
 962                 if (uiop->uio_resid <= 0) {
 963                         proc_t *p = ttoproc(curthread);
 964 
 965                         uiop->uio_resid += remainder;
 966                         mutex_enter(&p->p_lock);
 967                         (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
 968                             p->p_rctls, p, RCA_UNSAFE_SIGINFO);
 969                         mutex_exit(&p->p_lock);
 970                         error = EFBIG;
 971                         goto out;
 972                 }
 973         }
 974 
 975         resid = uiop->uio_resid;
 976         offset = uiop->uio_loffset;
 977         bsize = vp->v_vfsp->vfs_bsize;
 978 
 979         /* loop around and do the write in MAXBSIZE chunks */
 980         do {
 981                 /* mapping offset */
 982                 off = uiop->uio_loffset & (offset_t)MAXBMASK;
 983                 on = uiop->uio_loffset & MAXBOFFSET; /* Rel. offset */
 984                 n = MAXBSIZE - on;
 985                 if (n > uiop->uio_resid)
 986                         n = (int)uiop->uio_resid;
 987 
 988                 /*
 989                  * Touch the page and fault it in if it is not in
 990                  * core before segmap_getmapflt can lock it. This
 991                  * is to avoid the deadlock if the buffer is mapped
 992                  * to the same file through mmap which we want to
 993                  * write to.
 994                  */
 995                 uio_prefaultpages((long)n, uiop);
 996 
 997                 base = segmap_getmap(segkmap, vp, off);
 998                 error = cachefs_writepage(vp, (base + on), n, uiop);
 999                 if (error == 0) {
1000                         flags = 0;
1001                         /*
1002                          * Have written a whole block.Start an
1003                          * asynchronous write and mark the buffer to
1004                          * indicate that it won't be needed again
1005                          * soon.
1006                          */
1007                         if (n + on == bsize) {
1008                                 flags = SM_WRITE |SM_ASYNC |SM_DONTNEED;
1009                         }
1010 #if 0
1011                         /* XXX need to understand this */
1012                         if ((ioflag & (FSYNC|FDSYNC)) ||
1013                             (cp->c_backvp && vn_has_flocks(cp->c_backvp))) {
1014                                 flags &= ~SM_ASYNC;
1015                                 flags |= SM_WRITE;
1016                         }
1017 #else
1018                         if (ioflag & (FSYNC|FDSYNC)) {
1019                                 flags &= ~SM_ASYNC;
1020                                 flags |= SM_WRITE;
1021                         }
1022 #endif
1023                         error = segmap_release(segkmap, base, flags);
1024                 } else {
1025                         (void) segmap_release(segkmap, base, 0);
1026                 }
1027         } while (error == 0 && uiop->uio_resid > 0);
1028 
1029 out:
1030         if (error == EINTR && (ioflag & (FSYNC|FDSYNC))) {
1031                 uiop->uio_resid = resid;
1032                 uiop->uio_loffset = offset;
1033         } else
1034                 uiop->uio_resid += remainder;
1035 
1036 out2:
1037 #ifdef CFSDEBUG
1038         CFS_DEBUG(CFSDEBUG_VOPS)
1039                 printf("cachefs_write: EXIT error %d\n", error);
1040 #endif
1041         return (error);
1042 }
1043 
1044 /*
1045  * cachefs_write_backfs_nfsv4
1046  *
1047  * Call NFSv4 back filesystem to handle the write (cachefs
1048  * pass-through support for NFSv4).
1049  */
1050 static int
1051 cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
1052                         caller_context_t *ct)
1053 {
1054         cnode_t *cp = VTOC(vp);
1055         fscache_t *fscp = C_TO_FSCACHE(cp);
1056         vnode_t *backvp;
1057         int error;
1058 
1059         /*
1060          * For NFSv4 pass-through to work, only connected operation
1061          * is supported, the cnode backvp must exist, and cachefs
1062          * optional (eg., disconnectable) flags are turned off. Assert
1063          * these conditions for the read operation.
1064          */
1065         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1066         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1067 
1068         /* Call backfs vnode op after extracting the backvp */
1069         mutex_enter(&cp->c_statelock);
1070         backvp = cp->c_backvp;
1071         mutex_exit(&cp->c_statelock);
1072 
1073         CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_write_backfs_nfsv4: cnode %p, "
1074             "backvp %p\n", cp, backvp));
1075         (void) VOP_RWLOCK(backvp, V_WRITELOCK_TRUE, ct);
1076         error = VOP_WRITE(backvp, uiop, ioflag, cr, ct);
1077         VOP_RWUNLOCK(backvp, V_WRITELOCK_TRUE, ct);
1078 
1079         return (error);
1080 }
1081 
1082 /*
1083  * see if we've charged ourselves for frontfile data at
1084  * the given offset.  If not, allocate a block for it now.
1085  */
1086 static int
1087 cachefs_charge_page(struct cnode *cp, u_offset_t offset)
1088 {
1089         u_offset_t blockoff;
1090         int error;
1091         int inc;
1092 
1093         ASSERT(MUTEX_HELD(&cp->c_statelock));
1094         /*LINTED*/
1095         ASSERT(PAGESIZE <= MAXBSIZE);
1096 
1097         error = 0;
1098         blockoff = offset & (offset_t)MAXBMASK;
1099 
1100         /* get the front file if necessary so allocblocks works */
1101         if ((cp->c_frontvp == NULL) &&
1102             ((cp->c_flags & CN_NOCACHE) == 0)) {
1103                 (void) cachefs_getfrontfile(cp);
1104         }
1105         if (cp->c_flags & CN_NOCACHE)
1106                 return (1);
1107 
1108         if (cachefs_check_allocmap(cp, blockoff))
1109                 return (0);
1110 
1111         for (inc = PAGESIZE; inc < MAXBSIZE; inc += PAGESIZE)
1112                 if (cachefs_check_allocmap(cp, blockoff+inc))
1113                         return (0);
1114 
1115         error = cachefs_allocblocks(C_TO_FSCACHE(cp)->fs_cache, 1,
1116             cp->c_metadata.md_rltype);
1117         if (error == 0) {
1118                 cp->c_metadata.md_frontblks++;
1119                 cp->c_flags |= CN_UPDATED;
1120         }
1121         return (error);
1122 }
1123 
1124 /*
1125  * Called only by cachefs_write to write 1 page or less of data.
1126  *      base   - base address kernel addr space
1127  *      tcount - Total bytes to move - < MAXBSIZE
1128  */
1129 static int
1130 cachefs_writepage(vnode_t *vp, caddr_t base, int tcount, uio_t *uiop)
1131 {
1132         struct cnode *cp =  VTOC(vp);
1133         fscache_t *fscp = C_TO_FSCACHE(cp);
1134         register int n;
1135         register u_offset_t offset;
1136         int error = 0, terror;
1137         extern struct as kas;
1138         u_offset_t lastpage_off;
1139         int pagecreate = 0;
1140         int newpage;
1141 
1142 #ifdef CFSDEBUG
1143         CFS_DEBUG(CFSDEBUG_VOPS)
1144                 printf(
1145                     "cachefs_writepage: ENTER vp %p offset %llu len %ld\\\n",
1146                     (void *)vp, uiop->uio_loffset, uiop->uio_resid);
1147 #endif
1148 
1149         /*
1150          * Move bytes in PAGESIZE chunks. We must avoid spanning pages in
1151          * uiomove() because page faults may cause the cache to be invalidated
1152          * out from under us.
1153          */
1154         do {
1155                 offset = uiop->uio_loffset;
1156                 lastpage_off = (cp->c_size - 1) & (offset_t)PAGEMASK;
1157 
1158                 /*
1159                  * If not connected then need to make sure we have space
1160                  * to perform the write.  We could make this check
1161                  * a little tighter by only doing it if we are growing the file.
1162                  */
1163                 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1164                         error = cachefs_allocblocks(fscp->fs_cache, 1,
1165                             cp->c_metadata.md_rltype);
1166                         if (error)
1167                                 break;
1168                         cachefs_freeblocks(fscp->fs_cache, 1,
1169                             cp->c_metadata.md_rltype);
1170                 }
1171 
1172                 /*
1173                  * n is the number of bytes required to satisfy the request
1174                  * or the number of bytes to fill out the page.
1175                  */
1176                 n = (int)(PAGESIZE - ((uintptr_t)base & PAGEOFFSET));
1177                 if (n > tcount)
1178                         n = tcount;
1179 
1180                 /*
1181                  * The number of bytes of data in the last page can not
1182                  * be accurately be determined while page is being
1183                  * uiomove'd to and the size of the file being updated.
1184                  * Thus, inform threads which need to know accurately
1185                  * how much data is in the last page of the file.  They
1186                  * will not do the i/o immediately, but will arrange for
1187                  * the i/o to happen later when this modify operation
1188                  * will have finished.
1189                  *
1190                  * in similar NFS code, this is done right before the
1191                  * uiomove(), which is best.  but here in cachefs, we
1192                  * have two uiomove()s, so we must do it here.
1193                  */
1194                 ASSERT(!(cp->c_flags & CN_CMODINPROG));
1195                 mutex_enter(&cp->c_statelock);
1196                 cp->c_flags |= CN_CMODINPROG;
1197                 cp->c_modaddr = (offset & (offset_t)MAXBMASK);
1198                 mutex_exit(&cp->c_statelock);
1199 
1200                 /*
1201                  * Check to see if we can skip reading in the page
1202                  * and just allocate the memory.  We can do this
1203                  * if we are going to rewrite the entire mapping
1204                  * or if we are going to write to or beyond the current
1205                  * end of file from the beginning of the mapping.
1206                  */
1207                 if ((offset > (lastpage_off + PAGEOFFSET)) ||
1208                     ((cp->c_size == 0) && (offset < PAGESIZE)) ||
1209                     ((uintptr_t)base & PAGEOFFSET) == 0 && (n == PAGESIZE ||
1210                     ((offset + n) >= cp->c_size))) {
1211                         pagecreate = 1;
1212 
1213                         /*
1214                          * segmap_pagecreate() returns 1 if it calls
1215                          * page_create_va() to allocate any pages.
1216                          */
1217                         newpage = segmap_pagecreate(segkmap,
1218                             (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK),
1219                             PAGESIZE, 0);
1220                         /* do not zero page if we are overwriting all of it */
1221                         if (!((((uintptr_t)base & PAGEOFFSET) == 0) &&
1222                             (n == PAGESIZE))) {
1223                                 (void) kzero((void *)
1224                                     ((uintptr_t)base & (uintptr_t)PAGEMASK),
1225                                     PAGESIZE);
1226                         }
1227                         error = uiomove(base, n, UIO_WRITE, uiop);
1228 
1229                         /*
1230                          * Unlock the page allocated by page_create_va()
1231                          * in segmap_pagecreate()
1232                          */
1233                         if (newpage)
1234                                 segmap_pageunlock(segkmap,
1235                                     (caddr_t)((uintptr_t)base &
1236                                     (uintptr_t)PAGEMASK),
1237                                     PAGESIZE, S_WRITE);
1238                 } else {
1239                         /*
1240                          * KLUDGE ! Use segmap_fault instead of faulting and
1241                          * using as_fault() to avoid a recursive readers lock
1242                          * on kas.
1243                          */
1244                         error = segmap_fault(kas.a_hat, segkmap, (caddr_t)
1245                             ((uintptr_t)base & (uintptr_t)PAGEMASK),
1246                             PAGESIZE, F_SOFTLOCK, S_WRITE);
1247                         if (error) {
1248                                 if (FC_CODE(error) == FC_OBJERR)
1249                                         error =  FC_ERRNO(error);
1250                                 else
1251                                         error = EIO;
1252                                 break;
1253                         }
1254                         error = uiomove(base, n, UIO_WRITE, uiop);
1255                         (void) segmap_fault(kas.a_hat, segkmap, (caddr_t)
1256                             ((uintptr_t)base & (uintptr_t)PAGEMASK),
1257                             PAGESIZE, F_SOFTUNLOCK, S_WRITE);
1258                 }
1259                 n = (int)(uiop->uio_loffset - offset); /* n = # bytes written */
1260                 base += n;
1261                 tcount -= n;
1262 
1263                 /* get access to the file system */
1264                 if ((terror = cachefs_cd_access(fscp, 0, 1)) != 0) {
1265                         error = terror;
1266                         break;
1267                 }
1268 
1269                 /*
1270                  * cp->c_attr.va_size is the maximum number of
1271                  * bytes known to be in the file.
1272                  * Make sure it is at least as high as the
1273                  * last byte we just wrote into the buffer.
1274                  */
1275                 mutex_enter(&cp->c_statelock);
1276                 if (cp->c_size < uiop->uio_loffset) {
1277                         cp->c_size = uiop->uio_loffset;
1278                 }
1279                 if (cp->c_size != cp->c_attr.va_size) {
1280                         cp->c_attr.va_size = cp->c_size;
1281                         cp->c_flags |= CN_UPDATED;
1282                 }
1283                 /* c_size is now correct, so we can clear modinprog */
1284                 cp->c_flags &= ~CN_CMODINPROG;
1285                 if (error == 0) {
1286                         cp->c_flags |= CDIRTY;
1287                         if (pagecreate && (cp->c_flags & CN_NOCACHE) == 0) {
1288                                 /*
1289                                  * if we're not in NOCACHE mode
1290                                  * (i.e., single-writer), we update the
1291                                  * allocmap here rather than waiting until
1292                                  * cachefspush is called.  This prevents
1293                                  * getpage from clustering up pages from
1294                                  * the backfile and stomping over the changes
1295                                  * we make here.
1296                                  */
1297                                 if (cachefs_charge_page(cp, offset) == 0) {
1298                                         cachefs_update_allocmap(cp,
1299                                             offset & (offset_t)PAGEMASK,
1300                                             (size_t)PAGESIZE);
1301                                 }
1302 
1303                                 /* else we ran out of space */
1304                                 else {
1305                                         /* nocache file if connected */
1306                                         if (fscp->fs_cdconnected ==
1307                                             CFS_CD_CONNECTED)
1308                                                 cachefs_nocache(cp);
1309                                         /*
1310                                          * If disconnected then cannot
1311                                          * nocache the file.  Let it have
1312                                          * the space.
1313                                          */
1314                                         else {
1315                                                 cp->c_metadata.md_frontblks++;
1316                                                 cp->c_flags |= CN_UPDATED;
1317                                                 cachefs_update_allocmap(cp,
1318                                                     offset & (offset_t)PAGEMASK,
1319                                                     (size_t)PAGESIZE);
1320                                         }
1321                                 }
1322                         }
1323                 }
1324                 mutex_exit(&cp->c_statelock);
1325                 cachefs_cd_release(fscp);
1326         } while (tcount > 0 && error == 0);
1327 
1328         if (cp->c_flags & CN_CMODINPROG) {
1329                 /* XXX assert error != 0?  FC_ERRNO() makes this more risky. */
1330                 mutex_enter(&cp->c_statelock);
1331                 cp->c_flags &= ~CN_CMODINPROG;
1332                 mutex_exit(&cp->c_statelock);
1333         }
1334 
1335 #ifdef CFS_CD_DEBUG
1336         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
1337 #endif
1338 
1339 #ifdef CFSDEBUG
1340         CFS_DEBUG(CFSDEBUG_VOPS)
1341                 printf("cachefs_writepage: EXIT error %d\n", error);
1342 #endif
1343 
1344         return (error);
1345 }
1346 
1347 /*
1348  * Pushes out pages to the back and/or front file system.
1349  */
1350 static int
1351 cachefs_push(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
1352     int flags, cred_t *cr)
1353 {
1354         struct cnode *cp = VTOC(vp);
1355         struct buf *bp;
1356         int error;
1357         fscache_t *fscp = C_TO_FSCACHE(cp);
1358         u_offset_t iooff;
1359         size_t iolen;
1360         u_offset_t lbn;
1361         u_offset_t lbn_off;
1362         uint_t bsize;
1363 
1364         ASSERT((flags & B_ASYNC) == 0);
1365         ASSERT(!vn_is_readonly(vp));
1366         ASSERT(pp != NULL);
1367         ASSERT(cr != NULL);
1368 
1369         bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
1370         lbn = pp->p_offset / bsize;
1371         lbn_off = lbn * bsize;
1372 
1373         /*
1374          * Find a kluster that fits in one block, or in
1375          * one page if pages are bigger than blocks.  If
1376          * there is less file space allocated than a whole
1377          * page, we'll shorten the i/o request below.
1378          */
1379 
1380         pp = pvn_write_kluster(vp, pp, &iooff, &iolen, lbn_off,
1381             roundup(bsize, PAGESIZE), flags);
1382 
1383         /*
1384          * The CN_CMODINPROG flag makes sure that we use a correct
1385          * value of c_size, below.  CN_CMODINPROG is set in
1386          * cachefs_writepage().  When CN_CMODINPROG is set it
1387          * indicates that a uiomove() is in progress and the c_size
1388          * has not been made consistent with the new size of the
1389          * file. When the uiomove() completes the c_size is updated
1390          * and the CN_CMODINPROG flag is cleared.
1391          *
1392          * The CN_CMODINPROG flag makes sure that cachefs_push_front
1393          * and cachefs_push_connected see a consistent value of
1394          * c_size.  Without this handshaking, it is possible that
1395          * these routines will pick up the old value of c_size before
1396          * the uiomove() in cachefs_writepage() completes.  This will
1397          * result in the vn_rdwr() being too small, and data loss.
1398          *
1399          * More precisely, there is a window between the time the
1400          * uiomove() completes and the time the c_size is updated. If
1401          * a VOP_PUTPAGE() operation intervenes in this window, the
1402          * page will be picked up, because it is dirty; it will be
1403          * unlocked, unless it was pagecreate'd. When the page is
1404          * picked up as dirty, the dirty bit is reset
1405          * (pvn_getdirty()). In cachefs_push_connected(), c_size is
1406          * checked.  This will still be the old size.  Therefore, the
1407          * page will not be written out to the correct length, and the
1408          * page will be clean, so the data may disappear.
1409          */
1410         if (cp->c_flags & CN_CMODINPROG) {
1411                 mutex_enter(&cp->c_statelock);
1412                 if ((cp->c_flags & CN_CMODINPROG) &&
1413                     cp->c_modaddr + MAXBSIZE > iooff &&
1414                     cp->c_modaddr < iooff + iolen) {
1415                         page_t *plist;
1416 
1417                         /*
1418                          * A write is in progress for this region of
1419                          * the file.  If we did not detect
1420                          * CN_CMODINPROG here then this path through
1421                          * cachefs_push_connected() would eventually
1422                          * do the vn_rdwr() and may not write out all
1423                          * of the data in the pages.  We end up losing
1424                          * data. So we decide to set the modified bit
1425                          * on each page in the page list and mark the
1426                          * cnode with CDIRTY.  This push will be
1427                          * restarted at some later time.
1428                          */
1429 
1430                         plist = pp;
1431                         while (plist != NULL) {
1432                                 pp = plist;
1433                                 page_sub(&plist, pp);
1434                                 hat_setmod(pp);
1435                                 page_io_unlock(pp);
1436                                 page_unlock(pp);
1437                         }
1438                         cp->c_flags |= CDIRTY;
1439                         mutex_exit(&cp->c_statelock);
1440                         if (offp)
1441                                 *offp = iooff;
1442                         if (lenp)
1443                                 *lenp = iolen;
1444                         return (0);
1445                 }
1446                 mutex_exit(&cp->c_statelock);
1447         }
1448 
1449         /*
1450          * Set the pages up for pageout.
1451          */
1452         bp = pageio_setup(pp, iolen, CTOV(cp), B_WRITE | flags);
1453         if (bp == NULL) {
1454 
1455                 /*
1456                  * currently, there is no way for pageio_setup() to
1457                  * return NULL, since it uses its own scheme for
1458                  * kmem_alloc()ing that shouldn't return NULL, and
1459                  * since pageio_setup() itself dereferences the thing
1460                  * it's about to return.  still, we need to be ready
1461                  * in case this ever does start happening.
1462                  */
1463 
1464                 error = ENOMEM;
1465                 goto writedone;
1466         }
1467         /*
1468          * pageio_setup should have set b_addr to 0.  This
1469          * is correct since we want to do I/O on a page
1470          * boundary.  bp_mapin will use this addr to calculate
1471          * an offset, and then set b_addr to the kernel virtual
1472          * address it allocated for us.
1473          */
1474         bp->b_edev = 0;
1475         bp->b_dev = 0;
1476         bp->b_lblkno = (diskaddr_t)lbtodb(iooff);
1477         bp_mapin(bp);
1478 
1479         iolen  = cp->c_size - ldbtob(bp->b_blkno);
1480         if (iolen > bp->b_bcount)
1481                 iolen  = bp->b_bcount;
1482 
1483         /* if connected */
1484         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1485                 /* write to the back file first */
1486                 error = cachefs_push_connected(vp, bp, iolen, iooff, cr);
1487 
1488                 /* write to the front file if allowed */
1489                 if ((error == 0) && CFS_ISFS_NONSHARED(fscp) &&
1490                     ((cp->c_flags & CN_NOCACHE) == 0)) {
1491                         /* try to write to the front file */
1492                         (void) cachefs_push_front(vp, bp, iolen, iooff, cr);
1493                 }
1494         }
1495 
1496         /* else if disconnected */
1497         else {
1498                 /* try to write to the front file */
1499                 error = cachefs_push_front(vp, bp, iolen, iooff, cr);
1500         }
1501 
1502         bp_mapout(bp);
1503         pageio_done(bp);
1504 
1505 writedone:
1506 
1507         pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
1508         if (offp)
1509                 *offp = iooff;
1510         if (lenp)
1511                 *lenp = iolen;
1512 
1513         /* XXX ask bob mastors how to fix this someday */
1514         mutex_enter(&cp->c_statelock);
1515         if (error) {
1516                 if (error == ENOSPC) {
1517                         if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
1518                             CFS_ISFS_SOFT(fscp)) {
1519                                 CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1520                                 cp->c_error = error;
1521                         }
1522                 } else if ((CFS_TIMEOUT(fscp, error) == 0) &&
1523                     (error != EINTR)) {
1524                         CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1525                         cp->c_error = error;
1526                 }
1527         } else if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1528                 CFSOP_MODIFY_COBJECT(fscp, cp, cr);
1529         }
1530         mutex_exit(&cp->c_statelock);
1531 
1532         return (error);
1533 }
1534 
1535 /*
1536  * Pushes out pages to the back file system.
1537  */
1538 static int
1539 cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
1540     u_offset_t iooff, cred_t *cr)
1541 {
1542         struct cnode *cp = VTOC(vp);
1543         int error = 0;
1544         int mode = 0;
1545         fscache_t *fscp = C_TO_FSCACHE(cp);
1546         ssize_t resid;
1547         vnode_t *backvp;
1548 
1549         /* get the back file if necessary */
1550         mutex_enter(&cp->c_statelock);
1551         if (cp->c_backvp == NULL) {
1552                 error = cachefs_getbackvp(fscp, cp);
1553                 if (error) {
1554                         mutex_exit(&cp->c_statelock);
1555                         goto out;
1556                 }
1557         }
1558         backvp = cp->c_backvp;
1559         VN_HOLD(backvp);
1560         mutex_exit(&cp->c_statelock);
1561 
1562         if (CFS_ISFS_NONSHARED(fscp) && CFS_ISFS_SNR(fscp))
1563                 mode = FSYNC;
1564 
1565         /* write to the back file */
1566         error = bp->b_error = vn_rdwr(UIO_WRITE, backvp, bp->b_un.b_addr,
1567             iolen, iooff, UIO_SYSSPACE, mode,
1568             RLIM64_INFINITY, cr, &resid);
1569         if (error) {
1570 #ifdef CFSDEBUG
1571                 CFS_DEBUG(CFSDEBUG_VOPS | CFSDEBUG_BACK)
1572                         printf("cachefspush: error %d cr %p\n",
1573                             error, (void *)cr);
1574 #endif
1575                 bp->b_flags |= B_ERROR;
1576         }
1577         VN_RELE(backvp);
1578 out:
1579         return (error);
1580 }
1581 
1582 /*
1583  * Pushes out pages to the front file system.
1584  * Called for both connected and disconnected states.
1585  */
1586 static int
1587 cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
1588     u_offset_t iooff, cred_t *cr)
1589 {
1590         struct cnode *cp = VTOC(vp);
1591         fscache_t *fscp = C_TO_FSCACHE(cp);
1592         int error = 0;
1593         ssize_t resid;
1594         u_offset_t popoff;
1595         off_t commit = 0;
1596         uint_t seq;
1597         enum cachefs_rl_type type;
1598         vnode_t *frontvp = NULL;
1599 
1600         mutex_enter(&cp->c_statelock);
1601 
1602         if (!CFS_ISFS_NONSHARED(fscp)) {
1603                 error = ETIMEDOUT;
1604                 goto out;
1605         }
1606 
1607         /* get the front file if necessary */
1608         if ((cp->c_frontvp == NULL) &&
1609             ((cp->c_flags & CN_NOCACHE) == 0)) {
1610                 (void) cachefs_getfrontfile(cp);
1611         }
1612         if (cp->c_flags & CN_NOCACHE) {
1613                 error = ETIMEDOUT;
1614                 goto out;
1615         }
1616 
1617         /* if disconnected, needs to be populated and have good attributes */
1618         if ((fscp->fs_cdconnected != CFS_CD_CONNECTED) &&
1619             (((cp->c_metadata.md_flags & MD_POPULATED) == 0) ||
1620             (cp->c_metadata.md_flags & MD_NEEDATTRS))) {
1621                 error = ETIMEDOUT;
1622                 goto out;
1623         }
1624 
1625         for (popoff = iooff; popoff < (iooff + iolen); popoff += MAXBSIZE) {
1626                 if (cachefs_charge_page(cp, popoff)) {
1627                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1628                                 cachefs_nocache(cp);
1629                                 goto out;
1630                         } else {
1631                                 error = ENOSPC;
1632                                 goto out;
1633                         }
1634                 }
1635         }
1636 
1637         if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1638                 /* log the first putpage to a file */
1639                 if ((cp->c_metadata.md_flags & MD_PUTPAGE) == 0) {
1640                         /* uses open's creds if we have them */
1641                         if (cp->c_cred)
1642                                 cr = cp->c_cred;
1643 
1644                         if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
1645                                 error = cachefs_dlog_cidmap(fscp);
1646                                 if (error) {
1647                                         error = ENOSPC;
1648                                         goto out;
1649                                 }
1650                                 cp->c_metadata.md_flags |= MD_MAPPING;
1651                         }
1652 
1653                         commit = cachefs_dlog_modify(fscp, cp, cr, &seq);
1654                         if (commit == 0) {
1655                                 /* out of space */
1656                                 error = ENOSPC;
1657                                 goto out;
1658                         }
1659 
1660                         cp->c_metadata.md_seq = seq;
1661                         type = cp->c_metadata.md_rltype;
1662                         cachefs_modified(cp);
1663                         cp->c_metadata.md_flags |= MD_PUTPAGE;
1664                         cp->c_metadata.md_flags &= ~MD_PUSHDONE;
1665                         cp->c_flags |= CN_UPDATED;
1666                 }
1667 
1668                 /* subsequent putpages just get a new sequence number */
1669                 else {
1670                         /* but only if it matters */
1671                         if (cp->c_metadata.md_seq != fscp->fs_dlogseq) {
1672                                 seq = cachefs_dlog_seqnext(fscp);
1673                                 if (seq == 0) {
1674                                         error = ENOSPC;
1675                                         goto out;
1676                                 }
1677                                 cp->c_metadata.md_seq = seq;
1678                                 cp->c_flags |= CN_UPDATED;
1679                                 /* XXX maybe should do write_metadata here */
1680                         }
1681                 }
1682         }
1683 
1684         frontvp = cp->c_frontvp;
1685         VN_HOLD(frontvp);
1686         mutex_exit(&cp->c_statelock);
1687         error = bp->b_error = vn_rdwr(UIO_WRITE, frontvp,
1688             bp->b_un.b_addr, iolen, iooff, UIO_SYSSPACE, 0,
1689             RLIM64_INFINITY, kcred, &resid);
1690         mutex_enter(&cp->c_statelock);
1691         VN_RELE(frontvp);
1692         frontvp = NULL;
1693         if (error) {
1694                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1695                         cachefs_nocache(cp);
1696                         error = 0;
1697                         goto out;
1698                 } else {
1699                         goto out;
1700                 }
1701         }
1702 
1703         (void) cachefs_update_allocmap(cp, iooff, iolen);
1704         cp->c_flags |= (CN_UPDATED | CN_NEED_FRONT_SYNC |
1705             CN_POPULATION_PENDING);
1706         if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1707                 gethrestime(&cp->c_metadata.md_localmtime);
1708                 cp->c_metadata.md_flags |= MD_LOCALMTIME;
1709         }
1710 
1711 out:
1712         if (commit) {
1713                 /* commit the log record */
1714                 ASSERT(fscp->fs_cdconnected == CFS_CD_DISCONNECTED);
1715                 if (cachefs_dlog_commit(fscp, commit, error)) {
1716                         /*EMPTY*/
1717                         /* XXX fix on panic */
1718                 }
1719         }
1720 
1721         if (error && commit) {
1722                 cp->c_metadata.md_flags &= ~MD_PUTPAGE;
1723                 cachefs_rlent_moveto(fscp->fs_cache, type,
1724                     cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
1725                 cp->c_metadata.md_rltype = type;
1726                 cp->c_flags |= CN_UPDATED;
1727         }
1728         mutex_exit(&cp->c_statelock);
1729         return (error);
1730 }
1731 
1732 /*ARGSUSED*/
1733 static int
1734 cachefs_dump(struct vnode *vp, caddr_t foo1, offset_t foo2, offset_t foo3,
1735     caller_context_t *ct)
1736 {
1737         return (ENOSYS); /* should we panic if we get here? */
1738 }
1739 
1740 /*ARGSUSED*/
1741 static int
1742 cachefs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cred,
1743         int *rvalp, caller_context_t *ct)
1744 {
1745         int error;
1746         struct cnode *cp = VTOC(vp);
1747         struct fscache *fscp = C_TO_FSCACHE(cp);
1748         struct cachefscache *cachep;
1749         extern kmutex_t cachefs_cachelock;
1750         extern cachefscache_t *cachefs_cachelist;
1751         cachefsio_pack_t *packp;
1752         STRUCT_DECL(cachefsio_dcmd, dcmd);
1753         int     inlen, outlen;  /* LP64: generic int for struct in/out len */
1754         void *dinp, *doutp;
1755         int (*dcmd_routine)(vnode_t *, void *, void *);
1756 
1757         if (getzoneid() != GLOBAL_ZONEID)
1758                 return (EPERM);
1759 
1760         /*
1761          * Cachefs only provides pass-through support for NFSv4,
1762          * and all vnode operations are passed through to the
1763          * back file system. For NFSv4 pass-through to work, only
1764          * connected operation is supported, the cnode backvp must
1765          * exist, and cachefs optional (eg., disconnectable) flags
1766          * are turned off. Assert these conditions which ensure
1767          * that only a subset of the ioctls are "truly supported"
1768          * for NFSv4 (these are CFSDCMD_DAEMONID and CFSDCMD_GETSTATS.
1769          * The packing operations are meaningless since there is
1770          * no caching for NFSv4, and the called functions silently
1771          * return if the backfilesystem is NFSv4. The daemon
1772          * commands except for those above are essentially used
1773          * for disconnectable operation support (including log
1774          * rolling), so in each called function, we assert that
1775          * NFSv4 is not in use. The _FIO* calls (except _FIOCOD)
1776          * are from "cfsfstype" which is not a documented
1777          * command. However, the command is visible in
1778          * /usr/lib/fs/cachefs so the commands are simply let
1779          * through (don't seem to impact pass-through functionality).
1780          */
1781         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1782         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1783 
1784         switch (cmd) {
1785         case CACHEFSIO_PACK:
1786                 packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1787                 error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1788                 if (!error)
1789                         error = cachefs_pack(vp, packp->p_name, cred);
1790                 cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1791                 break;
1792 
1793         case CACHEFSIO_UNPACK:
1794                 packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1795                 error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1796                 if (!error)
1797                         error = cachefs_unpack(vp, packp->p_name, cred);
1798                 cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1799                 break;
1800 
1801         case CACHEFSIO_PACKINFO:
1802                 packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1803                 error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1804                 if (!error)
1805                         error = cachefs_packinfo(vp, packp->p_name,
1806                             &packp->p_status, cred);
1807                 if (!error)
1808                         error = xcopyout(packp, (void *)arg,
1809                             sizeof (cachefsio_pack_t));
1810                 cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1811                 break;
1812 
1813         case CACHEFSIO_UNPACKALL:
1814                 error = cachefs_unpackall(vp);
1815                 break;
1816 
1817         case CACHEFSIO_DCMD:
1818                 /*
1819                  * This is a private interface between the cachefsd and
1820                  * this file system.
1821                  */
1822 
1823                 /* must be root to use these commands */
1824                 if (secpolicy_fs_config(cred, vp->v_vfsp) != 0)
1825                         return (EPERM);
1826 
1827                 /* get the command packet */
1828                 STRUCT_INIT(dcmd, flag & DATAMODEL_MASK);
1829                 error = xcopyin((void *)arg, STRUCT_BUF(dcmd),
1830                     SIZEOF_STRUCT(cachefsio_dcmd, DATAMODEL_NATIVE));
1831                 if (error)
1832                         return (error);
1833 
1834                 /* copy in the data for the operation */
1835                 dinp = NULL;
1836                 if ((inlen = STRUCT_FGET(dcmd, d_slen)) > 0) {
1837                         dinp = cachefs_kmem_alloc(inlen, KM_SLEEP);
1838                         error = xcopyin(STRUCT_FGETP(dcmd, d_sdata), dinp,
1839                             inlen);
1840                         if (error)
1841                                 return (error);
1842                 }
1843 
1844                 /* allocate space for the result */
1845                 doutp = NULL;
1846                 if ((outlen = STRUCT_FGET(dcmd, d_rlen)) > 0)
1847                         doutp = cachefs_kmem_alloc(outlen, KM_SLEEP);
1848 
1849                 /*
1850                  * Assert NFSv4 only allows the daemonid and getstats
1851                  * daemon requests
1852                  */
1853                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0 ||
1854                     STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_DAEMONID ||
1855                     STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_GETSTATS);
1856 
1857                 /* get the routine to execute */
1858                 dcmd_routine = NULL;
1859                 switch (STRUCT_FGET(dcmd, d_cmd)) {
1860                 case CFSDCMD_DAEMONID:
1861                         dcmd_routine = cachefs_io_daemonid;
1862                         break;
1863                 case CFSDCMD_STATEGET:
1864                         dcmd_routine = cachefs_io_stateget;
1865                         break;
1866                 case CFSDCMD_STATESET:
1867                         dcmd_routine = cachefs_io_stateset;
1868                         break;
1869                 case CFSDCMD_XWAIT:
1870                         dcmd_routine = cachefs_io_xwait;
1871                         break;
1872                 case CFSDCMD_EXISTS:
1873                         dcmd_routine = cachefs_io_exists;
1874                         break;
1875                 case CFSDCMD_LOSTFOUND:
1876                         dcmd_routine = cachefs_io_lostfound;
1877                         break;
1878                 case CFSDCMD_GETINFO:
1879                         dcmd_routine = cachefs_io_getinfo;
1880                         break;
1881                 case CFSDCMD_CIDTOFID:
1882                         dcmd_routine = cachefs_io_cidtofid;
1883                         break;
1884                 case CFSDCMD_GETATTRFID:
1885                         dcmd_routine = cachefs_io_getattrfid;
1886                         break;
1887                 case CFSDCMD_GETATTRNAME:
1888                         dcmd_routine = cachefs_io_getattrname;
1889                         break;
1890                 case CFSDCMD_GETSTATS:
1891                         dcmd_routine = cachefs_io_getstats;
1892                         break;
1893                 case CFSDCMD_ROOTFID:
1894                         dcmd_routine = cachefs_io_rootfid;
1895                         break;
1896                 case CFSDCMD_CREATE:
1897                         dcmd_routine = cachefs_io_create;
1898                         break;
1899                 case CFSDCMD_REMOVE:
1900                         dcmd_routine = cachefs_io_remove;
1901                         break;
1902                 case CFSDCMD_LINK:
1903                         dcmd_routine = cachefs_io_link;
1904                         break;
1905                 case CFSDCMD_RENAME:
1906                         dcmd_routine = cachefs_io_rename;
1907                         break;
1908                 case CFSDCMD_MKDIR:
1909                         dcmd_routine = cachefs_io_mkdir;
1910                         break;
1911                 case CFSDCMD_RMDIR:
1912                         dcmd_routine = cachefs_io_rmdir;
1913                         break;
1914                 case CFSDCMD_SYMLINK:
1915                         dcmd_routine = cachefs_io_symlink;
1916                         break;
1917                 case CFSDCMD_SETATTR:
1918                         dcmd_routine = cachefs_io_setattr;
1919                         break;
1920                 case CFSDCMD_SETSECATTR:
1921                         dcmd_routine = cachefs_io_setsecattr;
1922                         break;
1923                 case CFSDCMD_PUSHBACK:
1924                         dcmd_routine = cachefs_io_pushback;
1925                         break;
1926                 default:
1927                         error = ENOTTY;
1928                         break;
1929                 }
1930 
1931                 /* execute the routine */
1932                 if (dcmd_routine)
1933                         error = (*dcmd_routine)(vp, dinp, doutp);
1934 
1935                 /* copy out the result */
1936                 if ((error == 0) && doutp)
1937                         error = xcopyout(doutp, STRUCT_FGETP(dcmd, d_rdata),
1938                             outlen);
1939 
1940                 /* free allocated memory */
1941                 if (dinp)
1942                         cachefs_kmem_free(dinp, inlen);
1943                 if (doutp)
1944                         cachefs_kmem_free(doutp, outlen);
1945 
1946                 break;
1947 
1948         case _FIOCOD:
1949                 if (secpolicy_fs_config(cred, vp->v_vfsp) != 0) {
1950                         error = EPERM;
1951                         break;
1952                 }
1953 
1954                 error = EBUSY;
1955                 if (arg) {
1956                         /* non-zero arg means do all filesystems */
1957                         mutex_enter(&cachefs_cachelock);
1958                         for (cachep = cachefs_cachelist; cachep != NULL;
1959                             cachep = cachep->c_next) {
1960                                 mutex_enter(&cachep->c_fslistlock);
1961                                 for (fscp = cachep->c_fslist;
1962                                     fscp != NULL;
1963                                     fscp = fscp->fs_next) {
1964                                         if (CFS_ISFS_CODCONST(fscp)) {
1965                                                 gethrestime(&fscp->fs_cod_time);
1966                                                 error = 0;
1967                                         }
1968                                 }
1969                                 mutex_exit(&cachep->c_fslistlock);
1970                         }
1971                         mutex_exit(&cachefs_cachelock);
1972                 } else {
1973                         if (CFS_ISFS_CODCONST(fscp)) {
1974                                 gethrestime(&fscp->fs_cod_time);
1975                                 error = 0;
1976                         }
1977                 }
1978                 break;
1979 
1980         case _FIOSTOPCACHE:
1981                 error = cachefs_stop_cache(cp);
1982                 break;
1983 
1984         default:
1985                 error = ENOTTY;
1986                 break;
1987         }
1988 
1989         /* return the result */
1990         return (error);
1991 }
1992 
1993 ino64_t
1994 cachefs_fileno_conflict(fscache_t *fscp, ino64_t old)
1995 {
1996         ino64_t new;
1997 
1998         ASSERT(MUTEX_HELD(&fscp->fs_fslock));
1999 
2000         for (;;) {
2001                 fscp->fs_info.fi_localfileno++;
2002                 if (fscp->fs_info.fi_localfileno == 0)
2003                         fscp->fs_info.fi_localfileno = 3;
2004                 fscp->fs_flags |= CFS_FS_DIRTYINFO;
2005 
2006                 new = fscp->fs_info.fi_localfileno;
2007                 if (! cachefs_fileno_inuse(fscp, new))
2008                         break;
2009         }
2010 
2011         cachefs_inum_register(fscp, old, new);
2012         cachefs_inum_register(fscp, new, 0);
2013         return (new);
2014 }
2015 
2016 /*ARGSUSED*/
2017 static int
2018 cachefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2019         caller_context_t *ct)
2020 {
2021         struct cnode *cp = VTOC(vp);
2022         fscache_t *fscp = C_TO_FSCACHE(cp);
2023         int error = 0;
2024         int held = 0;
2025         int connected = 0;
2026 
2027 #ifdef CFSDEBUG
2028         CFS_DEBUG(CFSDEBUG_VOPS)
2029                 printf("cachefs_getattr: ENTER vp %p\n", (void *)vp);
2030 #endif
2031 
2032         if (getzoneid() != GLOBAL_ZONEID)
2033                 return (EPERM);
2034 
2035         /* Call backfilesystem getattr if NFSv4 */
2036         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
2037                 error = cachefs_getattr_backfs_nfsv4(vp, vap, flags, cr, ct);
2038                 goto out;
2039         }
2040 
2041         /*
2042          * If it has been specified that the return value will
2043          * just be used as a hint, and we are only being asked
2044          * for size, fsid or rdevid, then return the client's
2045          * notion of these values without checking to make sure
2046          * that the attribute cache is up to date.
2047          * The whole point is to avoid an over the wire GETATTR
2048          * call.
2049          */
2050         if (flags & ATTR_HINT) {
2051                 if (vap->va_mask ==
2052                     (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2053                         if (vap->va_mask | AT_SIZE)
2054                                 vap->va_size = cp->c_size;
2055                         /*
2056                          * Return the FSID of the cachefs filesystem,
2057                          * not the back filesystem
2058                          */
2059                         if (vap->va_mask | AT_FSID)
2060                                 vap->va_fsid = vp->v_vfsp->vfs_dev;
2061                         if (vap->va_mask | AT_RDEV)
2062                                 vap->va_rdev = cp->c_attr.va_rdev;
2063                         return (0);
2064                 }
2065         }
2066 
2067         /*
2068          * Only need to flush pages if asking for the mtime
2069          * and if there any dirty pages.
2070          */
2071         if (vap->va_mask & AT_MTIME) {
2072                 /*EMPTY*/
2073 #if 0
2074                 /*
2075                  * XXX bob: stolen from nfs code, need to do something similar
2076                  */
2077                 rp = VTOR(vp);
2078                 if ((rp->r_flags & RDIRTY) || rp->r_iocnt > 0)
2079                         (void) nfs3_putpage(vp, (offset_t)0, 0, 0, cr);
2080 #endif
2081         }
2082 
2083         for (;;) {
2084                 /* get (or renew) access to the file system */
2085                 if (held) {
2086                         cachefs_cd_release(fscp);
2087                         held = 0;
2088                 }
2089                 error = cachefs_cd_access(fscp, connected, 0);
2090                 if (error)
2091                         goto out;
2092                 held = 1;
2093 
2094                 /*
2095                  * If it has been specified that the return value will
2096                  * just be used as a hint, and we are only being asked
2097                  * for size, fsid or rdevid, then return the client's
2098                  * notion of these values without checking to make sure
2099                  * that the attribute cache is up to date.
2100                  * The whole point is to avoid an over the wire GETATTR
2101                  * call.
2102                  */
2103                 if (flags & ATTR_HINT) {
2104                         if (vap->va_mask ==
2105                             (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2106                                 if (vap->va_mask | AT_SIZE)
2107                                         vap->va_size = cp->c_size;
2108                                 /*
2109                                  * Return the FSID of the cachefs filesystem,
2110                                  * not the back filesystem
2111                                  */
2112                                 if (vap->va_mask | AT_FSID)
2113                                         vap->va_fsid = vp->v_vfsp->vfs_dev;
2114                                 if (vap->va_mask | AT_RDEV)
2115                                         vap->va_rdev = cp->c_attr.va_rdev;
2116                                 goto out;
2117                         }
2118                 }
2119 
2120                 mutex_enter(&cp->c_statelock);
2121                 if ((cp->c_metadata.md_flags & MD_NEEDATTRS) &&
2122                     (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
2123                         mutex_exit(&cp->c_statelock);
2124                         connected = 1;
2125                         continue;
2126                 }
2127 
2128                 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2129                 if (CFS_TIMEOUT(fscp, error)) {
2130                         mutex_exit(&cp->c_statelock);
2131                         cachefs_cd_release(fscp);
2132                         held = 0;
2133                         cachefs_cd_timedout(fscp);
2134                         continue;
2135                 }
2136                 if (error) {
2137                         mutex_exit(&cp->c_statelock);
2138                         break;
2139                 }
2140 
2141                 /* check for fileno conflict */
2142                 if ((fscp->fs_inum_size > 0) &&
2143                     ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) {
2144                         ino64_t fakenum;
2145 
2146                         mutex_exit(&cp->c_statelock);
2147                         mutex_enter(&fscp->fs_fslock);
2148                         fakenum = cachefs_inum_real2fake(fscp,
2149                             cp->c_attr.va_nodeid);
2150                         if (fakenum == 0) {
2151                                 fakenum = cachefs_fileno_conflict(fscp,
2152                                     cp->c_attr.va_nodeid);
2153                         }
2154                         mutex_exit(&fscp->fs_fslock);
2155 
2156                         mutex_enter(&cp->c_statelock);
2157                         cp->c_metadata.md_flags |= MD_LOCALFILENO;
2158                         cp->c_metadata.md_localfileno = fakenum;
2159                         cp->c_flags |= CN_UPDATED;
2160                 }
2161 
2162                 /* copy out the attributes */
2163                 *vap = cp->c_attr;
2164 
2165                 /*
2166                  * return the FSID of the cachefs filesystem,
2167                  * not the back filesystem
2168                  */
2169                 vap->va_fsid = vp->v_vfsp->vfs_dev;
2170 
2171                 /* return our idea of the size */
2172                 if (cp->c_size > vap->va_size)
2173                         vap->va_size = cp->c_size;
2174 
2175                 /* overwrite with our version of fileno and timestamps */
2176                 vap->va_nodeid = cp->c_metadata.md_localfileno;
2177                 vap->va_mtime = cp->c_metadata.md_localmtime;
2178                 vap->va_ctime = cp->c_metadata.md_localctime;
2179 
2180                 mutex_exit(&cp->c_statelock);
2181                 break;
2182         }
2183 out:
2184         if (held)
2185                 cachefs_cd_release(fscp);
2186 #ifdef CFS_CD_DEBUG
2187         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2188 #endif
2189 
2190 #ifdef CFSDEBUG
2191         CFS_DEBUG(CFSDEBUG_VOPS)
2192                 printf("cachefs_getattr: EXIT error = %d\n", error);
2193 #endif
2194         return (error);
2195 }
2196 
2197 /*
2198  * cachefs_getattr_backfs_nfsv4
2199  *
2200  * Call NFSv4 back filesystem to handle the getattr (cachefs
2201  * pass-through support for NFSv4).
2202  */
2203 static int
2204 cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
2205     int flags, cred_t *cr, caller_context_t *ct)
2206 {
2207         cnode_t *cp = VTOC(vp);
2208         fscache_t *fscp = C_TO_FSCACHE(cp);
2209         vnode_t *backvp;
2210         int error;
2211 
2212         /*
2213          * For NFSv4 pass-through to work, only connected operation
2214          * is supported, the cnode backvp must exist, and cachefs
2215          * optional (eg., disconnectable) flags are turned off. Assert
2216          * these conditions for the getattr operation.
2217          */
2218         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2219         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2220 
2221         /* Call backfs vnode op after extracting backvp */
2222         mutex_enter(&cp->c_statelock);
2223         backvp = cp->c_backvp;
2224         mutex_exit(&cp->c_statelock);
2225 
2226         CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_getattr_backfs_nfsv4: cnode %p,"
2227             " backvp %p\n", cp, backvp));
2228         error = VOP_GETATTR(backvp, vap, flags, cr, ct);
2229 
2230         /* Update attributes */
2231         cp->c_attr = *vap;
2232 
2233         /*
2234          * return the FSID of the cachefs filesystem,
2235          * not the back filesystem
2236          */
2237         vap->va_fsid = vp->v_vfsp->vfs_dev;
2238 
2239         return (error);
2240 }
2241 
2242 /*ARGSUSED4*/
2243 static int
2244 cachefs_setattr(
2245         vnode_t *vp,
2246         vattr_t *vap,
2247         int flags,
2248         cred_t *cr,
2249         caller_context_t *ct)
2250 {
2251         cnode_t *cp = VTOC(vp);
2252         fscache_t *fscp = C_TO_FSCACHE(cp);
2253         int error;
2254         int connected;
2255         int held = 0;
2256 
2257         if (getzoneid() != GLOBAL_ZONEID)
2258                 return (EPERM);
2259 
2260         /*
2261          * Cachefs only provides pass-through support for NFSv4,
2262          * and all vnode operations are passed through to the
2263          * back file system. For NFSv4 pass-through to work, only
2264          * connected operation is supported, the cnode backvp must
2265          * exist, and cachefs optional (eg., disconnectable) flags
2266          * are turned off. Assert these conditions to ensure that
2267          * the backfilesystem is called for the setattr operation.
2268          */
2269         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2270         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2271 
2272         connected = 0;
2273         for (;;) {
2274                 /* drop hold on file system */
2275                 if (held) {
2276                         /* Won't loop with NFSv4 connected behavior */
2277                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2278                         cachefs_cd_release(fscp);
2279                         held = 0;
2280                 }
2281 
2282                 /* acquire access to the file system */
2283                 error = cachefs_cd_access(fscp, connected, 1);
2284                 if (error)
2285                         break;
2286                 held = 1;
2287 
2288                 /* perform the setattr */
2289                 error = cachefs_setattr_common(vp, vap, flags, cr, ct);
2290                 if (error) {
2291                         /* if connected */
2292                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2293                                 if (CFS_TIMEOUT(fscp, error)) {
2294                                         cachefs_cd_release(fscp);
2295                                         held = 0;
2296                                         cachefs_cd_timedout(fscp);
2297                                         connected = 0;
2298                                         continue;
2299                                 }
2300                         }
2301 
2302                         /* else must be disconnected */
2303                         else {
2304                                 if (CFS_TIMEOUT(fscp, error)) {
2305                                         connected = 1;
2306                                         continue;
2307                                 }
2308                         }
2309                 }
2310                 break;
2311         }
2312 
2313         if (held) {
2314                 cachefs_cd_release(fscp);
2315         }
2316 #ifdef CFS_CD_DEBUG
2317         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2318 #endif
2319         return (error);
2320 }
2321 
2322 static int
2323 cachefs_setattr_common(
2324         vnode_t *vp,
2325         vattr_t *vap,
2326         int flags,
2327         cred_t *cr,
2328         caller_context_t *ct)
2329 {
2330         cnode_t *cp = VTOC(vp);
2331         fscache_t *fscp = C_TO_FSCACHE(cp);
2332         cachefscache_t *cachep = fscp->fs_cache;
2333         uint_t mask = vap->va_mask;
2334         int error = 0;
2335         uint_t bcnt;
2336 
2337         /* Cannot set these attributes. */
2338         if (mask & AT_NOSET)
2339                 return (EINVAL);
2340 
2341         /*
2342          * Truncate file.  Must have write permission and not be a directory.
2343          */
2344         if (mask & AT_SIZE) {
2345                 if (vp->v_type == VDIR) {
2346                         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE))
2347                                 cachefs_log_truncate(cachep, EISDIR,
2348                                     fscp->fs_cfsvfsp,
2349                                     &cp->c_metadata.md_cookie,
2350                                     cp->c_id.cid_fileno,
2351                                     crgetuid(cr), vap->va_size);
2352                         return (EISDIR);
2353                 }
2354         }
2355 
2356         /*
2357          * Gotta deal with one special case here, where we're setting the
2358          * size of the file. First, we zero out part of the page after the
2359          * new size of the file. Then we toss (not write) all pages after
2360          * page in which the new offset occurs. Note that the NULL passed
2361          * in instead of a putapage() fn parameter is correct, since
2362          * no dirty pages will be found (B_TRUNC | B_INVAL).
2363          */
2364 
2365         rw_enter(&cp->c_rwlock, RW_WRITER);
2366 
2367         /* sync dirty pages */
2368         if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
2369                 error = cachefs_putpage_common(vp, (offset_t)0, 0, 0, cr);
2370                 if (error == EINTR)
2371                         goto out;
2372         }
2373         error = 0;
2374 
2375         /* if connected */
2376         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2377                 error = cachefs_setattr_connected(vp, vap, flags, cr, ct);
2378         }
2379         /* else must be disconnected */
2380         else {
2381                 error = cachefs_setattr_disconnected(vp, vap, flags, cr, ct);
2382         }
2383         if (error)
2384                 goto out;
2385 
2386         /*
2387          * If the file size has been changed then
2388          * toss whole pages beyond the end of the file and zero
2389          * the portion of the last page that is beyond the end of the file.
2390          */
2391         if (mask & AT_SIZE && !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2392                 bcnt = (uint_t)(cp->c_size & PAGEOFFSET);
2393                 if (bcnt)
2394                         pvn_vpzero(vp, cp->c_size, PAGESIZE - bcnt);
2395                 (void) pvn_vplist_dirty(vp, cp->c_size, cachefs_push,
2396                     B_TRUNC | B_INVAL, cr);
2397         }
2398 
2399 out:
2400         rw_exit(&cp->c_rwlock);
2401 
2402         if ((mask & AT_SIZE) &&
2403             (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE)))
2404                 cachefs_log_truncate(cachep, error, fscp->fs_cfsvfsp,
2405                     &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
2406                     crgetuid(cr), vap->va_size);
2407 
2408         return (error);
2409 }
2410 
2411 static int
2412 cachefs_setattr_connected(
2413         vnode_t *vp,
2414         vattr_t *vap,
2415         int flags,
2416         cred_t *cr,
2417         caller_context_t *ct)
2418 {
2419         cnode_t *cp = VTOC(vp);
2420         fscache_t *fscp = C_TO_FSCACHE(cp);
2421         uint_t mask = vap->va_mask;
2422         int error = 0;
2423         int setsize;
2424 
2425         mutex_enter(&cp->c_statelock);
2426 
2427         if (cp->c_backvp == NULL) {
2428                 error = cachefs_getbackvp(fscp, cp);
2429                 if (error)
2430                         goto out;
2431         }
2432 
2433         error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2434         if (error)
2435                 goto out;
2436 
2437         CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_setattr (nfsv4): cnode %p, "
2438             "backvp %p\n", cp, cp->c_backvp));
2439         error = VOP_SETATTR(cp->c_backvp, vap, flags, cr, ct);
2440         if (error) {
2441                 goto out;
2442         }
2443 
2444         /* if the size of the file is being changed */
2445         if (mask & AT_SIZE) {
2446                 cp->c_size = vap->va_size;
2447                 error = 0;
2448                 setsize = 0;
2449 
2450                 /* see if okay to try to set the file size */
2451                 if (((cp->c_flags & CN_NOCACHE) == 0) &&
2452                     CFS_ISFS_NONSHARED(fscp)) {
2453                         /* okay to set size if file is populated */
2454                         if (cp->c_metadata.md_flags & MD_POPULATED)
2455                                 setsize = 1;
2456 
2457                         /*
2458                          * Okay to set size if front file exists and setting
2459                          * file size to zero.
2460                          */
2461                         if ((cp->c_metadata.md_flags & MD_FILE) &&
2462                             (vap->va_size == 0))
2463                                 setsize = 1;
2464                 }
2465 
2466                 /* if okay to try to set the file size */
2467                 if (setsize) {
2468                         error = 0;
2469                         if (cp->c_frontvp == NULL)
2470                                 error = cachefs_getfrontfile(cp);
2471                         if (error == 0)
2472                                 error = cachefs_frontfile_size(cp, cp->c_size);
2473                 } else if (cp->c_metadata.md_flags & MD_FILE) {
2474                         /* make sure file gets nocached */
2475                         error = EEXIST;
2476                 }
2477 
2478                 /* if we have to nocache the file */
2479                 if (error) {
2480                         if ((cp->c_flags & CN_NOCACHE) == 0 &&
2481                             !CFS_ISFS_BACKFS_NFSV4(fscp))
2482                                 cachefs_nocache(cp);
2483                         error = 0;
2484                 }
2485         }
2486 
2487         cp->c_flags |= CN_UPDATED;
2488 
2489         /* XXX bob: given what modify_cobject does this seems unnecessary */
2490         cp->c_attr.va_mask = AT_ALL;
2491         error = VOP_GETATTR(cp->c_backvp, &cp->c_attr, 0, cr, ct);
2492         if (error)
2493                 goto out;
2494 
2495         cp->c_attr.va_size = MAX(cp->c_attr.va_size, cp->c_size);
2496         cp->c_size = cp->c_attr.va_size;
2497 
2498         CFSOP_MODIFY_COBJECT(fscp, cp, cr);
2499 out:
2500         mutex_exit(&cp->c_statelock);
2501         return (error);
2502 }
2503 
2504 /*
2505  * perform the setattr on the local file system
2506  */
2507 /*ARGSUSED4*/
2508 static int
2509 cachefs_setattr_disconnected(
2510         vnode_t *vp,
2511         vattr_t *vap,
2512         int flags,
2513         cred_t *cr,
2514         caller_context_t *ct)
2515 {
2516         cnode_t *cp = VTOC(vp);
2517         fscache_t *fscp = C_TO_FSCACHE(cp);
2518         int mask;
2519         int error;
2520         int newfile;
2521         off_t commit = 0;
2522 
2523         if (CFS_ISFS_WRITE_AROUND(fscp))
2524                 return (ETIMEDOUT);
2525 
2526         /* if we do not have good attributes */
2527         if (cp->c_metadata.md_flags & MD_NEEDATTRS)
2528                 return (ETIMEDOUT);
2529 
2530         /* primary concern is to keep this routine as much like ufs_setattr */
2531 
2532         mutex_enter(&cp->c_statelock);
2533 
2534         error = secpolicy_vnode_setattr(cr, vp, vap, &cp->c_attr, flags,
2535             cachefs_access_local, cp);
2536 
2537         if (error)
2538                 goto out;
2539 
2540         mask = vap->va_mask;
2541 
2542         /* if changing the size of the file */
2543         if (mask & AT_SIZE) {
2544                 if (vp->v_type == VDIR) {
2545                         error = EISDIR;
2546                         goto out;
2547                 }
2548 
2549                 if (vp->v_type == VFIFO) {
2550                         error = 0;
2551                         goto out;
2552                 }
2553 
2554                 if ((vp->v_type != VREG) &&
2555                     !((vp->v_type == VLNK) && (vap->va_size == 0))) {
2556                         error = EINVAL;
2557                         goto out;
2558                 }
2559 
2560                 if (vap->va_size > fscp->fs_offmax) {
2561                         error = EFBIG;
2562                         goto out;
2563                 }
2564 
2565                 /* if the file is not populated and we are not truncating it */
2566                 if (((cp->c_metadata.md_flags & MD_POPULATED) == 0) &&
2567                     (vap->va_size != 0)) {
2568                         error = ETIMEDOUT;
2569                         goto out;
2570                 }
2571 
2572                 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2573                         error = cachefs_dlog_cidmap(fscp);
2574                         if (error) {
2575                                 error = ENOSPC;
2576                                 goto out;
2577                         }
2578                         cp->c_metadata.md_flags |= MD_MAPPING;
2579                 }
2580 
2581                 /* log the operation */
2582                 commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2583                 if (commit == 0) {
2584                         error = ENOSPC;
2585                         goto out;
2586                 }
2587                 cp->c_flags &= ~CN_NOCACHE;
2588 
2589                 /* special case truncating fast sym links */
2590                 if ((vp->v_type == VLNK) &&
2591                     (cp->c_metadata.md_flags & MD_FASTSYMLNK)) {
2592                         /* XXX how can we get here */
2593                         /* XXX should update mtime */
2594                         cp->c_size = 0;
2595                         error = 0;
2596                         goto out;
2597                 }
2598 
2599                 /* get the front file, this may create one */
2600                 newfile = (cp->c_metadata.md_flags & MD_FILE) ? 0 : 1;
2601                 if (cp->c_frontvp == NULL) {
2602                         error = cachefs_getfrontfile(cp);
2603                         if (error)
2604                                 goto out;
2605                 }
2606                 ASSERT(cp->c_frontvp);
2607                 if (newfile && (cp->c_flags & CN_UPDATED)) {
2608                         /* allocate space for the metadata */
2609                         ASSERT((cp->c_flags & CN_ALLOC_PENDING) == 0);
2610                         ASSERT((cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR)
2611                             == 0);
2612                         error = filegrp_write_metadata(cp->c_filegrp,
2613                             &cp->c_id, &cp->c_metadata);
2614                         if (error)
2615                                 goto out;
2616                 }
2617 
2618                 /* change the size of the front file */
2619                 error = cachefs_frontfile_size(cp, vap->va_size);
2620                 if (error)
2621                         goto out;
2622                 cp->c_attr.va_size = cp->c_size = vap->va_size;
2623                 gethrestime(&cp->c_metadata.md_localmtime);
2624                 cp->c_metadata.md_flags |= MD_POPULATED | MD_LOCALMTIME;
2625                 cachefs_modified(cp);
2626                 cp->c_flags |= CN_UPDATED;
2627         }
2628 
2629         if (mask & AT_MODE) {
2630                 /* mark as modified */
2631                 if (cachefs_modified_alloc(cp)) {
2632                         error = ENOSPC;
2633                         goto out;
2634                 }
2635 
2636                 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2637                         error = cachefs_dlog_cidmap(fscp);
2638                         if (error) {
2639                                 error = ENOSPC;
2640                                 goto out;
2641                         }
2642                         cp->c_metadata.md_flags |= MD_MAPPING;
2643                 }
2644 
2645                 /* log the operation if not already logged */
2646                 if (commit == 0) {
2647                         commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2648                         if (commit == 0) {
2649                                 error = ENOSPC;
2650                                 goto out;
2651                         }
2652                 }
2653 
2654                 cp->c_attr.va_mode &= S_IFMT;
2655                 cp->c_attr.va_mode |= vap->va_mode & ~S_IFMT;
2656                 gethrestime(&cp->c_metadata.md_localctime);
2657                 cp->c_metadata.md_flags |= MD_LOCALCTIME;
2658                 cp->c_flags |= CN_UPDATED;
2659         }
2660 
2661         if (mask & (AT_UID|AT_GID)) {
2662 
2663                 /* mark as modified */
2664                 if (cachefs_modified_alloc(cp)) {
2665                         error = ENOSPC;
2666                         goto out;
2667                 }
2668 
2669                 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2670                         error = cachefs_dlog_cidmap(fscp);
2671                         if (error) {
2672                                 error = ENOSPC;
2673                                 goto out;
2674                         }
2675                         cp->c_metadata.md_flags |= MD_MAPPING;
2676                 }
2677 
2678                 /* log the operation if not already logged */
2679                 if (commit == 0) {
2680                         commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2681                         if (commit == 0) {
2682                                 error = ENOSPC;
2683                                 goto out;
2684                         }
2685                 }
2686 
2687                 if (mask & AT_UID)
2688                         cp->c_attr.va_uid = vap->va_uid;
2689 
2690                 if (mask & AT_GID)
2691                         cp->c_attr.va_gid = vap->va_gid;
2692                 gethrestime(&cp->c_metadata.md_localctime);
2693                 cp->c_metadata.md_flags |= MD_LOCALCTIME;
2694                 cp->c_flags |= CN_UPDATED;
2695         }
2696 
2697 
2698         if (mask & (AT_MTIME|AT_ATIME)) {
2699                 /* mark as modified */
2700                 if (cachefs_modified_alloc(cp)) {
2701                         error = ENOSPC;
2702                         goto out;
2703                 }
2704 
2705                 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2706                         error = cachefs_dlog_cidmap(fscp);
2707                         if (error) {
2708                                 error = ENOSPC;
2709                                 goto out;
2710                         }
2711                         cp->c_metadata.md_flags |= MD_MAPPING;
2712                 }
2713 
2714                 /* log the operation if not already logged */
2715                 if (commit == 0) {
2716                         commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2717                         if (commit == 0) {
2718                                 error = ENOSPC;
2719                                 goto out;
2720                         }
2721                 }
2722 
2723                 if (mask & AT_MTIME) {
2724                         cp->c_metadata.md_localmtime = vap->va_mtime;
2725                         cp->c_metadata.md_flags |= MD_LOCALMTIME;
2726                 }
2727                 if (mask & AT_ATIME)
2728                         cp->c_attr.va_atime = vap->va_atime;
2729                 gethrestime(&cp->c_metadata.md_localctime);
2730                 cp->c_metadata.md_flags |= MD_LOCALCTIME;
2731                 cp->c_flags |= CN_UPDATED;
2732         }
2733 
2734 out:
2735         mutex_exit(&cp->c_statelock);
2736 
2737         /* commit the log entry */
2738         if (commit) {
2739                 if (cachefs_dlog_commit(fscp, commit, error)) {
2740                         /*EMPTY*/
2741                         /* XXX bob: fix on panic */
2742                 }
2743         }
2744         return (error);
2745 }
2746 
2747 /* ARGSUSED */
2748 static int
2749 cachefs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
2750         caller_context_t *ct)
2751 {
2752         cnode_t *cp = VTOC(vp);
2753         fscache_t *fscp = C_TO_FSCACHE(cp);
2754         int error;
2755         int held = 0;
2756         int connected = 0;
2757 
2758 #ifdef CFSDEBUG
2759         CFS_DEBUG(CFSDEBUG_VOPS)
2760                 printf("cachefs_access: ENTER vp %p\n", (void *)vp);
2761 #endif
2762         if (getzoneid() != GLOBAL_ZONEID) {
2763                 error = EPERM;
2764                 goto out;
2765         }
2766 
2767         /*
2768          * Cachefs only provides pass-through support for NFSv4,
2769          * and all vnode operations are passed through to the
2770          * back file system. For NFSv4 pass-through to work, only
2771          * connected operation is supported, the cnode backvp must
2772          * exist, and cachefs optional (eg., disconnectable) flags
2773          * are turned off. Assert these conditions to ensure that
2774          * the backfilesystem is called for the access operation.
2775          */
2776         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2777         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2778 
2779         for (;;) {
2780                 /* get (or renew) access to the file system */
2781                 if (held) {
2782                         /* Won't loop with NFSv4 connected behavior */
2783                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2784                         cachefs_cd_release(fscp);
2785                         held = 0;
2786                 }
2787                 error = cachefs_cd_access(fscp, connected, 0);
2788                 if (error)
2789                         break;
2790                 held = 1;
2791 
2792                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2793                         error = cachefs_access_connected(vp, mode, flags,
2794                             cr);
2795                         if (CFS_TIMEOUT(fscp, error)) {
2796                                 cachefs_cd_release(fscp);
2797                                 held = 0;
2798                                 cachefs_cd_timedout(fscp);
2799                                 connected = 0;
2800                                 continue;
2801                         }
2802                 } else {
2803                         mutex_enter(&cp->c_statelock);
2804                         error = cachefs_access_local(cp, mode, cr);
2805                         mutex_exit(&cp->c_statelock);
2806                         if (CFS_TIMEOUT(fscp, error)) {
2807                                 if (cachefs_cd_access_miss(fscp)) {
2808                                         mutex_enter(&cp->c_statelock);
2809                                         if (cp->c_backvp == NULL) {
2810                                                 (void) cachefs_getbackvp(fscp,
2811                                                     cp);
2812                                         }
2813                                         mutex_exit(&cp->c_statelock);
2814                                         error = cachefs_access_connected(vp,
2815                                             mode, flags, cr);
2816                                         if (!CFS_TIMEOUT(fscp, error))
2817                                                 break;
2818                                         delay(5*hz);
2819                                         connected = 0;
2820                                         continue;
2821                                 }
2822                                 connected = 1;
2823                                 continue;
2824                         }
2825                 }
2826                 break;
2827         }
2828         if (held)
2829                 cachefs_cd_release(fscp);
2830 #ifdef CFS_CD_DEBUG
2831         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2832 #endif
2833 out:
2834 #ifdef CFSDEBUG
2835         CFS_DEBUG(CFSDEBUG_VOPS)
2836                 printf("cachefs_access: EXIT error = %d\n", error);
2837 #endif
2838         return (error);
2839 }
2840 
2841 static int
2842 cachefs_access_connected(struct vnode *vp, int mode, int flags, cred_t *cr)
2843 {
2844         cnode_t *cp = VTOC(vp);
2845         fscache_t *fscp = C_TO_FSCACHE(cp);
2846         int error = 0;
2847 
2848         mutex_enter(&cp->c_statelock);
2849 
2850         /* Make sure the cnode attrs are valid first. */
2851         error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2852         if (error)
2853                 goto out;
2854 
2855         /* see if can do a local file system check */
2856         if ((fscp->fs_info.fi_mntflags & CFS_ACCESS_BACKFS) == 0 &&
2857             !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2858                 error = cachefs_access_local(cp, mode, cr);
2859                 goto out;
2860         }
2861 
2862         /* else do a remote file system check */
2863         else {
2864                 if (cp->c_backvp == NULL) {
2865                         error = cachefs_getbackvp(fscp, cp);
2866                         if (error)
2867                                 goto out;
2868                 }
2869 
2870                 CFS_DPRINT_BACKFS_NFSV4(fscp,
2871                     ("cachefs_access (nfsv4): cnode %p, backvp %p\n",
2872                     cp, cp->c_backvp));
2873                 error = VOP_ACCESS(cp->c_backvp, mode, flags, cr, NULL);
2874 
2875                 /*
2876                  * even though we don't `need' the ACL to do access
2877                  * via the backvp, we should cache it here to make our
2878                  * behavior more reasonable if we go disconnected.
2879                  */
2880 
2881                 if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
2882                     (cachefs_vtype_aclok(vp)) &&
2883                     ((cp->c_flags & CN_NOCACHE) == 0) &&
2884                     (!CFS_ISFS_BACKFS_NFSV4(fscp)) &&
2885                     ((cp->c_metadata.md_flags & MD_ACL) == 0))
2886                         (void) cachefs_cacheacl(cp, NULL);
2887         }
2888 out:
2889         /*
2890          * If NFS returned ESTALE, mark this cnode as stale, so that
2891          * the vn_open retry will read the file anew from backfs
2892          */
2893         if (error == ESTALE)
2894                 cachefs_cnode_stale(cp);
2895 
2896         mutex_exit(&cp->c_statelock);
2897         return (error);
2898 }
2899 
2900 /*
2901  * CFS has a fastsymlink scheme. If the size of the link is < C_FSL_SIZE, then
2902  * the link is placed in the metadata itself (no front file is allocated).
2903  */
2904 /*ARGSUSED*/
2905 static int
2906 cachefs_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
2907 {
2908         int error = 0;
2909         cnode_t *cp = VTOC(vp);
2910         fscache_t *fscp = C_TO_FSCACHE(cp);
2911         cachefscache_t *cachep = fscp->fs_cache;
2912         int held = 0;
2913         int connected = 0;
2914 
2915         if (getzoneid() != GLOBAL_ZONEID)
2916                 return (EPERM);
2917 
2918         if (vp->v_type != VLNK)
2919                 return (EINVAL);
2920 
2921         /*
2922          * Cachefs only provides pass-through support for NFSv4,
2923          * and all vnode operations are passed through to the
2924          * back file system. For NFSv4 pass-through to work, only
2925          * connected operation is supported, the cnode backvp must
2926          * exist, and cachefs optional (eg., disconnectable) flags
2927          * are turned off. Assert these conditions to ensure that
2928          * the backfilesystem is called for the readlink operation.
2929          */
2930         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2931         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2932 
2933         for (;;) {
2934                 /* get (or renew) access to the file system */
2935                 if (held) {
2936                         /* Won't loop with NFSv4 connected behavior */
2937                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2938                         cachefs_cd_release(fscp);
2939                         held = 0;
2940                 }
2941                 error = cachefs_cd_access(fscp, connected, 0);
2942                 if (error)
2943                         break;
2944                 held = 1;
2945 
2946                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2947                         /*
2948                          * since readlink_connected will call stuffsymlink
2949                          * on success, have to serialize access
2950                          */
2951                         if (!rw_tryenter(&cp->c_rwlock, RW_WRITER)) {
2952                                 cachefs_cd_release(fscp);
2953                                 rw_enter(&cp->c_rwlock, RW_WRITER);
2954                                 error = cachefs_cd_access(fscp, connected, 0);
2955                                 if (error) {
2956                                         held = 0;
2957                                         rw_exit(&cp->c_rwlock);
2958                                         break;
2959                                 }
2960                         }
2961                         error = cachefs_readlink_connected(vp, uiop, cr);
2962                         rw_exit(&cp->c_rwlock);
2963                         if (CFS_TIMEOUT(fscp, error)) {
2964                                 cachefs_cd_release(fscp);
2965                                 held = 0;
2966                                 cachefs_cd_timedout(fscp);
2967                                 connected = 0;
2968                                 continue;
2969                         }
2970                 } else {
2971                         error = cachefs_readlink_disconnected(vp, uiop);
2972                         if (CFS_TIMEOUT(fscp, error)) {
2973                                 if (cachefs_cd_access_miss(fscp)) {
2974                                         /* as above */
2975                                         if (!rw_tryenter(&cp->c_rwlock,
2976                                             RW_WRITER)) {
2977                                                 cachefs_cd_release(fscp);
2978                                                 rw_enter(&cp->c_rwlock,
2979                                                     RW_WRITER);
2980                                                 error = cachefs_cd_access(fscp,
2981                                                     connected, 0);
2982                                                 if (error) {
2983                                                         held = 0;
2984                                                         rw_exit(&cp->c_rwlock);
2985                                                         break;
2986                                                 }
2987                                         }
2988                                         error = cachefs_readlink_connected(vp,
2989                                             uiop, cr);
2990                                         rw_exit(&cp->c_rwlock);
2991                                         if (!CFS_TIMEOUT(fscp, error))
2992                                                 break;
2993                                         delay(5*hz);
2994                                         connected = 0;
2995                                         continue;
2996                                 }
2997                                 connected = 1;
2998                                 continue;
2999                         }
3000                 }
3001                 break;
3002         }
3003         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READLINK))
3004                 cachefs_log_readlink(cachep, error, fscp->fs_cfsvfsp,
3005                     &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
3006                     crgetuid(cr), cp->c_size);
3007 
3008         if (held)
3009                 cachefs_cd_release(fscp);
3010 #ifdef CFS_CD_DEBUG
3011         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3012 #endif
3013 
3014         /*
3015          * The over the wire error for attempting to readlink something
3016          * other than a symbolic link is ENXIO.  However, we need to
3017          * return EINVAL instead of ENXIO, so we map it here.
3018          */
3019         return (error == ENXIO ? EINVAL : error);
3020 }
3021 
3022 static int
3023 cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr)
3024 {
3025         int error;
3026         cnode_t *cp = VTOC(vp);
3027         fscache_t *fscp = C_TO_FSCACHE(cp);
3028         caddr_t buf;
3029         int buflen;
3030         int readcache = 0;
3031 
3032         mutex_enter(&cp->c_statelock);
3033 
3034         error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
3035         if (error)
3036                 goto out;
3037 
3038         /* if the sym link is cached as a fast sym link */
3039         if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3040                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3041                 error = uiomove(cp->c_metadata.md_allocinfo,
3042                     MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3043 #ifdef CFSDEBUG
3044                 readcache = 1;
3045                 goto out;
3046 #else /* CFSDEBUG */
3047                 /* XXX KLUDGE! correct for insidious 0-len symlink */
3048                 if (cp->c_size != 0) {
3049                         readcache = 1;
3050                         goto out;
3051                 }
3052 #endif /* CFSDEBUG */
3053         }
3054 
3055         /* if the sym link is cached in a front file */
3056         if (cp->c_metadata.md_flags & MD_POPULATED) {
3057                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3058                 ASSERT(cp->c_metadata.md_flags & MD_FILE);
3059                 if (cp->c_frontvp == NULL) {
3060                         (void) cachefs_getfrontfile(cp);
3061                 }
3062                 if (cp->c_metadata.md_flags & MD_POPULATED) {
3063                         /* read symlink data from frontfile */
3064                         uiop->uio_offset = 0;
3065                         (void) VOP_RWLOCK(cp->c_frontvp,
3066                             V_WRITELOCK_FALSE, NULL);
3067                         error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3068                         VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3069 
3070                         /* XXX KLUDGE! correct for insidious 0-len symlink */
3071                         if (cp->c_size != 0) {
3072                                 readcache = 1;
3073                                 goto out;
3074                         }
3075                 }
3076         }
3077 
3078         /* get the sym link contents from the back fs */
3079         error = cachefs_readlink_back(cp, cr, &buf, &buflen);
3080         if (error)
3081                 goto out;
3082 
3083         /* copy the contents out to the user */
3084         error = uiomove(buf, MIN(buflen, uiop->uio_resid), UIO_READ, uiop);
3085 
3086         /*
3087          * try to cache the sym link, note that its a noop if NOCACHE is set
3088          * or if NFSv4 pass-through is enabled.
3089          */
3090         if (cachefs_stuffsymlink(cp, buf, buflen)) {
3091                 cachefs_nocache(cp);
3092         }
3093 
3094         cachefs_kmem_free(buf, MAXPATHLEN);
3095 
3096 out:
3097         mutex_exit(&cp->c_statelock);
3098         if (error == 0) {
3099                 if (readcache)
3100                         fscp->fs_stats.st_hits++;
3101                 else
3102                         fscp->fs_stats.st_misses++;
3103         }
3104         return (error);
3105 }
3106 
3107 static int
3108 cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop)
3109 {
3110         int error;
3111         cnode_t *cp = VTOC(vp);
3112         fscache_t *fscp = C_TO_FSCACHE(cp);
3113         int readcache = 0;
3114 
3115         mutex_enter(&cp->c_statelock);
3116 
3117         /* if the sym link is cached as a fast sym link */
3118         if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3119                 error = uiomove(cp->c_metadata.md_allocinfo,
3120                     MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3121                 readcache = 1;
3122                 goto out;
3123         }
3124 
3125         /* if the sym link is cached in a front file */
3126         if (cp->c_metadata.md_flags & MD_POPULATED) {
3127                 ASSERT(cp->c_metadata.md_flags & MD_FILE);
3128                 if (cp->c_frontvp == NULL) {
3129                         (void) cachefs_getfrontfile(cp);
3130                 }
3131                 if (cp->c_metadata.md_flags & MD_POPULATED) {
3132                         /* read symlink data from frontfile */
3133                         uiop->uio_offset = 0;
3134                         (void) VOP_RWLOCK(cp->c_frontvp,
3135                             V_WRITELOCK_FALSE, NULL);
3136                         error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3137                         VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3138                         readcache = 1;
3139                         goto out;
3140                 }
3141         }
3142         error = ETIMEDOUT;
3143 
3144 out:
3145         mutex_exit(&cp->c_statelock);
3146         if (error == 0) {
3147                 if (readcache)
3148                         fscp->fs_stats.st_hits++;
3149                 else
3150                         fscp->fs_stats.st_misses++;
3151         }
3152         return (error);
3153 }
3154 
3155 /*ARGSUSED*/
3156 static int
3157 cachefs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
3158 {
3159         cnode_t *cp = VTOC(vp);
3160         int error = 0;
3161         fscache_t *fscp = C_TO_FSCACHE(cp);
3162         int held = 0;
3163         int connected = 0;
3164 
3165 #ifdef CFSDEBUG
3166         CFS_DEBUG(CFSDEBUG_VOPS)
3167                 printf("cachefs_fsync: ENTER vp %p\n", (void *)vp);
3168 #endif
3169 
3170         if (getzoneid() != GLOBAL_ZONEID) {
3171                 error = EPERM;
3172                 goto out;
3173         }
3174 
3175         if (fscp->fs_backvfsp && fscp->fs_backvfsp->vfs_flag & VFS_RDONLY)
3176                 goto out;
3177 
3178         /*
3179          * Cachefs only provides pass-through support for NFSv4,
3180          * and all vnode operations are passed through to the
3181          * back file system. For NFSv4 pass-through to work, only
3182          * connected operation is supported, the cnode backvp must
3183          * exist, and cachefs optional (eg., disconnectable) flags
3184          * are turned off. Assert these conditions to ensure that
3185          * the backfilesystem is called for the fsync operation.
3186          */
3187         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3188         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3189 
3190         for (;;) {
3191                 /* get (or renew) access to the file system */
3192                 if (held) {
3193                         /* Won't loop with NFSv4 connected behavior */
3194                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3195                         cachefs_cd_release(fscp);
3196                         held = 0;
3197                 }
3198                 error = cachefs_cd_access(fscp, connected, 1);
3199                 if (error)
3200                         break;
3201                 held = 1;
3202                 connected = 0;
3203 
3204                 /* if a regular file, write out the pages */
3205                 if ((vp->v_type == VREG) && vn_has_cached_data(vp) &&
3206                     !CFS_ISFS_BACKFS_NFSV4(fscp)) {
3207                         error = cachefs_putpage_common(vp, (offset_t)0,
3208                             0, 0, cr);
3209                         if (CFS_TIMEOUT(fscp, error)) {
3210                                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3211                                         cachefs_cd_release(fscp);
3212                                         held = 0;
3213                                         cachefs_cd_timedout(fscp);
3214                                         continue;
3215                                 } else {
3216                                         connected = 1;
3217                                         continue;
3218                                 }
3219                         }
3220 
3221                         /* if no space left in cache, wait until connected */
3222                         if ((error == ENOSPC) &&
3223                             (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
3224                                 connected = 1;
3225                                 continue;
3226                         }
3227 
3228                         /* clear the cnode error if putpage worked */
3229                         if ((error == 0) && cp->c_error) {
3230                                 mutex_enter(&cp->c_statelock);
3231                                 cp->c_error = 0;
3232                                 mutex_exit(&cp->c_statelock);
3233                         }
3234 
3235                         if (error)
3236                                 break;
3237                 }
3238 
3239                 /* if connected, sync the backvp */
3240                 if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) &&
3241                     cp->c_backvp) {
3242                         mutex_enter(&cp->c_statelock);
3243                         if (cp->c_backvp) {
3244                                 CFS_DPRINT_BACKFS_NFSV4(fscp,
3245                                     ("cachefs_fsync (nfsv4): cnode %p, "
3246                                     "backvp %p\n", cp, cp->c_backvp));
3247                                 error = VOP_FSYNC(cp->c_backvp, syncflag, cr,
3248                                     ct);
3249                                 if (CFS_TIMEOUT(fscp, error)) {
3250                                         mutex_exit(&cp->c_statelock);
3251                                         cachefs_cd_release(fscp);
3252                                         held = 0;
3253                                         cachefs_cd_timedout(fscp);
3254                                         continue;
3255                                 } else if (error && (error != EINTR))
3256                                         cp->c_error = error;
3257                         }
3258                         mutex_exit(&cp->c_statelock);
3259                 }
3260 
3261                 /* sync the metadata and the front file to the front fs */
3262                 if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
3263                         error = cachefs_sync_metadata(cp);
3264                         if (error &&
3265                             (fscp->fs_cdconnected == CFS_CD_CONNECTED))
3266                                 error = 0;
3267                 }
3268                 break;
3269         }
3270 
3271         if (error == 0)
3272                 error = cp->c_error;
3273 
3274         if (held)
3275                 cachefs_cd_release(fscp);
3276 
3277 out:
3278 #ifdef CFS_CD_DEBUG
3279         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3280 #endif
3281 
3282 #ifdef CFSDEBUG
3283         CFS_DEBUG(CFSDEBUG_VOPS)
3284                 printf("cachefs_fsync: EXIT vp %p\n", (void *)vp);
3285 #endif
3286         return (error);
3287 }
3288 
3289 /*
3290  * Called from cachefs_inactive(), to make sure all the data goes out to disk.
3291  */
3292 int
3293 cachefs_sync_metadata(cnode_t *cp)
3294 {
3295         int error = 0;
3296         struct filegrp *fgp;
3297         struct vattr va;
3298         fscache_t *fscp = C_TO_FSCACHE(cp);
3299 
3300 #ifdef CFSDEBUG
3301         CFS_DEBUG(CFSDEBUG_VOPS)
3302                 printf("c_sync_metadata: ENTER cp %p cflag %x\n",
3303                     (void *)cp, cp->c_flags);
3304 #endif
3305 
3306         mutex_enter(&cp->c_statelock);
3307         if ((cp->c_flags & CN_UPDATED) == 0)
3308                 goto out;
3309         if (cp->c_flags & (CN_STALE | CN_DESTROY))
3310                 goto out;
3311         fgp = cp->c_filegrp;
3312         if ((fgp->fg_flags & CFS_FG_WRITE) == 0)
3313                 goto out;
3314         if (CFS_ISFS_BACKFS_NFSV4(fscp))
3315                 goto out;
3316 
3317         if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
3318                 mutex_exit(&cp->c_statelock);
3319                 error = filegrp_allocattr(fgp);
3320                 mutex_enter(&cp->c_statelock);
3321                 if (error) {
3322                         error = 0;
3323                         goto out;
3324                 }
3325         }
3326 
3327         if (cp->c_flags & CN_ALLOC_PENDING) {
3328                 error = filegrp_create_metadata(fgp, &cp->c_metadata,
3329                     &cp->c_id);
3330                 if (error)
3331                         goto out;
3332                 cp->c_flags &= ~CN_ALLOC_PENDING;
3333         }
3334 
3335         if (cp->c_flags & CN_NEED_FRONT_SYNC) {
3336                 if (cp->c_frontvp != NULL) {
3337                         error = VOP_FSYNC(cp->c_frontvp, FSYNC, kcred, NULL);
3338                         if (error) {
3339                                 cp->c_metadata.md_timestamp.tv_sec = 0;
3340                         } else {
3341                                 va.va_mask = AT_MTIME;
3342                                 error = VOP_GETATTR(cp->c_frontvp, &va, 0,
3343                                     kcred, NULL);
3344                                 if (error)
3345                                         goto out;
3346                                 cp->c_metadata.md_timestamp = va.va_mtime;
3347                                 cp->c_flags &=
3348                                     ~(CN_NEED_FRONT_SYNC |
3349                                     CN_POPULATION_PENDING);
3350                         }
3351                 } else {
3352                         cp->c_flags &=
3353                             ~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING);
3354                 }
3355         }
3356 
3357         /*
3358          * XXX tony: How can CN_ALLOC_PENDING still be set??
3359          * XXX tony: How can CN_UPDATED not be set?????
3360          */
3361         if ((cp->c_flags & CN_ALLOC_PENDING) == 0 &&
3362             (cp->c_flags & CN_UPDATED)) {
3363                 error = filegrp_write_metadata(fgp, &cp->c_id,
3364                     &cp->c_metadata);
3365                 if (error)
3366                         goto out;
3367         }
3368 out:
3369         if (error) {
3370                 /* XXX modified files? */
3371                 if (cp->c_metadata.md_rlno) {
3372                         cachefs_removefrontfile(&cp->c_metadata,
3373                             &cp->c_id, fgp);
3374                         cachefs_rlent_moveto(C_TO_FSCACHE(cp)->fs_cache,
3375                             CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
3376                         cp->c_metadata.md_rlno = 0;
3377                         cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
3378                         if (cp->c_frontvp) {
3379                                 VN_RELE(cp->c_frontvp);
3380                                 cp->c_frontvp = NULL;
3381                         }
3382                 }
3383                 if ((cp->c_flags & CN_ALLOC_PENDING) == 0)
3384                         (void) filegrp_destroy_metadata(fgp, &cp->c_id);
3385                 cp->c_flags |= CN_ALLOC_PENDING;
3386                 cachefs_nocache(cp);
3387         }
3388         /*
3389          * we clear the updated bit even on errors because a retry
3390          * will probably fail also.
3391          */
3392         cp->c_flags &= ~CN_UPDATED;
3393         mutex_exit(&cp->c_statelock);
3394 
3395 #ifdef CFSDEBUG
3396         CFS_DEBUG(CFSDEBUG_VOPS)
3397                 printf("c_sync_metadata: EXIT cp %p cflag %x\n",
3398                     (void *)cp, cp->c_flags);
3399 #endif
3400 
3401         return (error);
3402 }
3403 
3404 /*
3405  * This is the vop entry point for inactivating a vnode.
3406  * It just queues the request for the async thread which
3407  * calls cachefs_inactive.
3408  * Because of the dnlc, it is not safe to grab most locks here.
3409  */
3410 /*ARGSUSED*/
3411 static void
3412 cachefs_inactive(struct vnode *vp, cred_t *cr, caller_context_t *ct)
3413 {
3414         cnode_t *cp;
3415         struct cachefs_req *rp;
3416         fscache_t *fscp;
3417 
3418 #ifdef CFSDEBUG
3419         CFS_DEBUG(CFSDEBUG_VOPS)
3420                 printf("cachefs_inactive: ENTER vp %p\n", (void *)vp);
3421 #endif
3422 
3423         cp = VTOC(vp);
3424         fscp = C_TO_FSCACHE(cp);
3425 
3426         ASSERT((cp->c_flags & CN_IDLE) == 0);
3427 
3428         /*
3429          * Cachefs only provides pass-through support for NFSv4,
3430          * and all vnode operations are passed through to the
3431          * back file system. For NFSv4 pass-through to work, only
3432          * connected operation is supported, the cnode backvp must
3433          * exist, and cachefs optional (eg., disconnectable) flags
3434          * are turned off. Assert these conditions to ensure that
3435          * the backfilesystem is called for the inactive operation.
3436          */
3437         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3438         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3439 
3440         /* vn_rele() set the v_count == 1 */
3441 
3442         cp->c_ipending = 1;
3443 
3444         rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3445         rp->cfs_cmd = CFS_IDLE;
3446         rp->cfs_cr = cr;
3447         crhold(rp->cfs_cr);
3448         rp->cfs_req_u.cu_idle.ci_vp = vp;
3449         cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
3450 
3451 #ifdef CFSDEBUG
3452         CFS_DEBUG(CFSDEBUG_VOPS)
3453                 printf("cachefs_inactive: EXIT vp %p\n", (void *)vp);
3454 #endif
3455 }
3456 
3457 /* ARGSUSED */
3458 static int
3459 cachefs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp,
3460     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr,
3461     caller_context_t *ct, int *direntflags, pathname_t *realpnp)
3462 
3463 {
3464         int error = 0;
3465         cnode_t *dcp = VTOC(dvp);
3466         fscache_t *fscp = C_TO_FSCACHE(dcp);
3467         int held = 0;
3468         int connected = 0;
3469 
3470 #ifdef CFSDEBUG
3471         CFS_DEBUG(CFSDEBUG_VOPS)
3472                 printf("cachefs_lookup: ENTER dvp %p nm %s\n", (void *)dvp, nm);
3473 #endif
3474 
3475         if (getzoneid() != GLOBAL_ZONEID) {
3476                 error = EPERM;
3477                 goto out;
3478         }
3479 
3480         /*
3481          * Cachefs only provides pass-through support for NFSv4,
3482          * and all vnode operations are passed through to the
3483          * back file system. For NFSv4 pass-through to work, only
3484          * connected operation is supported, the cnode backvp must
3485          * exist, and cachefs optional (eg., disconnectable) flags
3486          * are turned off. Assert these conditions to ensure that
3487          * the backfilesystem is called for the lookup operation.
3488          */
3489         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3490         CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3491 
3492         for (;;) {
3493                 /* get (or renew) access to the file system */
3494                 if (held) {
3495                         /* Won't loop with NFSv4 connected behavior */
3496                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3497                         cachefs_cd_release(fscp);
3498                         held = 0;
3499                 }
3500                 error = cachefs_cd_access(fscp, connected, 0);
3501                 if (error)
3502                         break;
3503                 held = 1;
3504 
3505                 error = cachefs_lookup_common(dvp, nm, vpp, pnp,
3506                         flags, rdir, cr);
3507                 if (CFS_TIMEOUT(fscp, error)) {
3508                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3509                                 cachefs_cd_release(fscp);
3510                                 held = 0;
3511                                 cachefs_cd_timedout(fscp);
3512                                 connected = 0;
3513                                 continue;
3514                         } else {
3515                                 if (cachefs_cd_access_miss(fscp)) {
3516                                         rw_enter(&dcp->c_rwlock, RW_READER);
3517                                         error = cachefs_lookup_back(dvp, nm,
3518                                             vpp, cr);
3519                                         rw_exit(&dcp->c_rwlock);
3520                                         if (!CFS_TIMEOUT(fscp, error))
3521                                                 break;
3522                                         delay(5*hz);
3523                                         connected = 0;
3524                                         continue;
3525                                 }
3526                                 connected = 1;
3527                                 continue;
3528                         }
3529                 }
3530                 break;
3531         }
3532         if (held)
3533                 cachefs_cd_release(fscp);
3534 
3535         if (error == 0 && IS_DEVVP(*vpp)) {
3536                 struct vnode *newvp;
3537                 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3538                 VN_RELE(*vpp);
3539                 if (newvp == NULL) {
3540                         error = ENOSYS;
3541                 } else {
3542                         *vpp = newvp;
3543                 }
3544         }
3545 
3546 #ifdef CFS_CD_DEBUG
3547         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3548 #endif
3549 out:
3550 #ifdef CFSDEBUG
3551         CFS_DEBUG(CFSDEBUG_VOPS)
3552                 printf("cachefs_lookup: EXIT error = %d\n", error);
3553 #endif
3554 
3555         return (error);
3556 }
3557 
3558 /* ARGSUSED */
3559 int
3560 cachefs_lookup_common(vnode_t *dvp, char *nm, vnode_t **vpp,
3561     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr)
3562 {
3563         int error = 0;
3564         cnode_t *cp, *dcp = VTOC(dvp);
3565         fscache_t *fscp = C_TO_FSCACHE(dcp);
3566         struct fid cookie;
3567         u_offset_t d_offset;
3568         struct cachefs_req *rp;
3569         cfs_cid_t cid, dircid;
3570         uint_t flag;
3571         uint_t uncached = 0;
3572 
3573         *vpp = NULL;
3574 
3575         /*
3576          * If lookup is for "", just return dvp.  Don't need
3577          * to send it over the wire, look it up in the dnlc,
3578          * or perform any access checks.
3579          */
3580         if (*nm == '\0') {
3581                 VN_HOLD(dvp);
3582                 *vpp = dvp;
3583                 return (0);
3584         }
3585 
3586         /* can't do lookups in non-directories */
3587         if (dvp->v_type != VDIR)
3588                 return (ENOTDIR);
3589 
3590         /* perform access check, also does consistency check if connected */
3591         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3592                 error = cachefs_access_connected(dvp, VEXEC, 0, cr);
3593         } else {
3594                 mutex_enter(&dcp->c_statelock);
3595                 error = cachefs_access_local(dcp, VEXEC, cr);
3596                 mutex_exit(&dcp->c_statelock);
3597         }
3598         if (error)
3599                 return (error);
3600 
3601         /*
3602          * If lookup is for ".", just return dvp.  Don't need
3603          * to send it over the wire or look it up in the dnlc,
3604          * just need to check access.
3605          */
3606         if (strcmp(nm, ".") == 0) {
3607                 VN_HOLD(dvp);
3608                 *vpp = dvp;
3609                 return (0);
3610         }
3611 
3612         /* check the dnlc */
3613         *vpp = (vnode_t *)dnlc_lookup(dvp, nm);
3614         if (*vpp)
3615                 return (0);
3616 
3617         /* read lock the dir before starting the search */
3618         rw_enter(&dcp->c_rwlock, RW_READER);
3619 
3620         mutex_enter(&dcp->c_statelock);
3621         dircid = dcp->c_id;
3622 
3623         dcp->c_usage++;
3624 
3625         /* if front file is not usable, lookup on the back fs */
3626         if ((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
3627             CFS_ISFS_BACKFS_NFSV4(fscp) ||
3628             ((dcp->c_filegrp->fg_flags & CFS_FG_READ) == 0)) {
3629                 mutex_exit(&dcp->c_statelock);
3630                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3631                         error = cachefs_lookup_back(dvp, nm, vpp, cr);
3632                 else
3633                         error = ETIMEDOUT;
3634                 goto out;
3635         }
3636 
3637         /* if the front file is not populated, try to populate it */
3638         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
3639                 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
3640                         error = ETIMEDOUT;
3641                         mutex_exit(&dcp->c_statelock);
3642                         goto out;
3643                 }
3644 
3645                 if (cachefs_async_okay()) {
3646                         /* cannot populate if cache is not writable */
3647                         ASSERT((dcp->c_flags &
3648                             (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0);
3649                         dcp->c_flags |= CN_ASYNC_POPULATE;
3650 
3651                         rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3652                         rp->cfs_cmd = CFS_POPULATE;
3653                         rp->cfs_req_u.cu_populate.cpop_vp = dvp;
3654                         rp->cfs_cr = cr;
3655 
3656                         crhold(cr);
3657                         VN_HOLD(dvp);
3658 
3659                         cachefs_addqueue(rp, &fscp->fs_workq);
3660                 } else if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
3661                         error = cachefs_dir_fill(dcp, cr);
3662                         if (error != 0) {
3663                                 mutex_exit(&dcp->c_statelock);
3664                                 goto out;
3665                         }
3666                 }
3667                 /* no populate if too many asyncs and we have to cache ACLs */
3668 
3669                 mutex_exit(&dcp->c_statelock);
3670 
3671                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3672                         error = cachefs_lookup_back(dvp, nm, vpp, cr);
3673                 else
3674                         error = ETIMEDOUT;
3675                 goto out;
3676         }
3677 
3678         /* by now we have a valid cached front file that we can search */
3679 
3680         ASSERT((dcp->c_flags & CN_ASYNC_POPULATE) == 0);
3681         error = cachefs_dir_look(dcp, nm, &cookie, &flag,
3682             &d_offset, &cid);
3683         mutex_exit(&dcp->c_statelock);
3684 
3685         if (error) {
3686                 /* if the entry does not have the fid, go get it */
3687                 if (error == EINVAL) {
3688                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3689                                 error = cachefs_lookup_back(dvp, nm, vpp, cr);
3690                         else
3691                                 error = ETIMEDOUT;
3692                 }
3693 
3694                 /* errors other than does not exist */
3695                 else if (error != ENOENT) {
3696                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3697                                 error = cachefs_lookup_back(dvp, nm, vpp, cr);
3698                         else
3699                                 error = ETIMEDOUT;
3700                 }
3701                 goto out;
3702         }
3703 
3704         /*
3705          * Else we found the entry in the cached directory.
3706          * Make a cnode for it.
3707          */
3708         error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
3709             cr, 0, &cp);
3710         if (error == ESTALE) {
3711                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3712                 mutex_enter(&dcp->c_statelock);
3713                 cachefs_nocache(dcp);
3714                 mutex_exit(&dcp->c_statelock);
3715                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3716                         error = cachefs_lookup_back(dvp, nm, vpp, cr);
3717                         uncached = 1;
3718                 } else
3719                         error = ETIMEDOUT;
3720         } else if (error == 0) {
3721                 *vpp = CTOV(cp);
3722         }
3723 
3724 out:
3725         if (error == 0) {
3726                 /* put the entry in the dnlc */
3727                 if (cachefs_dnlc)
3728                         dnlc_enter(dvp, nm, *vpp);
3729 
3730                 /* save the cid of the parent so can find the name */
3731                 cp = VTOC(*vpp);
3732                 if (bcmp(&cp->c_metadata.md_parent, &dircid,
3733                     sizeof (cfs_cid_t)) != 0) {
3734                         mutex_enter(&cp->c_statelock);
3735                         cp->c_metadata.md_parent = dircid;
3736                         cp->c_flags |= CN_UPDATED;
3737                         mutex_exit(&cp->c_statelock);
3738                 }
3739         }
3740 
3741         rw_exit(&dcp->c_rwlock);
3742         if (uncached && dcp->c_metadata.md_flags & MD_PACKED)
3743                 (void) cachefs_pack_common(dvp, cr);
3744         return (error);
3745 }
3746 
3747 /*
3748  * Called from cachefs_lookup_common when the back file system needs to be
3749  * examined to perform the lookup.
3750  */
3751 static int
3752 cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
3753     cred_t *cr)
3754 {
3755         int error = 0;
3756         cnode_t *cp, *dcp = VTOC(dvp);
3757         fscache_t *fscp = C_TO_FSCACHE(dcp);
3758         vnode_t *backvp = NULL;
3759         struct vattr va;
3760         struct fid cookie;
3761         cfs_cid_t cid;
3762         uint32_t valid_fid;
3763 
3764         mutex_enter(&dcp->c_statelock);
3765 
3766         /* do a lookup on the back FS to get the back vnode */
3767         if (dcp->c_backvp == NULL) {
3768                 error = cachefs_getbackvp(fscp, dcp);
3769                 if (error)
3770                         goto out;
3771         }
3772 
3773         CFS_DPRINT_BACKFS_NFSV4(fscp,
3774             ("cachefs_lookup (nfsv4): dcp %p, dbackvp %p, name %s\n",
3775             dcp, dcp->c_backvp, nm));
3776         error = VOP_LOOKUP(dcp->c_backvp, nm, &backvp, (struct pathname *)NULL,
3777             0, (vnode_t *)NULL, cr, NULL, NULL, NULL);
3778         if (error)
3779                 goto out;
3780         if (IS_DEVVP(backvp)) {
3781                 struct vnode *devvp = backvp;
3782 
3783                 if (VOP_REALVP(devvp, &backvp, NULL) == 0) {
3784                         VN_HOLD(backvp);
3785                         VN_RELE(devvp);
3786                 }
3787         }
3788 
3789         /* get the fid and attrs from the back fs */
3790         valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
3791         error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
3792         if (error)
3793                 goto out;
3794 
3795         cid.cid_fileno = va.va_nodeid;
3796         cid.cid_flags = 0;
3797 
3798 #if 0
3799         /* XXX bob: this is probably no longer necessary */
3800         /* if the directory entry was incomplete, we can complete it now */
3801         if ((dcp->c_metadata.md_flags & MD_POPULATED) &&
3802             ((dcp->c_flags & CN_ASYNC_POPULATE) == 0) &&
3803             (dcp->c_filegrp->fg_flags & CFS_FG_WRITE)) {
3804                 cachefs_dir_modentry(dcp, d_offset, &cookie, &cid);
3805         }
3806 #endif
3807 
3808 out:
3809         mutex_exit(&dcp->c_statelock);
3810 
3811         /* create the cnode */
3812         if (error == 0) {
3813                 error = cachefs_cnode_make(&cid, fscp,
3814                     (valid_fid ? &cookie : NULL),
3815                     &va, backvp, cr, 0, &cp);
3816                 if (error == 0) {
3817                         *vpp = CTOV(cp);
3818                 }
3819         }
3820 
3821         if (backvp)
3822                 VN_RELE(backvp);
3823 
3824         return (error);
3825 }
3826 
3827 /*ARGSUSED7*/
3828 static int
3829 cachefs_create(vnode_t *dvp, char *nm, vattr_t *vap,
3830     vcexcl_t exclusive, int mode, vnode_t **vpp, cred_t *cr, int flag,
3831     caller_context_t *ct, vsecattr_t *vsecp)
3832 
3833 {
3834         cnode_t *dcp = VTOC(dvp);
3835         fscache_t *fscp = C_TO_FSCACHE(dcp);
3836         cachefscache_t *cachep = fscp->fs_cache;
3837         int error;
3838         int connected = 0;
3839         int held = 0;
3840 
3841 #ifdef CFSDEBUG
3842         CFS_DEBUG(CFSDEBUG_VOPS)
3843                 printf("cachefs_create: ENTER dvp %p, nm %s\n",
3844                     (void *)dvp, nm);
3845 #endif
3846         if (getzoneid() != GLOBAL_ZONEID) {
3847                 error = EPERM;
3848                 goto out;
3849         }
3850 
3851         /*
3852          * Cachefs only provides pass-through support for NFSv4,
3853          * and all vnode operations are passed through to the
3854          * back file system. For NFSv4 pass-through to work, only
3855          * connected operation is supported, the cnode backvp must
3856          * exist, and cachefs optional (eg., disconnectable) flags
3857          * are turned off. Assert these conditions to ensure that
3858          * the backfilesystem is called for the create operation.
3859          */
3860         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3861         CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3862 
3863         for (;;) {
3864                 /* get (or renew) access to the file system */
3865                 if (held) {
3866                         /* Won't loop with NFSv4 connected behavior */
3867                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3868                         cachefs_cd_release(fscp);
3869                         held = 0;
3870                 }
3871                 error = cachefs_cd_access(fscp, connected, 1);
3872                 if (error)
3873                         break;
3874                 held = 1;
3875 
3876                 /*
3877                  * if we are connected, perform the remote portion of the
3878                  * create.
3879                  */
3880                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3881                         error = cachefs_create_connected(dvp, nm, vap,
3882                             exclusive, mode, vpp, cr);
3883                         if (CFS_TIMEOUT(fscp, error)) {
3884                                 cachefs_cd_release(fscp);
3885                                 held = 0;
3886                                 cachefs_cd_timedout(fscp);
3887                                 connected = 0;
3888                                 continue;
3889                         } else if (error) {
3890                                 break;
3891                         }
3892                 }
3893 
3894                 /* else we must be disconnected */
3895                 else {
3896                         error = cachefs_create_disconnected(dvp, nm, vap,
3897                             exclusive, mode, vpp, cr);
3898                         if (CFS_TIMEOUT(fscp, error)) {
3899                                 connected = 1;
3900                                 continue;
3901                         } else if (error) {
3902                                 break;
3903                         }
3904                 }
3905                 break;
3906         }
3907 
3908         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_CREATE)) {
3909                 fid_t *fidp = NULL;
3910                 ino64_t fileno = 0;
3911                 cnode_t *cp = NULL;
3912                 if (error == 0)
3913                         cp = VTOC(*vpp);
3914 
3915                 if (cp != NULL) {
3916                         fidp = &cp->c_metadata.md_cookie;
3917                         fileno = cp->c_id.cid_fileno;
3918                 }
3919                 cachefs_log_create(cachep, error, fscp->fs_cfsvfsp,
3920                     fidp, fileno, crgetuid(cr));
3921         }
3922 
3923         if (held)
3924                 cachefs_cd_release(fscp);
3925 
3926         if (error == 0 && CFS_ISFS_NONSHARED(fscp))
3927                 (void) cachefs_pack(dvp, nm, cr);
3928         if (error == 0 && IS_DEVVP(*vpp)) {
3929                 struct vnode *spcvp;
3930 
3931                 spcvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3932                 VN_RELE(*vpp);
3933                 if (spcvp == NULL) {
3934                         error = ENOSYS;
3935                 } else {
3936                         *vpp = spcvp;
3937                 }
3938         }
3939 
3940 #ifdef CFS_CD_DEBUG
3941         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3942 #endif
3943 out:
3944 #ifdef CFSDEBUG
3945         CFS_DEBUG(CFSDEBUG_VOPS)
3946                 printf("cachefs_create: EXIT error %d\n", error);
3947 #endif
3948         return (error);
3949 }
3950 
3951 
3952 static int
3953 cachefs_create_connected(vnode_t *dvp, char *nm, vattr_t *vap,
3954     enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
3955 {
3956         cnode_t *dcp = VTOC(dvp);
3957         fscache_t *fscp = C_TO_FSCACHE(dcp);
3958         int error;
3959         vnode_t *tvp = NULL;
3960         vnode_t *devvp;
3961         fid_t cookie;
3962         vattr_t va;
3963         cnode_t *ncp;
3964         cfs_cid_t cid;
3965         vnode_t *vp;
3966         uint32_t valid_fid;
3967 
3968         /* special case if file already exists */
3969         error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
3970         if (CFS_TIMEOUT(fscp, error))
3971                 return (error);
3972         if (error == 0) {
3973                 if (exclusive == EXCL)
3974                         error = EEXIST;
3975                 else if (vp->v_type == VDIR && (mode & VWRITE))
3976                         error = EISDIR;
3977                 else if ((error =
3978                     cachefs_access_connected(vp, mode, 0, cr)) == 0) {
3979                         if ((vap->va_mask & AT_SIZE) && (vp->v_type == VREG)) {
3980                                 vap->va_mask = AT_SIZE;
3981                                 error = cachefs_setattr_common(vp, vap, 0,
3982                                     cr, NULL);
3983                         }
3984                 }
3985                 if (error) {
3986                         VN_RELE(vp);
3987                 } else
3988                         *vpp = vp;
3989                 return (error);
3990         }
3991 
3992         rw_enter(&dcp->c_rwlock, RW_WRITER);
3993         mutex_enter(&dcp->c_statelock);
3994 
3995         /* consistency check the directory */
3996         error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
3997         if (error) {
3998                 mutex_exit(&dcp->c_statelock);
3999                 goto out;
4000         }
4001 
4002         /* get the backvp if necessary */
4003         if (dcp->c_backvp == NULL) {
4004                 error = cachefs_getbackvp(fscp, dcp);
4005                 if (error) {
4006                         mutex_exit(&dcp->c_statelock);
4007                         goto out;
4008                 }
4009         }
4010 
4011         /* create the file on the back fs */
4012         CFS_DPRINT_BACKFS_NFSV4(fscp,
4013             ("cachefs_create (nfsv4): dcp %p, dbackvp %p,"
4014             "name %s\n", dcp, dcp->c_backvp, nm));
4015         error = VOP_CREATE(dcp->c_backvp, nm, vap, exclusive, mode,
4016             &devvp, cr, 0, NULL, NULL);
4017         mutex_exit(&dcp->c_statelock);
4018         if (error)
4019                 goto out;
4020         if (VOP_REALVP(devvp, &tvp, NULL) == 0) {
4021                 VN_HOLD(tvp);
4022                 VN_RELE(devvp);
4023         } else {
4024                 tvp = devvp;
4025         }
4026 
4027         /* get the fid and attrs from the back fs */
4028         valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
4029         error = cachefs_getcookie(tvp, &cookie, &va, cr, valid_fid);
4030         if (error)
4031                 goto out;
4032 
4033         /* make the cnode */
4034         cid.cid_fileno = va.va_nodeid;
4035         cid.cid_flags = 0;
4036         error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
4037             &va, tvp, cr, 0, &ncp);
4038         if (error)
4039                 goto out;
4040 
4041         *vpp = CTOV(ncp);
4042 
4043         /* enter it in the parent directory */
4044         mutex_enter(&dcp->c_statelock);
4045         if (CFS_ISFS_NONSHARED(fscp) &&
4046             (dcp->c_metadata.md_flags & MD_POPULATED)) {
4047                 /* see if entry already exists */
4048                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4049                 error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
4050                 if (error == ENOENT) {
4051                         /* entry, does not exist, add the new file */
4052                         error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4053                             &ncp->c_id, SM_ASYNC);
4054                         if (error) {
4055                                 cachefs_nocache(dcp);
4056                                 error = 0;
4057                         }
4058                         /* XXX should this be done elsewhere, too? */
4059                         dnlc_enter(dvp, nm, *vpp);
4060                 } else {
4061                         /* entry exists or some other problem */
4062                         cachefs_nocache(dcp);
4063                         error = 0;
4064                 }
4065         }
4066         CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4067         mutex_exit(&dcp->c_statelock);
4068 
4069 out:
4070         rw_exit(&dcp->c_rwlock);
4071         if (tvp)
4072                 VN_RELE(tvp);
4073 
4074         return (error);
4075 }
4076 
4077 static int
4078 cachefs_create_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
4079         enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
4080 {
4081         cnode_t *dcp = VTOC(dvp);
4082         cnode_t *cp;
4083         cnode_t *ncp = NULL;
4084         vnode_t *vp;
4085         fscache_t *fscp = C_TO_FSCACHE(dcp);
4086         int error = 0;
4087         struct vattr va;
4088         timestruc_t current_time;
4089         off_t commit = 0;
4090         fid_t cookie;
4091         cfs_cid_t cid;
4092 
4093         rw_enter(&dcp->c_rwlock, RW_WRITER);
4094         mutex_enter(&dcp->c_statelock);
4095 
4096         /* give up if the directory is not populated */
4097         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4098                 mutex_exit(&dcp->c_statelock);
4099                 rw_exit(&dcp->c_rwlock);
4100                 return (ETIMEDOUT);
4101         }
4102 
4103         /* special case if file already exists */
4104         error = cachefs_dir_look(dcp, nm, &cookie, NULL, NULL, &cid);
4105         if (error == EINVAL) {
4106                 mutex_exit(&dcp->c_statelock);
4107                 rw_exit(&dcp->c_rwlock);
4108                 return (ETIMEDOUT);
4109         }
4110         if (error == 0) {
4111                 mutex_exit(&dcp->c_statelock);
4112                 rw_exit(&dcp->c_rwlock);
4113                 error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
4114                     cr, 0, &cp);
4115                 if (error) {
4116                         return (error);
4117                 }
4118                 vp = CTOV(cp);
4119 
4120                 if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4121                         error = ETIMEDOUT;
4122                 else if (exclusive == EXCL)
4123                         error = EEXIST;
4124                 else if (vp->v_type == VDIR && (mode & VWRITE))
4125                         error = EISDIR;
4126                 else {
4127                         mutex_enter(&cp->c_statelock);
4128                         error = cachefs_access_local(cp, mode, cr);
4129                         mutex_exit(&cp->c_statelock);
4130                         if (!error) {
4131                                 if ((vap->va_mask & AT_SIZE) &&
4132                                     (vp->v_type == VREG)) {
4133                                         vap->va_mask = AT_SIZE;
4134                                         error = cachefs_setattr_common(vp,
4135                                             vap, 0, cr, NULL);
4136                                 }
4137                         }
4138                 }
4139                 if (error) {
4140                         VN_RELE(vp);
4141                 } else
4142                         *vpp = vp;
4143                 return (error);
4144         }
4145 
4146         /* give up if cannot modify the cache */
4147         if (CFS_ISFS_WRITE_AROUND(fscp)) {
4148                 mutex_exit(&dcp->c_statelock);
4149                 error = ETIMEDOUT;
4150                 goto out;
4151         }
4152 
4153         /* check access */
4154         if (error = cachefs_access_local(dcp, VWRITE, cr)) {
4155                 mutex_exit(&dcp->c_statelock);
4156                 goto out;
4157         }
4158 
4159         /* mark dir as modified */
4160         cachefs_modified(dcp);
4161         mutex_exit(&dcp->c_statelock);
4162 
4163         /* must be privileged to set sticky bit */
4164         if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
4165                 vap->va_mode &= ~VSVTX;
4166 
4167         /* make up a reasonable set of attributes */
4168         cachefs_attr_setup(vap, &va, dcp, cr);
4169 
4170         /* create the cnode */
4171         error = cachefs_cnode_create(fscp, &va, 0, &ncp);
4172         if (error)
4173                 goto out;
4174 
4175         mutex_enter(&ncp->c_statelock);
4176 
4177         /* get the front file now instead of later */
4178         if (vap->va_type == VREG) {
4179                 error = cachefs_getfrontfile(ncp);
4180                 if (error) {
4181                         mutex_exit(&ncp->c_statelock);
4182                         goto out;
4183                 }
4184                 ASSERT(ncp->c_frontvp != NULL);
4185                 ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4186                 ncp->c_metadata.md_flags |= MD_POPULATED;
4187         } else {
4188                 ASSERT(ncp->c_flags & CN_ALLOC_PENDING);
4189                 if (ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
4190                         (void) filegrp_allocattr(ncp->c_filegrp);
4191                 }
4192                 error = filegrp_create_metadata(ncp->c_filegrp,
4193                     &ncp->c_metadata, &ncp->c_id);
4194                 if (error) {
4195                         mutex_exit(&ncp->c_statelock);
4196                         goto out;
4197                 }
4198                 ncp->c_flags &= ~CN_ALLOC_PENDING;
4199         }
4200         mutex_enter(&dcp->c_statelock);
4201         cachefs_creategid(dcp, ncp, vap, cr);
4202         cachefs_createacl(dcp, ncp);
4203         mutex_exit(&dcp->c_statelock);
4204 
4205         /* set times on the file */
4206         gethrestime(&current_time);
4207         ncp->c_metadata.md_vattr.va_atime = current_time;
4208         ncp->c_metadata.md_localctime = current_time;
4209         ncp->c_metadata.md_localmtime = current_time;
4210         ncp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
4211 
4212         /* reserve space for the daemon cid mapping */
4213         error = cachefs_dlog_cidmap(fscp);
4214         if (error) {
4215                 mutex_exit(&ncp->c_statelock);
4216                 goto out;
4217         }
4218         ncp->c_metadata.md_flags |= MD_MAPPING;
4219 
4220         /* mark the new file as modified */
4221         if (cachefs_modified_alloc(ncp)) {
4222                 mutex_exit(&ncp->c_statelock);
4223                 error = ENOSPC;
4224                 goto out;
4225         }
4226         ncp->c_flags |= CN_UPDATED;
4227 
4228         /*
4229          * write the metadata now rather than waiting until
4230          * inactive so that if there's no space we can let
4231          * the caller know.
4232          */
4233         ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4234         ASSERT((ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
4235         error = filegrp_write_metadata(ncp->c_filegrp,
4236             &ncp->c_id, &ncp->c_metadata);
4237         if (error) {
4238                 mutex_exit(&ncp->c_statelock);
4239                 goto out;
4240         }
4241 
4242         /* log the operation */
4243         commit = cachefs_dlog_create(fscp, dcp, nm, vap, exclusive,
4244             mode, ncp, 0, cr);
4245         if (commit == 0) {
4246                 mutex_exit(&ncp->c_statelock);
4247                 error = ENOSPC;
4248                 goto out;
4249         }
4250 
4251         mutex_exit(&ncp->c_statelock);
4252 
4253         mutex_enter(&dcp->c_statelock);
4254 
4255         /* update parent dir times */
4256         dcp->c_metadata.md_localmtime = current_time;
4257         dcp->c_metadata.md_flags |= MD_LOCALMTIME;
4258         dcp->c_flags |= CN_UPDATED;
4259 
4260         /* enter new file name in the parent directory */
4261         if (dcp->c_metadata.md_flags & MD_POPULATED) {
4262                 error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4263                     &ncp->c_id, 0);
4264                 if (error) {
4265                         cachefs_nocache(dcp);
4266                         mutex_exit(&dcp->c_statelock);
4267                         error = ETIMEDOUT;
4268                         goto out;
4269                 }
4270                 dnlc_enter(dvp, nm, CTOV(ncp));
4271         } else {
4272                 mutex_exit(&dcp->c_statelock);
4273                 error = ETIMEDOUT;
4274                 goto out;
4275         }
4276         mutex_exit(&dcp->c_statelock);
4277 
4278 out:
4279         rw_exit(&dcp->c_rwlock);
4280 
4281         if (commit) {
4282                 if (cachefs_dlog_commit(fscp, commit, error)) {
4283                         /*EMPTY*/
4284                         /* XXX bob: fix on panic */
4285                 }
4286         }
4287         if (error) {
4288                 /* destroy the cnode we created */
4289                 if (ncp) {
4290                         mutex_enter(&ncp->c_statelock);
4291                         ncp->c_flags |= CN_DESTROY;
4292                         mutex_exit(&ncp->c_statelock);
4293                         VN_RELE(CTOV(ncp));
4294                 }
4295         } else {
4296                 *vpp = CTOV(ncp);
4297         }
4298         return (error);
4299 }
4300 
4301 /*ARGSUSED*/
4302 static int
4303 cachefs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
4304     int flags)
4305 {
4306         cnode_t *dcp = VTOC(dvp);
4307         fscache_t *fscp = C_TO_FSCACHE(dcp);
4308         cachefscache_t *cachep = fscp->fs_cache;
4309         int error = 0;
4310         int held = 0;
4311         int connected = 0;
4312         size_t namlen;
4313         vnode_t *vp = NULL;
4314         int vfslock = 0;
4315 
4316 #ifdef CFSDEBUG
4317         CFS_DEBUG(CFSDEBUG_VOPS)
4318                 printf("cachefs_remove: ENTER dvp %p name %s\n",
4319                     (void *)dvp, nm);
4320 #endif
4321         if (getzoneid() != GLOBAL_ZONEID) {
4322                 error = EPERM;
4323                 goto out;
4324         }
4325 
4326         if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4327                 ASSERT(dcp->c_flags & CN_NOCACHE);
4328 
4329         /*
4330          * Cachefs only provides pass-through support for NFSv4,
4331          * and all vnode operations are passed through to the
4332          * back file system. For NFSv4 pass-through to work, only
4333          * connected operation is supported, the cnode backvp must
4334          * exist, and cachefs optional (eg., disconnectable) flags
4335          * are turned off. Assert these conditions to ensure that
4336          * the backfilesystem is called for the remove operation.
4337          */
4338         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4339         CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
4340 
4341         for (;;) {
4342                 if (vfslock) {
4343                         vn_vfsunlock(vp);
4344                         vfslock = 0;
4345                 }
4346                 if (vp) {
4347                         VN_RELE(vp);
4348                         vp = NULL;
4349                 }
4350 
4351                 /* get (or renew) access to the file system */
4352                 if (held) {
4353                         /* Won't loop with NFSv4 connected behavior */
4354                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4355                         cachefs_cd_release(fscp);
4356                         held = 0;
4357                 }
4358                 error = cachefs_cd_access(fscp, connected, 1);
4359                 if (error)
4360                         break;
4361                 held = 1;
4362 
4363                 /* if disconnected, do some extra error checking */
4364                 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
4365                         /* check permissions */
4366                         mutex_enter(&dcp->c_statelock);
4367                         error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
4368                         mutex_exit(&dcp->c_statelock);
4369                         if (CFS_TIMEOUT(fscp, error)) {
4370                                 connected = 1;
4371                                 continue;
4372                         }
4373                         if (error)
4374                                 break;
4375 
4376                         namlen = strlen(nm);
4377                         if (namlen == 0) {
4378                                 error = EINVAL;
4379                                 break;
4380                         }
4381 
4382                         /* cannot remove . and .. */
4383                         if (nm[0] == '.') {
4384                                 if (namlen == 1) {
4385                                         error = EINVAL;
4386                                         break;
4387                                 } else if (namlen == 2 && nm[1] == '.') {
4388                                         error = EEXIST;
4389                                         break;
4390                                 }
4391                         }
4392 
4393                 }
4394 
4395                 /* get the cnode of the file to delete */
4396                 error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
4397                 if (error) {
4398                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4399                                 if (CFS_TIMEOUT(fscp, error)) {
4400                                         cachefs_cd_release(fscp);
4401                                         held = 0;
4402                                         cachefs_cd_timedout(fscp);
4403                                         connected = 0;
4404                                         continue;
4405                                 }
4406                         } else {
4407                                 if (CFS_TIMEOUT(fscp, error)) {
4408                                         connected = 1;
4409                                         continue;
4410                                 }
4411                         }
4412                         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE)) {
4413                                 struct fid foo;
4414 
4415                                 bzero(&foo, sizeof (foo));
4416                                 cachefs_log_remove(cachep, error,
4417                                     fscp->fs_cfsvfsp, &foo, 0, crgetuid(cr));
4418                         }
4419                         break;
4420                 }
4421 
4422                 if (vp->v_type == VDIR) {
4423                         /* must be privileged to remove dirs with unlink() */
4424                         if ((error = secpolicy_fs_linkdir(cr, vp->v_vfsp)) != 0)
4425                                 break;
4426 
4427                         /* see ufs_dirremove for why this is done, mount race */
4428                         if (vn_vfswlock(vp)) {
4429                                 error = EBUSY;
4430                                 break;
4431                         }
4432                         vfslock = 1;
4433                         if (vn_mountedvfs(vp) != NULL) {
4434                                 error = EBUSY;
4435                                 break;
4436                         }
4437                 }
4438 
4439                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4440                         error = cachefs_remove_connected(dvp, nm, cr, vp);
4441                         if (CFS_TIMEOUT(fscp, error)) {
4442                                 cachefs_cd_release(fscp);
4443                                 held = 0;
4444                                 cachefs_cd_timedout(fscp);
4445                                 connected = 0;
4446                                 continue;
4447                         }
4448                 } else {
4449                         error = cachefs_remove_disconnected(dvp, nm, cr,
4450                             vp);
4451                         if (CFS_TIMEOUT(fscp, error)) {
4452                                 connected = 1;
4453                                 continue;
4454                         }
4455                 }
4456                 break;
4457         }
4458 
4459 #if 0
4460         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE))
4461                 cachefs_log_remove(cachep, error, fscp->fs_cfsvfsp,
4462                     &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
4463                     crgetuid(cr));
4464 #endif
4465 
4466         if (held)
4467                 cachefs_cd_release(fscp);
4468 
4469         if (vfslock)
4470                 vn_vfsunlock(vp);
4471 
4472         if (vp)
4473                 VN_RELE(vp);
4474 
4475 #ifdef CFS_CD_DEBUG
4476         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4477 #endif
4478 out:
4479 #ifdef CFSDEBUG
4480         CFS_DEBUG(CFSDEBUG_VOPS)
4481                 printf("cachefs_remove: EXIT dvp %p\n", (void *)dvp);
4482 #endif
4483 
4484         return (error);
4485 }
4486 
4487 int
4488 cachefs_remove_connected(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4489 {
4490         cnode_t *dcp = VTOC(dvp);
4491         cnode_t *cp = VTOC(vp);
4492         fscache_t *fscp = C_TO_FSCACHE(dcp);
4493         int error = 0;
4494 
4495         /*
4496          * Acquire the rwlock (WRITER) on the directory to prevent other
4497          * activity on the directory.
4498          */
4499         rw_enter(&dcp->c_rwlock, RW_WRITER);
4500 
4501         /* purge dnlc of this entry so can get accurate vnode count */
4502         dnlc_purge_vp(vp);
4503 
4504         /*
4505          * If the cnode is active, make a link to the file
4506          * so operations on the file will continue.
4507          */
4508         if ((vp->v_type != VDIR) &&
4509             !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4510                 error = cachefs_remove_dolink(dvp, vp, nm, cr);
4511                 if (error)
4512                         goto out;
4513         }
4514 
4515         /* else call backfs NFSv4 handler if NFSv4 */
4516         else if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
4517                 error = cachefs_remove_backfs_nfsv4(dvp, nm, cr, vp);
4518                 goto out;
4519         }
4520 
4521         /* else drop the backvp so nfs does not do rename */
4522         else if (cp->c_backvp) {
4523                 mutex_enter(&cp->c_statelock);
4524                 if (cp->c_backvp) {
4525                         VN_RELE(cp->c_backvp);
4526                         cp->c_backvp = NULL;
4527                 }
4528                 mutex_exit(&cp->c_statelock);
4529         }
4530 
4531         mutex_enter(&dcp->c_statelock);
4532 
4533         /* get the backvp */
4534         if (dcp->c_backvp == NULL) {
4535                 error = cachefs_getbackvp(fscp, dcp);
4536                 if (error) {
4537                         mutex_exit(&dcp->c_statelock);
4538                         goto out;
4539                 }
4540         }
4541 
4542         /* check directory consistency */
4543         error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4544         if (error) {
4545                 mutex_exit(&dcp->c_statelock);
4546                 goto out;
4547         }
4548 
4549         /* perform the remove on the back fs */
4550         error = VOP_REMOVE(dcp->c_backvp, nm, cr, NULL, 0);
4551         if (error) {
4552                 mutex_exit(&dcp->c_statelock);
4553                 goto out;
4554         }
4555 
4556         /* the dir has been modified */
4557         CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4558 
4559         /* remove the entry from the populated directory */
4560         if (CFS_ISFS_NONSHARED(fscp) &&
4561             (dcp->c_metadata.md_flags & MD_POPULATED)) {
4562                 error = cachefs_dir_rmentry(dcp, nm);
4563                 if (error) {
4564                         cachefs_nocache(dcp);
4565                         error = 0;
4566                 }
4567         }
4568         mutex_exit(&dcp->c_statelock);
4569 
4570         /* fix up the file we deleted */
4571         mutex_enter(&cp->c_statelock);
4572         if (cp->c_attr.va_nlink == 1)
4573                 cp->c_flags |= CN_DESTROY;
4574         else
4575                 cp->c_flags |= CN_UPDATED;
4576 
4577         cp->c_attr.va_nlink--;
4578         CFSOP_MODIFY_COBJECT(fscp, cp, cr);
4579         mutex_exit(&cp->c_statelock);
4580 
4581 out:
4582         rw_exit(&dcp->c_rwlock);
4583         return (error);
4584 }
4585 
4586 /*
4587  * cachefs_remove_backfs_nfsv4
4588  *
4589  * Call NFSv4 back filesystem to handle the remove (cachefs
4590  * pass-through support for NFSv4).
4591  */
4592 int
4593 cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4594 {
4595         cnode_t *dcp = VTOC(dvp);
4596         cnode_t *cp = VTOC(vp);
4597         vnode_t *dbackvp;
4598         fscache_t *fscp = C_TO_FSCACHE(dcp);
4599         int error = 0;
4600 
4601         /*
4602          * For NFSv4 pass-through to work, only connected operation
4603          * is supported, the cnode backvp must exist, and cachefs
4604          * optional (eg., disconnectable) flags are turned off. Assert
4605          * these conditions for the getattr operation.
4606          */
4607         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4608         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
4609 
4610         /* Should hold the directory readwrite lock to update directory */
4611         ASSERT(RW_WRITE_HELD(&dcp->c_rwlock));
4612 
4613         /*
4614          * Update attributes for directory. Note that
4615          * CFSOP_CHECK_COBJECT asserts for c_statelock being
4616          * held, so grab it before calling the routine.
4617          */
4618         mutex_enter(&dcp->c_statelock);
4619         error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4620         mutex_exit(&dcp->c_statelock);
4621         if (error)
4622                 goto out;
4623 
4624         /*
4625          * Update attributes for cp. Note that CFSOP_CHECK_COBJECT
4626          * asserts for c_statelock being held, so grab it before
4627          * calling the routine.
4628          */
4629         mutex_enter(&cp->c_statelock);
4630         error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
4631         if (error) {
4632                 mutex_exit(&cp->c_statelock);
4633                 goto out;
4634         }
4635 
4636         /*
4637          * Drop the backvp so nfs if the link count is 1 so that
4638          * nfs does not do rename. Ensure that we will destroy the cnode
4639          * since this cnode no longer contains the backvp. Note that we
4640          * maintain lock on this cnode to prevent change till the remove
4641          * completes, otherwise other operations will encounter an ESTALE
4642          * if they try to use the cnode with CN_DESTROY set (see
4643          * cachefs_get_backvp()), or change the state of the cnode
4644          * while we're removing it.
4645          */
4646         if (cp->c_attr.va_nlink == 1) {
4647                 /*
4648                  * The unldvp information is created for the case
4649                  * when there is more than one reference on the
4650                  * vnode when a remove operation is called. If the
4651                  * remove itself was holding a reference to the
4652                  * vnode, then a subsequent remove will remove the
4653                  * backvp, so we need to get rid of the unldvp
4654                  * before removing the backvp. An alternate would
4655                  * be to simply ignore the remove and let the
4656                  * inactivation routine do the deletion of the
4657                  * unldvp.
4658                  */
4659                 if (cp->c_unldvp) {
4660                         VN_RELE(cp->c_unldvp);
4661                         cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
4662                         crfree(cp->c_unlcred);
4663                         cp->c_unldvp = NULL;
4664                         cp->c_unlcred = NULL;
4665                 }
4666                 cp->c_flags |= CN_DESTROY;
4667                 cp->c_attr.va_nlink = 0;
4668                 VN_RELE(cp->c_backvp);
4669                 cp->c_backvp = NULL;
4670         }
4671 
4672         /* perform the remove on back fs after extracting directory backvp */
4673         mutex_enter(&dcp->c_statelock);
4674         dbackvp = dcp->c_backvp;
4675         mutex_exit(&dcp->c_statelock);
4676 
4677         CFS_DPRINT_BACKFS_NFSV4(fscp,
4678             ("cachefs_remove (nfsv4): dcp %p, dbackvp %p, name %s\n",
4679             dcp, dbackvp, nm));
4680         error = VOP_REMOVE(dbackvp, nm, cr, NULL, 0);
4681         if (error) {
4682                 mutex_exit(&cp->c_statelock);
4683                 goto out;
4684         }
4685 
4686         /* fix up the file we deleted, if not destroying the cnode */
4687         if ((cp->c_flags & CN_DESTROY) == 0) {
4688                 cp->c_attr.va_nlink--;
4689                 cp->c_flags |= CN_UPDATED;
4690         }
4691 
4692         mutex_exit(&cp->c_statelock);
4693 
4694 out:
4695         return (error);
4696 }
4697 
4698 int
4699 cachefs_remove_disconnected(vnode_t *dvp, char *nm, cred_t *cr,
4700     vnode_t *vp)
4701 {
4702         cnode_t *dcp = VTOC(dvp);
4703         cnode_t *cp = VTOC(vp);
4704         fscache_t *fscp = C_TO_FSCACHE(dcp);
4705         int error = 0;
4706         off_t commit = 0;
4707         timestruc_t current_time;
4708 
4709         if (CFS_ISFS_WRITE_AROUND(fscp))
4710                 return (ETIMEDOUT);
4711 
4712         if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4713                 return (ETIMEDOUT);
4714 
4715         /*
4716          * Acquire the rwlock (WRITER) on the directory to prevent other
4717          * activity on the directory.
4718          */
4719         rw_enter(&dcp->c_rwlock, RW_WRITER);
4720 
4721         /* dir must be populated */
4722         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4723                 error = ETIMEDOUT;
4724                 goto out;
4725         }
4726 
4727         mutex_enter(&dcp->c_statelock);
4728         mutex_enter(&cp->c_statelock);
4729 
4730         error = cachefs_stickyrmchk(dcp, cp, cr);
4731 
4732         mutex_exit(&cp->c_statelock);
4733         mutex_exit(&dcp->c_statelock);
4734         if (error)
4735                 goto out;
4736 
4737         /* purge dnlc of this entry so can get accurate vnode count */
4738         dnlc_purge_vp(vp);
4739 
4740         /*
4741          * If the cnode is active, make a link to the file
4742          * so operations on the file will continue.
4743          */
4744         if ((vp->v_type != VDIR) &&
4745             !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4746                 error = cachefs_remove_dolink(dvp, vp, nm, cr);
4747                 if (error)
4748                         goto out;
4749         }
4750 
4751         if (cp->c_attr.va_nlink > 1) {
4752                 mutex_enter(&cp->c_statelock);
4753                 if (cachefs_modified_alloc(cp)) {
4754                         mutex_exit(&cp->c_statelock);
4755                         error = ENOSPC;
4756                         goto out;
4757                 }
4758                 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
4759                         error = cachefs_dlog_cidmap(fscp);
4760                         if (error) {
4761                                 mutex_exit(&cp->c_statelock);
4762                                 error = ENOSPC;
4763                                 goto out;
4764                         }
4765                         cp->c_metadata.md_flags |= MD_MAPPING;
4766                         cp->c_flags |= CN_UPDATED;
4767                 }
4768                 mutex_exit(&cp->c_statelock);
4769         }
4770 
4771         /* log the remove */
4772         commit = cachefs_dlog_remove(fscp, dcp, nm, cp, cr);
4773         if (commit == 0) {
4774                 error = ENOSPC;
4775                 goto out;
4776         }
4777 
4778         /* remove the file from the dir */
4779         mutex_enter(&dcp->c_statelock);
4780         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4781                 mutex_exit(&dcp->c_statelock);
4782                 error = ETIMEDOUT;
4783                 goto out;
4784 
4785         }
4786         cachefs_modified(dcp);
4787         error = cachefs_dir_rmentry(dcp, nm);
4788         if (error) {
4789                 mutex_exit(&dcp->c_statelock);
4790                 if (error == ENOTDIR)
4791                         error = ETIMEDOUT;
4792                 goto out;
4793         }
4794 
4795         /* update parent dir times */
4796         gethrestime(&current_time);
4797         dcp->c_metadata.md_localctime = current_time;
4798         dcp->c_metadata.md_localmtime = current_time;
4799         dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
4800         dcp->c_flags |= CN_UPDATED;
4801         mutex_exit(&dcp->c_statelock);
4802 
4803         /* adjust file we are deleting */
4804         mutex_enter(&cp->c_statelock);
4805         cp->c_attr.va_nlink--;
4806         cp->c_metadata.md_localctime = current_time;
4807         cp->c_metadata.md_flags |= MD_LOCALCTIME;
4808         if (cp->c_attr.va_nlink == 0) {
4809                 cp->c_flags |= CN_DESTROY;
4810         } else {
4811                 cp->c_flags |= CN_UPDATED;
4812         }
4813         mutex_exit(&cp->c_statelock);
4814 
4815 out:
4816         if (commit) {
4817                 /* commit the log entry */
4818                 if (cachefs_dlog_commit(fscp, commit, error)) {
4819                         /*EMPTY*/
4820                         /* XXX bob: fix on panic */
4821                 }
4822         }
4823 
4824         rw_exit(&dcp->c_rwlock);
4825         return (error);
4826 }
4827 
4828 /*ARGSUSED*/
4829 static int
4830 cachefs_link(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr,
4831     caller_context_t *ct, int flags)
4832 {
4833         fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4834         cnode_t *tdcp = VTOC(tdvp);
4835         struct vnode *realvp;
4836         int error = 0;
4837         int held = 0;
4838         int connected = 0;
4839 
4840 #ifdef CFSDEBUG
4841         CFS_DEBUG(CFSDEBUG_VOPS)
4842                 printf("cachefs_link: ENTER fvp %p tdvp %p tnm %s\n",
4843                     (void *)fvp, (void *)tdvp, tnm);
4844 #endif
4845 
4846         if (getzoneid() != GLOBAL_ZONEID) {
4847                 error = EPERM;
4848                 goto out;
4849         }
4850 
4851         if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4852                 ASSERT(tdcp->c_flags & CN_NOCACHE);
4853 
4854         if (VOP_REALVP(fvp, &realvp, ct) == 0) {
4855                 fvp = realvp;
4856         }
4857 
4858         /*
4859          * Cachefs only provides pass-through support for NFSv4,
4860          * and all vnode operations are passed through to the
4861          * back file system. For NFSv4 pass-through to work, only
4862          * connected operation is supported, the cnode backvp must
4863          * exist, and cachefs optional (eg., disconnectable) flags
4864          * are turned off. Assert these conditions to ensure that
4865          * the backfilesystem is called for the link operation.
4866          */
4867 
4868         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4869         CFS_BACKFS_NFSV4_ASSERT_CNODE(tdcp);
4870 
4871         for (;;) {
4872                 /* get (or renew) access to the file system */
4873                 if (held) {
4874                         /* Won't loop with NFSv4 connected behavior */
4875                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4876                         rw_exit(&tdcp->c_rwlock);
4877                         cachefs_cd_release(fscp);
4878                         held = 0;
4879                 }
4880                 error = cachefs_cd_access(fscp, connected, 1);
4881                 if (error)
4882                         break;
4883                 rw_enter(&tdcp->c_rwlock, RW_WRITER);
4884                 held = 1;
4885 
4886                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4887                         error = cachefs_link_connected(tdvp, fvp, tnm, cr);
4888                         if (CFS_TIMEOUT(fscp, error)) {
4889                                 rw_exit(&tdcp->c_rwlock);
4890                                 cachefs_cd_release(fscp);
4891                                 held = 0;
4892                                 cachefs_cd_timedout(fscp);
4893                                 connected = 0;
4894                                 continue;
4895                         }
4896                 } else {
4897                         error = cachefs_link_disconnected(tdvp, fvp, tnm,
4898                             cr);
4899                         if (CFS_TIMEOUT(fscp, error)) {
4900                                 connected = 1;
4901                                 continue;
4902                         }
4903                 }
4904                 break;
4905         }
4906 
4907         if (held) {
4908                 rw_exit(&tdcp->c_rwlock);
4909                 cachefs_cd_release(fscp);
4910         }
4911 
4912 #ifdef CFS_CD_DEBUG
4913         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4914 #endif
4915 out:
4916 #ifdef CFSDEBUG
4917         CFS_DEBUG(CFSDEBUG_VOPS)
4918                 printf("cachefs_link: EXIT fvp %p tdvp %p tnm %s\n",
4919                     (void *)fvp, (void *)tdvp, tnm);
4920 #endif
4921         return (error);
4922 }
4923 
4924 static int
4925 cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr)
4926 {
4927         cnode_t *tdcp = VTOC(tdvp);
4928         cnode_t *fcp = VTOC(fvp);
4929         fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4930         int error = 0;
4931         vnode_t *backvp = NULL;
4932 
4933         if (tdcp != fcp) {
4934                 mutex_enter(&fcp->c_statelock);
4935 
4936                 if (fcp->c_backvp == NULL) {
4937                         error = cachefs_getbackvp(fscp, fcp);
4938                         if (error) {
4939                                 mutex_exit(&fcp->c_statelock);
4940                                 goto out;
4941                         }
4942                 }
4943 
4944                 error = CFSOP_CHECK_COBJECT(fscp, fcp, 0, cr);
4945                 if (error) {
4946                         mutex_exit(&fcp->c_statelock);
4947                         goto out;
4948                 }
4949                 backvp = fcp->c_backvp;
4950                 VN_HOLD(backvp);
4951                 mutex_exit(&fcp->c_statelock);
4952         }
4953 
4954         mutex_enter(&tdcp->c_statelock);
4955 
4956         /* get backvp of target directory */
4957         if (tdcp->c_backvp == NULL) {
4958                 error = cachefs_getbackvp(fscp, tdcp);
4959                 if (error) {
4960                         mutex_exit(&tdcp->c_statelock);
4961                         goto out;
4962                 }
4963         }
4964 
4965         /* consistency check target directory */
4966         error = CFSOP_CHECK_COBJECT(fscp, tdcp, 0, cr);
4967         if (error) {
4968                 mutex_exit(&tdcp->c_statelock);
4969                 goto out;
4970         }
4971         if (backvp == NULL) {
4972                 backvp = tdcp->c_backvp;
4973                 VN_HOLD(backvp);
4974         }
4975 
4976         /* perform the link on the back fs */
4977         CFS_DPRINT_BACKFS_NFSV4(fscp,
4978             ("cachefs_link (nfsv4): tdcp %p, tdbackvp %p, "
4979             "name %s\n", tdcp, tdcp->c_backvp, tnm));
4980         error = VOP_LINK(tdcp->c_backvp, backvp, tnm, cr, NULL, 0);
4981         if (error) {
4982                 mutex_exit(&tdcp->c_statelock);
4983                 goto out;
4984         }
4985 
4986         CFSOP_MODIFY_COBJECT(fscp, tdcp, cr);
4987 
4988         /* if the dir is populated, add the new link */
4989         if (CFS_ISFS_NONSHARED(fscp) &&
4990             (tdcp->c_metadata.md_flags & MD_POPULATED)) {
4991                 error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
4992                     &fcp->c_id, SM_ASYNC);
4993                 if (error) {
4994                         cachefs_nocache(tdcp);
4995                         error = 0;
4996                 }
4997         }
4998         mutex_exit(&tdcp->c_statelock);
4999 
5000         /* get the new link count on the file */
5001         mutex_enter(&fcp->c_statelock);
5002         fcp->c_flags |= CN_UPDATED;
5003         CFSOP_MODIFY_COBJECT(fscp, fcp, cr);
5004         if (fcp->c_backvp == NULL) {
5005                 error = cachefs_getbackvp(fscp, fcp);
5006                 if (error) {
5007                         mutex_exit(&fcp->c_statelock);
5008                         goto out;
5009                 }
5010         }
5011 
5012         /* XXX bob: given what modify_cobject does this seems unnecessary */
5013         fcp->c_attr.va_mask = AT_ALL;
5014         error = VOP_GETATTR(fcp->c_backvp, &fcp->c_attr, 0, cr, NULL);
5015         mutex_exit(&fcp->c_statelock);
5016 out:
5017         if (backvp)
5018                 VN_RELE(backvp);
5019 
5020         return (error);
5021 }
5022 
5023 static int
5024 cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
5025     cred_t *cr)
5026 {
5027         cnode_t *tdcp = VTOC(tdvp);
5028         cnode_t *fcp = VTOC(fvp);
5029         fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
5030         int error = 0;
5031         timestruc_t current_time;
5032         off_t commit = 0;
5033 
5034         if (fvp->v_type == VDIR && secpolicy_fs_linkdir(cr, fvp->v_vfsp) != 0 ||
5035             fcp->c_attr.va_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
5036                 return (EPERM);
5037 
5038         if (CFS_ISFS_WRITE_AROUND(fscp))
5039                 return (ETIMEDOUT);
5040 
5041         if (fcp->c_metadata.md_flags & MD_NEEDATTRS)
5042                 return (ETIMEDOUT);
5043 
5044         mutex_enter(&tdcp->c_statelock);
5045 
5046         /* check permissions */
5047         if (error = cachefs_access_local(tdcp, (VEXEC|VWRITE), cr)) {
5048                 mutex_exit(&tdcp->c_statelock);
5049                 goto out;
5050         }
5051 
5052         /* the directory front file must be populated */
5053         if ((tdcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5054                 error = ETIMEDOUT;
5055                 mutex_exit(&tdcp->c_statelock);
5056                 goto out;
5057         }
5058 
5059         /* make sure tnm does not already exist in the directory */
5060         error = cachefs_dir_look(tdcp, tnm, NULL, NULL, NULL, NULL);
5061         if (error == ENOTDIR) {
5062                 error = ETIMEDOUT;
5063                 mutex_exit(&tdcp->c_statelock);
5064                 goto out;
5065         }
5066         if (error != ENOENT) {
5067                 error = EEXIST;
5068                 mutex_exit(&tdcp->c_statelock);
5069                 goto out;
5070         }
5071 
5072         mutex_enter(&fcp->c_statelock);
5073 
5074         /* create a mapping for the file if necessary */
5075         if ((fcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5076                 error = cachefs_dlog_cidmap(fscp);
5077                 if (error) {
5078                         mutex_exit(&fcp->c_statelock);
5079                         mutex_exit(&tdcp->c_statelock);
5080                         error = ENOSPC;
5081                         goto out;
5082                 }
5083                 fcp->c_metadata.md_flags |= MD_MAPPING;
5084                 fcp->c_flags |= CN_UPDATED;
5085         }
5086 
5087         /* mark file as modified */
5088         if (cachefs_modified_alloc(fcp)) {
5089                 mutex_exit(&fcp->c_statelock);
5090                 mutex_exit(&tdcp->c_statelock);
5091                 error = ENOSPC;
5092                 goto out;
5093         }
5094         mutex_exit(&fcp->c_statelock);
5095 
5096         /* log the operation */
5097         commit = cachefs_dlog_link(fscp, tdcp, tnm, fcp, cr);
5098         if (commit == 0) {
5099                 mutex_exit(&tdcp->c_statelock);
5100                 error = ENOSPC;
5101                 goto out;
5102         }
5103 
5104         gethrestime(&current_time);
5105 
5106         /* make the new link */
5107         cachefs_modified(tdcp);
5108         error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
5109             &fcp->c_id, SM_ASYNC);
5110         if (error) {
5111                 error = 0;
5112                 mutex_exit(&tdcp->c_statelock);
5113                 goto out;
5114         }
5115 
5116         /* Update mtime/ctime of parent dir */
5117         tdcp->c_metadata.md_localmtime = current_time;
5118         tdcp->c_metadata.md_localctime = current_time;
5119         tdcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5120         tdcp->c_flags |= CN_UPDATED;
5121         mutex_exit(&tdcp->c_statelock);
5122 
5123         /* update the file we linked to */
5124         mutex_enter(&fcp->c_statelock);
5125         fcp->c_attr.va_nlink++;
5126         fcp->c_metadata.md_localctime = current_time;
5127         fcp->c_metadata.md_flags |= MD_LOCALCTIME;
5128         fcp->c_flags |= CN_UPDATED;
5129         mutex_exit(&fcp->c_statelock);
5130 
5131 out:
5132         if (commit) {
5133                 /* commit the log entry */
5134                 if (cachefs_dlog_commit(fscp, commit, error)) {
5135                         /*EMPTY*/
5136                         /* XXX bob: fix on panic */
5137                 }
5138         }
5139 
5140         return (error);
5141 }
5142 
5143 /*
5144  * Serialize all renames in CFS, to avoid deadlocks - We have to hold two
5145  * cnodes atomically.
5146  */
5147 kmutex_t cachefs_rename_lock;
5148 
5149 /*ARGSUSED*/
5150 static int
5151 cachefs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp,
5152     char *nnm, cred_t *cr, caller_context_t *ct, int flags)
5153 {
5154         fscache_t *fscp = C_TO_FSCACHE(VTOC(odvp));
5155         cachefscache_t *cachep = fscp->fs_cache;
5156         int error = 0;
5157         int held = 0;
5158         int connected = 0;
5159         vnode_t *delvp = NULL;
5160         vnode_t *tvp = NULL;
5161         int vfslock = 0;
5162         struct vnode *realvp;
5163 
5164         if (getzoneid() != GLOBAL_ZONEID)
5165                 return (EPERM);
5166 
5167         if (VOP_REALVP(ndvp, &realvp, ct) == 0)
5168                 ndvp = realvp;
5169 
5170         /*
5171          * if the fs NOFILL or NOCACHE flags are on, then the old and new
5172          * directory cnodes better indicate NOCACHE mode as well.
5173          */
5174         ASSERT(
5175             (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) == 0 ||
5176             ((VTOC(odvp)->c_flags & CN_NOCACHE) &&
5177             (VTOC(ndvp)->c_flags & CN_NOCACHE)));
5178 
5179         /*
5180          * Cachefs only provides pass-through support for NFSv4,
5181          * and all vnode operations are passed through to the
5182          * back file system. For NFSv4 pass-through to work, only
5183          * connected operation is supported, the cnode backvp must
5184          * exist, and cachefs optional (eg., disconnectable) flags
5185          * are turned off. Assert these conditions to ensure that
5186          * the backfilesystem is called for the rename operation.
5187          */
5188         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5189         CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(odvp));
5190         CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(ndvp));
5191 
5192         for (;;) {
5193                 if (vfslock) {
5194                         vn_vfsunlock(delvp);
5195                         vfslock = 0;
5196                 }
5197                 if (delvp) {
5198                         VN_RELE(delvp);
5199                         delvp = NULL;
5200                 }
5201 
5202                 /* get (or renew) access to the file system */
5203                 if (held) {
5204                         /* Won't loop for NFSv4 connected support */
5205                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5206                         cachefs_cd_release(fscp);
5207                         held = 0;
5208                 }
5209                 error = cachefs_cd_access(fscp, connected, 1);
5210                 if (error)
5211                         break;
5212                 held = 1;
5213 
5214                 /* sanity check */
5215                 if ((odvp->v_type != VDIR) || (ndvp->v_type != VDIR)) {
5216                         error = EINVAL;
5217                         break;
5218                 }
5219 
5220                 /* cannot rename from or to . or .. */
5221                 if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 ||
5222                     strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0) {
5223                         error = EINVAL;
5224                         break;
5225                 }
5226 
5227                 if (odvp != ndvp) {
5228                         /*
5229                          * if moving a directory, its notion
5230                          * of ".." will change
5231                          */
5232                         error = cachefs_lookup_common(odvp, onm, &tvp,
5233                             NULL, 0, NULL, cr);
5234                         if (error == 0) {
5235                                 ASSERT(tvp != NULL);
5236                                 if (tvp->v_type == VDIR) {
5237                                         cnode_t *cp = VTOC(tvp);
5238 
5239                                         dnlc_remove(tvp, "..");
5240 
5241                                         mutex_enter(&cp->c_statelock);
5242                                         CFSOP_MODIFY_COBJECT(fscp, cp, cr);
5243                                         mutex_exit(&cp->c_statelock);
5244                                 }
5245                         } else {
5246                                 tvp = NULL;
5247                                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5248                                         if (CFS_TIMEOUT(fscp, error)) {
5249                                                 cachefs_cd_release(fscp);
5250                                                 held = 0;
5251                                                 cachefs_cd_timedout(fscp);
5252                                                 connected = 0;
5253                                                 continue;
5254                                         }
5255                                 } else {
5256                                         if (CFS_TIMEOUT(fscp, error)) {
5257                                                 connected = 1;
5258                                                 continue;
5259                                         }
5260                                 }
5261                                 break;
5262                         }
5263                 }
5264 
5265                 /* get the cnode if file being deleted */
5266                 error = cachefs_lookup_common(ndvp, nnm, &delvp, NULL, 0,
5267                     NULL, cr);
5268                 if (error) {
5269                         delvp = NULL;
5270                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5271                                 if (CFS_TIMEOUT(fscp, error)) {
5272                                         cachefs_cd_release(fscp);
5273                                         held = 0;
5274                                         cachefs_cd_timedout(fscp);
5275                                         connected = 0;
5276                                         continue;
5277                                 }
5278                         } else {
5279                                 if (CFS_TIMEOUT(fscp, error)) {
5280                                         connected = 1;
5281                                         continue;
5282                                 }
5283                         }
5284                         if (error != ENOENT)
5285                                 break;
5286                 }
5287 
5288                 if (delvp && delvp->v_type == VDIR) {
5289                         /* see ufs_dirremove for why this is done, mount race */
5290                         if (vn_vfswlock(delvp)) {
5291                                 error = EBUSY;
5292                                 break;
5293                         }
5294                         vfslock = 1;
5295                         if (vn_mountedvfs(delvp) != NULL) {
5296                                 error = EBUSY;
5297                                 break;
5298                         }
5299                 }
5300 
5301                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5302                         error = cachefs_rename_connected(odvp, onm,
5303                             ndvp, nnm, cr, delvp);
5304                         if (CFS_TIMEOUT(fscp, error)) {
5305                                 cachefs_cd_release(fscp);
5306                                 held = 0;
5307                                 cachefs_cd_timedout(fscp);
5308                                 connected = 0;
5309                                 continue;
5310                         }
5311                 } else {
5312                         error = cachefs_rename_disconnected(odvp, onm,
5313                             ndvp, nnm, cr, delvp);
5314                         if (CFS_TIMEOUT(fscp, error)) {
5315                                 connected = 1;
5316                                 continue;
5317                         }
5318                 }
5319                 break;
5320         }
5321 
5322         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RENAME)) {
5323                 struct fid gone;
5324 
5325                 bzero(&gone, sizeof (gone));
5326                 gone.fid_len = MAXFIDSZ;
5327                 if (delvp != NULL)
5328                         (void) VOP_FID(delvp, &gone, ct);
5329 
5330                 cachefs_log_rename(cachep, error, fscp->fs_cfsvfsp,
5331                     &gone, 0, (delvp != NULL), crgetuid(cr));
5332         }
5333 
5334         if (held)
5335                 cachefs_cd_release(fscp);
5336 
5337         if (vfslock)
5338                 vn_vfsunlock(delvp);
5339 
5340         if (delvp)
5341                 VN_RELE(delvp);
5342         if (tvp)
5343                 VN_RELE(tvp);
5344 
5345 #ifdef CFS_CD_DEBUG
5346         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5347 #endif
5348         return (error);
5349 }
5350 
5351 static int
5352 cachefs_rename_connected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5353     char *nnm, cred_t *cr, vnode_t *delvp)
5354 {
5355         cnode_t *odcp = VTOC(odvp);
5356         cnode_t *ndcp = VTOC(ndvp);
5357         vnode_t *revp = NULL;
5358         cnode_t *recp;
5359         cnode_t *delcp;
5360         fscache_t *fscp = C_TO_FSCACHE(odcp);
5361         int error = 0;
5362         struct fid cookie;
5363         struct fid *cookiep;
5364         cfs_cid_t cid;
5365         int gotdirent;
5366 
5367         /* find the file we are renaming */
5368         error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5369         if (error)
5370                 return (error);
5371         recp = VTOC(revp);
5372 
5373         /*
5374          * To avoid deadlock, we acquire this global rename lock before
5375          * we try to get the locks for the source and target directories.
5376          */
5377         mutex_enter(&cachefs_rename_lock);
5378         rw_enter(&odcp->c_rwlock, RW_WRITER);
5379         if (odcp != ndcp) {
5380                 rw_enter(&ndcp->c_rwlock, RW_WRITER);
5381         }
5382         mutex_exit(&cachefs_rename_lock);
5383 
5384         ASSERT((odcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5385         ASSERT((ndcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5386 
5387         mutex_enter(&odcp->c_statelock);
5388         if (odcp->c_backvp == NULL) {
5389                 error = cachefs_getbackvp(fscp, odcp);
5390                 if (error) {
5391                         mutex_exit(&odcp->c_statelock);
5392                         goto out;
5393                 }
5394         }
5395 
5396         error = CFSOP_CHECK_COBJECT(fscp, odcp, 0, cr);
5397         if (error) {
5398                 mutex_exit(&odcp->c_statelock);
5399                 goto out;
5400         }
5401         mutex_exit(&odcp->c_statelock);
5402 
5403         if (odcp != ndcp) {
5404                 mutex_enter(&ndcp->c_statelock);
5405                 if (ndcp->c_backvp == NULL) {
5406                         error = cachefs_getbackvp(fscp, ndcp);
5407                         if (error) {
5408                                 mutex_exit(&ndcp->c_statelock);
5409                                 goto out;
5410                         }
5411                 }
5412 
5413                 error = CFSOP_CHECK_COBJECT(fscp, ndcp, 0, cr);
5414                 if (error) {
5415                         mutex_exit(&ndcp->c_statelock);
5416                         goto out;
5417                 }
5418                 mutex_exit(&ndcp->c_statelock);
5419         }
5420 
5421         /* if a file is being deleted because of this rename */
5422         if (delvp) {
5423                 /* if src and dest file are same */
5424                 if (delvp == revp) {
5425                         error = 0;
5426                         goto out;
5427                 }
5428 
5429                 /*
5430                  * If the cnode is active, make a link to the file
5431                  * so operations on the file will continue.
5432                  */
5433                 dnlc_purge_vp(delvp);
5434                 delcp = VTOC(delvp);
5435                 if ((delvp->v_type != VDIR) &&
5436                     !((delvp->v_count == 1) ||
5437                     ((delvp->v_count == 2) && delcp->c_ipending))) {
5438                         error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5439                         if (error)
5440                                 goto out;
5441                 }
5442         }
5443 
5444         /* do the rename on the back fs */
5445         CFS_DPRINT_BACKFS_NFSV4(fscp,
5446             ("cachefs_rename (nfsv4): odcp %p, odbackvp %p, "
5447             " ndcp %p, ndbackvp %p, onm %s, nnm %s\n",
5448             odcp, odcp->c_backvp, ndcp, ndcp->c_backvp, onm, nnm));
5449         error = VOP_RENAME(odcp->c_backvp, onm, ndcp->c_backvp, nnm, cr, NULL,
5450             0);
5451         if (error)
5452                 goto out;
5453 
5454         /* purge mappings to file in the old directory */
5455         dnlc_purge_vp(odvp);
5456 
5457         /* purge mappings in the new dir if we deleted a file */
5458         if (delvp && (odvp != ndvp))
5459                 dnlc_purge_vp(ndvp);
5460 
5461         /* update the file we just deleted */
5462         if (delvp) {
5463                 mutex_enter(&delcp->c_statelock);
5464                 if (delcp->c_attr.va_nlink == 1) {
5465                         delcp->c_flags |= CN_DESTROY;
5466                 } else {
5467                         delcp->c_flags |= CN_UPDATED;
5468                 }
5469                 delcp->c_attr.va_nlink--;
5470                 CFSOP_MODIFY_COBJECT(fscp, delcp, cr);
5471                 mutex_exit(&delcp->c_statelock);
5472         }
5473 
5474         /* find the entry in the old directory */
5475         mutex_enter(&odcp->c_statelock);
5476         gotdirent = 0;
5477         cookiep = NULL;
5478         if (CFS_ISFS_NONSHARED(fscp) &&
5479             (odcp->c_metadata.md_flags & MD_POPULATED)) {
5480                 error = cachefs_dir_look(odcp, onm, &cookie,
5481                     NULL, NULL, &cid);
5482                 if (error == 0 || error == EINVAL) {
5483                         gotdirent = 1;
5484                         if (error == 0)
5485                                 cookiep = &cookie;
5486                 } else {
5487                         cachefs_inval_object(odcp);
5488                 }
5489         }
5490         error = 0;
5491 
5492         /* remove the directory entry from the old directory */
5493         if (gotdirent) {
5494                 error = cachefs_dir_rmentry(odcp, onm);
5495                 if (error) {
5496                         cachefs_nocache(odcp);
5497                         error = 0;
5498                 }
5499         }
5500         CFSOP_MODIFY_COBJECT(fscp, odcp, cr);
5501         mutex_exit(&odcp->c_statelock);
5502 
5503         /* install the directory entry in the new directory */
5504         mutex_enter(&ndcp->c_statelock);
5505         if (CFS_ISFS_NONSHARED(fscp) &&
5506             (ndcp->c_metadata.md_flags & MD_POPULATED)) {
5507                 error = 1;
5508                 if (gotdirent) {
5509                         ASSERT(cid.cid_fileno != 0);
5510                         error = 0;
5511                         if (delvp) {
5512                                 error = cachefs_dir_rmentry(ndcp, nnm);
5513                         }
5514                         if (error == 0) {
5515                                 error = cachefs_dir_enter(ndcp, nnm, cookiep,
5516                                     &cid, SM_ASYNC);
5517                         }
5518                 }
5519                 if (error) {
5520                         cachefs_nocache(ndcp);
5521                         error = 0;
5522                 }
5523         }
5524         if (odcp != ndcp)
5525                 CFSOP_MODIFY_COBJECT(fscp, ndcp, cr);
5526         mutex_exit(&ndcp->c_statelock);
5527 
5528         /* ctime of renamed file has changed */
5529         mutex_enter(&recp->c_statelock);
5530         CFSOP_MODIFY_COBJECT(fscp, recp, cr);
5531         mutex_exit(&recp->c_statelock);
5532 
5533 out:
5534         if (odcp != ndcp)
5535                 rw_exit(&ndcp->c_rwlock);
5536         rw_exit(&odcp->c_rwlock);
5537 
5538         VN_RELE(revp);
5539 
5540         return (error);
5541 }
5542 
5543 static int
5544 cachefs_rename_disconnected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5545     char *nnm, cred_t *cr, vnode_t *delvp)
5546 {
5547         cnode_t *odcp = VTOC(odvp);
5548         cnode_t *ndcp = VTOC(ndvp);
5549         cnode_t *delcp = NULL;
5550         vnode_t *revp = NULL;
5551         cnode_t *recp;
5552         fscache_t *fscp = C_TO_FSCACHE(odcp);
5553         int error = 0;
5554         struct fid cookie;
5555         struct fid *cookiep;
5556         cfs_cid_t cid;
5557         off_t commit = 0;
5558         timestruc_t current_time;
5559 
5560         if (CFS_ISFS_WRITE_AROUND(fscp))
5561                 return (ETIMEDOUT);
5562 
5563         /* find the file we are renaming */
5564         error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5565         if (error)
5566                 return (error);
5567         recp = VTOC(revp);
5568 
5569         /*
5570          * To avoid deadlock, we acquire this global rename lock before
5571          * we try to get the locks for the source and target directories.
5572          */
5573         mutex_enter(&cachefs_rename_lock);
5574         rw_enter(&odcp->c_rwlock, RW_WRITER);
5575         if (odcp != ndcp) {
5576                 rw_enter(&ndcp->c_rwlock, RW_WRITER);
5577         }
5578         mutex_exit(&cachefs_rename_lock);
5579 
5580         if (recp->c_metadata.md_flags & MD_NEEDATTRS) {
5581                 error = ETIMEDOUT;
5582                 goto out;
5583         }
5584 
5585         if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5586                 mutex_enter(&recp->c_statelock);
5587                 if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5588                         error = cachefs_dlog_cidmap(fscp);
5589                         if (error) {
5590                                 mutex_exit(&recp->c_statelock);
5591                                 error = ENOSPC;
5592                                 goto out;
5593                         }
5594                         recp->c_metadata.md_flags |= MD_MAPPING;
5595                         recp->c_flags |= CN_UPDATED;
5596                 }
5597                 mutex_exit(&recp->c_statelock);
5598         }
5599 
5600         /* check permissions */
5601         /* XXX clean up this mutex junk sometime */
5602         mutex_enter(&odcp->c_statelock);
5603         error = cachefs_access_local(odcp, (VEXEC|VWRITE), cr);
5604         mutex_exit(&odcp->c_statelock);
5605         if (error != 0)
5606                 goto out;
5607         mutex_enter(&ndcp->c_statelock);
5608         error = cachefs_access_local(ndcp, (VEXEC|VWRITE), cr);
5609         mutex_exit(&ndcp->c_statelock);
5610         if (error != 0)
5611                 goto out;
5612         mutex_enter(&odcp->c_statelock);
5613         error = cachefs_stickyrmchk(odcp, recp, cr);
5614         mutex_exit(&odcp->c_statelock);
5615         if (error != 0)
5616                 goto out;
5617 
5618         /* dirs must be populated */
5619         if (((odcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
5620             ((ndcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
5621                 error = ETIMEDOUT;
5622                 goto out;
5623         }
5624 
5625         /* for now do not allow moving dirs because could cause cycles */
5626         if ((((revp->v_type == VDIR) && (odvp != ndvp))) ||
5627             (revp == odvp)) {
5628                 error = ETIMEDOUT;
5629                 goto out;
5630         }
5631 
5632         /* if a file is being deleted because of this rename */
5633         if (delvp) {
5634                 delcp = VTOC(delvp);
5635 
5636                 /* if src and dest file are the same */
5637                 if (delvp == revp) {
5638                         error = 0;
5639                         goto out;
5640                 }
5641 
5642                 if (delcp->c_metadata.md_flags & MD_NEEDATTRS) {
5643                         error = ETIMEDOUT;
5644                         goto out;
5645                 }
5646 
5647                 /* if there are hard links to this file */
5648                 if (delcp->c_attr.va_nlink > 1) {
5649                         mutex_enter(&delcp->c_statelock);
5650                         if (cachefs_modified_alloc(delcp)) {
5651                                 mutex_exit(&delcp->c_statelock);
5652                                 error = ENOSPC;
5653                                 goto out;
5654                         }
5655 
5656                         if ((delcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5657                                 error = cachefs_dlog_cidmap(fscp);
5658                                 if (error) {
5659                                         mutex_exit(&delcp->c_statelock);
5660                                         error = ENOSPC;
5661                                         goto out;
5662                                 }
5663                                 delcp->c_metadata.md_flags |= MD_MAPPING;
5664                                 delcp->c_flags |= CN_UPDATED;
5665                         }
5666                         mutex_exit(&delcp->c_statelock);
5667                 }
5668 
5669                 /* make sure we can delete file */
5670                 mutex_enter(&ndcp->c_statelock);
5671                 error = cachefs_stickyrmchk(ndcp, delcp, cr);
5672                 mutex_exit(&ndcp->c_statelock);
5673                 if (error != 0)
5674                         goto out;
5675 
5676                 /*
5677                  * If the cnode is active, make a link to the file
5678                  * so operations on the file will continue.
5679                  */
5680                 dnlc_purge_vp(delvp);
5681                 if ((delvp->v_type != VDIR) &&
5682                     !((delvp->v_count == 1) ||
5683                     ((delvp->v_count == 2) && delcp->c_ipending))) {
5684                         error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5685                         if (error)
5686                                 goto out;
5687                 }
5688         }
5689 
5690         /* purge mappings to file in the old directory */
5691         dnlc_purge_vp(odvp);
5692 
5693         /* purge mappings in the new dir if we deleted a file */
5694         if (delvp && (odvp != ndvp))
5695                 dnlc_purge_vp(ndvp);
5696 
5697         /* find the entry in the old directory */
5698         mutex_enter(&odcp->c_statelock);
5699         if ((odcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5700                 mutex_exit(&odcp->c_statelock);
5701                 error = ETIMEDOUT;
5702                 goto out;
5703         }
5704         cookiep = NULL;
5705         error = cachefs_dir_look(odcp, onm, &cookie, NULL, NULL, &cid);
5706         if (error == 0 || error == EINVAL) {
5707                 if (error == 0)
5708                         cookiep = &cookie;
5709         } else {
5710                 mutex_exit(&odcp->c_statelock);
5711                 if (error == ENOTDIR)
5712                         error = ETIMEDOUT;
5713                 goto out;
5714         }
5715         error = 0;
5716 
5717         /* write the log entry */
5718         commit = cachefs_dlog_rename(fscp, odcp, onm, ndcp, nnm, cr,
5719             recp, delcp);
5720         if (commit == 0) {
5721                 mutex_exit(&odcp->c_statelock);
5722                 error = ENOSPC;
5723                 goto out;
5724         }
5725 
5726         /* remove the directory entry from the old directory */
5727         cachefs_modified(odcp);
5728         error = cachefs_dir_rmentry(odcp, onm);
5729         if (error) {
5730                 mutex_exit(&odcp->c_statelock);
5731                 if (error == ENOTDIR)
5732                         error = ETIMEDOUT;
5733                 goto out;
5734         }
5735         mutex_exit(&odcp->c_statelock);
5736 
5737         /* install the directory entry in the new directory */
5738         mutex_enter(&ndcp->c_statelock);
5739         error = ENOTDIR;
5740         if (ndcp->c_metadata.md_flags & MD_POPULATED) {
5741                 ASSERT(cid.cid_fileno != 0);
5742                 cachefs_modified(ndcp);
5743                 error = 0;
5744                 if (delvp) {
5745                         error = cachefs_dir_rmentry(ndcp, nnm);
5746                 }
5747                 if (error == 0) {
5748                         error = cachefs_dir_enter(ndcp, nnm, cookiep,
5749                             &cid, SM_ASYNC);
5750                 }
5751         }
5752         if (error) {
5753                 cachefs_nocache(ndcp);
5754                 mutex_exit(&ndcp->c_statelock);
5755                 mutex_enter(&odcp->c_statelock);
5756                 cachefs_nocache(odcp);
5757                 mutex_exit(&odcp->c_statelock);
5758                 if (error == ENOTDIR)
5759                         error = ETIMEDOUT;
5760                 goto out;
5761         }
5762         mutex_exit(&ndcp->c_statelock);
5763 
5764         gethrestime(&current_time);
5765 
5766         /* update the file we just deleted */
5767         if (delvp) {
5768                 mutex_enter(&delcp->c_statelock);
5769                 delcp->c_attr.va_nlink--;
5770                 delcp->c_metadata.md_localctime = current_time;
5771                 delcp->c_metadata.md_flags |= MD_LOCALCTIME;
5772                 if (delcp->c_attr.va_nlink == 0) {
5773                         delcp->c_flags |= CN_DESTROY;
5774                 } else {
5775                         delcp->c_flags |= CN_UPDATED;
5776                 }
5777                 mutex_exit(&delcp->c_statelock);
5778         }
5779 
5780         /* update the file we renamed */
5781         mutex_enter(&recp->c_statelock);
5782         recp->c_metadata.md_localctime = current_time;
5783         recp->c_metadata.md_flags |= MD_LOCALCTIME;
5784         recp->c_flags |= CN_UPDATED;
5785         mutex_exit(&recp->c_statelock);
5786 
5787         /* update the source directory */
5788         mutex_enter(&odcp->c_statelock);
5789         odcp->c_metadata.md_localctime = current_time;
5790         odcp->c_metadata.md_localmtime = current_time;
5791         odcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5792         odcp->c_flags |= CN_UPDATED;
5793         mutex_exit(&odcp->c_statelock);
5794 
5795         /* update the destination directory */
5796         if (odcp != ndcp) {
5797                 mutex_enter(&ndcp->c_statelock);
5798                 ndcp->c_metadata.md_localctime = current_time;
5799                 ndcp->c_metadata.md_localmtime = current_time;
5800                 ndcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5801                 ndcp->c_flags |= CN_UPDATED;
5802                 mutex_exit(&ndcp->c_statelock);
5803         }
5804 
5805 out:
5806         if (commit) {
5807                 /* commit the log entry */
5808                 if (cachefs_dlog_commit(fscp, commit, error)) {
5809                         /*EMPTY*/
5810                         /* XXX bob: fix on panic */
5811                 }
5812         }
5813 
5814         if (odcp != ndcp)
5815                 rw_exit(&ndcp->c_rwlock);
5816         rw_exit(&odcp->c_rwlock);
5817 
5818         VN_RELE(revp);
5819 
5820         return (error);
5821 }
5822 
5823 /*ARGSUSED*/
5824 static int
5825 cachefs_mkdir(vnode_t *dvp, char *nm, vattr_t *vap, vnode_t **vpp,
5826     cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
5827 {
5828         cnode_t *dcp = VTOC(dvp);
5829         fscache_t *fscp = C_TO_FSCACHE(dcp);
5830         cachefscache_t *cachep = fscp->fs_cache;
5831         int error = 0;
5832         int held = 0;
5833         int connected = 0;
5834 
5835 #ifdef CFSDEBUG
5836         CFS_DEBUG(CFSDEBUG_VOPS)
5837                 printf("cachefs_mkdir: ENTER dvp %p\n", (void *)dvp);
5838 #endif
5839 
5840         if (getzoneid() != GLOBAL_ZONEID) {
5841                 error = EPERM;
5842                 goto out;
5843         }
5844 
5845         if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5846                 ASSERT(dcp->c_flags & CN_NOCACHE);
5847 
5848         /*
5849          * Cachefs only provides pass-through support for NFSv4,
5850          * and all vnode operations are passed through to the
5851          * back file system. For NFSv4 pass-through to work, only
5852          * connected operation is supported, the cnode backvp must
5853          * exist, and cachefs optional (eg., disconnectable) flags
5854          * are turned off. Assert these conditions to ensure that
5855          * the backfilesystem is called for the mkdir operation.
5856          */
5857         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5858         CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
5859 
5860         for (;;) {
5861                 /* get (or renew) access to the file system */
5862                 if (held) {
5863                         /* Won't loop with NFSv4 connected behavior */
5864                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5865                         rw_exit(&dcp->c_rwlock);
5866                         cachefs_cd_release(fscp);
5867                         held = 0;
5868                 }
5869                 error = cachefs_cd_access(fscp, connected, 1);
5870                 if (error)
5871                         break;
5872                 rw_enter(&dcp->c_rwlock, RW_WRITER);
5873                 held = 1;
5874 
5875                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5876                         error = cachefs_mkdir_connected(dvp, nm, vap,
5877                             vpp, cr);
5878                         if (CFS_TIMEOUT(fscp, error)) {
5879                                 rw_exit(&dcp->c_rwlock);
5880                                 cachefs_cd_release(fscp);
5881                                 held = 0;
5882                                 cachefs_cd_timedout(fscp);
5883                                 connected = 0;
5884                                 continue;
5885                         }
5886                 } else {
5887                         error = cachefs_mkdir_disconnected(dvp, nm, vap,
5888                             vpp, cr);
5889                         if (CFS_TIMEOUT(fscp, error)) {
5890                                 connected = 1;
5891                                 continue;
5892                         }
5893                 }
5894                 break;
5895         }
5896 
5897         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_MKDIR)) {
5898                 fid_t *fidp = NULL;
5899                 ino64_t fileno = 0;
5900                 cnode_t *cp = NULL;
5901                 if (error == 0)
5902                         cp = VTOC(*vpp);
5903 
5904                 if (cp != NULL) {
5905                         fidp = &cp->c_metadata.md_cookie;
5906                         fileno = cp->c_id.cid_fileno;
5907                 }
5908 
5909                 cachefs_log_mkdir(cachep, error, fscp->fs_cfsvfsp,
5910                     fidp, fileno, crgetuid(cr));
5911         }
5912 
5913         if (held) {
5914                 rw_exit(&dcp->c_rwlock);
5915                 cachefs_cd_release(fscp);
5916         }
5917         if (error == 0 && CFS_ISFS_NONSHARED(fscp))
5918                 (void) cachefs_pack(dvp, nm, cr);
5919 
5920 #ifdef CFS_CD_DEBUG
5921         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5922 #endif
5923 out:
5924 #ifdef CFSDEBUG
5925         CFS_DEBUG(CFSDEBUG_VOPS)
5926                 printf("cachefs_mkdir: EXIT error = %d\n", error);
5927 #endif
5928         return (error);
5929 }
5930 
5931 static int
5932 cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
5933     vnode_t **vpp, cred_t *cr)
5934 {
5935         cnode_t *newcp = NULL, *dcp = VTOC(dvp);
5936         struct vnode *vp = NULL;
5937         int error = 0;
5938         fscache_t *fscp = C_TO_FSCACHE(dcp);
5939         struct fid cookie;
5940         struct vattr attr;
5941         cfs_cid_t cid, dircid;
5942         uint32_t valid_fid;
5943 
5944         if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5945                 ASSERT(dcp->c_flags & CN_NOCACHE);
5946 
5947         mutex_enter(&dcp->c_statelock);
5948 
5949         /* get backvp of dir */
5950         if (dcp->c_backvp == NULL) {
5951                 error = cachefs_getbackvp(fscp, dcp);
5952                 if (error) {
5953                         mutex_exit(&dcp->c_statelock);
5954                         goto out;
5955                 }
5956         }
5957 
5958         /* consistency check the directory */
5959         error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
5960         if (error) {
5961                 mutex_exit(&dcp->c_statelock);
5962                 goto out;
5963         }
5964         dircid = dcp->c_id;
5965 
5966         /* make the dir on the back fs */
5967         CFS_DPRINT_BACKFS_NFSV4(fscp,
5968             ("cachefs_mkdir (nfsv4): dcp %p, dbackvp %p, "
5969             "name %s\n", dcp, dcp->c_backvp, nm));
5970         error = VOP_MKDIR(dcp->c_backvp, nm, vap, &vp, cr, NULL, 0, NULL);
5971         mutex_exit(&dcp->c_statelock);
5972         if (error) {
5973                 goto out;
5974         }
5975 
5976         /* get the cookie and make the cnode */
5977         attr.va_mask = AT_ALL;
5978         valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
5979         error = cachefs_getcookie(vp, &cookie, &attr, cr, valid_fid);
5980         if (error) {
5981                 goto out;
5982         }
5983         cid.cid_flags = 0;
5984         cid.cid_fileno = attr.va_nodeid;
5985         error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
5986             &attr, vp, cr, 0, &newcp);
5987         if (error) {
5988                 goto out;
5989         }
5990         ASSERT(CTOV(newcp)->v_type == VDIR);
5991         *vpp = CTOV(newcp);
5992 
5993         /* if the dir is populated, add the new entry */
5994         mutex_enter(&dcp->c_statelock);
5995         if (CFS_ISFS_NONSHARED(fscp) &&
5996             (dcp->c_metadata.md_flags & MD_POPULATED)) {
5997                 error = cachefs_dir_enter(dcp, nm, &cookie, &newcp->c_id,
5998                     SM_ASYNC);
5999                 if (error) {
6000                         cachefs_nocache(dcp);
6001                         error = 0;
6002                 }
6003         }
6004         dcp->c_attr.va_nlink++;
6005         dcp->c_flags |= CN_UPDATED;
6006         CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6007         mutex_exit(&dcp->c_statelock);
6008 
6009         /* XXX bob: should we do a filldir here? or just add . and .. */
6010         /* maybe should kick off an async filldir so caller does not wait */
6011 
6012         /* put the entry in the dnlc */
6013         if (cachefs_dnlc)
6014                 dnlc_enter(dvp, nm, *vpp);
6015 
6016         /* save the fileno of the parent so can find the name */
6017         if (bcmp(&newcp->c_metadata.md_parent, &dircid,
6018             sizeof (cfs_cid_t)) != 0) {
6019                 mutex_enter(&newcp->c_statelock);
6020                 newcp->c_metadata.md_parent = dircid;
6021                 newcp->c_flags |= CN_UPDATED;
6022                 mutex_exit(&newcp->c_statelock);
6023         }
6024 out:
6025         if (vp)
6026                 VN_RELE(vp);
6027 
6028         return (error);
6029 }
6030 
6031 static int
6032 cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
6033     vnode_t **vpp, cred_t *cr)
6034 {
6035         cnode_t *dcp = VTOC(dvp);
6036         fscache_t *fscp = C_TO_FSCACHE(dcp);
6037         int error;
6038         cnode_t *newcp = NULL;
6039         struct vattr va;
6040         timestruc_t current_time;
6041         off_t commit = 0;
6042         char *s;
6043         int namlen;
6044 
6045         /* don't allow '/' characters in pathname component */
6046         for (s = nm, namlen = 0; *s; s++, namlen++)
6047                 if (*s == '/')
6048                         return (EACCES);
6049         if (namlen == 0)
6050                 return (EINVAL);
6051 
6052         if (CFS_ISFS_WRITE_AROUND(fscp))
6053                 return (ETIMEDOUT);
6054 
6055         mutex_enter(&dcp->c_statelock);
6056 
6057         /* check permissions */
6058         if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6059                 mutex_exit(&dcp->c_statelock);
6060                 goto out;
6061         }
6062 
6063         /* the directory front file must be populated */
6064         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6065                 error = ETIMEDOUT;
6066                 mutex_exit(&dcp->c_statelock);
6067                 goto out;
6068         }
6069 
6070         /* make sure nm does not already exist in the directory */
6071         error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
6072         if (error == ENOTDIR) {
6073                 error = ETIMEDOUT;
6074                 mutex_exit(&dcp->c_statelock);
6075                 goto out;
6076         }
6077         if (error != ENOENT) {
6078                 error = EEXIST;
6079                 mutex_exit(&dcp->c_statelock);
6080                 goto out;
6081         }
6082 
6083         /* make up a reasonable set of attributes */
6084         cachefs_attr_setup(vap, &va, dcp, cr);
6085         va.va_type = VDIR;
6086         va.va_mode |= S_IFDIR;
6087         va.va_nlink = 2;
6088 
6089         mutex_exit(&dcp->c_statelock);
6090 
6091         /* create the cnode */
6092         error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6093         if (error)
6094                 goto out;
6095 
6096         mutex_enter(&newcp->c_statelock);
6097 
6098         error = cachefs_dlog_cidmap(fscp);
6099         if (error) {
6100                 mutex_exit(&newcp->c_statelock);
6101                 goto out;
6102         }
6103 
6104         cachefs_creategid(dcp, newcp, vap, cr);
6105         mutex_enter(&dcp->c_statelock);
6106         cachefs_createacl(dcp, newcp);
6107         mutex_exit(&dcp->c_statelock);
6108         gethrestime(&current_time);
6109         newcp->c_metadata.md_vattr.va_atime = current_time;
6110         newcp->c_metadata.md_localctime = current_time;
6111         newcp->c_metadata.md_localmtime = current_time;
6112         newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6113             MD_LOCALCTIME;
6114         newcp->c_flags |= CN_UPDATED;
6115 
6116         /* make a front file for the new directory, add . and .. */
6117         error = cachefs_dir_new(dcp, newcp);
6118         if (error) {
6119                 mutex_exit(&newcp->c_statelock);
6120                 goto out;
6121         }
6122         cachefs_modified(newcp);
6123 
6124         /*
6125          * write the metadata now rather than waiting until
6126          * inactive so that if there's no space we can let
6127          * the caller know.
6128          */
6129         ASSERT(newcp->c_frontvp);
6130         ASSERT((newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
6131         ASSERT((newcp->c_flags & CN_ALLOC_PENDING) == 0);
6132         error = filegrp_write_metadata(newcp->c_filegrp,
6133             &newcp->c_id, &newcp->c_metadata);
6134         if (error) {
6135                 mutex_exit(&newcp->c_statelock);
6136                 goto out;
6137         }
6138         mutex_exit(&newcp->c_statelock);
6139 
6140         /* log the operation */
6141         commit = cachefs_dlog_mkdir(fscp, dcp, newcp, nm, &va, cr);
6142         if (commit == 0) {
6143                 error = ENOSPC;
6144                 goto out;
6145         }
6146 
6147         mutex_enter(&dcp->c_statelock);
6148 
6149         /* make sure directory is still populated */
6150         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6151                 mutex_exit(&dcp->c_statelock);
6152                 error = ETIMEDOUT;
6153                 goto out;
6154         }
6155         cachefs_modified(dcp);
6156 
6157         /* enter the new file in the directory */
6158         error = cachefs_dir_enter(dcp, nm, &newcp->c_metadata.md_cookie,
6159             &newcp->c_id, SM_ASYNC);
6160         if (error) {
6161                 mutex_exit(&dcp->c_statelock);
6162                 goto out;
6163         }
6164 
6165         /* update parent dir times */
6166         dcp->c_metadata.md_localctime = current_time;
6167         dcp->c_metadata.md_localmtime = current_time;
6168         dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6169         dcp->c_attr.va_nlink++;
6170         dcp->c_flags |= CN_UPDATED;
6171         mutex_exit(&dcp->c_statelock);
6172 
6173 out:
6174         if (commit) {
6175                 /* commit the log entry */
6176                 if (cachefs_dlog_commit(fscp, commit, error)) {
6177                         /*EMPTY*/
6178                         /* XXX bob: fix on panic */
6179                 }
6180         }
6181         if (error) {
6182                 if (newcp) {
6183                         mutex_enter(&newcp->c_statelock);
6184                         newcp->c_flags |= CN_DESTROY;
6185                         mutex_exit(&newcp->c_statelock);
6186                         VN_RELE(CTOV(newcp));
6187                 }
6188         } else {
6189                 *vpp = CTOV(newcp);
6190         }
6191         return (error);
6192 }
6193 
6194 /*ARGSUSED*/
6195 static int
6196 cachefs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6197     caller_context_t *ct, int flags)
6198 {
6199         cnode_t *dcp = VTOC(dvp);
6200         fscache_t *fscp = C_TO_FSCACHE(dcp);
6201         cachefscache_t *cachep = fscp->fs_cache;
6202         int error = 0;
6203         int held = 0;
6204         int connected = 0;
6205         size_t namlen;
6206         vnode_t *vp = NULL;
6207         int vfslock = 0;
6208 
6209 #ifdef CFSDEBUG
6210         CFS_DEBUG(CFSDEBUG_VOPS)
6211                 printf("cachefs_rmdir: ENTER vp %p\n", (void *)dvp);
6212 #endif
6213 
6214         if (getzoneid() != GLOBAL_ZONEID) {
6215                 error = EPERM;
6216                 goto out;
6217         }
6218 
6219         if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
6220                 ASSERT(dcp->c_flags & CN_NOCACHE);
6221 
6222         /*
6223          * Cachefs only provides pass-through support for NFSv4,
6224          * and all vnode operations are passed through to the
6225          * back file system. For NFSv4 pass-through to work, only
6226          * connected operation is supported, the cnode backvp must
6227          * exist, and cachefs optional (eg., disconnectable) flags
6228          * are turned off. Assert these conditions to ensure that
6229          * the backfilesystem is called for the rmdir operation.
6230          */
6231         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6232         CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6233 
6234         for (;;) {
6235                 if (vfslock) {
6236                         vn_vfsunlock(vp);
6237                         vfslock = 0;
6238                 }
6239                 if (vp) {
6240                         VN_RELE(vp);
6241                         vp = NULL;
6242                 }
6243 
6244                 /* get (or renew) access to the file system */
6245                 if (held) {
6246                         /* Won't loop with NFSv4 connected behavior */
6247                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6248                         cachefs_cd_release(fscp);
6249                         held = 0;
6250                 }
6251                 error = cachefs_cd_access(fscp, connected, 1);
6252                 if (error)
6253                         break;
6254                 held = 1;
6255 
6256                 /* if disconnected, do some extra error checking */
6257                 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
6258                         /* check permissions */
6259                         mutex_enter(&dcp->c_statelock);
6260                         error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
6261                         mutex_exit(&dcp->c_statelock);
6262                         if (CFS_TIMEOUT(fscp, error)) {
6263                                 connected = 1;
6264                                 continue;
6265                         }
6266                         if (error)
6267                                 break;
6268 
6269                         namlen = strlen(nm);
6270                         if (namlen == 0) {
6271                                 error = EINVAL;
6272                                 break;
6273                         }
6274 
6275                         /* cannot remove . and .. */
6276                         if (nm[0] == '.') {
6277                                 if (namlen == 1) {
6278                                         error = EINVAL;
6279                                         break;
6280                                 } else if (namlen == 2 && nm[1] == '.') {
6281                                         error = EEXIST;
6282                                         break;
6283                                 }
6284                         }
6285 
6286                 }
6287 
6288                 /* get the cnode of the dir to remove */
6289                 error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
6290                 if (error) {
6291                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6292                                 if (CFS_TIMEOUT(fscp, error)) {
6293                                         cachefs_cd_release(fscp);
6294                                         held = 0;
6295                                         cachefs_cd_timedout(fscp);
6296                                         connected = 0;
6297                                         continue;
6298                                 }
6299                         } else {
6300                                 if (CFS_TIMEOUT(fscp, error)) {
6301                                         connected = 1;
6302                                         continue;
6303                                 }
6304                         }
6305                         break;
6306                 }
6307 
6308                 /* must be a dir */
6309                 if (vp->v_type != VDIR) {
6310                         error = ENOTDIR;
6311                         break;
6312                 }
6313 
6314                 /* must not be current dir */
6315                 if (VOP_CMP(vp, cdir, ct)) {
6316                         error = EINVAL;
6317                         break;
6318                 }
6319 
6320                 /* see ufs_dirremove for why this is done, mount race */
6321                 if (vn_vfswlock(vp)) {
6322                         error = EBUSY;
6323                         break;
6324                 }
6325                 vfslock = 1;
6326                 if (vn_mountedvfs(vp) != NULL) {
6327                         error = EBUSY;
6328                         break;
6329                 }
6330 
6331                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6332                         error = cachefs_rmdir_connected(dvp, nm, cdir,
6333                             cr, vp);
6334                         if (CFS_TIMEOUT(fscp, error)) {
6335                                 cachefs_cd_release(fscp);
6336                                 held = 0;
6337                                 cachefs_cd_timedout(fscp);
6338                                 connected = 0;
6339                                 continue;
6340                         }
6341                 } else {
6342                         error = cachefs_rmdir_disconnected(dvp, nm, cdir,
6343                             cr, vp);
6344                         if (CFS_TIMEOUT(fscp, error)) {
6345                                 connected = 1;
6346                                 continue;
6347                         }
6348                 }
6349                 break;
6350         }
6351 
6352         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RMDIR)) {
6353                 ino64_t fileno = 0;
6354                 fid_t *fidp = NULL;
6355                 cnode_t *cp = NULL;
6356                 if (vp)
6357                         cp = VTOC(vp);
6358 
6359                 if (cp != NULL) {
6360                         fidp = &cp->c_metadata.md_cookie;
6361                         fileno = cp->c_id.cid_fileno;
6362                 }
6363 
6364                 cachefs_log_rmdir(cachep, error, fscp->fs_cfsvfsp,
6365                     fidp, fileno, crgetuid(cr));
6366         }
6367 
6368         if (held) {
6369                 cachefs_cd_release(fscp);
6370         }
6371 
6372         if (vfslock)
6373                 vn_vfsunlock(vp);
6374 
6375         if (vp)
6376                 VN_RELE(vp);
6377 
6378 #ifdef CFS_CD_DEBUG
6379         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6380 #endif
6381 out:
6382 #ifdef CFSDEBUG
6383         CFS_DEBUG(CFSDEBUG_VOPS)
6384                 printf("cachefs_rmdir: EXIT error = %d\n", error);
6385 #endif
6386 
6387         return (error);
6388 }
6389 
6390 static int
6391 cachefs_rmdir_connected(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6392     vnode_t *vp)
6393 {
6394         cnode_t *dcp = VTOC(dvp);
6395         cnode_t *cp = VTOC(vp);
6396         int error = 0;
6397         fscache_t *fscp = C_TO_FSCACHE(dcp);
6398 
6399         rw_enter(&dcp->c_rwlock, RW_WRITER);
6400         mutex_enter(&dcp->c_statelock);
6401         mutex_enter(&cp->c_statelock);
6402 
6403         if (dcp->c_backvp == NULL) {
6404                 error = cachefs_getbackvp(fscp, dcp);
6405                 if (error) {
6406                         goto out;
6407                 }
6408         }
6409 
6410         error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6411         if (error)
6412                 goto out;
6413 
6414         /* rmdir on the back fs */
6415         CFS_DPRINT_BACKFS_NFSV4(fscp,
6416             ("cachefs_rmdir (nfsv4): dcp %p, dbackvp %p, "
6417             "name %s\n", dcp, dcp->c_backvp, nm));
6418         error = VOP_RMDIR(dcp->c_backvp, nm, cdir, cr, NULL, 0);
6419         if (error)
6420                 goto out;
6421 
6422         /* if the dir is populated, remove the entry from it */
6423         if (CFS_ISFS_NONSHARED(fscp) &&
6424             (dcp->c_metadata.md_flags & MD_POPULATED)) {
6425                 error = cachefs_dir_rmentry(dcp, nm);
6426                 if (error) {
6427                         cachefs_nocache(dcp);
6428                         error = 0;
6429                 }
6430         }
6431 
6432         /*
6433          * *if* the (hard) link count goes to 0, then we set the CDESTROY
6434          * flag on the cnode. The cached object will then be destroyed
6435          * at inactive time where the chickens come home to roost :-)
6436          * The link cnt for directories is bumped down by 2 'cause the "."
6437          * entry has to be elided too ! The link cnt for the parent goes down
6438          * by 1 (because of "..").
6439          */
6440         cp->c_attr.va_nlink -= 2;
6441         dcp->c_attr.va_nlink--;
6442         if (cp->c_attr.va_nlink == 0) {
6443                 cp->c_flags |= CN_DESTROY;
6444         } else {
6445                 cp->c_flags |= CN_UPDATED;
6446         }
6447         dcp->c_flags |= CN_UPDATED;
6448 
6449         dnlc_purge_vp(vp);
6450         CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6451 
6452 out:
6453         mutex_exit(&cp->c_statelock);
6454         mutex_exit(&dcp->c_statelock);
6455         rw_exit(&dcp->c_rwlock);
6456 
6457         return (error);
6458 }
6459 
6460 static int
6461 /*ARGSUSED*/
6462 cachefs_rmdir_disconnected(vnode_t *dvp, char *nm, vnode_t *cdir,
6463     cred_t *cr, vnode_t *vp)
6464 {
6465         cnode_t *dcp = VTOC(dvp);
6466         cnode_t *cp = VTOC(vp);
6467         fscache_t *fscp = C_TO_FSCACHE(dcp);
6468         int error = 0;
6469         off_t commit = 0;
6470         timestruc_t current_time;
6471 
6472         if (CFS_ISFS_WRITE_AROUND(fscp))
6473                 return (ETIMEDOUT);
6474 
6475         rw_enter(&dcp->c_rwlock, RW_WRITER);
6476         mutex_enter(&dcp->c_statelock);
6477         mutex_enter(&cp->c_statelock);
6478 
6479         /* both directories must be populated */
6480         if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
6481             ((cp->c_metadata.md_flags & MD_POPULATED) == 0)) {
6482                 error = ETIMEDOUT;
6483                 goto out;
6484         }
6485 
6486         /* if sticky bit set on the dir, more access checks to perform */
6487         if (error = cachefs_stickyrmchk(dcp, cp, cr)) {
6488                 goto out;
6489         }
6490 
6491         /* make sure dir is empty */
6492         if (cp->c_attr.va_nlink > 2) {
6493                 error = cachefs_dir_empty(cp);
6494                 if (error) {
6495                         if (error == ENOTDIR)
6496                                 error = ETIMEDOUT;
6497                         goto out;
6498                 }
6499                 cachefs_modified(cp);
6500         }
6501         cachefs_modified(dcp);
6502 
6503         /* log the operation */
6504         commit = cachefs_dlog_rmdir(fscp, dcp, nm, cp, cr);
6505         if (commit == 0) {
6506                 error = ENOSPC;
6507                 goto out;
6508         }
6509 
6510         /* remove name from parent dir */
6511         error = cachefs_dir_rmentry(dcp, nm);
6512         if (error == ENOTDIR) {
6513                 error = ETIMEDOUT;
6514                 goto out;
6515         }
6516         if (error)
6517                 goto out;
6518 
6519         gethrestime(&current_time);
6520 
6521         /* update deleted dir values */
6522         cp->c_attr.va_nlink -= 2;
6523         if (cp->c_attr.va_nlink == 0)
6524                 cp->c_flags |= CN_DESTROY;
6525         else {
6526                 cp->c_metadata.md_localctime = current_time;
6527                 cp->c_metadata.md_flags |= MD_LOCALCTIME;
6528                 cp->c_flags |= CN_UPDATED;
6529         }
6530 
6531         /* update parent values */
6532         dcp->c_metadata.md_localctime = current_time;
6533         dcp->c_metadata.md_localmtime = current_time;
6534         dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6535         dcp->c_attr.va_nlink--;
6536         dcp->c_flags |= CN_UPDATED;
6537 
6538 out:
6539         mutex_exit(&cp->c_statelock);
6540         mutex_exit(&dcp->c_statelock);
6541         rw_exit(&dcp->c_rwlock);
6542         if (commit) {
6543                 /* commit the log entry */
6544                 if (cachefs_dlog_commit(fscp, commit, error)) {
6545                         /*EMPTY*/
6546                         /* XXX bob: fix on panic */
6547                 }
6548                 dnlc_purge_vp(vp);
6549         }
6550         return (error);
6551 }
6552 
6553 /*ARGSUSED*/
6554 static int
6555 cachefs_symlink(vnode_t *dvp, char *lnm, vattr_t *tva,
6556     char *tnm, cred_t *cr, caller_context_t *ct, int flags)
6557 {
6558         cnode_t *dcp = VTOC(dvp);
6559         fscache_t *fscp = C_TO_FSCACHE(dcp);
6560         cachefscache_t *cachep = fscp->fs_cache;
6561         int error = 0;
6562         int held = 0;
6563         int connected = 0;
6564 
6565 #ifdef CFSDEBUG
6566         CFS_DEBUG(CFSDEBUG_VOPS)
6567                 printf("cachefs_symlink: ENTER dvp %p lnm %s tnm %s\n",
6568                     (void *)dvp, lnm, tnm);
6569 #endif
6570 
6571         if (getzoneid() != GLOBAL_ZONEID) {
6572                 error = EPERM;
6573                 goto out;
6574         }
6575 
6576         if (fscp->fs_cache->c_flags & CACHE_NOCACHE)
6577                 ASSERT(dcp->c_flags & CN_NOCACHE);
6578 
6579         /*
6580          * Cachefs only provides pass-through support for NFSv4,
6581          * and all vnode operations are passed through to the
6582          * back file system. For NFSv4 pass-through to work, only
6583          * connected operation is supported, the cnode backvp must
6584          * exist, and cachefs optional (eg., disconnectable) flags
6585          * are turned off. Assert these conditions to ensure that
6586          * the backfilesystem is called for the symlink operation.
6587          */
6588         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6589         CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6590 
6591         for (;;) {
6592                 /* get (or renew) access to the file system */
6593                 if (held) {
6594                         /* Won't loop with NFSv4 connected behavior */
6595                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6596                         rw_exit(&dcp->c_rwlock);
6597                         cachefs_cd_release(fscp);
6598                         held = 0;
6599                 }
6600                 error = cachefs_cd_access(fscp, connected, 1);
6601                 if (error)
6602                         break;
6603                 rw_enter(&dcp->c_rwlock, RW_WRITER);
6604                 held = 1;
6605 
6606                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6607                         error = cachefs_symlink_connected(dvp, lnm, tva,
6608                             tnm, cr);
6609                         if (CFS_TIMEOUT(fscp, error)) {
6610                                 rw_exit(&dcp->c_rwlock);
6611                                 cachefs_cd_release(fscp);
6612                                 held = 0;
6613                                 cachefs_cd_timedout(fscp);
6614                                 connected = 0;
6615                                 continue;
6616                         }
6617                 } else {
6618                         error = cachefs_symlink_disconnected(dvp, lnm, tva,
6619                             tnm, cr);
6620                         if (CFS_TIMEOUT(fscp, error)) {
6621                                 connected = 1;
6622                                 continue;
6623                         }
6624                 }
6625                 break;
6626         }
6627 
6628         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_SYMLINK))
6629                 cachefs_log_symlink(cachep, error, fscp->fs_cfsvfsp,
6630                     &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
6631                     crgetuid(cr), (uint_t)strlen(tnm));
6632 
6633         if (held) {
6634                 rw_exit(&dcp->c_rwlock);
6635                 cachefs_cd_release(fscp);
6636         }
6637 
6638 #ifdef CFS_CD_DEBUG
6639         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6640 #endif
6641 out:
6642 #ifdef CFSDEBUG
6643         CFS_DEBUG(CFSDEBUG_VOPS)
6644                 printf("cachefs_symlink: EXIT error = %d\n", error);
6645 #endif
6646         return (error);
6647 }
6648 
6649 static int
6650 cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
6651     char *tnm, cred_t *cr)
6652 {
6653         cnode_t *dcp = VTOC(dvp);
6654         fscache_t *fscp = C_TO_FSCACHE(dcp);
6655         int error = 0;
6656         vnode_t *backvp = NULL;
6657         cnode_t *newcp = NULL;
6658         struct vattr va;
6659         struct fid cookie;
6660         cfs_cid_t cid;
6661         uint32_t valid_fid;
6662 
6663         mutex_enter(&dcp->c_statelock);
6664 
6665         if (dcp->c_backvp == NULL) {
6666                 error = cachefs_getbackvp(fscp, dcp);
6667                 if (error) {
6668                         cachefs_nocache(dcp);
6669                         mutex_exit(&dcp->c_statelock);
6670                         goto out;
6671                 }
6672         }
6673 
6674         error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6675         if (error) {
6676                 mutex_exit(&dcp->c_statelock);
6677                 goto out;
6678         }
6679         CFS_DPRINT_BACKFS_NFSV4(fscp,
6680             ("cachefs_symlink (nfsv4): dcp %p, dbackvp %p, "
6681             "lnm %s, tnm %s\n", dcp, dcp->c_backvp, lnm, tnm));
6682         error = VOP_SYMLINK(dcp->c_backvp, lnm, tva, tnm, cr, NULL, 0);
6683         if (error) {
6684                 mutex_exit(&dcp->c_statelock);
6685                 goto out;
6686         }
6687         if ((dcp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
6688             !CFS_ISFS_BACKFS_NFSV4(fscp)) {
6689                 cachefs_nocache(dcp);
6690                 mutex_exit(&dcp->c_statelock);
6691                 goto out;
6692         }
6693 
6694         CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6695 
6696         /* lookup the symlink we just created and get its fid and attrs */
6697         (void) VOP_LOOKUP(dcp->c_backvp, lnm, &backvp, NULL, 0, NULL, cr,
6698             NULL, NULL, NULL);
6699         if (backvp == NULL) {
6700                 if (CFS_ISFS_BACKFS_NFSV4(fscp) == 0)
6701                         cachefs_nocache(dcp);
6702                 mutex_exit(&dcp->c_statelock);
6703                 goto out;
6704         }
6705 
6706         valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
6707         error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
6708         if (error) {
6709                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6710                 error = 0;
6711                 cachefs_nocache(dcp);
6712                 mutex_exit(&dcp->c_statelock);
6713                 goto out;
6714         }
6715         cid.cid_fileno = va.va_nodeid;
6716         cid.cid_flags = 0;
6717 
6718         /* if the dir is cached, add the symlink to it */
6719         if (CFS_ISFS_NONSHARED(fscp) &&
6720             (dcp->c_metadata.md_flags & MD_POPULATED)) {
6721                 error = cachefs_dir_enter(dcp, lnm, &cookie, &cid, SM_ASYNC);
6722                 if (error) {
6723                         cachefs_nocache(dcp);
6724                         error = 0;
6725                 }
6726         }
6727         mutex_exit(&dcp->c_statelock);
6728 
6729         /* make the cnode for the sym link */
6730         error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
6731             &va, backvp, cr, 0, &newcp);
6732         if (error) {
6733                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6734                 cachefs_nocache(dcp);
6735                 error = 0;
6736                 goto out;
6737         }
6738 
6739         /* try to cache the symlink contents */
6740         rw_enter(&newcp->c_rwlock, RW_WRITER);
6741         mutex_enter(&newcp->c_statelock);
6742 
6743         /*
6744          * try to cache the sym link, note that its a noop if NOCACHE
6745          * or NFSv4 is set
6746          */
6747         error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6748         if (error) {
6749                 cachefs_nocache(newcp);
6750                 error = 0;
6751         }
6752         mutex_exit(&newcp->c_statelock);
6753         rw_exit(&newcp->c_rwlock);
6754 
6755 out:
6756         if (backvp)
6757                 VN_RELE(backvp);
6758         if (newcp)
6759                 VN_RELE(CTOV(newcp));
6760         return (error);
6761 }
6762 
6763 static int
6764 cachefs_symlink_disconnected(vnode_t *dvp, char *lnm, vattr_t *tva,
6765     char *tnm, cred_t *cr)
6766 {
6767         cnode_t *dcp = VTOC(dvp);
6768         fscache_t *fscp = C_TO_FSCACHE(dcp);
6769         int error;
6770         cnode_t *newcp = NULL;
6771         struct vattr va;
6772         timestruc_t current_time;
6773         off_t commit = 0;
6774 
6775         if (CFS_ISFS_WRITE_AROUND(fscp))
6776                 return (ETIMEDOUT);
6777 
6778         mutex_enter(&dcp->c_statelock);
6779 
6780         /* check permissions */
6781         if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6782                 mutex_exit(&dcp->c_statelock);
6783                 goto out;
6784         }
6785 
6786         /* the directory front file must be populated */
6787         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6788                 error = ETIMEDOUT;
6789                 mutex_exit(&dcp->c_statelock);
6790                 goto out;
6791         }
6792 
6793         /* make sure lnm does not already exist in the directory */
6794         error = cachefs_dir_look(dcp, lnm, NULL, NULL, NULL, NULL);
6795         if (error == ENOTDIR) {
6796                 error = ETIMEDOUT;
6797                 mutex_exit(&dcp->c_statelock);
6798                 goto out;
6799         }
6800         if (error != ENOENT) {
6801                 error = EEXIST;
6802                 mutex_exit(&dcp->c_statelock);
6803                 goto out;
6804         }
6805 
6806         /* make up a reasonable set of attributes */
6807         cachefs_attr_setup(tva, &va, dcp, cr);
6808         va.va_type = VLNK;
6809         va.va_mode |= S_IFLNK;
6810         va.va_size = strlen(tnm);
6811 
6812         mutex_exit(&dcp->c_statelock);
6813 
6814         /* create the cnode */
6815         error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6816         if (error)
6817                 goto out;
6818 
6819         rw_enter(&newcp->c_rwlock, RW_WRITER);
6820         mutex_enter(&newcp->c_statelock);
6821 
6822         error = cachefs_dlog_cidmap(fscp);
6823         if (error) {
6824                 mutex_exit(&newcp->c_statelock);
6825                 rw_exit(&newcp->c_rwlock);
6826                 error = ENOSPC;
6827                 goto out;
6828         }
6829 
6830         cachefs_creategid(dcp, newcp, tva, cr);
6831         mutex_enter(&dcp->c_statelock);
6832         cachefs_createacl(dcp, newcp);
6833         mutex_exit(&dcp->c_statelock);
6834         gethrestime(&current_time);
6835         newcp->c_metadata.md_vattr.va_atime = current_time;
6836         newcp->c_metadata.md_localctime = current_time;
6837         newcp->c_metadata.md_localmtime = current_time;
6838         newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6839             MD_LOCALCTIME;
6840         newcp->c_flags |= CN_UPDATED;
6841 
6842         /* log the operation */
6843         commit = cachefs_dlog_symlink(fscp, dcp, newcp, lnm, tva, tnm, cr);
6844         if (commit == 0) {
6845                 mutex_exit(&newcp->c_statelock);
6846                 rw_exit(&newcp->c_rwlock);
6847                 error = ENOSPC;
6848                 goto out;
6849         }
6850 
6851         /* store the symlink contents */
6852         error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6853         if (error) {
6854                 mutex_exit(&newcp->c_statelock);
6855                 rw_exit(&newcp->c_rwlock);
6856                 goto out;
6857         }
6858         if (cachefs_modified_alloc(newcp)) {
6859                 mutex_exit(&newcp->c_statelock);
6860                 rw_exit(&newcp->c_rwlock);
6861                 error = ENOSPC;
6862                 goto out;
6863         }
6864 
6865         /*
6866          * write the metadata now rather than waiting until
6867          * inactive so that if there's no space we can let
6868          * the caller know.
6869          */
6870         if (newcp->c_flags & CN_ALLOC_PENDING) {
6871                 if (newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
6872                         (void) filegrp_allocattr(newcp->c_filegrp);
6873                 }
6874                 error = filegrp_create_metadata(newcp->c_filegrp,
6875                     &newcp->c_metadata, &newcp->c_id);
6876                 if (error) {
6877                         mutex_exit(&newcp->c_statelock);
6878                         rw_exit(&newcp->c_rwlock);
6879                         goto out;
6880                 }
6881                 newcp->c_flags &= ~CN_ALLOC_PENDING;
6882         }
6883         error = filegrp_write_metadata(newcp->c_filegrp,
6884             &newcp->c_id, &newcp->c_metadata);
6885         if (error) {
6886                 mutex_exit(&newcp->c_statelock);
6887                 rw_exit(&newcp->c_rwlock);
6888                 goto out;
6889         }
6890         mutex_exit(&newcp->c_statelock);
6891         rw_exit(&newcp->c_rwlock);
6892 
6893         mutex_enter(&dcp->c_statelock);
6894 
6895         /* enter the new file in the directory */
6896         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6897                 error = ETIMEDOUT;
6898                 mutex_exit(&dcp->c_statelock);
6899                 goto out;
6900         }
6901         cachefs_modified(dcp);
6902         error = cachefs_dir_enter(dcp, lnm, &newcp->c_metadata.md_cookie,
6903             &newcp->c_id, SM_ASYNC);
6904         if (error) {
6905                 mutex_exit(&dcp->c_statelock);
6906                 goto out;
6907         }
6908 
6909         /* update parent dir times */
6910         dcp->c_metadata.md_localctime = current_time;
6911         dcp->c_metadata.md_localmtime = current_time;
6912         dcp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
6913         dcp->c_flags |= CN_UPDATED;
6914         mutex_exit(&dcp->c_statelock);
6915 
6916 out:
6917         if (commit) {
6918                 /* commit the log entry */
6919                 if (cachefs_dlog_commit(fscp, commit, error)) {
6920                         /*EMPTY*/
6921                         /* XXX bob: fix on panic */
6922                 }
6923         }
6924 
6925         if (error) {
6926                 if (newcp) {
6927                         mutex_enter(&newcp->c_statelock);
6928                         newcp->c_flags |= CN_DESTROY;
6929                         mutex_exit(&newcp->c_statelock);
6930                 }
6931         }
6932         if (newcp) {
6933                 VN_RELE(CTOV(newcp));
6934         }
6935 
6936         return (error);
6937 }
6938 
6939 /*ARGSUSED*/
6940 static int
6941 cachefs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
6942     caller_context_t *ct, int flags)
6943 {
6944         cnode_t *dcp = VTOC(vp);
6945         fscache_t *fscp = C_TO_FSCACHE(dcp);
6946         cachefscache_t *cachep = fscp->fs_cache;
6947         int error = 0;
6948         int held = 0;
6949         int connected = 0;
6950 
6951 #ifdef CFSDEBUG
6952         CFS_DEBUG(CFSDEBUG_VOPS)
6953                 printf("cachefs_readdir: ENTER vp %p\n", (void *)vp);
6954 #endif
6955         if (getzoneid() != GLOBAL_ZONEID) {
6956                 error = EPERM;
6957                 goto out;
6958         }
6959 
6960         /*
6961          * Cachefs only provides pass-through support for NFSv4,
6962          * and all vnode operations are passed through to the
6963          * back file system. For NFSv4 pass-through to work, only
6964          * connected operation is supported, the cnode backvp must
6965          * exist, and cachefs optional (eg., disconnectable) flags
6966          * are turned off. Assert these conditions to ensure that
6967          * the backfilesystem is called for the readdir operation.
6968          */
6969         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6970         CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6971 
6972         for (;;) {
6973                 /* get (or renew) access to the file system */
6974                 if (held) {
6975                         /* Won't loop with NFSv4 connected behavior */
6976                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6977                         rw_exit(&dcp->c_rwlock);
6978                         cachefs_cd_release(fscp);
6979                         held = 0;
6980                 }
6981                 error = cachefs_cd_access(fscp, connected, 0);
6982                 if (error)
6983                         break;
6984                 rw_enter(&dcp->c_rwlock, RW_READER);
6985                 held = 1;
6986 
6987                 /* quit if link count of zero (posix) */
6988                 if (dcp->c_attr.va_nlink == 0) {
6989                         if (eofp)
6990                                 *eofp = 1;
6991                         error = 0;
6992                         break;
6993                 }
6994 
6995                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6996                         error = cachefs_readdir_connected(vp, uiop, cr,
6997                             eofp);
6998                         if (CFS_TIMEOUT(fscp, error)) {
6999                                 rw_exit(&dcp->c_rwlock);
7000                                 cachefs_cd_release(fscp);
7001                                 held = 0;
7002                                 cachefs_cd_timedout(fscp);
7003                                 connected = 0;
7004                                 continue;
7005                         }
7006                 } else {
7007                         error = cachefs_readdir_disconnected(vp, uiop, cr,
7008                             eofp);
7009                         if (CFS_TIMEOUT(fscp, error)) {
7010                                 if (cachefs_cd_access_miss(fscp)) {
7011                                         error = cachefs_readdir_connected(vp,
7012                                             uiop, cr, eofp);
7013                                         if (!CFS_TIMEOUT(fscp, error))
7014                                                 break;
7015                                         delay(5*hz);
7016                                         connected = 0;
7017                                         continue;
7018                                 }
7019                                 connected = 1;
7020                                 continue;
7021                         }
7022                 }
7023                 break;
7024         }
7025 
7026         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READDIR))
7027                 cachefs_log_readdir(cachep, error, fscp->fs_cfsvfsp,
7028                     &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
7029                     crgetuid(cr), uiop->uio_loffset, *eofp);
7030 
7031         if (held) {
7032                 rw_exit(&dcp->c_rwlock);
7033                 cachefs_cd_release(fscp);
7034         }
7035 
7036 #ifdef CFS_CD_DEBUG
7037         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7038 #endif
7039 out:
7040 #ifdef CFSDEBUG
7041         CFS_DEBUG(CFSDEBUG_VOPS)
7042                 printf("cachefs_readdir: EXIT error = %d\n", error);
7043 #endif
7044 
7045         return (error);
7046 }
7047 
7048 static int
7049 cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp)
7050 {
7051         cnode_t *dcp = VTOC(vp);
7052         int error;
7053         fscache_t *fscp = C_TO_FSCACHE(dcp);
7054         struct cachefs_req *rp;
7055 
7056         mutex_enter(&dcp->c_statelock);
7057 
7058         /* check directory consistency */
7059         error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
7060         if (error)
7061                 goto out;
7062         dcp->c_usage++;
7063 
7064         /* if dir was modified, toss old contents */
7065         if (dcp->c_metadata.md_flags & MD_INVALREADDIR) {
7066                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7067                 cachefs_inval_object(dcp);
7068         }
7069 
7070         error = 0;
7071         if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) &&
7072             ((dcp->c_flags & (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0) &&
7073             !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7074             (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
7075 
7076                 if (cachefs_async_okay()) {
7077 
7078                         /*
7079                          * Set up asynchronous request to fill this
7080                          * directory.
7081                          */
7082 
7083                         dcp->c_flags |= CN_ASYNC_POPULATE;
7084 
7085                         rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
7086                         rp->cfs_cmd = CFS_POPULATE;
7087                         rp->cfs_req_u.cu_populate.cpop_vp = vp;
7088                         rp->cfs_cr = cr;
7089 
7090                         crhold(cr);
7091                         VN_HOLD(vp);
7092 
7093                         cachefs_addqueue(rp, &fscp->fs_workq);
7094                 } else {
7095                         error = cachefs_dir_fill(dcp, cr);
7096                         if (error != 0)
7097                                 cachefs_nocache(dcp);
7098                 }
7099         }
7100 
7101         /* if front file is populated */
7102         if (((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) == 0) &&
7103             !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7104             (dcp->c_metadata.md_flags & MD_POPULATED)) {
7105                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7106                 error = cachefs_dir_read(dcp, uiop, eofp);
7107                 if (error == 0)
7108                         fscp->fs_stats.st_hits++;
7109         }
7110 
7111         /* if front file could not be used */
7112         if ((error != 0) ||
7113             CFS_ISFS_BACKFS_NFSV4(fscp) ||
7114             (dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
7115             ((dcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
7116 
7117                 if (error && !(dcp->c_flags & CN_NOCACHE) &&
7118                     !CFS_ISFS_BACKFS_NFSV4(fscp))
7119                         cachefs_nocache(dcp);
7120 
7121                 /* get the back vp */
7122                 if (dcp->c_backvp == NULL) {
7123                         error = cachefs_getbackvp(fscp, dcp);
7124                         if (error)
7125                                 goto out;
7126                 }
7127 
7128                 if (fscp->fs_inum_size > 0) {
7129                         error = cachefs_readback_translate(dcp, uiop, cr, eofp);
7130                 } else {
7131                         /* do the dir read from the back fs */
7132                         (void) VOP_RWLOCK(dcp->c_backvp,
7133                             V_WRITELOCK_FALSE, NULL);
7134                         CFS_DPRINT_BACKFS_NFSV4(fscp,
7135                             ("cachefs_readdir (nfsv4): "
7136                             "dcp %p, dbackvp %p\n", dcp, dcp->c_backvp));
7137                         error = VOP_READDIR(dcp->c_backvp, uiop, cr, eofp,
7138                             NULL, 0);
7139                         VOP_RWUNLOCK(dcp->c_backvp, V_WRITELOCK_FALSE, NULL);
7140                 }
7141 
7142                 if (error == 0)
7143                         fscp->fs_stats.st_misses++;
7144         }
7145 
7146 out:
7147         mutex_exit(&dcp->c_statelock);
7148 
7149         return (error);
7150 }
7151 
7152 static int
7153 cachefs_readback_translate(cnode_t *cp, uio_t *uiop, cred_t *cr, int *eofp)
7154 {
7155         int error = 0;
7156         fscache_t *fscp = C_TO_FSCACHE(cp);
7157         caddr_t buffy = NULL;
7158         int buffysize = MAXBSIZE;
7159         caddr_t chrp, end;
7160         ino64_t newinum;
7161         struct dirent64 *de;
7162         uio_t uioin;
7163         iovec_t iov;
7164 
7165         ASSERT(cp->c_backvp != NULL);
7166         ASSERT(fscp->fs_inum_size > 0);
7167 
7168         if (uiop->uio_resid < buffysize)
7169                 buffysize = (int)uiop->uio_resid;
7170         buffy = cachefs_kmem_alloc(buffysize, KM_SLEEP);
7171 
7172         iov.iov_base = buffy;
7173         iov.iov_len = buffysize;
7174         uioin.uio_iov = &iov;
7175         uioin.uio_iovcnt = 1;
7176         uioin.uio_segflg = UIO_SYSSPACE;
7177         uioin.uio_fmode = 0;
7178         uioin.uio_extflg = UIO_COPY_CACHED;
7179         uioin.uio_loffset = uiop->uio_loffset;
7180         uioin.uio_resid = buffysize;
7181 
7182         (void) VOP_RWLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7183         error = VOP_READDIR(cp->c_backvp, &uioin, cr, eofp, NULL, 0);
7184         VOP_RWUNLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7185 
7186         if (error != 0)
7187                 goto out;
7188 
7189         end = buffy + buffysize - uioin.uio_resid;
7190 
7191         mutex_exit(&cp->c_statelock);
7192         mutex_enter(&fscp->fs_fslock);
7193 
7194 
7195         for (chrp = buffy; chrp < end; chrp += de->d_reclen) {
7196                 de = (dirent64_t *)chrp;
7197                 newinum = cachefs_inum_real2fake(fscp, de->d_ino);
7198                 if (newinum == 0)
7199                         newinum = cachefs_fileno_conflict(fscp, de->d_ino);
7200                 de->d_ino = newinum;
7201         }
7202         mutex_exit(&fscp->fs_fslock);
7203         mutex_enter(&cp->c_statelock);
7204 
7205         error = uiomove(buffy, end - buffy, UIO_READ, uiop);
7206         uiop->uio_loffset = uioin.uio_loffset;
7207 
7208 out:
7209 
7210         if (buffy != NULL)
7211                 cachefs_kmem_free(buffy, buffysize);
7212 
7213         return (error);
7214 }
7215 
7216 static int
7217 /*ARGSUSED*/
7218 cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop, cred_t *cr,
7219     int *eofp)
7220 {
7221         cnode_t *dcp = VTOC(vp);
7222         int error;
7223 
7224         mutex_enter(&dcp->c_statelock);
7225         if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
7226                 error = ETIMEDOUT;
7227         } else {
7228                 error = cachefs_dir_read(dcp, uiop, eofp);
7229                 if (error == ENOTDIR)
7230                         error = ETIMEDOUT;
7231         }
7232         mutex_exit(&dcp->c_statelock);
7233 
7234         return (error);
7235 }
7236 
7237 /*ARGSUSED*/
7238 static int
7239 cachefs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
7240 {
7241         int error = 0;
7242         struct cnode *cp = VTOC(vp);
7243         fscache_t *fscp = C_TO_FSCACHE(cp);
7244 
7245         /*
7246          * Cachefs only provides pass-through support for NFSv4,
7247          * and all vnode operations are passed through to the
7248          * back file system. For NFSv4 pass-through to work, only
7249          * connected operation is supported, the cnode backvp must
7250          * exist, and cachefs optional (eg., disconnectable) flags
7251          * are turned off. Assert these conditions, then bail
7252          * as  NFSv4 doesn't support VOP_FID.
7253          */
7254         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7255         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7256         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7257                 return (ENOTSUP);
7258         }
7259 
7260         mutex_enter(&cp->c_statelock);
7261         if (fidp->fid_len < cp->c_metadata.md_cookie.fid_len) {
7262                 fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7263                 error = ENOSPC;
7264         } else {
7265                 bcopy(cp->c_metadata.md_cookie.fid_data, fidp->fid_data,
7266                     cp->c_metadata.md_cookie.fid_len);
7267                 fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7268         }
7269         mutex_exit(&cp->c_statelock);
7270         return (error);
7271 }
7272 
7273 /* ARGSUSED2 */
7274 static int
7275 cachefs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7276 {
7277         cnode_t *cp = VTOC(vp);
7278 
7279         /*
7280          * XXX - This is ifdef'ed out for now. The problem -
7281          * getdents() acquires the read version of rwlock, then we come
7282          * into cachefs_readdir() and that wants to acquire the write version
7283          * of this lock (if its going to populate the directory). This is
7284          * a problem, this can be solved by introducing another lock in the
7285          * cnode.
7286          */
7287 /* XXX */
7288         if (vp->v_type != VREG)
7289                 return (-1);
7290         if (write_lock)
7291                 rw_enter(&cp->c_rwlock, RW_WRITER);
7292         else
7293                 rw_enter(&cp->c_rwlock, RW_READER);
7294         return (write_lock);
7295 }
7296 
7297 /* ARGSUSED */
7298 static void
7299 cachefs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7300 {
7301         cnode_t *cp = VTOC(vp);
7302         if (vp->v_type != VREG)
7303                 return;
7304         rw_exit(&cp->c_rwlock);
7305 }
7306 
7307 /* ARGSUSED */
7308 static int
7309 cachefs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
7310     caller_context_t *ct)
7311 {
7312         return (0);
7313 }
7314 
7315 static int cachefs_lostpage = 0;
7316 /*
7317  * Return all the pages from [off..off+len] in file
7318  */
7319 /*ARGSUSED*/
7320 static int
7321 cachefs_getpage(struct vnode *vp, offset_t off, size_t len,
7322         uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7323         caddr_t addr, enum seg_rw rw, cred_t *cr, caller_context_t *ct)
7324 {
7325         cnode_t *cp = VTOC(vp);
7326         int error;
7327         fscache_t *fscp = C_TO_FSCACHE(cp);
7328         cachefscache_t *cachep = fscp->fs_cache;
7329         int held = 0;
7330         int connected = 0;
7331 
7332 #ifdef CFSDEBUG
7333         u_offset_t offx = (u_offset_t)off;
7334 
7335         CFS_DEBUG(CFSDEBUG_VOPS)
7336                 printf("cachefs_getpage: ENTER vp %p off %lld len %lu rw %d\n",
7337                     (void *)vp, offx, len, rw);
7338 #endif
7339         if (getzoneid() != GLOBAL_ZONEID) {
7340                 error = EPERM;
7341                 goto out;
7342         }
7343 
7344         if (vp->v_flag & VNOMAP) {
7345                 error = ENOSYS;
7346                 goto out;
7347         }
7348 
7349         /* Call backfilesystem if NFSv4 */
7350         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7351                 error = cachefs_getpage_backfs_nfsv4(vp, off, len, protp, pl,
7352                     plsz, seg, addr, rw, cr);
7353                 goto out;
7354         }
7355 
7356         /* XXX sam: make this do an async populate? */
7357         if (pl == NULL) {
7358                 error = 0;
7359                 goto out;
7360         }
7361         if (protp != NULL)
7362                 *protp = PROT_ALL;
7363 
7364         for (;;) {
7365                 /* get (or renew) access to the file system */
7366                 if (held) {
7367                         cachefs_cd_release(fscp);
7368                         held = 0;
7369                 }
7370                 error = cachefs_cd_access(fscp, connected, 0);
7371                 if (error)
7372                         break;
7373                 held = 1;
7374 
7375                 /*
7376                  * If we are getting called as a side effect of a
7377                  * cachefs_write()
7378                  * operation the local file size might not be extended yet.
7379                  * In this case we want to be able to return pages of zeroes.
7380                  */
7381                 if ((u_offset_t)off + len >
7382                     ((cp->c_size + PAGEOFFSET) & (offset_t)PAGEMASK)) {
7383                         if (seg != segkmap) {
7384                                 error = EFAULT;
7385                                 break;
7386                         }
7387                 }
7388                 if (len <= PAGESIZE)
7389                         error = cachefs_getapage(vp, (u_offset_t)off, len,
7390                             protp, pl, plsz, seg, addr, rw, cr);
7391                 else
7392                         error = pvn_getpages(cachefs_getapage, vp,
7393                             (u_offset_t)off, len, protp, pl, plsz, seg, addr,
7394                             rw, cr);
7395                 if (error == 0)
7396                         break;
7397 
7398                 if (((cp->c_flags & CN_NOCACHE) && (error == ENOSPC)) ||
7399                     error == EAGAIN) {
7400                         connected = 0;
7401                         continue;
7402                 }
7403                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7404                         if (CFS_TIMEOUT(fscp, error)) {
7405                                 cachefs_cd_release(fscp);
7406                                 held = 0;
7407                                 cachefs_cd_timedout(fscp);
7408                                 connected = 0;
7409                                 continue;
7410                         }
7411                 } else {
7412                         if (CFS_TIMEOUT(fscp, error)) {
7413                                 if (cachefs_cd_access_miss(fscp)) {
7414                                         if (len <= PAGESIZE)
7415                                                 error = cachefs_getapage_back(
7416                                                     vp, (u_offset_t)off,
7417                                                     len, protp, pl,
7418                                                     plsz, seg, addr, rw, cr);
7419                                         else
7420                                                 error = pvn_getpages(
7421                                                     cachefs_getapage_back, vp,
7422                                                     (u_offset_t)off, len,
7423                                                     protp, pl,
7424                                                     plsz, seg, addr, rw, cr);
7425                                         if (!CFS_TIMEOUT(fscp, error) &&
7426                                             (error != EAGAIN))
7427                                                 break;
7428                                         delay(5*hz);
7429                                         connected = 0;
7430                                         continue;
7431                                 }
7432                                 connected = 1;
7433                                 continue;
7434                         }
7435                 }
7436                 break;
7437         }
7438 
7439         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GETPAGE))
7440                 cachefs_log_getpage(cachep, error, vp->v_vfsp,
7441                     &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
7442                     crgetuid(cr), off, len);
7443 
7444         if (held) {
7445                 cachefs_cd_release(fscp);
7446         }
7447 
7448 out:
7449 #ifdef CFS_CD_DEBUG
7450         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7451 #endif
7452 #ifdef CFSDEBUG
7453         CFS_DEBUG(CFSDEBUG_VOPS)
7454                 printf("cachefs_getpage: EXIT vp %p error %d\n",
7455                     (void *)vp, error);
7456 #endif
7457         return (error);
7458 }
7459 
7460 /*
7461  * cachefs_getpage_backfs_nfsv4
7462  *
7463  * Call NFSv4 back filesystem to handle the getpage (cachefs
7464  * pass-through support for NFSv4).
7465  */
7466 static int
7467 cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off, size_t len,
7468                         uint_t *protp, struct page *pl[], size_t plsz,
7469                         struct seg *seg, caddr_t addr, enum seg_rw rw,
7470                         cred_t *cr)
7471 {
7472         cnode_t *cp = VTOC(vp);
7473         fscache_t *fscp = C_TO_FSCACHE(cp);
7474         vnode_t *backvp;
7475         int error;
7476 
7477         /*
7478          * For NFSv4 pass-through to work, only connected operation is
7479          * supported, the cnode backvp must exist, and cachefs optional
7480          * (eg., disconnectable) flags are turned off. Assert these
7481          * conditions for the getpage operation.
7482          */
7483         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7484         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7485 
7486         /* Call backfs vnode op after extracting backvp */
7487         mutex_enter(&cp->c_statelock);
7488         backvp = cp->c_backvp;
7489         mutex_exit(&cp->c_statelock);
7490 
7491         CFS_DPRINT_BACKFS_NFSV4(fscp,
7492             ("cachefs_getpage_backfs_nfsv4: cnode %p, backvp %p\n",
7493             cp, backvp));
7494         error = VOP_GETPAGE(backvp, off, len, protp, pl, plsz, seg,
7495             addr, rw, cr, NULL);
7496 
7497         return (error);
7498 }
7499 
7500 /*
7501  * Called from pvn_getpages or cachefs_getpage to get a particular page.
7502  */
7503 /*ARGSUSED*/
7504 static int
7505 cachefs_getapage(struct vnode *vp, u_offset_t off, size_t len, uint_t *protp,
7506         struct page *pl[], size_t plsz, struct seg *seg, caddr_t addr,
7507         enum seg_rw rw, cred_t *cr)
7508 {
7509         cnode_t *cp = VTOC(vp);
7510         page_t **ppp, *pp = NULL;
7511         fscache_t *fscp = C_TO_FSCACHE(cp);
7512         cachefscache_t *cachep = fscp->fs_cache;
7513         int error = 0;
7514         struct page **ourpl;
7515         struct page *ourstackpl[17]; /* see ASSERT() below for 17 */
7516         int index = 0;
7517         int downgrade;
7518         int have_statelock = 0;
7519         u_offset_t popoff;
7520         size_t popsize = 0;
7521 
7522         /*LINTED*/
7523         ASSERT(((DEF_POP_SIZE / PAGESIZE) + 1) <= 17);
7524 
7525         if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7526                 ourpl = cachefs_kmem_alloc(sizeof (struct page *) *
7527                     ((fscp->fs_info.fi_popsize / PAGESIZE) + 1), KM_SLEEP);
7528         else
7529                 ourpl = ourstackpl;
7530 
7531         ourpl[0] = NULL;
7532         off = off & (offset_t)PAGEMASK;
7533 again:
7534         /*
7535          * Look for the page
7536          */
7537         if (page_exists(vp, off) == 0) {
7538                 /*
7539                  * Need to do work to get the page.
7540                  * Grab our lock because we are going to
7541                  * modify the state of the cnode.
7542                  */
7543                 if (! have_statelock) {
7544                         mutex_enter(&cp->c_statelock);
7545                         have_statelock = 1;
7546                 }
7547                 /*
7548                  * If we're in NOCACHE mode, we will need a backvp
7549                  */
7550                 if (cp->c_flags & CN_NOCACHE) {
7551                         if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7552                                 error = ETIMEDOUT;
7553                                 goto out;
7554                         }
7555                         if (cp->c_backvp == NULL) {
7556                                 error = cachefs_getbackvp(fscp, cp);
7557                                 if (error)
7558                                         goto out;
7559                         }
7560                         error = VOP_GETPAGE(cp->c_backvp, off,
7561                             PAGESIZE, protp, ourpl, PAGESIZE, seg,
7562                             addr, S_READ, cr, NULL);
7563                         /*
7564                          * backfs returns EFAULT when we are trying for a
7565                          * page beyond EOF but cachefs has the knowledge that
7566                          * it is not beyond EOF be cause cp->c_size is
7567                          * greater then the offset requested.
7568                          */
7569                         if (error == EFAULT) {
7570                                 error = 0;
7571                                 pp = page_create_va(vp, off, PAGESIZE,
7572                                     PG_EXCL | PG_WAIT, seg, addr);
7573                                 if (pp == NULL)
7574                                         goto again;
7575                                 pagezero(pp, 0, PAGESIZE);
7576                                 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
7577                                 goto out;
7578                         }
7579                         if (error)
7580                                 goto out;
7581                         goto getpages;
7582                 }
7583                 /*
7584                  * We need a front file. If we can't get it,
7585                  * put the cnode in NOCACHE mode and try again.
7586                  */
7587                 if (cp->c_frontvp == NULL) {
7588                         error = cachefs_getfrontfile(cp);
7589                         if (error) {
7590                                 cachefs_nocache(cp);
7591                                 error = EAGAIN;
7592                                 goto out;
7593                         }
7594                 }
7595                 /*
7596                  * Check if the front file needs population.
7597                  * If population is necessary, make sure we have a
7598                  * backvp as well. We will get the page from the backvp.
7599                  * bug 4152459-
7600                  * But if the file system is in disconnected mode
7601                  * and the file is a local file then do not check the
7602                  * allocmap.
7603                  */
7604                 if (((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
7605                     ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) &&
7606                     (cachefs_check_allocmap(cp, off) == 0)) {
7607                         if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7608                                 error = ETIMEDOUT;
7609                                 goto out;
7610                         }
7611                         if (cp->c_backvp == NULL) {
7612                                 error = cachefs_getbackvp(fscp, cp);
7613                                 if (error)
7614                                         goto out;
7615                         }
7616                         if (cp->c_filegrp->fg_flags & CFS_FG_WRITE) {
7617                                 cachefs_cluster_allocmap(off, &popoff,
7618                                     &popsize,
7619                                     fscp->fs_info.fi_popsize, cp);
7620                                 if (popsize != 0) {
7621                                         error = cachefs_populate(cp,
7622                                             popoff, popsize,
7623                                             cp->c_frontvp, cp->c_backvp,
7624                                             cp->c_size, cr);
7625                                         if (error) {
7626                                                 cachefs_nocache(cp);
7627                                                 error = EAGAIN;
7628                                                 goto out;
7629                                         } else {
7630                                                 cp->c_flags |=
7631                                                     CN_UPDATED |
7632                                                     CN_NEED_FRONT_SYNC |
7633                                                     CN_POPULATION_PENDING;
7634                                         }
7635                                         popsize = popsize - (off - popoff);
7636                                 } else {
7637                                         popsize = PAGESIZE;
7638                                 }
7639                         }
7640                         /* else XXX assert CN_NOCACHE? */
7641                         error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7642                             PAGESIZE, protp, ourpl, popsize,
7643                             seg, addr, S_READ, cr, NULL);
7644                         if (error)
7645                                 goto out;
7646                         fscp->fs_stats.st_misses++;
7647                 } else {
7648                         if (cp->c_flags & CN_POPULATION_PENDING) {
7649                                 error = VOP_FSYNC(cp->c_frontvp, FSYNC, cr,
7650                                     NULL);
7651                                 cp->c_flags &= ~CN_POPULATION_PENDING;
7652                                 if (error) {
7653                                         cachefs_nocache(cp);
7654                                         error = EAGAIN;
7655                                         goto out;
7656                                 }
7657                         }
7658                         /*
7659                          * File was populated so we get the page from the
7660                          * frontvp
7661                          */
7662                         error = VOP_GETPAGE(cp->c_frontvp, (offset_t)off,
7663                             PAGESIZE, protp, ourpl, PAGESIZE, seg, addr,
7664                             rw, cr, NULL);
7665                         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GPFRONT))
7666                                 cachefs_log_gpfront(cachep, error,
7667                                     fscp->fs_cfsvfsp,
7668                                     &cp->c_metadata.md_cookie, cp->c_fileno,
7669                                     crgetuid(cr), off, PAGESIZE);
7670                         if (error) {
7671                                 cachefs_nocache(cp);
7672                                 error = EAGAIN;
7673                                 goto out;
7674                         }
7675                         fscp->fs_stats.st_hits++;
7676                 }
7677 getpages:
7678                 ASSERT(have_statelock);
7679                 if (have_statelock) {
7680                         mutex_exit(&cp->c_statelock);
7681                         have_statelock = 0;
7682                 }
7683                 downgrade = 0;
7684                 for (ppp = ourpl; *ppp; ppp++) {
7685                         if ((*ppp)->p_offset < off) {
7686                                 index++;
7687                                 page_unlock(*ppp);
7688                                 continue;
7689                         }
7690                         if (PAGE_SHARED(*ppp)) {
7691                                 if (page_tryupgrade(*ppp) == 0) {
7692                                         for (ppp = &ourpl[index]; *ppp; ppp++)
7693                                                 page_unlock(*ppp);
7694                                         error = EAGAIN;
7695                                         goto out;
7696                                 }
7697                                 downgrade = 1;
7698                         }
7699                         ASSERT(PAGE_EXCL(*ppp));
7700                         (void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7701                         page_rename(*ppp, vp, (*ppp)->p_offset);
7702                 }
7703                 pl[0] = ourpl[index];
7704                 pl[1] = NULL;
7705                 if (downgrade) {
7706                         page_downgrade(ourpl[index]);
7707                 }
7708                 /* Unlock the rest of the pages from the cluster */
7709                 for (ppp = &ourpl[index+1]; *ppp; ppp++)
7710                         page_unlock(*ppp);
7711         } else {
7712                 ASSERT(! have_statelock);
7713                 if (have_statelock) {
7714                         mutex_exit(&cp->c_statelock);
7715                         have_statelock = 0;
7716                 }
7717                 /* XXX SE_SHARED probably isn't what we *always* want */
7718                 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7719                         cachefs_lostpage++;
7720                         goto again;
7721                 }
7722                 pl[0] = pp;
7723                 pl[1] = NULL;
7724                 /* XXX increment st_hits?  i don't think so, but... */
7725         }
7726 
7727 out:
7728         if (have_statelock) {
7729                 mutex_exit(&cp->c_statelock);
7730                 have_statelock = 0;
7731         }
7732         if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7733                 cachefs_kmem_free(ourpl, sizeof (struct page *) *
7734                     ((fscp->fs_info.fi_popsize / PAGESIZE) + 1));
7735         return (error);
7736 }
7737 
7738 /* gets a page but only from the back fs */
7739 /*ARGSUSED*/
7740 static int
7741 cachefs_getapage_back(struct vnode *vp, u_offset_t off, size_t len,
7742     uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7743     caddr_t addr, enum seg_rw rw, cred_t *cr)
7744 {
7745         cnode_t *cp = VTOC(vp);
7746         page_t **ppp, *pp = NULL;
7747         fscache_t *fscp = C_TO_FSCACHE(cp);
7748         int error = 0;
7749         struct page *ourpl[17];
7750         int index = 0;
7751         int have_statelock = 0;
7752         int downgrade;
7753 
7754         /*
7755          * Grab the cnode statelock so the cnode state won't change
7756          * while we're in here.
7757          */
7758         ourpl[0] = NULL;
7759         off = off & (offset_t)PAGEMASK;
7760 again:
7761         if (page_exists(vp, off) == 0) {
7762                 if (! have_statelock) {
7763                         mutex_enter(&cp->c_statelock);
7764                         have_statelock = 1;
7765                 }
7766 
7767                 if (cp->c_backvp == NULL) {
7768                         error = cachefs_getbackvp(fscp, cp);
7769                         if (error)
7770                                 goto out;
7771                 }
7772                 error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7773                     PAGESIZE, protp, ourpl, PAGESIZE, seg,
7774                     addr, S_READ, cr, NULL);
7775                 if (error)
7776                         goto out;
7777 
7778                 if (have_statelock) {
7779                         mutex_exit(&cp->c_statelock);
7780                         have_statelock = 0;
7781                 }
7782                 downgrade = 0;
7783                 for (ppp = ourpl; *ppp; ppp++) {
7784                         if ((*ppp)->p_offset < off) {
7785                                 index++;
7786                                 page_unlock(*ppp);
7787                                 continue;
7788                         }
7789                         if (PAGE_SHARED(*ppp)) {
7790                                 if (page_tryupgrade(*ppp) == 0) {
7791                                         for (ppp = &ourpl[index]; *ppp; ppp++)
7792                                                 page_unlock(*ppp);
7793                                         error = EAGAIN;
7794                                         goto out;
7795                                 }
7796                                 downgrade = 1;
7797                         }
7798                         ASSERT(PAGE_EXCL(*ppp));
7799                         (void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7800                         page_rename(*ppp, vp, (*ppp)->p_offset);
7801                 }
7802                 pl[0] = ourpl[index];
7803                 pl[1] = NULL;
7804                 if (downgrade) {
7805                         page_downgrade(ourpl[index]);
7806                 }
7807                 /* Unlock the rest of the pages from the cluster */
7808                 for (ppp = &ourpl[index+1]; *ppp; ppp++)
7809                         page_unlock(*ppp);
7810         } else {
7811                 ASSERT(! have_statelock);
7812                 if (have_statelock) {
7813                         mutex_exit(&cp->c_statelock);
7814                         have_statelock = 0;
7815                 }
7816                 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7817                         cachefs_lostpage++;
7818                         goto again;
7819                 }
7820                 pl[0] = pp;
7821                 pl[1] = NULL;
7822         }
7823 
7824 out:
7825         if (have_statelock) {
7826                 mutex_exit(&cp->c_statelock);
7827                 have_statelock = 0;
7828         }
7829         return (error);
7830 }
7831 
7832 /*ARGSUSED*/
7833 static int
7834 cachefs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
7835     caller_context_t *ct)
7836 {
7837         cnode_t *cp = VTOC(vp);
7838         int error = 0;
7839         fscache_t *fscp = C_TO_FSCACHE(cp);
7840         int held = 0;
7841         int connected = 0;
7842 
7843         if (getzoneid() != GLOBAL_ZONEID)
7844                 return (EPERM);
7845 
7846         /* Call backfilesytem if NFSv4 */
7847         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7848                 error = cachefs_putpage_backfs_nfsv4(vp, off, len, flags, cr);
7849                 goto out;
7850         }
7851 
7852         for (;;) {
7853                 /* get (or renew) access to the file system */
7854                 if (held) {
7855                         cachefs_cd_release(fscp);
7856                         held = 0;
7857                 }
7858                 error = cachefs_cd_access(fscp, connected, 1);
7859                 if (error)
7860                         break;
7861                 held = 1;
7862 
7863                 error = cachefs_putpage_common(vp, off, len, flags, cr);
7864                 if (error == 0)
7865                         break;
7866 
7867                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7868                         if (CFS_TIMEOUT(fscp, error)) {
7869                                 cachefs_cd_release(fscp);
7870                                 held = 0;
7871                                 cachefs_cd_timedout(fscp);
7872                                 connected = 0;
7873                                 continue;
7874                         }
7875                 } else {
7876                         if (NOMEMWAIT()) {
7877                                 error = 0;
7878                                 goto out;
7879                         }
7880                         if (CFS_TIMEOUT(fscp, error)) {
7881                                 connected = 1;
7882                                 continue;
7883                         }
7884                 }
7885                 break;
7886         }
7887 
7888 out:
7889 
7890         if (held) {
7891                 cachefs_cd_release(fscp);
7892         }
7893 
7894 #ifdef CFS_CD_DEBUG
7895         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7896 #endif
7897         return (error);
7898 }
7899 
7900 /*
7901  * cachefs_putpage_backfs_nfsv4
7902  *
7903  * Call NFSv4 back filesystem to handle the putpage (cachefs
7904  * pass-through support for NFSv4).
7905  */
7906 static int
7907 cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off, size_t len, int flags,
7908                         cred_t *cr)
7909 {
7910         cnode_t *cp = VTOC(vp);
7911         fscache_t *fscp = C_TO_FSCACHE(cp);
7912         vnode_t *backvp;
7913         int error;
7914 
7915         /*
7916          * For NFSv4 pass-through to work, only connected operation is
7917          * supported, the cnode backvp must exist, and cachefs optional
7918          * (eg., disconnectable) flags are turned off. Assert these
7919          * conditions for the putpage operation.
7920          */
7921         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7922         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7923 
7924         /* Call backfs vnode op after extracting backvp */
7925         mutex_enter(&cp->c_statelock);
7926         backvp = cp->c_backvp;
7927         mutex_exit(&cp->c_statelock);
7928 
7929         CFS_DPRINT_BACKFS_NFSV4(fscp,
7930             ("cachefs_putpage_backfs_nfsv4: cnode %p, backvp %p\n",
7931             cp, backvp));
7932         error = VOP_PUTPAGE(backvp, off, len, flags, cr, NULL);
7933 
7934         return (error);
7935 }
7936 
7937 /*
7938  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
7939  * If len == 0, do from off to EOF.
7940  *
7941  * The normal cases should be len == 0 & off == 0 (entire vp list),
7942  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
7943  * (from pageout).
7944  */
7945 
7946 /*ARGSUSED*/
7947 int
7948 cachefs_putpage_common(struct vnode *vp, offset_t off, size_t len,
7949     int flags, cred_t *cr)
7950 {
7951         struct cnode *cp  = VTOC(vp);
7952         struct page *pp;
7953         size_t io_len;
7954         u_offset_t eoff, io_off;
7955         int error = 0;
7956         fscache_t *fscp = C_TO_FSCACHE(cp);
7957         cachefscache_t *cachep = fscp->fs_cache;
7958 
7959         if (len == 0 && (flags & B_INVAL) == 0 && vn_is_readonly(vp)) {
7960                 return (0);
7961         }
7962         if (!vn_has_cached_data(vp) || (off >= cp->c_size &&
7963             (flags & B_INVAL) == 0))
7964                 return (0);
7965 
7966         /*
7967          * Should never have cached data for the cachefs vnode
7968          * if NFSv4 is in use.
7969          */
7970         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7971 
7972         /*
7973          * If this is an async putpage let a thread handle it.
7974          */
7975         if (flags & B_ASYNC) {
7976                 struct cachefs_req *rp;
7977                 int tflags = (flags & ~(B_ASYNC|B_DONTNEED));
7978 
7979                 if (ttoproc(curthread) == proc_pageout) {
7980                         /*
7981                          * If this is the page daemon we
7982                          * do the push synchronously (Dangerous!) and hope
7983                          * we can free enough to keep running...
7984                          */
7985                         flags &= ~B_ASYNC;
7986                         goto again;
7987                 }
7988 
7989                 if (! cachefs_async_okay()) {
7990 
7991                         /*
7992                          * this is somewhat like NFS's behavior.  keep
7993                          * the system from thrashing.  we've seen
7994                          * cases where async queues get out of
7995                          * control, especially if
7996                          * madvise(MADV_SEQUENTIAL) is done on a large
7997                          * mmap()ed file that is read sequentially.
7998                          */
7999 
8000                         flags &= ~B_ASYNC;
8001                         goto again;
8002                 }
8003 
8004                 /*
8005                  * if no flags other than B_ASYNC were set,
8006                  * we coalesce putpage requests into a single one for the
8007                  * whole file (len = off = 0).  If such a request is
8008                  * already queued, we're done.
8009                  *
8010                  * If there are other flags set (e.g., B_INVAL), we don't
8011                  * attempt to coalesce and we use the specified length and
8012                  * offset.
8013                  */
8014                 rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
8015                 mutex_enter(&cp->c_iomutex);
8016                 if ((cp->c_ioflags & CIO_PUTPAGES) == 0 || tflags != 0) {
8017                         rp->cfs_cmd = CFS_PUTPAGE;
8018                         rp->cfs_req_u.cu_putpage.cp_vp = vp;
8019                         if (tflags == 0) {
8020                                 off = len = 0;
8021                                 cp->c_ioflags |= CIO_PUTPAGES;
8022                         }
8023                         rp->cfs_req_u.cu_putpage.cp_off = off;
8024                         rp->cfs_req_u.cu_putpage.cp_len = (uint_t)len;
8025                         rp->cfs_req_u.cu_putpage.cp_flags = flags & ~B_ASYNC;
8026                         rp->cfs_cr = cr;
8027                         crhold(rp->cfs_cr);
8028                         VN_HOLD(vp);
8029                         cp->c_nio++;
8030                         cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
8031                 } else {
8032                         kmem_cache_free(cachefs_req_cache, rp);
8033                 }
8034 
8035                 mutex_exit(&cp->c_iomutex);
8036                 return (0);
8037         }
8038 
8039 
8040 again:
8041         if (len == 0) {
8042                 /*
8043                  * Search the entire vp list for pages >= off
8044                  */
8045                 error = pvn_vplist_dirty(vp, off, cachefs_push, flags, cr);
8046         } else {
8047                 /*
8048                  * Do a range from [off...off + len] looking for pages
8049                  * to deal with.
8050                  */
8051                 eoff = (u_offset_t)off + len;
8052                 for (io_off = off; io_off < eoff && io_off < cp->c_size;
8053                     io_off += io_len) {
8054                         /*
8055                          * If we are not invalidating, synchronously
8056                          * freeing or writing pages use the routine
8057                          * page_lookup_nowait() to prevent reclaiming
8058                          * them from the free list.
8059                          */
8060                         if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
8061                                 pp = page_lookup(vp, io_off,
8062                                     (flags & (B_INVAL | B_FREE)) ?
8063                                     SE_EXCL : SE_SHARED);
8064                         } else {
8065                                 /* XXX this looks like dead code */
8066                                 pp = page_lookup_nowait(vp, io_off,
8067                                     (flags & B_FREE) ? SE_EXCL : SE_SHARED);
8068                         }
8069 
8070                         if (pp == NULL || pvn_getdirty(pp, flags) == 0)
8071                                 io_len = PAGESIZE;
8072                         else {
8073                                 error = cachefs_push(vp, pp, &io_off,
8074                                     &io_len, flags, cr);
8075                                 if (error != 0)
8076                                         break;
8077                                 /*
8078                                  * "io_off" and "io_len" are returned as
8079                                  * the range of pages we actually wrote.
8080                                  * This allows us to skip ahead more quickly
8081                                  * since several pages may've been dealt
8082                                  * with by this iteration of the loop.
8083                                  */
8084                         }
8085                 }
8086         }
8087 
8088         if (error == 0 && off == 0 && (len == 0 || len >= cp->c_size)) {
8089                 cp->c_flags &= ~CDIRTY;
8090         }
8091 
8092         if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_PUTPAGE))
8093                 cachefs_log_putpage(cachep, error, fscp->fs_cfsvfsp,
8094                     &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
8095                     crgetuid(cr), off, len);
8096 
8097         return (error);
8098 
8099 }
8100 
8101 /*ARGSUSED*/
8102 static int
8103 cachefs_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
8104     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
8105     caller_context_t *ct)
8106 {
8107         cnode_t *cp = VTOC(vp);
8108         fscache_t *fscp = C_TO_FSCACHE(cp);
8109         struct segvn_crargs vn_a;
8110         int error;
8111         int held = 0;
8112         int writing;
8113         int connected = 0;
8114 
8115 #ifdef CFSDEBUG
8116         u_offset_t offx = (u_offset_t)off;
8117 
8118         CFS_DEBUG(CFSDEBUG_VOPS)
8119                 printf("cachefs_map: ENTER vp %p off %lld len %lu flags %d\n",
8120                     (void *)vp, offx, len, flags);
8121 #endif
8122         if (getzoneid() != GLOBAL_ZONEID) {
8123                 error = EPERM;
8124                 goto out;
8125         }
8126 
8127         if (vp->v_flag & VNOMAP) {
8128                 error = ENOSYS;
8129                 goto out;
8130         }
8131         if (off < 0 || (offset_t)(off + len) < 0) {
8132                 error = ENXIO;
8133                 goto out;
8134         }
8135         if (vp->v_type != VREG) {
8136                 error = ENODEV;
8137                 goto out;
8138         }
8139 
8140         /*
8141          * Check to see if the vnode is currently marked as not cachable.
8142          * If so, we have to refuse the map request as this violates the
8143          * don't cache attribute.
8144          */
8145         if (vp->v_flag & VNOCACHE)
8146                 return (EAGAIN);
8147 
8148 #ifdef OBSOLETE
8149         /*
8150          * If file is being locked, disallow mapping.
8151          */
8152         if (vn_has_flocks(vp)) {
8153                 error = EAGAIN;
8154                 goto out;
8155         }
8156 #endif
8157 
8158         /* call backfilesystem if NFSv4 */
8159         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8160                 error = cachefs_map_backfs_nfsv4(vp, off, as, addrp, len, prot,
8161                     maxprot, flags, cr);
8162                 goto out;
8163         }
8164 
8165         writing = (prot & PROT_WRITE && ((flags & MAP_PRIVATE) == 0));
8166 
8167         for (;;) {
8168                 /* get (or renew) access to the file system */
8169                 if (held) {
8170                         cachefs_cd_release(fscp);
8171                         held = 0;
8172                 }
8173                 error = cachefs_cd_access(fscp, connected, writing);
8174                 if (error)
8175                         break;
8176                 held = 1;
8177 
8178                 if (writing) {
8179                         mutex_enter(&cp->c_statelock);
8180                         if (CFS_ISFS_WRITE_AROUND(fscp)) {
8181                                 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8182                                         connected = 1;
8183                                         continue;
8184                                 } else {
8185                                         cachefs_nocache(cp);
8186                                 }
8187                         }
8188 
8189                         /*
8190                          * CN_MAPWRITE is for an optimization in cachefs_delmap.
8191                          * If CN_MAPWRITE is not set then cachefs_delmap does
8192                          * not need to try to push out any pages.
8193                          * This bit gets cleared when the cnode goes inactive.
8194                          */
8195                         cp->c_flags |= CN_MAPWRITE;
8196 
8197                         mutex_exit(&cp->c_statelock);
8198                 }
8199                 break;
8200         }
8201 
8202         if (held) {
8203                 cachefs_cd_release(fscp);
8204         }
8205 
8206         as_rangelock(as);
8207         error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
8208         if (error != 0) {
8209                 as_rangeunlock(as);
8210                 goto out;
8211         }
8212 
8213         /*
8214          * package up all the data passed in into a segvn_args struct and
8215          * call as_map with segvn_create function to create a new segment
8216          * in the address space.
8217          */
8218         vn_a.vp = vp;
8219         vn_a.offset = off;
8220         vn_a.type = flags & MAP_TYPE;
8221         vn_a.prot = (uchar_t)prot;
8222         vn_a.maxprot = (uchar_t)maxprot;
8223         vn_a.cred = cr;
8224         vn_a.amp = NULL;
8225         vn_a.flags = flags & ~MAP_TYPE;
8226         vn_a.szc = 0;
8227         vn_a.lgrp_mem_policy_flags = 0;
8228         error = as_map(as, *addrp, len, segvn_create, &vn_a);
8229         as_rangeunlock(as);
8230 out:
8231 
8232 #ifdef CFS_CD_DEBUG
8233         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8234 #endif
8235 #ifdef CFSDEBUG
8236         CFS_DEBUG(CFSDEBUG_VOPS)
8237                 printf("cachefs_map: EXIT vp %p error %d\n", (void *)vp, error);
8238 #endif
8239         return (error);
8240 }
8241 
8242 /*
8243  * cachefs_map_backfs_nfsv4
8244  *
8245  * Call NFSv4 back filesystem to handle the map (cachefs
8246  * pass-through support for NFSv4).
8247  */
8248 static int
8249 cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off, struct as *as,
8250                         caddr_t *addrp, size_t len, uchar_t prot,
8251                         uchar_t maxprot, uint_t flags, cred_t *cr)
8252 {
8253         cnode_t *cp = VTOC(vp);
8254         fscache_t *fscp = C_TO_FSCACHE(cp);
8255         vnode_t *backvp;
8256         int error;
8257 
8258         /*
8259          * For NFSv4 pass-through to work, only connected operation is
8260          * supported, the cnode backvp must exist, and cachefs optional
8261          * (eg., disconnectable) flags are turned off. Assert these
8262          * conditions for the map operation.
8263          */
8264         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8265         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8266 
8267         /* Call backfs vnode op after extracting backvp */
8268         mutex_enter(&cp->c_statelock);
8269         backvp = cp->c_backvp;
8270         mutex_exit(&cp->c_statelock);
8271 
8272         CFS_DPRINT_BACKFS_NFSV4(fscp,
8273             ("cachefs_map_backfs_nfsv4: cnode %p, backvp %p\n",
8274             cp, backvp));
8275         error = VOP_MAP(backvp, off, as, addrp, len, prot, maxprot, flags, cr,
8276             NULL);
8277 
8278         return (error);
8279 }
8280 
8281 /*ARGSUSED*/
8282 static int
8283 cachefs_addmap(struct vnode *vp, offset_t off, struct as *as,
8284     caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
8285     cred_t *cr, caller_context_t *ct)
8286 {
8287         cnode_t *cp = VTOC(vp);
8288         fscache_t *fscp = C_TO_FSCACHE(cp);
8289 
8290         if (getzoneid() != GLOBAL_ZONEID)
8291                 return (EPERM);
8292 
8293         if (vp->v_flag & VNOMAP)
8294                 return (ENOSYS);
8295 
8296         /*
8297          * Check this is not an NFSv4 filesystem, as the mapping
8298          * is not done on the cachefs filesystem if NFSv4 is in
8299          * use.
8300          */
8301         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8302 
8303         mutex_enter(&cp->c_statelock);
8304         cp->c_mapcnt += btopr(len);
8305         mutex_exit(&cp->c_statelock);
8306         return (0);
8307 }
8308 
8309 /*ARGSUSED*/
8310 static int
8311 cachefs_delmap(struct vnode *vp, offset_t off, struct as *as,
8312         caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
8313         cred_t *cr, caller_context_t *ct)
8314 {
8315         cnode_t *cp = VTOC(vp);
8316         fscache_t *fscp = C_TO_FSCACHE(cp);
8317         int error;
8318         int connected = 0;
8319         int held = 0;
8320 
8321         /*
8322          * The file may be passed in to (or inherited into) the zone, so we
8323          * need to let this operation go through since it happens as part of
8324          * exiting.
8325          */
8326         if (vp->v_flag & VNOMAP)
8327                 return (ENOSYS);
8328 
8329         /*
8330          * Check this is not an NFSv4 filesystem, as the mapping
8331          * is not done on the cachefs filesystem if NFSv4 is in
8332          * use.
8333          */
8334         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8335 
8336         mutex_enter(&cp->c_statelock);
8337         cp->c_mapcnt -= btopr(len);
8338         ASSERT(cp->c_mapcnt >= 0);
8339         mutex_exit(&cp->c_statelock);
8340 
8341         if (cp->c_mapcnt || !vn_has_cached_data(vp) ||
8342             ((cp->c_flags & CN_MAPWRITE) == 0))
8343                 return (0);
8344 
8345         for (;;) {
8346                 /* get (or renew) access to the file system */
8347                 if (held) {
8348                         cachefs_cd_release(fscp);
8349                         held = 0;
8350                 }
8351                 error = cachefs_cd_access(fscp, connected, 1);
8352                 if (error)
8353                         break;
8354                 held = 1;
8355                 connected = 0;
8356 
8357                 error = cachefs_putpage_common(vp, (offset_t)0,
8358                     (uint_t)0, 0, cr);
8359                 if (CFS_TIMEOUT(fscp, error)) {
8360                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8361                                 cachefs_cd_release(fscp);
8362                                 held = 0;
8363                                 cachefs_cd_timedout(fscp);
8364                                 continue;
8365                         } else {
8366                                 connected = 1;
8367                                 continue;
8368                         }
8369                 }
8370 
8371                 /* if no space left in cache, wait until connected */
8372                 if ((error == ENOSPC) &&
8373                     (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
8374                         connected = 1;
8375                         continue;
8376                 }
8377 
8378                 mutex_enter(&cp->c_statelock);
8379                 if (!error)
8380                         error = cp->c_error;
8381                 cp->c_error = 0;
8382                 mutex_exit(&cp->c_statelock);
8383                 break;
8384         }
8385 
8386         if (held)
8387                 cachefs_cd_release(fscp);
8388 
8389 #ifdef CFS_CD_DEBUG
8390         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8391 #endif
8392         return (error);
8393 }
8394 
8395 /* ARGSUSED */
8396 static int
8397 cachefs_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8398         offset_t offset, struct flk_callback *flk_cbp, cred_t *cr,
8399         caller_context_t *ct)
8400 {
8401         struct cnode *cp = VTOC(vp);
8402         int error;
8403         struct fscache *fscp = C_TO_FSCACHE(cp);
8404         vnode_t *backvp;
8405         int held = 0;
8406         int connected = 0;
8407 
8408         if (getzoneid() != GLOBAL_ZONEID)
8409                 return (EPERM);
8410 
8411         if ((cmd != F_GETLK) && (cmd != F_SETLK) && (cmd != F_SETLKW))
8412                 return (EINVAL);
8413 
8414         /* Disallow locking of files that are currently mapped */
8415         if (((cmd == F_SETLK) || (cmd == F_SETLKW)) && (cp->c_mapcnt > 0)) {
8416                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8417                 return (EAGAIN);
8418         }
8419 
8420         /*
8421          * Cachefs only provides pass-through support for NFSv4,
8422          * and all vnode operations are passed through to the
8423          * back file system. For NFSv4 pass-through to work, only
8424          * connected operation is supported, the cnode backvp must
8425          * exist, and cachefs optional (eg., disconnectable) flags
8426          * are turned off. Assert these conditions to ensure that
8427          * the backfilesystem is called for the frlock operation.
8428          */
8429         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8430         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8431 
8432         /* XXX bob: nfs does a bunch more checks than we do */
8433         if (CFS_ISFS_LLOCK(fscp)) {
8434                 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8435                 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
8436         }
8437 
8438         for (;;) {
8439                 /* get (or renew) access to the file system */
8440                 if (held) {
8441                         /* Won't loop with NFSv4 connected behavior */
8442                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8443                         cachefs_cd_release(fscp);
8444                         held = 0;
8445                 }
8446                 error = cachefs_cd_access(fscp, connected, 0);
8447                 if (error)
8448                         break;
8449                 held = 1;
8450 
8451                 /* if not connected, quit or wait */
8452                 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8453                         connected = 1;
8454                         continue;
8455                 }
8456 
8457                 /* nocache the file */
8458                 if ((cp->c_flags & CN_NOCACHE) == 0 &&
8459                     !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8460                         mutex_enter(&cp->c_statelock);
8461                         cachefs_nocache(cp);
8462                         mutex_exit(&cp->c_statelock);
8463                 }
8464 
8465                 /*
8466                  * XXX bob: probably should do a consistency check
8467                  * Pass arguments unchanged if NFSv4 is the backfs.
8468                  */
8469                 if (bfp->l_whence == 2 && CFS_ISFS_BACKFS_NFSV4(fscp) == 0) {
8470                         bfp->l_start += cp->c_size;
8471                         bfp->l_whence = 0;
8472                 }
8473 
8474                 /* get the back vp */
8475                 mutex_enter(&cp->c_statelock);
8476                 if (cp->c_backvp == NULL) {
8477                         error = cachefs_getbackvp(fscp, cp);
8478                         if (error) {
8479                                 mutex_exit(&cp->c_statelock);
8480                                 break;
8481                         }
8482                 }
8483                 backvp = cp->c_backvp;
8484                 VN_HOLD(backvp);
8485                 mutex_exit(&cp->c_statelock);
8486 
8487                 /*
8488                  * make sure we can flush currently dirty pages before
8489                  * allowing the lock
8490                  */
8491                 if (bfp->l_type != F_UNLCK && cmd != F_GETLK &&
8492                     !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8493                         error = cachefs_putpage(
8494                             vp, (offset_t)0, 0, B_INVAL, cr, ct);
8495                         if (error) {
8496                                 error = ENOLCK;
8497                                 VN_RELE(backvp);
8498                                 break;
8499                         }
8500                 }
8501 
8502                 /* do lock on the back file */
8503                 CFS_DPRINT_BACKFS_NFSV4(fscp,
8504                     ("cachefs_frlock (nfsv4): cp %p, backvp %p\n",
8505                     cp, backvp));
8506                 error = VOP_FRLOCK(backvp, cmd, bfp, flag, offset, NULL, cr,
8507                     ct);
8508                 VN_RELE(backvp);
8509                 if (CFS_TIMEOUT(fscp, error)) {
8510                         connected = 1;
8511                         continue;
8512                 }
8513                 break;
8514         }
8515 
8516         if (held) {
8517                 cachefs_cd_release(fscp);
8518         }
8519 
8520         /*
8521          * If we are setting a lock mark the vnode VNOCACHE so the page
8522          * cache does not give inconsistent results on locked files shared
8523          * between clients.  The VNOCACHE flag is never turned off as long
8524          * as the vnode is active because it is hard to figure out when the
8525          * last lock is gone.
8526          * XXX - what if some already has the vnode mapped in?
8527          * XXX bob: see nfs3_frlock, do not allow locking if vnode mapped in.
8528          */
8529         if ((error == 0) && (bfp->l_type != F_UNLCK) && (cmd != F_GETLK) &&
8530             !CFS_ISFS_BACKFS_NFSV4(fscp))
8531                 vp->v_flag |= VNOCACHE;
8532 
8533 #ifdef CFS_CD_DEBUG
8534         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8535 #endif
8536         return (error);
8537 }
8538 
8539 /*
8540  * Free storage space associated with the specified vnode.  The portion
8541  * to be freed is specified by bfp->l_start and bfp->l_len (already
8542  * normalized to a "whence" of 0).
8543  *
8544  * This is an experimental facility whose continued existence is not
8545  * guaranteed.  Currently, we only support the special case
8546  * of l_len == 0, meaning free to end of file.
8547  */
8548 /* ARGSUSED */
8549 static int
8550 cachefs_space(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8551         offset_t offset, cred_t *cr, caller_context_t *ct)
8552 {
8553         cnode_t *cp = VTOC(vp);
8554         fscache_t *fscp = C_TO_FSCACHE(cp);
8555         int error;
8556 
8557         ASSERT(vp->v_type == VREG);
8558         if (getzoneid() != GLOBAL_ZONEID)
8559                 return (EPERM);
8560         if (cmd != F_FREESP)
8561                 return (EINVAL);
8562 
8563         /* call backfilesystem if NFSv4 */
8564         if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8565                 error = cachefs_space_backfs_nfsv4(vp, cmd, bfp, flag,
8566                     offset, cr, ct);
8567                 goto out;
8568         }
8569 
8570         if ((error = convoff(vp, bfp, 0, offset)) == 0) {
8571                 ASSERT(bfp->l_start >= 0);
8572                 if (bfp->l_len == 0) {
8573                         struct vattr va;
8574 
8575                         va.va_size = bfp->l_start;
8576                         va.va_mask = AT_SIZE;
8577                         error = cachefs_setattr(vp, &va, 0, cr, ct);
8578                 } else
8579                         error = EINVAL;
8580         }
8581 
8582 out:
8583         return (error);
8584 }
8585 
8586 /*
8587  * cachefs_space_backfs_nfsv4
8588  *
8589  * Call NFSv4 back filesystem to handle the space (cachefs
8590  * pass-through support for NFSv4).
8591  */
8592 static int
8593 cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd, struct flock64 *bfp,
8594                 int flag, offset_t offset, cred_t *cr, caller_context_t *ct)
8595 {
8596         cnode_t *cp = VTOC(vp);
8597         fscache_t *fscp = C_TO_FSCACHE(cp);
8598         vnode_t *backvp;
8599         int error;
8600 
8601         /*
8602          * For NFSv4 pass-through to work, only connected operation is
8603          * supported, the cnode backvp must exist, and cachefs optional
8604          * (eg., disconnectable) flags are turned off. Assert these
8605          * conditions for the space operation.
8606          */
8607         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8608         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8609 
8610         /* Call backfs vnode op after extracting backvp */
8611         mutex_enter(&cp->c_statelock);
8612         backvp = cp->c_backvp;
8613         mutex_exit(&cp->c_statelock);
8614 
8615         CFS_DPRINT_BACKFS_NFSV4(fscp,
8616             ("cachefs_space_backfs_nfsv4: cnode %p, backvp %p\n",
8617             cp, backvp));
8618         error = VOP_SPACE(backvp, cmd, bfp, flag, offset, cr, ct);
8619 
8620         return (error);
8621 }
8622 
8623 /*ARGSUSED*/
8624 static int
8625 cachefs_realvp(struct vnode *vp, struct vnode **vpp, caller_context_t *ct)
8626 {
8627         return (EINVAL);
8628 }
8629 
8630 /*ARGSUSED*/
8631 static int
8632 cachefs_pageio(struct vnode *vp, page_t *pp, u_offset_t io_off, size_t io_len,
8633         int flags, cred_t *cr, caller_context_t *ct)
8634 {
8635         return (ENOSYS);
8636 }
8637 
8638 static int
8639 cachefs_setsecattr_connected(cnode_t *cp,
8640     vsecattr_t *vsec, int flag, cred_t *cr)
8641 {
8642         fscache_t *fscp = C_TO_FSCACHE(cp);
8643         int error = 0;
8644 
8645         ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
8646         ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8647 
8648         mutex_enter(&cp->c_statelock);
8649 
8650         if (cp->c_backvp == NULL) {
8651                 error = cachefs_getbackvp(fscp, cp);
8652                 if (error) {
8653                         cachefs_nocache(cp);
8654                         goto out;
8655                 }
8656         }
8657 
8658         error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
8659         if (error)
8660                 goto out;
8661 
8662         /* only owner can set acl */
8663         if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8664                 error = EINVAL;
8665                 goto out;
8666         }
8667 
8668 
8669         CFS_DPRINT_BACKFS_NFSV4(fscp,
8670             ("cachefs_setsecattr (nfsv4): cp %p, backvp %p",
8671             cp, cp->c_backvp));
8672         error = VOP_SETSECATTR(cp->c_backvp, vsec, flag, cr, NULL);
8673         if (error) {
8674                 goto out;
8675         }
8676 
8677         if ((cp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
8678             !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8679                 cachefs_nocache(cp);
8680                 goto out;
8681         }
8682 
8683         CFSOP_MODIFY_COBJECT(fscp, cp, cr);
8684 
8685         /* acl may have changed permissions -- handle this. */
8686         if (!CFS_ISFS_BACKFS_NFSV4(fscp))
8687                 cachefs_acl2perm(cp, vsec);
8688 
8689         if ((cp->c_flags & CN_NOCACHE) == 0 &&
8690             !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8691                 error = cachefs_cacheacl(cp, vsec);
8692                 if (error != 0) {
8693 #ifdef CFSDEBUG
8694                         CFS_DEBUG(CFSDEBUG_VOPS)
8695                                 printf("cachefs_setacl: cacheacl: error %d\n",
8696                                     error);
8697 #endif /* CFSDEBUG */
8698                         error = 0;
8699                         cachefs_nocache(cp);
8700                 }
8701         }
8702 
8703 out:
8704         mutex_exit(&cp->c_statelock);
8705 
8706         return (error);
8707 }
8708 
8709 static int
8710 cachefs_setsecattr_disconnected(cnode_t *cp,
8711     vsecattr_t *vsec, int flag, cred_t *cr)
8712 {
8713         fscache_t *fscp = C_TO_FSCACHE(cp);
8714         mode_t failmode = cp->c_metadata.md_vattr.va_mode;
8715         off_t commit = 0;
8716         int error = 0;
8717 
8718         ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8719 
8720         if (CFS_ISFS_WRITE_AROUND(fscp))
8721                 return (ETIMEDOUT);
8722 
8723         mutex_enter(&cp->c_statelock);
8724 
8725         /* only owner can set acl */
8726         if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8727                 error = EINVAL;
8728                 goto out;
8729         }
8730 
8731         if (cp->c_metadata.md_flags & MD_NEEDATTRS) {
8732                 error = ETIMEDOUT;
8733                 goto out;
8734         }
8735 
8736         /* XXX do i need this?  is this right? */
8737         if (cp->c_flags & CN_ALLOC_PENDING) {
8738                 if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
8739                         (void) filegrp_allocattr(cp->c_filegrp);
8740                 }
8741                 error = filegrp_create_metadata(cp->c_filegrp,
8742                     &cp->c_metadata, &cp->c_id);
8743                 if (error) {
8744                         goto out;
8745                 }
8746                 cp->c_flags &= ~CN_ALLOC_PENDING;
8747         }
8748 
8749         /* XXX is this right? */
8750         if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
8751                 error = cachefs_dlog_cidmap(fscp);
8752                 if (error) {
8753                         error = ENOSPC;
8754                         goto out;
8755                 }
8756                 cp->c_metadata.md_flags |= MD_MAPPING;
8757                 cp->c_flags |= CN_UPDATED;
8758         }
8759 
8760         commit = cachefs_dlog_setsecattr(fscp, vsec, flag, cp, cr);
8761         if (commit == 0)
8762                 goto out;
8763 
8764         /* fix modes in metadata */
8765         cachefs_acl2perm(cp, vsec);
8766 
8767         if ((cp->c_flags & CN_NOCACHE) == 0) {
8768                 error = cachefs_cacheacl(cp, vsec);
8769                 if (error != 0) {
8770                         goto out;
8771                 }
8772         }
8773 
8774         /* XXX is this right? */
8775         if (cachefs_modified_alloc(cp)) {
8776                 error = ENOSPC;
8777                 goto out;
8778         }
8779 
8780 out:
8781         if (error != 0)
8782                 cp->c_metadata.md_vattr.va_mode = failmode;
8783 
8784         mutex_exit(&cp->c_statelock);
8785 
8786         if (commit) {
8787                 if (cachefs_dlog_commit(fscp, commit, error)) {
8788                         /*EMPTY*/
8789                         /* XXX fix on panic? */
8790                 }
8791         }
8792 
8793         return (error);
8794 }
8795 
8796 /*ARGSUSED*/
8797 static int
8798 cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr,
8799     caller_context_t *ct)
8800 {
8801         cnode_t *cp = VTOC(vp);
8802         fscache_t *fscp = C_TO_FSCACHE(cp);
8803         int connected = 0;
8804         int held = 0;
8805         int error = 0;
8806 
8807 #ifdef CFSDEBUG
8808         CFS_DEBUG(CFSDEBUG_VOPS)
8809                 printf("cachefs_setsecattr: ENTER vp %p\n", (void *)vp);
8810 #endif
8811         if (getzoneid() != GLOBAL_ZONEID) {
8812                 error = EPERM;
8813                 goto out;
8814         }
8815 
8816         if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8817                 error = ENOSYS;
8818                 goto out;
8819         }
8820 
8821         if (! cachefs_vtype_aclok(vp)) {
8822                 error = EINVAL;
8823                 goto out;
8824         }
8825 
8826         /*
8827          * Cachefs only provides pass-through support for NFSv4,
8828          * and all vnode operations are passed through to the
8829          * back file system. For NFSv4 pass-through to work, only
8830          * connected operation is supported, the cnode backvp must
8831          * exist, and cachefs optional (eg., disconnectable) flags
8832          * are turned off. Assert these conditions to ensure that
8833          * the backfilesystem is called for the setsecattr operation.
8834          */
8835         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8836         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8837 
8838         for (;;) {
8839                 /* drop hold on file system */
8840                 if (held) {
8841                         /* Won't loop with NFSv4 connected operation */
8842                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8843                         cachefs_cd_release(fscp);
8844                         held = 0;
8845                 }
8846 
8847                 /* acquire access to the file system */
8848                 error = cachefs_cd_access(fscp, connected, 1);
8849                 if (error)
8850                         break;
8851                 held = 1;
8852 
8853                 /* perform the setattr */
8854                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
8855                         error = cachefs_setsecattr_connected(cp,
8856                             vsec, flag, cr);
8857                 else
8858                         error = cachefs_setsecattr_disconnected(cp,
8859                             vsec, flag, cr);
8860                 if (error) {
8861                         /* if connected */
8862                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8863                                 if (CFS_TIMEOUT(fscp, error)) {
8864                                         cachefs_cd_release(fscp);
8865                                         held = 0;
8866                                         cachefs_cd_timedout(fscp);
8867                                         connected = 0;
8868                                         continue;
8869                                 }
8870                         }
8871 
8872                         /* else must be disconnected */
8873                         else {
8874                                 if (CFS_TIMEOUT(fscp, error)) {
8875                                         connected = 1;
8876                                         continue;
8877                                 }
8878                         }
8879                 }
8880                 break;
8881         }
8882 
8883         if (held) {
8884                 cachefs_cd_release(fscp);
8885         }
8886         return (error);
8887 
8888 out:
8889 #ifdef CFS_CD_DEBUG
8890         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8891 #endif
8892 
8893 #ifdef CFSDEBUG
8894         CFS_DEBUG(CFSDEBUG_VOPS)
8895                 printf("cachefs_setsecattr: EXIT error = %d\n", error);
8896 #endif
8897         return (error);
8898 }
8899 
8900 /*
8901  * call this BEFORE calling cachefs_cacheacl(), as the latter will
8902  * sanitize the acl.
8903  */
8904 
8905 static void
8906 cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec)
8907 {
8908         aclent_t *aclp;
8909         int i;
8910 
8911         for (i = 0; i < vsec->vsa_aclcnt; i++) {
8912                 aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
8913                 switch (aclp->a_type) {
8914                 case USER_OBJ:
8915                         cp->c_metadata.md_vattr.va_mode &= (~0700);
8916                         cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 6);
8917                         break;
8918 
8919                 case GROUP_OBJ:
8920                         cp->c_metadata.md_vattr.va_mode &= (~070);
8921                         cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 3);
8922                         break;
8923 
8924                 case OTHER_OBJ:
8925                         cp->c_metadata.md_vattr.va_mode &= (~07);
8926                         cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm);
8927                         break;
8928 
8929                 case CLASS_OBJ:
8930                         cp->c_metadata.md_aclclass = aclp->a_perm;
8931                         break;
8932                 }
8933         }
8934 
8935         cp->c_flags |= CN_UPDATED;
8936 }
8937 
8938 static int
8939 cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr,
8940     caller_context_t *ct)
8941 {
8942         cnode_t *cp = VTOC(vp);
8943         fscache_t *fscp = C_TO_FSCACHE(cp);
8944         int held = 0, connected = 0;
8945         int error = 0;
8946 
8947 #ifdef CFSDEBUG
8948         CFS_DEBUG(CFSDEBUG_VOPS)
8949                 printf("cachefs_getsecattr: ENTER vp %p\n", (void *)vp);
8950 #endif
8951 
8952         if (getzoneid() != GLOBAL_ZONEID) {
8953                 error = EPERM;
8954                 goto out;
8955         }
8956 
8957         /*
8958          * Cachefs only provides pass-through support for NFSv4,
8959          * and all vnode operations are passed through to the
8960          * back file system. For NFSv4 pass-through to work, only
8961          * connected operation is supported, the cnode backvp must
8962          * exist, and cachefs optional (eg., disconnectable) flags
8963          * are turned off. Assert these conditions to ensure that
8964          * the backfilesystem is called for the getsecattr operation.
8965          */
8966         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8967         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8968 
8969         if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8970                 error = fs_fab_acl(vp, vsec, flag, cr, ct);
8971                 goto out;
8972         }
8973 
8974         for (;;) {
8975                 if (held) {
8976                         /* Won't loop with NFSv4 connected behavior */
8977                         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8978                         cachefs_cd_release(fscp);
8979                         held = 0;
8980                 }
8981                 error = cachefs_cd_access(fscp, connected, 0);
8982                 if (error)
8983                         break;
8984                 held = 1;
8985 
8986                 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8987                         error = cachefs_getsecattr_connected(vp, vsec, flag,
8988                             cr);
8989                         if (CFS_TIMEOUT(fscp, error)) {
8990                                 cachefs_cd_release(fscp);
8991                                 held = 0;
8992                                 cachefs_cd_timedout(fscp);
8993                                 connected = 0;
8994                                 continue;
8995                         }
8996                 } else {
8997                         error = cachefs_getsecattr_disconnected(vp, vsec, flag,
8998                             cr);
8999                         if (CFS_TIMEOUT(fscp, error)) {
9000                                 if (cachefs_cd_access_miss(fscp)) {
9001                                         error = cachefs_getsecattr_connected(vp,
9002                                             vsec, flag, cr);
9003                                         if (!CFS_TIMEOUT(fscp, error))
9004                                                 break;
9005                                         delay(5*hz);
9006                                         connected = 0;
9007                                         continue;
9008                                 }
9009                                 connected = 1;
9010                                 continue;
9011                         }
9012                 }
9013                 break;
9014         }
9015 
9016 out:
9017         if (held)
9018                 cachefs_cd_release(fscp);
9019 
9020 #ifdef CFS_CD_DEBUG
9021         ASSERT((curthread->t_flag & T_CD_HELD) == 0);
9022 #endif
9023 #ifdef CFSDEBUG
9024         CFS_DEBUG(CFSDEBUG_VOPS)
9025                 printf("cachefs_getsecattr: EXIT error = %d\n", error);
9026 #endif
9027         return (error);
9028 }
9029 
9030 static int
9031 cachefs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
9032     caller_context_t *ct)
9033 {
9034         cnode_t *cp = VTOC(vp);
9035         fscache_t *fscp = C_TO_FSCACHE(cp);
9036         int error = 0;
9037         vnode_t *backvp;
9038 
9039 #ifdef CFSDEBUG
9040         CFS_DEBUG(CFSDEBUG_VOPS)
9041                 printf("cachefs_shrlock: ENTER vp %p\n", (void *)vp);
9042 #endif
9043 
9044         if (getzoneid() != GLOBAL_ZONEID) {
9045                 error = EPERM;
9046                 goto out;
9047         }
9048 
9049         /*
9050          * Cachefs only provides pass-through support for NFSv4,
9051          * and all vnode operations are passed through to the
9052          * back file system. For NFSv4 pass-through to work, only
9053          * connected operation is supported, the cnode backvp must
9054          * exist, and cachefs optional (eg., disconnectable) flags
9055          * are turned off. Assert these conditions to ensure that
9056          * the backfilesystem is called for the shrlock operation.
9057          */
9058         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
9059         CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
9060 
9061         mutex_enter(&cp->c_statelock);
9062         if (cp->c_backvp == NULL)
9063                 error = cachefs_getbackvp(fscp, cp);
9064         backvp = cp->c_backvp;
9065         mutex_exit(&cp->c_statelock);
9066         ASSERT((error != 0) || (backvp != NULL));
9067 
9068         if (error == 0) {
9069                 CFS_DPRINT_BACKFS_NFSV4(fscp,
9070                     ("cachefs_shrlock (nfsv4): cp %p, backvp %p",
9071                     cp, backvp));
9072                 error = VOP_SHRLOCK(backvp, cmd, shr, flag, cr, ct);
9073         }
9074 
9075 out:
9076 #ifdef CFSDEBUG
9077         CFS_DEBUG(CFSDEBUG_VOPS)
9078                 printf("cachefs_shrlock: EXIT error = %d\n", error);
9079 #endif
9080         return (error);
9081 }
9082 
9083 static int
9084 cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
9085     cred_t *cr)
9086 {
9087         cnode_t *cp = VTOC(vp);
9088         fscache_t *fscp = C_TO_FSCACHE(cp);
9089         int hit = 0;
9090         int error = 0;
9091 
9092 
9093         mutex_enter(&cp->c_statelock);
9094         error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
9095         if (error)
9096                 goto out;
9097 
9098         /* read from the cache if we can */
9099         if ((cp->c_metadata.md_flags & MD_ACL) &&
9100             ((cp->c_flags & CN_NOCACHE) == 0) &&
9101             !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9102                 ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9103                 error = cachefs_getaclfromcache(cp, vsec);
9104                 if (error) {
9105                         cachefs_nocache(cp);
9106                         ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9107                         error = 0;
9108                 } else {
9109                         hit = 1;
9110                         goto out;
9111                 }
9112         }
9113 
9114         ASSERT(error == 0);
9115         if (cp->c_backvp == NULL)
9116                 error = cachefs_getbackvp(fscp, cp);
9117         if (error)
9118                 goto out;
9119 
9120         CFS_DPRINT_BACKFS_NFSV4(fscp,
9121             ("cachefs_getsecattr (nfsv4): cp %p, backvp %p",
9122             cp, cp->c_backvp));
9123         error = VOP_GETSECATTR(cp->c_backvp, vsec, flag, cr, NULL);
9124         if (error)
9125                 goto out;
9126 
9127         if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9128             (cachefs_vtype_aclok(vp)) &&
9129             ((cp->c_flags & CN_NOCACHE) == 0) &&
9130             !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9131                 error = cachefs_cacheacl(cp, vsec);
9132                 if (error) {
9133                         error = 0;
9134                         cachefs_nocache(cp);
9135                 }
9136         }
9137 
9138 out:
9139         if (error == 0) {
9140                 if (hit)
9141                         fscp->fs_stats.st_hits++;
9142                 else
9143                         fscp->fs_stats.st_misses++;
9144         }
9145         mutex_exit(&cp->c_statelock);
9146 
9147         return (error);
9148 }
9149 
9150 static int
9151 /*ARGSUSED*/
9152 cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec, int flag,
9153     cred_t *cr)
9154 {
9155         cnode_t *cp = VTOC(vp);
9156         fscache_t *fscp = C_TO_FSCACHE(cp);
9157         int hit = 0;
9158         int error = 0;
9159 
9160 
9161         mutex_enter(&cp->c_statelock);
9162 
9163         /* read from the cache if we can */
9164         if (((cp->c_flags & CN_NOCACHE) == 0) &&
9165             (cp->c_metadata.md_flags & MD_ACL)) {
9166                 error = cachefs_getaclfromcache(cp, vsec);
9167                 if (error) {
9168                         cachefs_nocache(cp);
9169                         ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9170                         error = 0;
9171                 } else {
9172                         hit = 1;
9173                         goto out;
9174                 }
9175         }
9176         error = ETIMEDOUT;
9177 
9178 out:
9179         if (error == 0) {
9180                 if (hit)
9181                         fscp->fs_stats.st_hits++;
9182                 else
9183                         fscp->fs_stats.st_misses++;
9184         }
9185         mutex_exit(&cp->c_statelock);
9186 
9187         return (error);
9188 }
9189 
9190 /*
9191  * cachefs_cacheacl() -- cache an ACL, which we do by applying it to
9192  * the frontfile if possible; otherwise, the adjunct directory.
9193  *
9194  * inputs:
9195  * cp - the cnode, with its statelock already held
9196  * vsecp - a pointer to a vsecattr_t you'd like us to cache as-is,
9197  *  or NULL if you want us to do the VOP_GETSECATTR(backvp).
9198  *
9199  * returns:
9200  * 0 - all is well
9201  * nonzero - errno
9202  */
9203 
9204 int
9205 cachefs_cacheacl(cnode_t *cp, vsecattr_t *vsecp)
9206 {
9207         fscache_t *fscp = C_TO_FSCACHE(cp);
9208         vsecattr_t vsec;
9209         aclent_t *aclp;
9210         int gotvsec = 0;
9211         int error = 0;
9212         vnode_t *vp = NULL;
9213         void *aclkeep = NULL;
9214         int i;
9215 
9216         ASSERT(MUTEX_HELD(&cp->c_statelock));
9217         ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9218         ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
9219         ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9220         ASSERT(cachefs_vtype_aclok(CTOV(cp)));
9221 
9222         if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
9223                 error = ENOSYS;
9224                 goto out;
9225         }
9226 
9227         if (vsecp == NULL) {
9228                 if (cp->c_backvp == NULL)
9229                         error = cachefs_getbackvp(fscp, cp);
9230                 if (error != 0)
9231                         goto out;
9232                 vsecp = &vsec;
9233                 bzero(&vsec, sizeof (vsec));
9234                 vsecp->vsa_mask =
9235                     VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9236                 error = VOP_GETSECATTR(cp->c_backvp, vsecp, 0, kcred, NULL);
9237                 if (error != 0) {
9238                         goto out;
9239                 }
9240                 gotvsec = 1;
9241         } else if (vsecp->vsa_mask & VSA_ACL) {
9242                 aclkeep = vsecp->vsa_aclentp;
9243                 vsecp->vsa_aclentp = cachefs_kmem_alloc(vsecp->vsa_aclcnt *
9244                     sizeof (aclent_t), KM_SLEEP);
9245                 bcopy(aclkeep, vsecp->vsa_aclentp, vsecp->vsa_aclcnt *
9246                     sizeof (aclent_t));
9247         } else if ((vsecp->vsa_mask & (VSA_ACL | VSA_DFACL)) == 0) {
9248                 /* unless there's real data, we can cache nothing. */
9249                 return (0);
9250         }
9251 
9252         /*
9253          * prevent the ACL from chmoding our frontfile, and
9254          * snarf the class info
9255          */
9256 
9257         if ((vsecp->vsa_mask & (VSA_ACL | VSA_ACLCNT)) ==
9258             (VSA_ACL | VSA_ACLCNT)) {
9259                 for (i = 0; i < vsecp->vsa_aclcnt; i++) {
9260                         aclp = ((aclent_t *)vsecp->vsa_aclentp) + i;
9261                         switch (aclp->a_type) {
9262                         case CLASS_OBJ:
9263                                 cp->c_metadata.md_aclclass =
9264                                     aclp->a_perm;
9265                                 /*FALLTHROUGH*/
9266                         case USER_OBJ:
9267                         case GROUP_OBJ:
9268                         case OTHER_OBJ:
9269                                 aclp->a_perm = 06;
9270                         }
9271                 }
9272         }
9273 
9274         /*
9275          * if the frontfile exists, then we always do the work.  but,
9276          * if there's no frontfile, and the ACL isn't a `real' ACL,
9277          * then we don't want to do the work.  otherwise, an `ls -l'
9278          * will create tons of emtpy frontfiles.
9279          */
9280 
9281         if (((cp->c_metadata.md_flags & MD_FILE) == 0) &&
9282             ((vsecp->vsa_aclcnt + vsecp->vsa_dfaclcnt)
9283             <= MIN_ACL_ENTRIES)) {
9284                 cp->c_metadata.md_flags |= MD_ACL;
9285                 cp->c_flags |= CN_UPDATED;
9286                 goto out;
9287         }
9288 
9289         /*
9290          * if we have a default ACL, then we need a
9291          * real live directory in the frontfs that we
9292          * can apply the ACL to.  if not, then we just
9293          * use the frontfile.  we get the frontfile
9294          * regardless -- that way, we know the
9295          * directory for the frontfile exists.
9296          */
9297 
9298         if (vsecp->vsa_dfaclcnt > 0) {
9299                 if (cp->c_acldirvp == NULL)
9300                         error = cachefs_getacldirvp(cp);
9301                 if (error != 0)
9302                         goto out;
9303                 vp = cp->c_acldirvp;
9304         } else {
9305                 if (cp->c_frontvp == NULL)
9306                         error = cachefs_getfrontfile(cp);
9307                 if (error != 0)
9308                         goto out;
9309                 vp = cp->c_frontvp;
9310         }
9311         ASSERT(vp != NULL);
9312 
9313         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
9314         error = VOP_SETSECATTR(vp, vsecp, 0, kcred, NULL);
9315         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
9316         if (error != 0) {
9317 #ifdef CFSDEBUG
9318                 CFS_DEBUG(CFSDEBUG_VOPS)
9319                         printf("cachefs_cacheacl: setsecattr: error %d\n",
9320                             error);
9321 #endif /* CFSDEBUG */
9322                 /*
9323                  * If there was an error, we don't want to call
9324                  * cachefs_nocache(); so, set error to 0.
9325                  * We will call cachefs_purgeacl(), in order to
9326                  * clean such things as adjunct ACL directories.
9327                  */
9328                 cachefs_purgeacl(cp);
9329                 error = 0;
9330                 goto out;
9331         }
9332         if (vp == cp->c_frontvp)
9333                 cp->c_flags |= CN_NEED_FRONT_SYNC;
9334 
9335         cp->c_metadata.md_flags |= MD_ACL;
9336         cp->c_flags |= CN_UPDATED;
9337 
9338 out:
9339         if ((error) && (fscp->fs_cdconnected == CFS_CD_CONNECTED))
9340                 cachefs_nocache(cp);
9341 
9342         if (gotvsec) {
9343                 if (vsec.vsa_aclcnt)
9344                         kmem_free(vsec.vsa_aclentp,
9345                             vsec.vsa_aclcnt * sizeof (aclent_t));
9346                 if (vsec.vsa_dfaclcnt)
9347                         kmem_free(vsec.vsa_dfaclentp,
9348                             vsec.vsa_dfaclcnt * sizeof (aclent_t));
9349         } else if (aclkeep != NULL) {
9350                 cachefs_kmem_free(vsecp->vsa_aclentp,
9351                     vsecp->vsa_aclcnt * sizeof (aclent_t));
9352                 vsecp->vsa_aclentp = aclkeep;
9353         }
9354 
9355         return (error);
9356 }
9357 
9358 void
9359 cachefs_purgeacl(cnode_t *cp)
9360 {
9361         ASSERT(MUTEX_HELD(&cp->c_statelock));
9362 
9363         ASSERT(!CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)));
9364 
9365         if (cp->c_acldirvp != NULL) {
9366                 VN_RELE(cp->c_acldirvp);
9367                 cp->c_acldirvp = NULL;
9368         }
9369 
9370         if (cp->c_metadata.md_flags & MD_ACLDIR) {
9371                 char name[CFS_FRONTFILE_NAME_SIZE + 2];
9372 
9373                 ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9374                 make_ascii_name(&cp->c_id, name);
9375                 (void) strcat(name, ".d");
9376 
9377                 (void) VOP_RMDIR(cp->c_filegrp->fg_dirvp, name,
9378                     cp->c_filegrp->fg_dirvp, kcred, NULL, 0);
9379         }
9380 
9381         cp->c_metadata.md_flags &= ~(MD_ACL | MD_ACLDIR);
9382         cp->c_flags |= CN_UPDATED;
9383 }
9384 
9385 static int
9386 cachefs_getacldirvp(cnode_t *cp)
9387 {
9388         char name[CFS_FRONTFILE_NAME_SIZE + 2];
9389         int error = 0;
9390 
9391         ASSERT(MUTEX_HELD(&cp->c_statelock));
9392         ASSERT(cp->c_acldirvp == NULL);
9393 
9394         if (cp->c_frontvp == NULL)
9395                 error = cachefs_getfrontfile(cp);
9396         if (error != 0)
9397                 goto out;
9398 
9399         ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9400         make_ascii_name(&cp->c_id, name);
9401         (void) strcat(name, ".d");
9402         error = VOP_LOOKUP(cp->c_filegrp->fg_dirvp,
9403             name, &cp->c_acldirvp, NULL, 0, NULL, kcred, NULL, NULL, NULL);
9404         if ((error != 0) && (error != ENOENT))
9405                 goto out;
9406 
9407         if (error != 0) {
9408                 vattr_t va;
9409 
9410                 va.va_mode = S_IFDIR | 0777;
9411                 va.va_uid = 0;
9412                 va.va_gid = 0;
9413                 va.va_type = VDIR;
9414                 va.va_mask = AT_TYPE | AT_MODE |
9415                     AT_UID | AT_GID;
9416                 error =
9417                     VOP_MKDIR(cp->c_filegrp->fg_dirvp,
9418                     name, &va, &cp->c_acldirvp, kcred, NULL, 0, NULL);
9419                 if (error != 0)
9420                         goto out;
9421         }
9422 
9423         ASSERT(cp->c_acldirvp != NULL);
9424         cp->c_metadata.md_flags |= MD_ACLDIR;
9425         cp->c_flags |= CN_UPDATED;
9426 
9427 out:
9428         if (error != 0)
9429                 cp->c_acldirvp = NULL;
9430         return (error);
9431 }
9432 
9433 static int
9434 cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec)
9435 {
9436         aclent_t *aclp;
9437         int error = 0;
9438         vnode_t *vp = NULL;
9439         int i;
9440 
9441         ASSERT(cp->c_metadata.md_flags & MD_ACL);
9442         ASSERT(MUTEX_HELD(&cp->c_statelock));
9443         ASSERT(vsec->vsa_aclentp == NULL);
9444 
9445         if (cp->c_metadata.md_flags & MD_ACLDIR) {
9446                 if (cp->c_acldirvp == NULL)
9447                         error = cachefs_getacldirvp(cp);
9448                 if (error != 0)
9449                         goto out;
9450                 vp = cp->c_acldirvp;
9451         } else if (cp->c_metadata.md_flags & MD_FILE) {
9452                 if (cp->c_frontvp == NULL)
9453                         error = cachefs_getfrontfile(cp);
9454                 if (error != 0)
9455                         goto out;
9456                 vp = cp->c_frontvp;
9457         } else {
9458 
9459                 /*
9460                  * if we get here, then we know that MD_ACL is on,
9461                  * meaning an ACL was successfully cached.  we also
9462                  * know that neither MD_ACLDIR nor MD_FILE are on, so
9463                  * this has to be an entry without a `real' ACL.
9464                  * thus, we forge whatever is necessary.
9465                  */
9466 
9467                 if (vsec->vsa_mask & VSA_ACLCNT)
9468                         vsec->vsa_aclcnt = MIN_ACL_ENTRIES;
9469 
9470                 if (vsec->vsa_mask & VSA_ACL) {
9471                         vsec->vsa_aclentp =
9472                             kmem_zalloc(MIN_ACL_ENTRIES *
9473                             sizeof (aclent_t), KM_SLEEP);
9474                         aclp = (aclent_t *)vsec->vsa_aclentp;
9475                         aclp->a_type = USER_OBJ;
9476                         ++aclp;
9477                         aclp->a_type = GROUP_OBJ;
9478                         ++aclp;
9479                         aclp->a_type = OTHER_OBJ;
9480                         ++aclp;
9481                         aclp->a_type = CLASS_OBJ;
9482                         ksort((caddr_t)vsec->vsa_aclentp, MIN_ACL_ENTRIES,
9483                             sizeof (aclent_t), cmp2acls);
9484                 }
9485 
9486                 ASSERT(vp == NULL);
9487         }
9488 
9489         if (vp != NULL) {
9490                 if ((error = VOP_GETSECATTR(vp, vsec, 0, kcred, NULL)) != 0) {
9491 #ifdef CFSDEBUG
9492                         CFS_DEBUG(CFSDEBUG_VOPS)
9493                                 printf("cachefs_getaclfromcache: error %d\n",
9494                                     error);
9495 #endif /* CFSDEBUG */
9496                         goto out;
9497                 }
9498         }
9499 
9500         if (vsec->vsa_aclentp != NULL) {
9501                 for (i = 0; i < vsec->vsa_aclcnt; i++) {
9502                         aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
9503                         switch (aclp->a_type) {
9504                         case USER_OBJ:
9505                                 aclp->a_id = cp->c_metadata.md_vattr.va_uid;
9506                                 aclp->a_perm =
9507                                     cp->c_metadata.md_vattr.va_mode & 0700;
9508                                 aclp->a_perm >>= 6;
9509                                 break;
9510 
9511                         case GROUP_OBJ:
9512                                 aclp->a_id = cp->c_metadata.md_vattr.va_gid;
9513                                 aclp->a_perm =
9514                                     cp->c_metadata.md_vattr.va_mode & 070;
9515                                 aclp->a_perm >>= 3;
9516                                 break;
9517 
9518                         case OTHER_OBJ:
9519                                 aclp->a_perm =
9520                                     cp->c_metadata.md_vattr.va_mode & 07;
9521                                 break;
9522 
9523                         case CLASS_OBJ:
9524                                 aclp->a_perm =
9525                                     cp->c_metadata.md_aclclass;
9526                                 break;
9527                         }
9528                 }
9529         }
9530 
9531 out:
9532 
9533         if (error != 0)
9534                 cachefs_nocache(cp);
9535 
9536         return (error);
9537 }
9538 
9539 /*
9540  * Fills in targp with attribute information from srcp, cp
9541  * and if necessary the system.
9542  */
9543 static void
9544 cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp, cred_t *cr)
9545 {
9546         time_t  now;
9547 
9548         ASSERT((srcp->va_mask & (AT_TYPE | AT_MODE)) == (AT_TYPE | AT_MODE));
9549 
9550         /*
9551          * Add code to fill in the va struct.  We use the fields from
9552          * the srcp struct if they are populated, otherwise we guess
9553          */
9554 
9555         targp->va_mask = 0;  /* initialize all fields */
9556         targp->va_mode = srcp->va_mode;
9557         targp->va_type = srcp->va_type;
9558         targp->va_nlink = 1;
9559         targp->va_nodeid = 0;
9560 
9561         if (srcp->va_mask & AT_UID)
9562                 targp->va_uid = srcp->va_uid;
9563         else
9564                 targp->va_uid = crgetuid(cr);
9565 
9566         if (srcp->va_mask & AT_GID)
9567                 targp->va_gid = srcp->va_gid;
9568         else
9569                 targp->va_gid = crgetgid(cr);
9570 
9571         if (srcp->va_mask & AT_FSID)
9572                 targp->va_fsid = srcp->va_fsid;
9573         else
9574                 targp->va_fsid = 0;  /* initialize all fields */
9575 
9576         now = gethrestime_sec();
9577         if (srcp->va_mask & AT_ATIME)
9578                 targp->va_atime = srcp->va_atime;
9579         else
9580                 targp->va_atime.tv_sec = now;
9581 
9582         if (srcp->va_mask & AT_MTIME)
9583                 targp->va_mtime = srcp->va_mtime;
9584         else
9585                 targp->va_mtime.tv_sec = now;
9586 
9587         if (srcp->va_mask & AT_CTIME)
9588                 targp->va_ctime = srcp->va_ctime;
9589         else
9590                 targp->va_ctime.tv_sec = now;
9591 
9592 
9593         if (srcp->va_mask & AT_SIZE)
9594                 targp->va_size = srcp->va_size;
9595         else
9596                 targp->va_size = 0;
9597 
9598         /*
9599          * the remaing fields are set by the fs and not changable.
9600          * we populate these entries useing the parent directory
9601          * values.  It's a small hack, but should work.
9602          */
9603         targp->va_blksize = cp->c_metadata.md_vattr.va_blksize;
9604         targp->va_rdev = cp->c_metadata.md_vattr.va_rdev;
9605         targp->va_nblocks = cp->c_metadata.md_vattr.va_nblocks;
9606         targp->va_seq = 0; /* Never keep the sequence number */
9607 }
9608 
9609 /*
9610  * set the gid for a newly created file.  The algorithm is as follows:
9611  *
9612  *      1) If the gid is set in the attribute list, then use it if
9613  *         the caller is privileged, belongs to the target group, or
9614  *         the group is the same as the parent directory.
9615  *
9616  *      2) If the parent directory's set-gid bit is clear, then use
9617  *         the process gid
9618  *
9619  *      3) Otherwise, use the gid of the parent directory.
9620  *
9621  * Note: newcp->c_attr.va_{mode,type} must already be set before calling
9622  * this routine.
9623  */
9624 static void
9625 cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap, cred_t *cr)
9626 {
9627         if ((vap->va_mask & AT_GID) &&
9628             ((vap->va_gid == dcp->c_attr.va_gid) ||
9629             groupmember(vap->va_gid, cr) ||
9630             secpolicy_vnode_create_gid(cr) != 0)) {
9631                 newcp->c_attr.va_gid = vap->va_gid;
9632         } else {
9633                 if (dcp->c_attr.va_mode & S_ISGID)
9634                         newcp->c_attr.va_gid = dcp->c_attr.va_gid;
9635                 else
9636                         newcp->c_attr.va_gid = crgetgid(cr);
9637         }
9638 
9639         /*
9640          * if we're creating a directory, and the parent directory has the
9641          * set-GID bit set, set it on the new directory.
9642          * Otherwise, if the user is neither privileged nor a member of the
9643          * file's new group, clear the file's set-GID bit.
9644          */
9645         if (dcp->c_attr.va_mode & S_ISGID && newcp->c_attr.va_type == VDIR) {
9646                 newcp->c_attr.va_mode |= S_ISGID;
9647         } else if ((newcp->c_attr.va_mode & S_ISGID) &&
9648             secpolicy_vnode_setids_setgids(cr, newcp->c_attr.va_gid) != 0)
9649                 newcp->c_attr.va_mode &= ~S_ISGID;
9650 }
9651 
9652 /*
9653  * create an acl for the newly created file.  should be called right
9654  * after cachefs_creategid.
9655  */
9656 
9657 static void
9658 cachefs_createacl(cnode_t *dcp, cnode_t *newcp)
9659 {
9660         fscache_t *fscp = C_TO_FSCACHE(dcp);
9661         vsecattr_t vsec;
9662         int gotvsec = 0;
9663         int error = 0; /* placeholder */
9664         aclent_t *aclp;
9665         o_mode_t *classp = NULL;
9666         o_mode_t gunion = 0;
9667         int i;
9668 
9669         if ((fscp->fs_info.fi_mntflags & CFS_NOACL) ||
9670             (! cachefs_vtype_aclok(CTOV(newcp))))
9671                 return;
9672 
9673         ASSERT(dcp->c_metadata.md_flags & MD_ACL);
9674         ASSERT(MUTEX_HELD(&dcp->c_statelock));
9675         ASSERT(MUTEX_HELD(&newcp->c_statelock));
9676 
9677         /*
9678          * XXX should probably not do VSA_ACL and VSA_ACLCNT, but that
9679          * would hit code paths that isn't hit anywhere else.
9680          */
9681 
9682         bzero(&vsec, sizeof (vsec));
9683         vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9684         error = cachefs_getaclfromcache(dcp, &vsec);
9685         if (error != 0)
9686                 goto out;
9687         gotvsec = 1;
9688 
9689         if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL)) {
9690                 if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9691                         kmem_free(vsec.vsa_aclentp,
9692                             vsec.vsa_aclcnt * sizeof (aclent_t));
9693 
9694                 vsec.vsa_aclcnt = vsec.vsa_dfaclcnt;
9695                 vsec.vsa_aclentp = vsec.vsa_dfaclentp;
9696                 vsec.vsa_dfaclcnt = 0;
9697                 vsec.vsa_dfaclentp = NULL;
9698 
9699                 if (newcp->c_attr.va_type == VDIR) {
9700                         vsec.vsa_dfaclentp = kmem_alloc(vsec.vsa_aclcnt *
9701                             sizeof (aclent_t), KM_SLEEP);
9702                         vsec.vsa_dfaclcnt = vsec.vsa_aclcnt;
9703                         bcopy(vsec.vsa_aclentp, vsec.vsa_dfaclentp,
9704                             vsec.vsa_aclcnt * sizeof (aclent_t));
9705                 }
9706 
9707                 /*
9708                  * this function should be called pretty much after
9709                  * the rest of the file creation stuff is done.  so,
9710                  * uid, gid, etc. should be `right'.  we'll go with
9711                  * that, rather than trying to determine whether to
9712                  * get stuff from cr or va.
9713                  */
9714 
9715                 for (i = 0; i < vsec.vsa_aclcnt; i++) {
9716                         aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9717                         switch (aclp->a_type) {
9718                         case DEF_USER_OBJ:
9719                                 aclp->a_type = USER_OBJ;
9720                                 aclp->a_id = newcp->c_metadata.md_vattr.va_uid;
9721                                 aclp->a_perm =
9722                                     newcp->c_metadata.md_vattr.va_mode;
9723                                 aclp->a_perm &= 0700;
9724                                 aclp->a_perm >>= 6;
9725                                 break;
9726 
9727                         case DEF_GROUP_OBJ:
9728                                 aclp->a_type = GROUP_OBJ;
9729                                 aclp->a_id = newcp->c_metadata.md_vattr.va_gid;
9730                                 aclp->a_perm =
9731                                     newcp->c_metadata.md_vattr.va_mode;
9732                                 aclp->a_perm &= 070;
9733                                 aclp->a_perm >>= 3;
9734                                 gunion |= aclp->a_perm;
9735                                 break;
9736 
9737                         case DEF_OTHER_OBJ:
9738                                 aclp->a_type = OTHER_OBJ;
9739                                 aclp->a_perm =
9740                                     newcp->c_metadata.md_vattr.va_mode & 07;
9741                                 break;
9742 
9743                         case DEF_CLASS_OBJ:
9744                                 aclp->a_type = CLASS_OBJ;
9745                                 classp = &(aclp->a_perm);
9746                                 break;
9747 
9748                         case DEF_USER:
9749                                 aclp->a_type = USER;
9750                                 gunion |= aclp->a_perm;
9751                                 break;
9752 
9753                         case DEF_GROUP:
9754                                 aclp->a_type = GROUP;
9755                                 gunion |= aclp->a_perm;
9756                                 break;
9757                         }
9758                 }
9759 
9760                 /* XXX is this the POSIX thing to do? */
9761                 if (classp != NULL)
9762                         *classp &= gunion;
9763 
9764                 /*
9765                  * we don't need to log this; rather, we clear the
9766                  * MD_ACL bit when we reconnect.
9767                  */
9768 
9769                 error = cachefs_cacheacl(newcp, &vsec);
9770                 if (error != 0)
9771                         goto out;
9772         }
9773 
9774         newcp->c_metadata.md_aclclass = 07; /* XXX check posix */
9775         newcp->c_metadata.md_flags |= MD_ACL;
9776         newcp->c_flags |= CN_UPDATED;
9777 
9778 out:
9779 
9780         if (gotvsec) {
9781                 if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9782                         kmem_free(vsec.vsa_aclentp,
9783                             vsec.vsa_aclcnt * sizeof (aclent_t));
9784                 if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL))
9785                         kmem_free(vsec.vsa_dfaclentp,
9786                             vsec.vsa_dfaclcnt * sizeof (aclent_t));
9787         }
9788 }
9789 
9790 /*
9791  * this is translated from the UFS code for access checking.
9792  */
9793 
9794 static int
9795 cachefs_access_local(void *vcp, int mode, cred_t *cr)
9796 {
9797         cnode_t *cp = vcp;
9798         fscache_t *fscp = C_TO_FSCACHE(cp);
9799         int shift = 0;
9800 
9801         ASSERT(MUTEX_HELD(&cp->c_statelock));
9802 
9803         if (mode & VWRITE) {
9804                 /*
9805                  * Disallow write attempts on read-only
9806                  * file systems, unless the file is special.
9807                  */
9808                 struct vnode *vp = CTOV(cp);
9809                 if (vn_is_readonly(vp)) {
9810                         if (!IS_DEVVP(vp)) {
9811                                 return (EROFS);
9812                         }
9813                 }
9814         }
9815 
9816         /*
9817          * if we need to do ACLs, do it.  this works whether anyone
9818          * has explicitly made an ACL or not.
9819          */
9820 
9821         if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9822             (cachefs_vtype_aclok(CTOV(cp))))
9823                 return (cachefs_acl_access(cp, mode, cr));
9824 
9825         if (crgetuid(cr) != cp->c_attr.va_uid) {
9826                 shift += 3;
9827                 if (!groupmember(cp->c_attr.va_gid, cr))
9828                         shift += 3;
9829         }
9830 
9831         return (secpolicy_vnode_access2(cr, CTOV(cp), cp->c_attr.va_uid,
9832             cp->c_attr.va_mode << shift, mode));
9833 }
9834 
9835 /*
9836  * This is transcribed from ufs_acl_access().  If that changes, then
9837  * this should, too.
9838  *
9839  * Check the cnode's ACL's to see if this mode of access is
9840  * allowed; return 0 if allowed, EACCES if not.
9841  *
9842  * We follow the procedure defined in Sec. 3.3.5, ACL Access
9843  * Check Algorithm, of the POSIX 1003.6 Draft Standard.
9844  */
9845 
9846 #define ACL_MODE_CHECK(M, PERM, C, I) \
9847     secpolicy_vnode_access2(C, CTOV(I), owner, (PERM), (M))
9848 
9849 static int
9850 cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr)
9851 {
9852         int error = 0;
9853 
9854         fscache_t *fscp = C_TO_FSCACHE(cp);
9855 
9856         int mask = ~0;
9857         int ismask = 0;
9858 
9859         int gperm = 0;
9860         int ngroup = 0;
9861 
9862         vsecattr_t vsec;
9863         int gotvsec = 0;
9864         aclent_t *aclp;
9865 
9866         uid_t owner = cp->c_attr.va_uid;
9867 
9868         int i;
9869 
9870         ASSERT(MUTEX_HELD(&cp->c_statelock));
9871         ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9872 
9873         /*
9874          * strictly speaking, we shouldn't set VSA_DFACL and DFACLCNT,
9875          * but then i believe we'd be the only thing exercising those
9876          * code paths -- probably a bad thing.
9877          */
9878 
9879         bzero(&vsec, sizeof (vsec));
9880         vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9881 
9882         /* XXX KLUDGE! correct insidious 0-class problem */
9883         if (cp->c_metadata.md_aclclass == 0 &&
9884             fscp->fs_cdconnected == CFS_CD_CONNECTED)
9885                 cachefs_purgeacl(cp);
9886 again:
9887         if (cp->c_metadata.md_flags & MD_ACL) {
9888                 error = cachefs_getaclfromcache(cp, &vsec);
9889                 if (error != 0) {
9890 #ifdef CFSDEBUG
9891                         if (error != ETIMEDOUT)
9892                                 CFS_DEBUG(CFSDEBUG_VOPS)
9893                                         printf("cachefs_acl_access():"
9894                                             "error %d from getaclfromcache()\n",
9895                                             error);
9896 #endif /* CFSDEBUG */
9897                         if ((cp->c_metadata.md_flags & MD_ACL) == 0) {
9898                                 goto again;
9899                         } else {
9900                                 goto out;
9901                         }
9902                 }
9903         } else {
9904                 if (cp->c_backvp == NULL) {
9905                         if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
9906                                 error = cachefs_getbackvp(fscp, cp);
9907                         else
9908                                 error = ETIMEDOUT;
9909                 }
9910                 if (error == 0)
9911                         error = VOP_GETSECATTR(cp->c_backvp, &vsec, 0, cr,
9912                             NULL);
9913                 if (error != 0) {
9914 #ifdef CFSDEBUG
9915                         CFS_DEBUG(CFSDEBUG_VOPS)
9916                                 printf("cachefs_acl_access():"
9917                                     "error %d from getsecattr(backvp)\n",
9918                                     error);
9919 #endif /* CFSDEBUG */
9920                         goto out;
9921                 }
9922                 if ((cp->c_flags & CN_NOCACHE) == 0 &&
9923                     !CFS_ISFS_BACKFS_NFSV4(fscp))
9924                         (void) cachefs_cacheacl(cp, &vsec);
9925         }
9926         gotvsec = 1;
9927 
9928         ASSERT(error == 0);
9929         for (i = 0; i < vsec.vsa_aclcnt; i++) {
9930                 aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9931                 switch (aclp->a_type) {
9932                 case USER_OBJ:
9933                         /*
9934                          * this might look cleaner in the 2nd loop
9935                          * below, but we do it here as an
9936                          * optimization.
9937                          */
9938 
9939                         owner = aclp->a_id;
9940                         if (crgetuid(cr) == owner) {
9941                                 error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9942                                     cr, cp);
9943                                 goto out;
9944                         }
9945                         break;
9946 
9947                 case CLASS_OBJ:
9948                         mask = aclp->a_perm;
9949                         ismask = 1;
9950                         break;
9951                 }
9952         }
9953 
9954         ASSERT(error == 0);
9955         for (i = 0; i < vsec.vsa_aclcnt; i++) {
9956                 aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9957                 switch (aclp->a_type) {
9958                 case USER:
9959                         if (crgetuid(cr) == aclp->a_id) {
9960                                 error = ACL_MODE_CHECK(mode,
9961                                     (aclp->a_perm & mask) << 6, cr, cp);
9962                                 goto out;
9963                         }
9964                         break;
9965 
9966                 case GROUP_OBJ:
9967                         if (groupmember(aclp->a_id, cr)) {
9968                                 ++ngroup;
9969                                 gperm |= aclp->a_perm;
9970                                 if (! ismask) {
9971                                         error = ACL_MODE_CHECK(mode,
9972                                             aclp->a_perm << 6,
9973                                             cr, cp);
9974                                         goto out;
9975                                 }
9976                         }
9977                         break;
9978 
9979                 case GROUP:
9980                         if (groupmember(aclp->a_id, cr)) {
9981                                 ++ngroup;
9982                                 gperm |= aclp->a_perm;
9983                         }
9984                         break;
9985 
9986                 case OTHER_OBJ:
9987                         if (ngroup == 0) {
9988                                 error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9989                                     cr, cp);
9990                                 goto out;
9991                         }
9992                         break;
9993 
9994                 default:
9995                         break;
9996                 }
9997         }
9998 
9999         ASSERT(ngroup > 0);
10000         error = ACL_MODE_CHECK(mode, (gperm & mask) << 6, cr, cp);
10001 
10002 out:
10003         if (gotvsec) {
10004                 if (vsec.vsa_aclcnt && vsec.vsa_aclentp)
10005                         kmem_free(vsec.vsa_aclentp,
10006                             vsec.vsa_aclcnt * sizeof (aclent_t));
10007                 if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp)
10008                         kmem_free(vsec.vsa_dfaclentp,
10009                             vsec.vsa_dfaclcnt * sizeof (aclent_t));
10010         }
10011 
10012         return (error);
10013 }
10014 
10015 /*
10016  * see if permissions allow for removal of the given file from
10017  * the given directory.
10018  */
10019 static int
10020 cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr)
10021 {
10022         uid_t uid;
10023         /*
10024          * If the containing directory is sticky, the user must:
10025          *  - own the directory, or
10026          *  - own the file, or
10027          *  - be able to write the file (if it's a plain file), or
10028          *  - be sufficiently privileged.
10029          */
10030         if ((dcp->c_attr.va_mode & S_ISVTX) &&
10031             ((uid = crgetuid(cr)) != dcp->c_attr.va_uid) &&
10032             (uid != cp->c_attr.va_uid) &&
10033             (cp->c_attr.va_type != VREG ||
10034             cachefs_access_local(cp, VWRITE, cr) != 0))
10035                 return (secpolicy_vnode_remove(cr));
10036 
10037         return (0);
10038 }
10039 
10040 /*
10041  * Returns a new name, may even be unique.
10042  * Stolen from nfs code.
10043  * Since now we will use renaming to .cfs* in place of .nfs*
10044  * for CacheFS. Both NFS and CacheFS will rename opened files.
10045  */
10046 static char cachefs_prefix[] = ".cfs";
10047 kmutex_t cachefs_newnum_lock;
10048 
10049 static char *
10050 cachefs_newname(void)
10051 {
10052         static uint_t newnum = 0;
10053         char *news;
10054         char *s, *p;
10055         uint_t id;
10056 
10057         mutex_enter(&cachefs_newnum_lock);
10058         if (newnum == 0) {
10059                 newnum = gethrestime_sec() & 0xfffff;
10060                 newnum |= 0x10000;
10061         }
10062         id = newnum++;
10063         mutex_exit(&cachefs_newnum_lock);
10064 
10065         news = cachefs_kmem_alloc(MAXNAMELEN, KM_SLEEP);
10066         s = news;
10067         p = cachefs_prefix;
10068         while (*p != '\0')
10069                 *s++ = *p++;
10070         while (id != 0) {
10071                 *s++ = "0123456789ABCDEF"[id & 0x0f];
10072                 id >>= 4;
10073         }
10074         *s = '\0';
10075         return (news);
10076 }
10077 
10078 /*
10079  * Called to rename the specified file to a temporary file so
10080  * operations to the file after remove work.
10081  * Must call this routine with the dir c_rwlock held as a writer.
10082  */
10083 static int
10084 /*ARGSUSED*/
10085 cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm, cred_t *cr)
10086 {
10087         cnode_t *cp = VTOC(vp);
10088         char *tmpname;
10089         fscache_t *fscp = C_TO_FSCACHE(cp);
10090         int error;
10091 
10092         ASSERT(RW_WRITE_HELD(&(VTOC(dvp)->c_rwlock)));
10093 
10094         /* get the new name for the file */
10095         tmpname = cachefs_newname();
10096 
10097         /* do the link */
10098         if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
10099                 error = cachefs_link_connected(dvp, vp, tmpname, cr);
10100         else
10101                 error = cachefs_link_disconnected(dvp, vp, tmpname, cr);
10102         if (error) {
10103                 cachefs_kmem_free(tmpname, MAXNAMELEN);
10104                 return (error);
10105         }
10106 
10107         mutex_enter(&cp->c_statelock);
10108         if (cp->c_unldvp) {
10109                 VN_RELE(cp->c_unldvp);
10110                 cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
10111                 crfree(cp->c_unlcred);
10112         }
10113 
10114         VN_HOLD(dvp);
10115         cp->c_unldvp = dvp;
10116         crhold(cr);
10117         cp->c_unlcred = cr;
10118         cp->c_unlname = tmpname;
10119 
10120         /* drop the backvp so NFS does not also do a rename */
10121         mutex_exit(&cp->c_statelock);
10122 
10123         return (0);
10124 }
10125 
10126 /*
10127  * Marks the cnode as modified.
10128  */
10129 static void
10130 cachefs_modified(cnode_t *cp)
10131 {
10132         fscache_t *fscp = C_TO_FSCACHE(cp);
10133         struct vattr va;
10134         int error;
10135 
10136         ASSERT(MUTEX_HELD(&cp->c_statelock));
10137         ASSERT(cp->c_metadata.md_rlno);
10138 
10139         /* if not on the modify list */
10140         if (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) {
10141                 /* put on modified list, also marks the file as modified */
10142                 cachefs_rlent_moveto(fscp->fs_cache, CACHEFS_RL_MODIFIED,
10143                     cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
10144                 cp->c_metadata.md_rltype = CACHEFS_RL_MODIFIED;
10145                 cp->c_flags |= CN_UPDATED;
10146 
10147                 /* if a modified regular file that is not local */
10148                 if (((cp->c_id.cid_flags & CFS_CID_LOCAL) == 0) &&
10149                     (cp->c_metadata.md_flags & MD_FILE) &&
10150                     (cp->c_attr.va_type == VREG)) {
10151 
10152                         if (cp->c_frontvp == NULL)
10153                                 (void) cachefs_getfrontfile(cp);
10154                         if (cp->c_frontvp) {
10155                                 /* identify file so fsck knows it is modified */
10156                                 va.va_mode = 0766;
10157                                 va.va_mask = AT_MODE;
10158                                 error = VOP_SETATTR(cp->c_frontvp,
10159                                     &va, 0, kcred, NULL);
10160                                 if (error) {
10161                                         cmn_err(CE_WARN,
10162                                             "Cannot change ff mode.\n");
10163                                 }
10164                         }
10165                 }
10166         }
10167 }
10168 
10169 /*
10170  * Marks the cnode as modified.
10171  * Allocates a rl slot for the cnode if necessary.
10172  * Returns 0 for success, !0 if cannot get an rl slot.
10173  */
10174 static int
10175 cachefs_modified_alloc(cnode_t *cp)
10176 {
10177         fscache_t *fscp = C_TO_FSCACHE(cp);
10178         filegrp_t *fgp = cp->c_filegrp;
10179         int error;
10180         rl_entry_t rl_ent;
10181 
10182         ASSERT(MUTEX_HELD(&cp->c_statelock));
10183 
10184         /* get the rl slot if needed */
10185         if (cp->c_metadata.md_rlno == 0) {
10186                 /* get a metadata slot if we do not have one yet */
10187                 if (cp->c_flags & CN_ALLOC_PENDING) {
10188                         if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
10189                                 (void) filegrp_allocattr(cp->c_filegrp);
10190                         }
10191                         error = filegrp_create_metadata(cp->c_filegrp,
10192                             &cp->c_metadata, &cp->c_id);
10193                         if (error)
10194                                 return (error);
10195                         cp->c_flags &= ~CN_ALLOC_PENDING;
10196                 }
10197 
10198                 /* get a free rl entry */
10199                 rl_ent.rl_fileno = cp->c_id.cid_fileno;
10200                 rl_ent.rl_local = (cp->c_id.cid_flags & CFS_CID_LOCAL) ? 1 : 0;
10201                 rl_ent.rl_fsid = fscp->fs_cfsid;
10202                 rl_ent.rl_attrc = 0;
10203                 error = cachefs_rl_alloc(fscp->fs_cache, &rl_ent,
10204                     &cp->c_metadata.md_rlno);
10205                 if (error)
10206                         return (error);
10207                 cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
10208 
10209                 /* hold the filegrp so the attrcache file is not gc */
10210                 error = filegrp_ffhold(fgp);
10211                 if (error) {
10212                         cachefs_rlent_moveto(fscp->fs_cache,
10213                             CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
10214                         cp->c_metadata.md_rlno = 0;
10215                         return (error);
10216                 }
10217         }
10218         cachefs_modified(cp);
10219         return (0);
10220 }
10221 
10222 int
10223 cachefs_vtype_aclok(vnode_t *vp)
10224 {
10225         vtype_t *vtp, oktypes[] = {VREG, VDIR, VFIFO, VNON};
10226 
10227         if (vp->v_type == VNON)
10228                 return (0);
10229 
10230         for (vtp = oktypes; *vtp != VNON; vtp++)
10231                 if (vp->v_type == *vtp)
10232                         break;
10233 
10234         return (*vtp != VNON);
10235 }
10236 
10237 static int
10238 cachefs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
10239     caller_context_t *ct)
10240 {
10241         int error = 0;
10242         fscache_t *fscp = C_TO_FSCACHE(VTOC(vp));
10243 
10244         /* Assert cachefs compatibility if NFSv4 is in use */
10245         CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
10246         CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(vp));
10247 
10248         if (cmd == _PC_FILESIZEBITS) {
10249                 u_offset_t maxsize = fscp->fs_offmax;
10250                 (*valp) = 0;
10251                 while (maxsize != 0) {
10252                         maxsize >>= 1;
10253                         (*valp)++;
10254                 }
10255                 (*valp)++;
10256         } else
10257                 error = fs_pathconf(vp, cmd, valp, cr, ct);
10258 
10259         return (error);
10260 }