1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  29  */
  30 
  31 #include <sys/param.h>
  32 #include <sys/t_lock.h>
  33 #include <sys/systm.h>
  34 #include <sys/sysmacros.h>
  35 #include <sys/user.h>
  36 #include <sys/buf.h>
  37 #include <sys/stat.h>
  38 #include <sys/vfs.h>
  39 #include <sys/vfs_opreg.h>
  40 #include <sys/dirent.h>
  41 #include <sys/vnode.h>
  42 #include <sys/proc.h>
  43 #include <sys/file.h>
  44 #include <sys/fcntl.h>
  45 #include <sys/uio.h>
  46 #include <sys/fs/pc_label.h>
  47 #include <sys/fs/pc_fs.h>
  48 #include <sys/fs/pc_dir.h>
  49 #include <sys/fs/pc_node.h>
  50 #include <sys/mman.h>
  51 #include <sys/pathname.h>
  52 #include <sys/vmsystm.h>
  53 #include <sys/cmn_err.h>
  54 #include <sys/debug.h>
  55 #include <sys/statvfs.h>
  56 #include <sys/unistd.h>
  57 #include <sys/kmem.h>
  58 #include <sys/conf.h>
  59 #include <sys/flock.h>
  60 #include <sys/policy.h>
  61 #include <sys/sdt.h>
  62 #include <sys/sunddi.h>
  63 #include <sys/types.h>
  64 #include <sys/errno.h>
  65 
  66 #include <vm/seg.h>
  67 #include <vm/page.h>
  68 #include <vm/pvn.h>
  69 #include <vm/seg_map.h>
  70 #include <vm/seg_vn.h>
  71 #include <vm/hat.h>
  72 #include <vm/as.h>
  73 #include <vm/seg_kmem.h>
  74 
  75 #include <fs/fs_subr.h>
  76 
  77 static int pcfs_open(struct vnode **, int, struct cred *, caller_context_t *ct);
  78 static int pcfs_close(struct vnode *, int, int, offset_t, struct cred *,
  79         caller_context_t *ct);
  80 static int pcfs_read(struct vnode *, struct uio *, int, struct cred *,
  81         caller_context_t *);
  82 static int pcfs_write(struct vnode *, struct uio *, int, struct cred *,
  83         caller_context_t *);
  84 static int pcfs_getattr(struct vnode *, struct vattr *, int, struct cred *,
  85         caller_context_t *ct);
  86 static int pcfs_setattr(struct vnode *, struct vattr *, int, struct cred *,
  87         caller_context_t *);
  88 static int pcfs_access(struct vnode *, int, int, struct cred *,
  89         caller_context_t *ct);
  90 static int pcfs_lookup(struct vnode *, char *, struct vnode **,
  91         struct pathname *, int, struct vnode *, struct cred *,
  92         caller_context_t *, int *, pathname_t *);
  93 static int pcfs_create(struct vnode *, char *, struct vattr *,
  94         enum vcexcl, int mode, struct vnode **, struct cred *, int,
  95         caller_context_t *, vsecattr_t *);
  96 static int pcfs_remove(struct vnode *, char *, struct cred *,
  97         caller_context_t *, int);
  98 static int pcfs_rename(struct vnode *, char *, struct vnode *, char *,
  99         struct cred *, caller_context_t *, int);
 100 static int pcfs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **,
 101         struct cred *, caller_context_t *, int, vsecattr_t *);
 102 static int pcfs_rmdir(struct vnode *, char *, struct vnode *, struct cred *,
 103         caller_context_t *, int);
 104 static int pcfs_readdir(struct vnode *, struct uio *, struct cred *, int *,
 105         caller_context_t *, int);
 106 static int pcfs_fsync(struct vnode *, int, struct cred *, caller_context_t *);
 107 static void pcfs_inactive(struct vnode *, struct cred *, caller_context_t *);
 108 static int pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *);
 109 static int pcfs_space(struct vnode *, int, struct flock64 *, int,
 110         offset_t, cred_t *, caller_context_t *);
 111 static int pcfs_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t *[],
 112         size_t, struct seg *, caddr_t, enum seg_rw, struct cred *,
 113         caller_context_t *);
 114 static int pcfs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
 115         page_t *[], size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
 116 static int pcfs_putpage(struct vnode *, offset_t, size_t, int, struct cred *,
 117         caller_context_t *);
 118 static int pcfs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
 119         uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
 120 static int pcfs_addmap(struct vnode *, offset_t, struct as *, caddr_t,
 121         size_t, uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
 122 static int pcfs_delmap(struct vnode *, offset_t, struct as *, caddr_t,
 123         size_t, uint_t, uint_t, uint_t, struct cred *, caller_context_t *);
 124 static int pcfs_seek(struct vnode *, offset_t, offset_t *,
 125         caller_context_t *);
 126 static int pcfs_pathconf(struct vnode *, int, ulong_t *, struct cred *,
 127         caller_context_t *);
 128 
 129 int pcfs_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
 130         struct cred *);
 131 static int rwpcp(struct pcnode *, struct uio *, enum uio_rw, int);
 132 static int get_long_fn_chunk(struct pcdir_lfn *ep, char *buf);
 133 
 134 extern krwlock_t pcnodes_lock;
 135 
 136 #define lround(r)       (((r)+sizeof (long long)-1)&(~(sizeof (long long)-1)))
 137 
 138 /*
 139  * vnode op vectors for files and directories.
 140  */
 141 struct vnodeops *pcfs_fvnodeops;
 142 struct vnodeops *pcfs_dvnodeops;
 143 
 144 const fs_operation_def_t pcfs_fvnodeops_template[] = {
 145         VOPNAME_OPEN,           { .vop_open = pcfs_open },
 146         VOPNAME_CLOSE,          { .vop_close = pcfs_close },
 147         VOPNAME_READ,           { .vop_read = pcfs_read },
 148         VOPNAME_WRITE,          { .vop_write = pcfs_write },
 149         VOPNAME_GETATTR,        { .vop_getattr = pcfs_getattr },
 150         VOPNAME_SETATTR,        { .vop_setattr = pcfs_setattr },
 151         VOPNAME_ACCESS,         { .vop_access = pcfs_access },
 152         VOPNAME_FSYNC,          { .vop_fsync = pcfs_fsync },
 153         VOPNAME_INACTIVE,       { .vop_inactive = pcfs_inactive },
 154         VOPNAME_FID,            { .vop_fid = pcfs_fid },
 155         VOPNAME_SEEK,           { .vop_seek = pcfs_seek },
 156         VOPNAME_SPACE,          { .vop_space = pcfs_space },
 157         VOPNAME_GETPAGE,        { .vop_getpage = pcfs_getpage },
 158         VOPNAME_PUTPAGE,        { .vop_putpage = pcfs_putpage },
 159         VOPNAME_MAP,            { .vop_map = pcfs_map },
 160         VOPNAME_ADDMAP,         { .vop_addmap = pcfs_addmap },
 161         VOPNAME_DELMAP,         { .vop_delmap = pcfs_delmap },
 162         VOPNAME_PATHCONF,       { .vop_pathconf = pcfs_pathconf },
 163         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
 164         NULL,                   NULL
 165 };
 166 
 167 const fs_operation_def_t pcfs_dvnodeops_template[] = {
 168         VOPNAME_OPEN,           { .vop_open = pcfs_open },
 169         VOPNAME_CLOSE,          { .vop_close = pcfs_close },
 170         VOPNAME_GETATTR,        { .vop_getattr = pcfs_getattr },
 171         VOPNAME_SETATTR,        { .vop_setattr = pcfs_setattr },
 172         VOPNAME_ACCESS,         { .vop_access = pcfs_access },
 173         VOPNAME_LOOKUP,         { .vop_lookup = pcfs_lookup },
 174         VOPNAME_CREATE,         { .vop_create = pcfs_create },
 175         VOPNAME_REMOVE,         { .vop_remove = pcfs_remove },
 176         VOPNAME_RENAME,         { .vop_rename = pcfs_rename },
 177         VOPNAME_MKDIR,          { .vop_mkdir = pcfs_mkdir },
 178         VOPNAME_RMDIR,          { .vop_rmdir = pcfs_rmdir },
 179         VOPNAME_READDIR,        { .vop_readdir = pcfs_readdir },
 180         VOPNAME_FSYNC,          { .vop_fsync = pcfs_fsync },
 181         VOPNAME_INACTIVE,       { .vop_inactive = pcfs_inactive },
 182         VOPNAME_FID,            { .vop_fid = pcfs_fid },
 183         VOPNAME_SEEK,           { .vop_seek = pcfs_seek },
 184         VOPNAME_PATHCONF,       { .vop_pathconf = pcfs_pathconf },
 185         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
 186         NULL,                   NULL
 187 };
 188 
 189 
 190 /*ARGSUSED*/
 191 static int
 192 pcfs_open(
 193         struct vnode **vpp,
 194         int flag,
 195         struct cred *cr,
 196         caller_context_t *ct)
 197 {
 198         return (0);
 199 }
 200 
 201 /*
 202  * files are sync'ed on close to keep floppy up to date
 203  */
 204 
 205 /*ARGSUSED*/
 206 static int
 207 pcfs_close(
 208         struct vnode *vp,
 209         int flag,
 210         int count,
 211         offset_t offset,
 212         struct cred *cr,
 213         caller_context_t *ct)
 214 {
 215         return (0);
 216 }
 217 
 218 /*ARGSUSED*/
 219 static int
 220 pcfs_read(
 221         struct vnode *vp,
 222         struct uio *uiop,
 223         int ioflag,
 224         struct cred *cr,
 225         struct caller_context *ct)
 226 {
 227         struct pcfs *fsp;
 228         struct pcnode *pcp;
 229         int error;
 230 
 231         fsp = VFSTOPCFS(vp->v_vfsp);
 232         if (error = pc_verify(fsp))
 233                 return (error);
 234         error = pc_lockfs(fsp, 0, 0);
 235         if (error)
 236                 return (error);
 237         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
 238                 pc_unlockfs(fsp);
 239                 return (EIO);
 240         }
 241         error = rwpcp(pcp, uiop, UIO_READ, ioflag);
 242         if ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0) {
 243                 pc_mark_acc(fsp, pcp);
 244         }
 245         pc_unlockfs(fsp);
 246         if (error) {
 247                 PC_DPRINTF1(1, "pcfs_read: io error = %d\n", error);
 248         }
 249         return (error);
 250 }
 251 
 252 /*ARGSUSED*/
 253 static int
 254 pcfs_write(
 255         struct vnode *vp,
 256         struct uio *uiop,
 257         int ioflag,
 258         struct cred *cr,
 259         struct caller_context *ct)
 260 {
 261         struct pcfs *fsp;
 262         struct pcnode *pcp;
 263         int error;
 264 
 265         fsp = VFSTOPCFS(vp->v_vfsp);
 266         if (error = pc_verify(fsp))
 267                 return (error);
 268         error = pc_lockfs(fsp, 0, 0);
 269         if (error)
 270                 return (error);
 271         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
 272                 pc_unlockfs(fsp);
 273                 return (EIO);
 274         }
 275         if (ioflag & FAPPEND) {
 276                 /*
 277                  * in append mode start at end of file.
 278                  */
 279                 uiop->uio_loffset = pcp->pc_size;
 280         }
 281         error = rwpcp(pcp, uiop, UIO_WRITE, ioflag);
 282         pcp->pc_flags |= PC_MOD;
 283         pc_mark_mod(fsp, pcp);
 284         if (ioflag & (FSYNC|FDSYNC))
 285                 (void) pc_nodeupdate(pcp);
 286 
 287         pc_unlockfs(fsp);
 288         if (error) {
 289                 PC_DPRINTF1(1, "pcfs_write: io error = %d\n", error);
 290         }
 291         return (error);
 292 }
 293 
 294 /*
 295  * read or write a vnode
 296  */
 297 static int
 298 rwpcp(
 299         struct pcnode *pcp,
 300         struct uio *uio,
 301         enum uio_rw rw,
 302         int ioflag)
 303 {
 304         struct vnode *vp = PCTOV(pcp);
 305         struct pcfs *fsp;
 306         daddr_t bn;                     /* phys block number */
 307         int n;
 308         offset_t off;
 309         caddr_t base;
 310         int mapon, pagecreate;
 311         int newpage;
 312         int error = 0;
 313         rlim64_t limit = uio->uio_llimit;
 314         int oresid = uio->uio_resid;
 315 
 316         /*
 317          * If the filesystem was umounted by force, return immediately.
 318          */
 319         if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 320                 return (EIO);
 321 
 322         PC_DPRINTF4(5, "rwpcp pcp=%p off=%lld resid=%ld size=%u\n", (void *)pcp,
 323             uio->uio_loffset, uio->uio_resid, pcp->pc_size);
 324 
 325         ASSERT(rw == UIO_READ || rw == UIO_WRITE);
 326         ASSERT(vp->v_type == VREG);
 327 
 328         if (uio->uio_loffset >= UINT32_MAX && rw == UIO_READ) {
 329                 return (0);
 330         }
 331 
 332         if (uio->uio_loffset < 0)
 333                 return (EINVAL);
 334 
 335         if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 336                 limit = MAXOFFSET_T;
 337 
 338         if (uio->uio_loffset >= limit && rw == UIO_WRITE) {
 339                 proc_t *p = ttoproc(curthread);
 340 
 341                 mutex_enter(&p->p_lock);
 342                 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
 343                     p, RCA_UNSAFE_SIGINFO);
 344                 mutex_exit(&p->p_lock);
 345                 return (EFBIG);
 346         }
 347 
 348         /* the following condition will occur only for write */
 349 
 350         if (uio->uio_loffset >= UINT32_MAX)
 351                 return (EFBIG);
 352 
 353         if (uio->uio_resid == 0)
 354                 return (0);
 355 
 356         if (limit > UINT32_MAX)
 357                 limit = UINT32_MAX;
 358 
 359         fsp = VFSTOPCFS(vp->v_vfsp);
 360         if (fsp->pcfs_flags & PCFS_IRRECOV)
 361                 return (EIO);
 362 
 363         do {
 364                 /*
 365                  * Assignments to "n" in this block may appear
 366                  * to overflow in some cases.  However, after careful
 367                  * analysis it was determined that all assignments to
 368                  * "n" serve only to make "n" smaller.  Since "n"
 369                  * starts out as no larger than MAXBSIZE, "int" is
 370                  * safe.
 371                  */
 372                 off = uio->uio_loffset & MAXBMASK;
 373                 mapon = (int)(uio->uio_loffset & MAXBOFFSET);
 374                 n = MIN(MAXBSIZE - mapon, uio->uio_resid);
 375                 if (rw == UIO_READ) {
 376                         offset_t diff;
 377 
 378                         diff = pcp->pc_size - uio->uio_loffset;
 379                         if (diff <= 0)
 380                                 return (0);
 381                         if (diff < n)
 382                                 n = (int)diff;
 383                 }
 384                 /*
 385                  * Compare limit with the actual offset + n, not the
 386                  * rounded down offset "off" or we will overflow
 387                  * the maximum file size after all.
 388                  */
 389                 if (rw == UIO_WRITE && uio->uio_loffset + n >= limit) {
 390                         if (uio->uio_loffset >= limit) {
 391                                 error = EFBIG;
 392                                 break;
 393                         }
 394                         n = (int)(limit - uio->uio_loffset);
 395                 }
 396 
 397                 /*
 398                  * Touch the page and fault it in if it is not in
 399                  * core before segmap_getmapflt can lock it. This
 400                  * is to avoid the deadlock if the buffer is mapped
 401                  * to the same file through mmap which we want to
 402                  * write to.
 403                  */
 404                 uio_prefaultpages((long)n, uio);
 405 
 406                 base = segmap_getmap(segkmap, vp, (u_offset_t)off);
 407                 pagecreate = 0;
 408                 newpage = 0;
 409                 if (rw == UIO_WRITE) {
 410                         /*
 411                          * If PAGESIZE < MAXBSIZE, perhaps we ought to deal
 412                          * with one page at a time, instead of one MAXBSIZE
 413                          * at a time, so we can fully explore pagecreate
 414                          * optimization??
 415                          */
 416                         if (uio->uio_loffset + n > pcp->pc_size) {
 417                                 uint_t ncl, lcn;
 418 
 419                                 ncl = (uint_t)howmany((offset_t)pcp->pc_size,
 420                                     fsp->pcfs_clsize);
 421                                 if (uio->uio_loffset > pcp->pc_size &&
 422                                     ncl < (uint_t)howmany(uio->uio_loffset,
 423                                     fsp->pcfs_clsize)) {
 424                                         /*
 425                                          * Allocate and zerofill skipped
 426                                          * clusters. This may not be worth the
 427                                          * effort since a small lseek beyond
 428                                          * eof but still within the cluster
 429                                          * will not be zeroed out.
 430                                          */
 431                                         lcn = pc_lblkno(fsp, uio->uio_loffset);
 432                                         error = pc_balloc(pcp, (daddr_t)lcn,
 433                                             1, &bn);
 434                                         ncl = lcn + 1;
 435                                 }
 436                                 if (!error &&
 437                                     ncl < (uint_t)howmany(uio->uio_loffset + n,
 438                                     fsp->pcfs_clsize))
 439                                         /*
 440                                          * allocate clusters w/o zerofill
 441                                          */
 442                                         error = pc_balloc(pcp,
 443                                             (daddr_t)pc_lblkno(fsp,
 444                                             uio->uio_loffset + n - 1),
 445                                             0, &bn);
 446 
 447                                 pcp->pc_flags |= PC_CHG;
 448 
 449                                 if (error) {
 450                                         pc_cluster32_t ncl;
 451                                         int nerror;
 452 
 453                                         /*
 454                                          * figure out new file size from
 455                                          * cluster chain length. If this
 456                                          * is detected to loop, the chain
 457                                          * is corrupted and we'd better
 458                                          * keep our fingers off that file.
 459                                          */
 460                                         nerror = pc_fileclsize(fsp,
 461                                             pcp->pc_scluster, &ncl);
 462                                         if (nerror) {
 463                                                 PC_DPRINTF1(2,
 464                                                     "cluster chain "
 465                                                     "corruption, "
 466                                                     "scluster=%d\n",
 467                                                     pcp->pc_scluster);
 468                                                 pcp->pc_size = 0;
 469                                                 pcp->pc_flags |= PC_INVAL;
 470                                                 error = nerror;
 471                                                 (void) segmap_release(segkmap,
 472                                                     base, 0);
 473                                                 break;
 474                                         }
 475                                         pcp->pc_size = fsp->pcfs_clsize * ncl;
 476 
 477                                         if (error == ENOSPC &&
 478                                             (pcp->pc_size - uio->uio_loffset)
 479                                             > 0) {
 480                                                 PC_DPRINTF3(2, "rwpcp ENOSPC "
 481                                                     "off=%lld n=%d size=%d\n",
 482                                                     uio->uio_loffset,
 483                                                     n, pcp->pc_size);
 484                                                 n = (int)(pcp->pc_size -
 485                                                     uio->uio_loffset);
 486                                         } else {
 487                                                 PC_DPRINTF1(1,
 488                                                     "rwpcp error1=%d\n", error);
 489                                                 (void) segmap_release(segkmap,
 490                                                     base, 0);
 491                                                 break;
 492                                         }
 493                                 } else {
 494                                         pcp->pc_size =
 495                                             (uint_t)(uio->uio_loffset + n);
 496                                 }
 497                                 if (mapon == 0) {
 498                                         newpage = segmap_pagecreate(segkmap,
 499                                             base, (size_t)n, 0);
 500                                         pagecreate = 1;
 501                                 }
 502                         } else if (n == MAXBSIZE) {
 503                                 newpage = segmap_pagecreate(segkmap, base,
 504                                     (size_t)n, 0);
 505                                 pagecreate = 1;
 506                         }
 507                 }
 508                 error = uiomove(base + mapon, (size_t)n, rw, uio);
 509 
 510                 if (pagecreate && uio->uio_loffset <
 511                     roundup(off + mapon + n, PAGESIZE)) {
 512                         offset_t nzero, nmoved;
 513 
 514                         nmoved = uio->uio_loffset - (off + mapon);
 515                         nzero = roundup(mapon + n, PAGESIZE) - nmoved;
 516                         (void) kzero(base + mapon + nmoved, (size_t)nzero);
 517                 }
 518 
 519                 /*
 520                  * Unlock the pages which have been allocated by
 521                  * page_create_va() in segmap_pagecreate().
 522                  */
 523                 if (newpage) {
 524                         segmap_pageunlock(segkmap, base, (size_t)n,
 525                             rw == UIO_WRITE ? S_WRITE : S_READ);
 526                 }
 527 
 528                 if (error) {
 529                         PC_DPRINTF1(1, "rwpcp error2=%d\n", error);
 530                         /*
 531                          * If we failed on a write, we may have already
 532                          * allocated file blocks as well as pages.  It's hard
 533                          * to undo the block allocation, but we must be sure
 534                          * to invalidate any pages that may have been
 535                          * allocated.
 536                          */
 537                         if (rw == UIO_WRITE)
 538                                 (void) segmap_release(segkmap, base, SM_INVAL);
 539                         else
 540                                 (void) segmap_release(segkmap, base, 0);
 541                 } else {
 542                         uint_t flags = 0;
 543 
 544                         if (rw == UIO_READ) {
 545                                 if (n + mapon == MAXBSIZE ||
 546                                     uio->uio_loffset == pcp->pc_size)
 547                                         flags = SM_DONTNEED;
 548                         } else if (ioflag & (FSYNC|FDSYNC)) {
 549                                 flags = SM_WRITE;
 550                         } else if (n + mapon == MAXBSIZE) {
 551                                 flags = SM_WRITE|SM_ASYNC|SM_DONTNEED;
 552                         }
 553                         error = segmap_release(segkmap, base, flags);
 554                 }
 555 
 556         } while (error == 0 && uio->uio_resid > 0 && n != 0);
 557 
 558         if (oresid != uio->uio_resid)
 559                 error = 0;
 560         return (error);
 561 }
 562 
 563 /*ARGSUSED*/
 564 static int
 565 pcfs_getattr(
 566         struct vnode *vp,
 567         struct vattr *vap,
 568         int flags,
 569         struct cred *cr,
 570         caller_context_t *ct)
 571 {
 572         struct pcnode *pcp;
 573         struct pcfs *fsp;
 574         int error;
 575         char attr;
 576         struct pctime atime;
 577         int64_t unixtime;
 578 
 579         PC_DPRINTF1(8, "pcfs_getattr: vp=%p\n", (void *)vp);
 580 
 581         fsp = VFSTOPCFS(vp->v_vfsp);
 582         error = pc_lockfs(fsp, 0, 0);
 583         if (error)
 584                 return (error);
 585 
 586         /*
 587          * Note that we don't check for "invalid node" (PC_INVAL) here
 588          * only in order to make stat() succeed. We allow no I/O on such
 589          * a node, but do allow to check for its existence.
 590          */
 591         if ((pcp = VTOPC(vp)) == NULL) {
 592                 pc_unlockfs(fsp);
 593                 return (EIO);
 594         }
 595         /*
 596          * Copy from pcnode.
 597          */
 598         vap->va_type = vp->v_type;
 599         attr = pcp->pc_entry.pcd_attr;
 600         if (PCA_IS_HIDDEN(fsp, attr))
 601                 vap->va_mode = 0;
 602         else if (attr & PCA_LABEL)
 603                 vap->va_mode = 0444;
 604         else if (attr & PCA_RDONLY)
 605                 vap->va_mode = 0555;
 606         else if (fsp->pcfs_flags & PCFS_BOOTPART) {
 607                 vap->va_mode = 0755;
 608         } else {
 609                 vap->va_mode = 0777;
 610         }
 611 
 612         if (attr & PCA_DIR)
 613                 vap->va_mode |= S_IFDIR;
 614         else
 615                 vap->va_mode |= S_IFREG;
 616         if (fsp->pcfs_flags & PCFS_BOOTPART) {
 617                 vap->va_uid = 0;
 618                 vap->va_gid = 0;
 619         } else {
 620                 vap->va_uid = crgetuid(cr);
 621                 vap->va_gid = crgetgid(cr);
 622         }
 623         vap->va_fsid = vp->v_vfsp->vfs_dev;
 624         vap->va_nodeid = (ino64_t)pc_makenodeid(pcp->pc_eblkno,
 625             pcp->pc_eoffset, pcp->pc_entry.pcd_attr,
 626             pc_getstartcluster(fsp, &pcp->pc_entry), pc_direntpersec(fsp));
 627         vap->va_nlink = 1;
 628         vap->va_size = (u_offset_t)pcp->pc_size;
 629         vap->va_rdev = 0;
 630         vap->va_nblocks =
 631             (fsblkcnt64_t)howmany((offset_t)pcp->pc_size, DEV_BSIZE);
 632         vap->va_blksize = fsp->pcfs_clsize;
 633 
 634         /*
 635          * FAT root directories have no timestamps. In order not to return
 636          * "time zero" (1/1/1970), we record the time of the mount and give
 637          * that. This breaks less expectations.
 638          */
 639         if (vp->v_flag & VROOT) {
 640                 vap->va_mtime = fsp->pcfs_mounttime;
 641                 vap->va_atime = fsp->pcfs_mounttime;
 642                 vap->va_ctime = fsp->pcfs_mounttime;
 643                 pc_unlockfs(fsp);
 644                 return (0);
 645         }
 646 
 647         pc_pcttotv(&pcp->pc_entry.pcd_mtime, &unixtime);
 648         if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
 649                 if (unixtime > INT32_MAX)
 650                         DTRACE_PROBE1(pcfs__mtimeclamped, int64_t, unixtime);
 651                 unixtime = MIN(unixtime, INT32_MAX);
 652         } else if (unixtime > INT32_MAX &&
 653             get_udatamodel() == DATAMODEL_ILP32) {
 654                 pc_unlockfs(fsp);
 655                 DTRACE_PROBE1(pcfs__mtimeoverflowed, int64_t, unixtime);
 656                 return (EOVERFLOW);
 657         }
 658 
 659         vap->va_mtime.tv_sec = (time_t)unixtime;
 660         vap->va_mtime.tv_nsec = 0;
 661 
 662         /*
 663          * FAT doesn't know about POSIX ctime.
 664          * Best approximation is to always set it to mtime.
 665          */
 666         vap->va_ctime = vap->va_mtime;
 667 
 668         /*
 669          * FAT only stores "last access date". If that's the
 670          * same as the date of last modification then the time
 671          * of last access is known. Otherwise, use midnight.
 672          */
 673         atime.pct_date = pcp->pc_entry.pcd_ladate;
 674         if (atime.pct_date == pcp->pc_entry.pcd_mtime.pct_date)
 675                 atime.pct_time = pcp->pc_entry.pcd_mtime.pct_time;
 676         else
 677                 atime.pct_time = 0;
 678         pc_pcttotv(&atime, &unixtime);
 679         if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
 680                 if (unixtime > INT32_MAX)
 681                         DTRACE_PROBE1(pcfs__atimeclamped, int64_t, unixtime);
 682                 unixtime = MIN(unixtime, INT32_MAX);
 683         } else if (unixtime > INT32_MAX &&
 684             get_udatamodel() == DATAMODEL_ILP32) {
 685                 pc_unlockfs(fsp);
 686                 DTRACE_PROBE1(pcfs__atimeoverflowed, int64_t, unixtime);
 687                 return (EOVERFLOW);
 688         }
 689 
 690         vap->va_atime.tv_sec = (time_t)unixtime;
 691         vap->va_atime.tv_nsec = 0;
 692 
 693         pc_unlockfs(fsp);
 694         return (0);
 695 }
 696 
 697 
 698 /*ARGSUSED*/
 699 static int
 700 pcfs_setattr(
 701         struct vnode *vp,
 702         struct vattr *vap,
 703         int flags,
 704         struct cred *cr,
 705         caller_context_t *ct)
 706 {
 707         struct pcnode *pcp;
 708         mode_t mask = vap->va_mask;
 709         int error;
 710         struct pcfs *fsp;
 711         timestruc_t now, *timep;
 712 
 713         PC_DPRINTF2(6, "pcfs_setattr: vp=%p mask=%x\n", (void *)vp, (int)mask);
 714         /*
 715          * cannot set these attributes
 716          */
 717         if (mask & (AT_NOSET | AT_UID | AT_GID)) {
 718                 return (EINVAL);
 719         }
 720         /*
 721          * pcfs_setattr is now allowed on directories to avoid silly warnings
 722          * from 'tar' when it tries to set times on a directory, and console
 723          * printf's on the NFS server when it gets EINVAL back on such a
 724          * request. One possible problem with that since a directory entry
 725          * identifies a file, '.' and all the '..' entries in subdirectories
 726          * may get out of sync when the directory is updated since they're
 727          * treated like separate files. We could fix that by looking for
 728          * '.' and giving it the same attributes, and then looking for
 729          * all the subdirectories and updating '..', but that's pretty
 730          * expensive for something that doesn't seem likely to matter.
 731          */
 732         /* can't do some ops on directories anyway */
 733         if ((vp->v_type == VDIR) &&
 734             (mask & AT_SIZE)) {
 735                 return (EINVAL);
 736         }
 737 
 738         fsp = VFSTOPCFS(vp->v_vfsp);
 739         error = pc_lockfs(fsp, 0, 0);
 740         if (error)
 741                 return (error);
 742         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
 743                 pc_unlockfs(fsp);
 744                 return (EIO);
 745         }
 746 
 747         if (fsp->pcfs_flags & PCFS_BOOTPART) {
 748                 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
 749                         pc_unlockfs(fsp);
 750                         return (EACCES);
 751                 }
 752         }
 753 
 754         /*
 755          * Change file access modes.
 756          * If nobody has write permission, file is marked readonly.
 757          * Otherwise file is writable by anyone.
 758          */
 759         if ((mask & AT_MODE) && (vap->va_mode != (mode_t)-1)) {
 760                 if ((vap->va_mode & 0222) == 0)
 761                         pcp->pc_entry.pcd_attr |= PCA_RDONLY;
 762                 else
 763                         pcp->pc_entry.pcd_attr &= ~PCA_RDONLY;
 764                 pcp->pc_flags |= PC_CHG;
 765         }
 766         /*
 767          * Truncate file. Must have write permission.
 768          */
 769         if ((mask & AT_SIZE) && (vap->va_size != (u_offset_t)-1)) {
 770                 if (pcp->pc_entry.pcd_attr & PCA_RDONLY) {
 771                         error = EACCES;
 772                         goto out;
 773                 }
 774                 if (vap->va_size > UINT32_MAX) {
 775                         error = EFBIG;
 776                         goto out;
 777                 }
 778                 error = pc_truncate(pcp, (uint_t)vap->va_size);
 779 
 780                 if (error)
 781                         goto out;
 782 
 783                 if (vap->va_size == 0)
 784                         vnevent_truncate(vp, ct);
 785         }
 786         /*
 787          * Change file modified times.
 788          */
 789         if (mask & (AT_MTIME | AT_CTIME)) {
 790                 /*
 791                  * If SysV-compatible option to set access and
 792                  * modified times if privileged, owner, or write access,
 793                  * use current time rather than va_mtime.
 794                  *
 795                  * XXX - va_mtime.tv_sec == -1 flags this.
 796                  */
 797                 timep = &vap->va_mtime;
 798                 if (vap->va_mtime.tv_sec == -1) {
 799                         gethrestime(&now);
 800                         timep = &now;
 801                 }
 802                 if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
 803                     timep->tv_sec > INT32_MAX) {
 804                         error = EOVERFLOW;
 805                         goto out;
 806                 }
 807                 error = pc_tvtopct(timep, &pcp->pc_entry.pcd_mtime);
 808                 if (error)
 809                         goto out;
 810                 pcp->pc_flags |= PC_CHG;
 811         }
 812         /*
 813          * Change file access times.
 814          */
 815         if (mask & AT_ATIME) {
 816                 /*
 817                  * If SysV-compatible option to set access and
 818                  * modified times if privileged, owner, or write access,
 819                  * use current time rather than va_mtime.
 820                  *
 821                  * XXX - va_atime.tv_sec == -1 flags this.
 822                  */
 823                 struct pctime   atime;
 824 
 825                 timep = &vap->va_atime;
 826                 if (vap->va_atime.tv_sec == -1) {
 827                         gethrestime(&now);
 828                         timep = &now;
 829                 }
 830                 if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
 831                     timep->tv_sec > INT32_MAX) {
 832                         error = EOVERFLOW;
 833                         goto out;
 834                 }
 835                 error = pc_tvtopct(timep, &atime);
 836                 if (error)
 837                         goto out;
 838                 pcp->pc_entry.pcd_ladate = atime.pct_date;
 839                 pcp->pc_flags |= PC_CHG;
 840         }
 841 out:
 842         pc_unlockfs(fsp);
 843         return (error);
 844 }
 845 
 846 
 847 /*ARGSUSED*/
 848 static int
 849 pcfs_access(
 850         struct vnode *vp,
 851         int mode,
 852         int flags,
 853         struct cred *cr,
 854         caller_context_t *ct)
 855 {
 856         struct pcnode *pcp;
 857         struct pcfs *fsp;
 858 
 859 
 860         fsp = VFSTOPCFS(vp->v_vfsp);
 861 
 862         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
 863                 return (EIO);
 864         if ((mode & VWRITE) && (pcp->pc_entry.pcd_attr & PCA_RDONLY))
 865                 return (EACCES);
 866 
 867         /*
 868          * If this is a boot partition, privileged users have full access while
 869          * others have read-only access.
 870          */
 871         if (fsp->pcfs_flags & PCFS_BOOTPART) {
 872                 if ((mode & VWRITE) &&
 873                     secpolicy_pcfs_modify_bootpartition(cr) != 0)
 874                         return (EACCES);
 875         }
 876         return (0);
 877 }
 878 
 879 
 880 /*ARGSUSED*/
 881 static int
 882 pcfs_fsync(
 883         struct vnode *vp,
 884         int syncflag,
 885         struct cred *cr,
 886         caller_context_t *ct)
 887 {
 888         struct pcfs *fsp;
 889         struct pcnode *pcp;
 890         int error;
 891 
 892         fsp = VFSTOPCFS(vp->v_vfsp);
 893         if (error = pc_verify(fsp))
 894                 return (error);
 895         error = pc_lockfs(fsp, 0, 0);
 896         if (error)
 897                 return (error);
 898         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
 899                 pc_unlockfs(fsp);
 900                 return (EIO);
 901         }
 902         rw_enter(&pcnodes_lock, RW_WRITER);
 903         error = pc_nodesync(pcp);
 904         rw_exit(&pcnodes_lock);
 905         pc_unlockfs(fsp);
 906         return (error);
 907 }
 908 
 909 
 910 /*ARGSUSED*/
 911 static void
 912 pcfs_inactive(
 913         struct vnode *vp,
 914         struct cred *cr,
 915         caller_context_t *ct)
 916 {
 917         struct pcnode *pcp;
 918         struct pcfs *fsp;
 919         int error;
 920 
 921         fsp = VFSTOPCFS(vp->v_vfsp);
 922         error = pc_lockfs(fsp, 0, 1);
 923 
 924         /*
 925          * If the filesystem was umounted by force, all dirty
 926          * pages associated with this vnode are invalidated
 927          * and then the vnode will be freed.
 928          */
 929         if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
 930                 pcp = VTOPC(vp);
 931                 if (vn_has_cached_data(vp)) {
 932                         (void) pvn_vplist_dirty(vp, (u_offset_t)0,
 933                             pcfs_putapage, B_INVAL, (struct cred *)NULL);
 934                 }
 935                 remque(pcp);
 936                 if (error == 0)
 937                         pc_unlockfs(fsp);
 938                 vn_free(vp);
 939                 kmem_free(pcp, sizeof (struct pcnode));
 940                 VFS_RELE(PCFSTOVFS(fsp));
 941                 return;
 942         }
 943 
 944         mutex_enter(&vp->v_lock);
 945         ASSERT(vp->v_count >= 1);
 946         if (vp->v_count > 1) {
 947                 vp->v_count--;  /* release our hold from vn_rele */
 948                 mutex_exit(&vp->v_lock);
 949                 pc_unlockfs(fsp);
 950                 return;
 951         }
 952         mutex_exit(&vp->v_lock);
 953 
 954         /*
 955          * Check again to confirm that no intervening I/O error
 956          * with a subsequent pc_diskchanged() call has released
 957          * the pcnode. If it has then release the vnode as above.
 958          */
 959         pcp = VTOPC(vp);
 960         if (pcp == NULL || pcp->pc_flags & PC_INVAL) {
 961                 if (vn_has_cached_data(vp))
 962                         (void) pvn_vplist_dirty(vp, (u_offset_t)0,
 963                             pcfs_putapage, B_INVAL | B_TRUNC,
 964                             (struct cred *)NULL);
 965         }
 966 
 967         if (pcp == NULL) {
 968                 vn_free(vp);
 969         } else {
 970                 pc_rele(pcp);
 971         }
 972 
 973         if (!error)
 974                 pc_unlockfs(fsp);
 975 }
 976 
 977 /*ARGSUSED*/
 978 static int
 979 pcfs_lookup(
 980         struct vnode *dvp,
 981         char *nm,
 982         struct vnode **vpp,
 983         struct pathname *pnp,
 984         int flags,
 985         struct vnode *rdir,
 986         struct cred *cr,
 987         caller_context_t *ct,
 988         int *direntflags,
 989         pathname_t *realpnp)
 990 {
 991         struct pcfs *fsp;
 992         struct pcnode *pcp;
 993         int error;
 994 
 995         /*
 996          * If the filesystem was umounted by force, return immediately.
 997          */
 998         if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 999                 return (EIO);
1000 
1001         /*
1002          * verify that the dvp is still valid on the disk
1003          */
1004         fsp = VFSTOPCFS(dvp->v_vfsp);
1005         if (error = pc_verify(fsp))
1006                 return (error);
1007         error = pc_lockfs(fsp, 0, 0);
1008         if (error)
1009                 return (error);
1010         if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1011                 pc_unlockfs(fsp);
1012                 return (EIO);
1013         }
1014         /*
1015          * Null component name is a synonym for directory being searched.
1016          */
1017         if (*nm == '\0') {
1018                 VN_HOLD(dvp);
1019                 *vpp = dvp;
1020                 pc_unlockfs(fsp);
1021                 return (0);
1022         }
1023 
1024         error = pc_dirlook(VTOPC(dvp), nm, &pcp);
1025         if (!error) {
1026                 *vpp = PCTOV(pcp);
1027                 pcp->pc_flags |= PC_EXTERNAL;
1028         }
1029         pc_unlockfs(fsp);
1030         return (error);
1031 }
1032 
1033 
1034 /*ARGSUSED*/
1035 static int
1036 pcfs_create(
1037         struct vnode *dvp,
1038         char *nm,
1039         struct vattr *vap,
1040         enum vcexcl exclusive,
1041         int mode,
1042         struct vnode **vpp,
1043         struct cred *cr,
1044         int flag,
1045         caller_context_t *ct,
1046         vsecattr_t *vsecp)
1047 {
1048         int error;
1049         struct pcnode *pcp;
1050         struct vnode *vp;
1051         struct pcfs *fsp;
1052 
1053         /*
1054          * can't create directories. use pcfs_mkdir.
1055          * can't create anything other than files.
1056          */
1057         if (vap->va_type == VDIR)
1058                 return (EISDIR);
1059         else if (vap->va_type != VREG)
1060                 return (EINVAL);
1061 
1062         pcp = NULL;
1063         fsp = VFSTOPCFS(dvp->v_vfsp);
1064         error = pc_lockfs(fsp, 0, 0);
1065         if (error)
1066                 return (error);
1067         if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1068                 pc_unlockfs(fsp);
1069                 return (EIO);
1070         }
1071 
1072         if (fsp->pcfs_flags & PCFS_BOOTPART) {
1073                 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1074                         pc_unlockfs(fsp);
1075                         return (EACCES);
1076                 }
1077         }
1078 
1079         if (*nm == '\0') {
1080                 /*
1081                  * Null component name refers to the directory itself.
1082                  */
1083                 VN_HOLD(dvp);
1084                 pcp = VTOPC(dvp);
1085                 error = EEXIST;
1086         } else {
1087                 error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1088         }
1089         /*
1090          * if file exists and this is a nonexclusive create,
1091          * check for access permissions
1092          */
1093         if (error == EEXIST) {
1094                 vp = PCTOV(pcp);
1095                 if (exclusive == NONEXCL) {
1096                         if (vp->v_type == VDIR) {
1097                                 error = EISDIR;
1098                         } else if (mode) {
1099                                 error = pcfs_access(PCTOV(pcp), mode, 0,
1100                                     cr, ct);
1101                         } else {
1102                                 error = 0;
1103                         }
1104                 }
1105                 if (error) {
1106                         VN_RELE(PCTOV(pcp));
1107                 } else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
1108                     (vap->va_size == 0)) {
1109                         error = pc_truncate(pcp, 0L);
1110                         if (error) {
1111                                 VN_RELE(PCTOV(pcp));
1112                         } else {
1113                                 vnevent_create(PCTOV(pcp), ct);
1114                         }
1115                 }
1116         }
1117         if (error) {
1118                 pc_unlockfs(fsp);
1119                 return (error);
1120         }
1121         *vpp = PCTOV(pcp);
1122         pcp->pc_flags |= PC_EXTERNAL;
1123         pc_unlockfs(fsp);
1124         return (error);
1125 }
1126 
1127 /*ARGSUSED*/
1128 static int
1129 pcfs_remove(
1130         struct vnode *vp,
1131         char *nm,
1132         struct cred *cr,
1133         caller_context_t *ct,
1134         int flags)
1135 {
1136         struct pcfs *fsp;
1137         struct pcnode *pcp;
1138         int error;
1139 
1140         fsp = VFSTOPCFS(vp->v_vfsp);
1141         if (error = pc_verify(fsp))
1142                 return (error);
1143         error = pc_lockfs(fsp, 0, 0);
1144         if (error)
1145                 return (error);
1146         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
1147                 pc_unlockfs(fsp);
1148                 return (EIO);
1149         }
1150         if (fsp->pcfs_flags & PCFS_BOOTPART) {
1151                 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1152                         pc_unlockfs(fsp);
1153                         return (EACCES);
1154                 }
1155         }
1156         error = pc_dirremove(pcp, nm, (struct vnode *)0, VREG, ct);
1157         pc_unlockfs(fsp);
1158         return (error);
1159 }
1160 
1161 /*
1162  * Rename a file or directory
1163  * This rename is restricted to only rename files within a directory.
1164  * XX should make rename more general
1165  */
1166 /*ARGSUSED*/
1167 static int
1168 pcfs_rename(
1169         struct vnode *sdvp,             /* old (source) parent vnode */
1170         char *snm,                      /* old (source) entry name */
1171         struct vnode *tdvp,             /* new (target) parent vnode */
1172         char *tnm,                      /* new (target) entry name */
1173         struct cred *cr,
1174         caller_context_t *ct,
1175         int flags)
1176 {
1177         struct pcfs *fsp;
1178         struct pcnode *dp;      /* parent pcnode */
1179         struct pcnode *tdp;
1180         int error;
1181 
1182         fsp = VFSTOPCFS(sdvp->v_vfsp);
1183         if (error = pc_verify(fsp))
1184                 return (error);
1185 
1186         /*
1187          * make sure we can muck with this directory.
1188          */
1189         error = pcfs_access(sdvp, VWRITE, 0, cr, ct);
1190         if (error) {
1191                 return (error);
1192         }
1193         error = pc_lockfs(fsp, 0, 0);
1194         if (error)
1195                 return (error);
1196         if (((dp = VTOPC(sdvp)) == NULL) || ((tdp = VTOPC(tdvp)) == NULL) ||
1197             (dp->pc_flags & PC_INVAL) || (tdp->pc_flags & PC_INVAL)) {
1198                 pc_unlockfs(fsp);
1199                 return (EIO);
1200         }
1201         error = pc_rename(dp, tdp, snm, tnm, ct);
1202         pc_unlockfs(fsp);
1203         return (error);
1204 }
1205 
1206 /*ARGSUSED*/
1207 static int
1208 pcfs_mkdir(
1209         struct vnode *dvp,
1210         char *nm,
1211         struct vattr *vap,
1212         struct vnode **vpp,
1213         struct cred *cr,
1214         caller_context_t *ct,
1215         int flags,
1216         vsecattr_t *vsecp)
1217 {
1218         struct pcfs *fsp;
1219         struct pcnode *pcp;
1220         int error;
1221 
1222         fsp = VFSTOPCFS(dvp->v_vfsp);
1223         if (error = pc_verify(fsp))
1224                 return (error);
1225         error = pc_lockfs(fsp, 0, 0);
1226         if (error)
1227                 return (error);
1228         if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1229                 pc_unlockfs(fsp);
1230                 return (EIO);
1231         }
1232 
1233         if (fsp->pcfs_flags & PCFS_BOOTPART) {
1234                 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1235                         pc_unlockfs(fsp);
1236                         return (EACCES);
1237                 }
1238         }
1239 
1240         error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1241 
1242         if (!error) {
1243                 pcp -> pc_flags |= PC_EXTERNAL;
1244                 *vpp = PCTOV(pcp);
1245         } else if (error == EEXIST) {
1246                 VN_RELE(PCTOV(pcp));
1247         }
1248         pc_unlockfs(fsp);
1249         return (error);
1250 }
1251 
1252 /*ARGSUSED*/
1253 static int
1254 pcfs_rmdir(
1255         struct vnode *dvp,
1256         char *nm,
1257         struct vnode *cdir,
1258         struct cred *cr,
1259         caller_context_t *ct,
1260         int flags)
1261 {
1262         struct pcfs *fsp;
1263         struct pcnode *pcp;
1264         int error;
1265 
1266         fsp = VFSTOPCFS(dvp -> v_vfsp);
1267         if (error = pc_verify(fsp))
1268                 return (error);
1269         if (error = pc_lockfs(fsp, 0, 0))
1270                 return (error);
1271 
1272         if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1273                 pc_unlockfs(fsp);
1274                 return (EIO);
1275         }
1276 
1277         if (fsp->pcfs_flags & PCFS_BOOTPART) {
1278                 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1279                         pc_unlockfs(fsp);
1280                         return (EACCES);
1281                 }
1282         }
1283 
1284         error = pc_dirremove(pcp, nm, cdir, VDIR, ct);
1285         pc_unlockfs(fsp);
1286         return (error);
1287 }
1288 
1289 /*
1290  * read entries in a directory.
1291  * we must convert pc format to unix format
1292  */
1293 
1294 /*ARGSUSED*/
1295 static int
1296 pcfs_readdir(
1297         struct vnode *dvp,
1298         struct uio *uiop,
1299         struct cred *cr,
1300         int *eofp,
1301         caller_context_t *ct,
1302         int flags)
1303 {
1304         struct pcnode *pcp;
1305         struct pcfs *fsp;
1306         struct pcdir *ep;
1307         struct buf *bp = NULL;
1308         offset_t offset;
1309         int boff;
1310         struct pc_dirent lbp;
1311         struct pc_dirent *ld = &lbp;
1312         int error;
1313 
1314         /*
1315          * If the filesystem was umounted by force, return immediately.
1316          */
1317         if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1318                 return (EIO);
1319 
1320         if ((uiop->uio_iovcnt != 1) ||
1321             (uiop->uio_loffset % sizeof (struct pcdir)) != 0) {
1322                 return (EINVAL);
1323         }
1324         fsp = VFSTOPCFS(dvp->v_vfsp);
1325         /*
1326          * verify that the dp is still valid on the disk
1327          */
1328         if (error = pc_verify(fsp)) {
1329                 return (error);
1330         }
1331         error = pc_lockfs(fsp, 0, 0);
1332         if (error)
1333                 return (error);
1334         if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1335                 pc_unlockfs(fsp);
1336                 return (EIO);
1337         }
1338 
1339         bzero(ld, sizeof (*ld));
1340 
1341         if (eofp != NULL)
1342                 *eofp = 0;
1343         offset = uiop->uio_loffset;
1344 
1345         if (dvp->v_flag & VROOT) {
1346                 /*
1347                  * kludge up entries for "." and ".." in the root.
1348                  */
1349                 if (offset == 0) {
1350                         (void) strcpy(ld->d_name, ".");
1351                         ld->d_reclen = DIRENT64_RECLEN(1);
1352                         ld->d_off = (off64_t)sizeof (struct pcdir);
1353                         ld->d_ino = (ino64_t)UINT_MAX;
1354                         if (ld->d_reclen > uiop->uio_resid) {
1355                                 pc_unlockfs(fsp);
1356                                 return (ENOSPC);
1357                         }
1358                         (void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1359                         uiop->uio_loffset = ld->d_off;
1360                         offset = uiop->uio_loffset;
1361                 }
1362                 if (offset == sizeof (struct pcdir)) {
1363                         (void) strcpy(ld->d_name, "..");
1364                         ld->d_reclen = DIRENT64_RECLEN(2);
1365                         if (ld->d_reclen > uiop->uio_resid) {
1366                                 pc_unlockfs(fsp);
1367                                 return (ENOSPC);
1368                         }
1369                         ld->d_off = (off64_t)(uiop->uio_loffset +
1370                             sizeof (struct pcdir));
1371                         ld->d_ino = (ino64_t)UINT_MAX;
1372                         (void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1373                         uiop->uio_loffset = ld->d_off;
1374                         offset = uiop->uio_loffset;
1375                 }
1376                 offset -= 2 * sizeof (struct pcdir);
1377                 /* offset now has the real offset value into directory file */
1378         }
1379 
1380         for (;;) {
1381                 boff = pc_blkoff(fsp, offset);
1382                 if (boff == 0 || bp == NULL || boff >= bp->b_bcount) {
1383                         if (bp != NULL) {
1384                                 brelse(bp);
1385                                 bp = NULL;
1386                         }
1387                         error = pc_blkatoff(pcp, offset, &bp, &ep);
1388                         if (error) {
1389                                 if (error == ENOENT) {
1390                                         error = 0;
1391                                         if (eofp)
1392                                                 *eofp = 1;
1393                                 }
1394                                 break;
1395                         }
1396                 }
1397                 if (ep->pcd_filename[0] == PCD_UNUSED) {
1398                         if (eofp)
1399                                 *eofp = 1;
1400                         break;
1401                 }
1402                 /*
1403                  * Don't display label because it may contain funny characters.
1404                  */
1405                 if (ep->pcd_filename[0] == PCD_ERASED) {
1406                         uiop->uio_loffset += sizeof (struct pcdir);
1407                         offset += sizeof (struct pcdir);
1408                         ep++;
1409                         continue;
1410                 }
1411                 if (PCDL_IS_LFN(ep)) {
1412                         if (pc_read_long_fn(dvp, uiop, ld, &ep, &offset, &bp) !=
1413                             0)
1414                                 break;
1415                         continue;
1416                 }
1417 
1418                 if (pc_read_short_fn(dvp, uiop, ld, &ep, &offset, &bp) != 0)
1419                         break;
1420         }
1421         if (bp)
1422                 brelse(bp);
1423         pc_unlockfs(fsp);
1424         return (error);
1425 }
1426 
1427 
1428 /*
1429  * Called from pvn_getpages or pcfs_getpage to get a particular page.
1430  * When we are called the pcfs is already locked.
1431  */
1432 /*ARGSUSED*/
1433 static int
1434 pcfs_getapage(
1435         struct vnode *vp,
1436         u_offset_t off,
1437         size_t len,
1438         uint_t *protp,
1439         page_t *pl[],           /* NULL if async IO is requested */
1440         size_t plsz,
1441         struct seg *seg,
1442         caddr_t addr,
1443         enum seg_rw rw,
1444         struct cred *cr)
1445 {
1446         struct pcnode *pcp;
1447         struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1448         struct vnode *devvp;
1449         page_t *pp;
1450         page_t *pagefound;
1451         int err;
1452 
1453         /*
1454          * If the filesystem was umounted by force, return immediately.
1455          */
1456         if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1457                 return (EIO);
1458 
1459         PC_DPRINTF3(5, "pcfs_getapage: vp=%p off=%lld len=%lu\n",
1460             (void *)vp, off, len);
1461 
1462         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
1463                 return (EIO);
1464         devvp = fsp->pcfs_devvp;
1465 
1466         /* pcfs doesn't do readaheads */
1467         if (pl == NULL)
1468                 return (0);
1469 
1470         pl[0] = NULL;
1471         err = 0;
1472         /*
1473          * If the accessed time on the pcnode has not already been
1474          * set elsewhere (e.g. for read/setattr) we set the time now.
1475          * This gives us approximate modified times for mmap'ed files
1476          * which are accessed via loads in the user address space.
1477          */
1478         if ((pcp->pc_flags & PC_ACC) == 0 &&
1479             ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0)) {
1480                 pc_mark_acc(fsp, pcp);
1481         }
1482 reread:
1483         if ((pagefound = page_exists(vp, off)) == NULL) {
1484                 /*
1485                  * Need to really do disk IO to get the page(s).
1486                  */
1487                 struct buf *bp;
1488                 daddr_t lbn, bn;
1489                 u_offset_t io_off;
1490                 size_t io_len;
1491                 u_offset_t lbnoff, xferoffset;
1492                 u_offset_t pgoff;
1493                 uint_t  xfersize;
1494                 int err1;
1495 
1496                 lbn = pc_lblkno(fsp, off);
1497                 lbnoff = off & ~(fsp->pcfs_clsize - 1);
1498                 xferoffset = off & ~(fsp->pcfs_secsize - 1);
1499 
1500                 pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len,
1501                     off, (size_t)MIN(pc_blksize(fsp, pcp, off), PAGESIZE), 0);
1502                 if (pp == NULL)
1503                         /*
1504                          * XXX - If pcfs is made MT-hot, this should go
1505                          * back to reread.
1506                          */
1507                         panic("pcfs_getapage pvn_read_kluster");
1508 
1509                 for (pgoff = 0; pgoff < PAGESIZE && xferoffset < pcp->pc_size;
1510                     pgoff += xfersize,
1511                     lbn +=  howmany(xfersize, fsp->pcfs_clsize),
1512                     lbnoff += xfersize, xferoffset += xfersize) {
1513                         /*
1514                          * read as many contiguous blocks as possible to
1515                          * fill this page
1516                          */
1517                         xfersize = PAGESIZE - pgoff;
1518                         err1 = pc_bmap(pcp, lbn, &bn, &xfersize);
1519                         if (err1) {
1520                                 PC_DPRINTF1(1, "pc_getapage err=%d", err1);
1521                                 err = err1;
1522                                 goto out;
1523                         }
1524                         bp = pageio_setup(pp, xfersize, devvp, B_READ);
1525                         bp->b_edev = devvp->v_rdev;
1526                         bp->b_dev = cmpdev(devvp->v_rdev);
1527                         bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1528                         bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1529                         bp->b_file = vp;
1530                         bp->b_offset = (offset_t)(off + pgoff);
1531 
1532                         (void) bdev_strategy(bp);
1533 
1534                         lwp_stat_update(LWP_STAT_INBLK, 1);
1535 
1536                         if (err == 0)
1537                                 err = biowait(bp);
1538                         else
1539                                 (void) biowait(bp);
1540                         pageio_done(bp);
1541                         if (err)
1542                                 goto out;
1543                 }
1544                 if (pgoff < PAGESIZE) {
1545                         pagezero(pp->p_prev, pgoff, PAGESIZE - pgoff);
1546                 }
1547                 pvn_plist_init(pp, pl, plsz, off, io_len, rw);
1548         }
1549 out:
1550         if (err) {
1551                 if (pp != NULL)
1552                         pvn_read_done(pp, B_ERROR);
1553                 return (err);
1554         }
1555 
1556         if (pagefound) {
1557                 /*
1558                  * Page exists in the cache, acquire the "shared"
1559                  * lock.  If this fails, go back to reread.
1560                  */
1561                 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
1562                         goto reread;
1563                 }
1564                 pl[0] = pp;
1565                 pl[1] = NULL;
1566         }
1567         return (err);
1568 }
1569 
1570 /*
1571  * Return all the pages from [off..off+len] in given file
1572  */
1573 /* ARGSUSED */
1574 static int
1575 pcfs_getpage(
1576         struct vnode *vp,
1577         offset_t off,
1578         size_t len,
1579         uint_t *protp,
1580         page_t *pl[],
1581         size_t plsz,
1582         struct seg *seg,
1583         caddr_t addr,
1584         enum seg_rw rw,
1585         struct cred *cr,
1586         caller_context_t *ct)
1587 {
1588         struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1589         int err;
1590 
1591         PC_DPRINTF0(6, "pcfs_getpage\n");
1592         if (err = pc_verify(fsp))
1593                 return (err);
1594         if (vp->v_flag & VNOMAP)
1595                 return (ENOSYS);
1596         ASSERT(off <= UINT32_MAX);
1597         err = pc_lockfs(fsp, 0, 0);
1598         if (err)
1599                 return (err);
1600         if (protp != NULL)
1601                 *protp = PROT_ALL;
1602 
1603         ASSERT((off & PAGEOFFSET) == 0);
1604         if (len <= PAGESIZE) {
1605                 err = pcfs_getapage(vp, off, len, protp, pl,
1606                     plsz, seg, addr, rw, cr);
1607         } else {
1608                 err = pvn_getpages(pcfs_getapage, vp, off,
1609                     len, protp, pl, plsz, seg, addr, rw, cr);
1610         }
1611         pc_unlockfs(fsp);
1612         return (err);
1613 }
1614 
1615 
1616 /*
1617  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
1618  * If len == 0, do from off to EOF.
1619  *
1620  * The normal cases should be len == 0 & off == 0 (entire vp list),
1621  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
1622  * (from pageout).
1623  *
1624  */
1625 /*ARGSUSED*/
1626 static int
1627 pcfs_putpage(
1628         struct vnode *vp,
1629         offset_t off,
1630         size_t len,
1631         int flags,
1632         struct cred *cr,
1633         caller_context_t *ct)
1634 {
1635         struct pcnode *pcp;
1636         page_t *pp;
1637         struct pcfs *fsp;
1638         u_offset_t io_off;
1639         size_t io_len;
1640         offset_t eoff;
1641         int err;
1642 
1643         /*
1644          * If the filesystem was umounted by force, return immediately.
1645          */
1646         if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1647                 return (EIO);
1648 
1649         PC_DPRINTF1(6, "pcfs_putpage vp=0x%p\n", (void *)vp);
1650         if (vp->v_flag & VNOMAP)
1651                 return (ENOSYS);
1652 
1653         fsp = VFSTOPCFS(vp->v_vfsp);
1654 
1655         if (err = pc_verify(fsp))
1656                 return (err);
1657         if ((pcp = VTOPC(vp)) == NULL) {
1658                 PC_DPRINTF1(3, "pcfs_putpage NULL vp=0x%p\n", (void *)vp);
1659                 return (EIO);
1660         }
1661         if (pcp->pc_flags & PC_INVAL)
1662                 return (EIO);
1663 
1664         if (curproc == proc_pageout) {
1665                 /*
1666                  * XXX - This is a quick hack to avoid blocking
1667                  * pageout. Also to avoid pcfs_getapage deadlocking
1668                  * with putpage when memory is running out,
1669                  * since we only have one global lock and we don't
1670                  * support async putpage.
1671                  * It should be fixed someday.
1672                  *
1673                  * Interestingly, this used to be a test of NOMEMWAIT().
1674                  * We only ever got here once pcfs started supporting
1675                  * NFS sharing, and then only because the NFS server
1676                  * threads seem to do writes in sched's process context.
1677                  * Since everyone else seems to just care about pageout,
1678                  * the test was changed to look for pageout directly.
1679                  */
1680                 return (ENOMEM);
1681         }
1682 
1683         ASSERT(off <= UINT32_MAX);
1684 
1685         flags &= ~B_ASYNC;  /* XXX should fix this later */
1686 
1687         err = pc_lockfs(fsp, 0, 0);
1688         if (err)
1689                 return (err);
1690         if (!vn_has_cached_data(vp) || off >= pcp->pc_size) {
1691                 pc_unlockfs(fsp);
1692                 return (0);
1693         }
1694 
1695         if (len == 0) {
1696                 /*
1697                  * Search the entire vp list for pages >= off
1698                  */
1699                 err = pvn_vplist_dirty(vp, off,
1700                     pcfs_putapage, flags, cr);
1701         } else {
1702                 eoff = off + len;
1703 
1704                 for (io_off = off; io_off < eoff &&
1705                     io_off < pcp->pc_size; io_off += io_len) {
1706                         /*
1707                          * If we are not invalidating, synchronously
1708                          * freeing or writing pages use the routine
1709                          * page_lookup_nowait() to prevent reclaiming
1710                          * them from the free list.
1711                          */
1712                         if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
1713                                 pp = page_lookup(vp, io_off,
1714                                     (flags & (B_INVAL | B_FREE)) ?
1715                                     SE_EXCL : SE_SHARED);
1716                         } else {
1717                                 pp = page_lookup_nowait(vp, io_off,
1718                                     (flags & B_FREE) ? SE_EXCL : SE_SHARED);
1719                         }
1720 
1721                         if (pp == NULL || pvn_getdirty(pp, flags) == 0)
1722                                 io_len = PAGESIZE;
1723                         else {
1724                                 err = pcfs_putapage(vp, pp, &io_off, &io_len,
1725                                     flags, cr);
1726                                 if (err != 0)
1727                                         break;
1728                                 /*
1729                                  * "io_off" and "io_len" are returned as
1730                                  * the range of pages we actually wrote.
1731                                  * This allows us to skip ahead more quickly
1732                                  * since several pages may've been dealt
1733                                  * with by this iteration of the loop.
1734                                  */
1735                         }
1736                 }
1737         }
1738         if (err == 0 && (flags & B_INVAL) &&
1739             off == 0 && len == 0 && vn_has_cached_data(vp)) {
1740                 /*
1741                  * If doing "invalidation", make sure that
1742                  * all pages on the vnode list are actually
1743                  * gone.
1744                  */
1745                 cmn_err(CE_PANIC,
1746                     "pcfs_putpage: B_INVAL, pages not gone");
1747         } else if (err) {
1748                 PC_DPRINTF1(1, "pcfs_putpage err=%d\n", err);
1749         }
1750         pc_unlockfs(fsp);
1751         return (err);
1752 }
1753 
1754 /*
1755  * Write out a single page, possibly klustering adjacent dirty pages.
1756  */
1757 /*ARGSUSED*/
1758 int
1759 pcfs_putapage(
1760         struct vnode *vp,
1761         page_t *pp,
1762         u_offset_t *offp,
1763         size_t *lenp,
1764         int flags,
1765         struct cred *cr)
1766 {
1767         struct pcnode *pcp;
1768         struct pcfs *fsp;
1769         struct vnode *devvp;
1770         size_t io_len;
1771         daddr_t bn;
1772         u_offset_t lbn, lbnoff, xferoffset;
1773         uint_t pgoff, xfersize;
1774         int err = 0;
1775         u_offset_t io_off;
1776 
1777         pcp = VTOPC(vp);
1778         fsp = VFSTOPCFS(vp->v_vfsp);
1779         devvp = fsp->pcfs_devvp;
1780 
1781         /*
1782          * If the modified time on the inode has not already been
1783          * set elsewhere (e.g. for write/setattr) and this is not
1784          * a call from msync (B_FORCE) we set the time now.
1785          * This gives us approximate modified times for mmap'ed files
1786          * which are modified via stores in the user address space.
1787          */
1788         if ((pcp->pc_flags & PC_MOD) == 0 || (flags & B_FORCE)) {
1789                 pcp->pc_flags |= PC_MOD;
1790                 pc_mark_mod(fsp, pcp);
1791         }
1792         pp = pvn_write_kluster(vp, pp, &io_off, &io_len, pp->p_offset,
1793             PAGESIZE, flags);
1794 
1795         if (fsp->pcfs_flags & PCFS_IRRECOV) {
1796                 goto out;
1797         }
1798 
1799         PC_DPRINTF1(7, "pc_putpage writing dirty page off=%llu\n", io_off);
1800 
1801         lbn = pc_lblkno(fsp, io_off);
1802         lbnoff = io_off & ~(fsp->pcfs_clsize - 1);
1803         xferoffset = io_off & ~(fsp->pcfs_secsize - 1);
1804 
1805         for (pgoff = 0; pgoff < io_len && xferoffset < pcp->pc_size;
1806             pgoff += xfersize,
1807             lbn += howmany(xfersize, fsp->pcfs_clsize),
1808             lbnoff += xfersize, xferoffset += xfersize) {
1809 
1810                 struct buf *bp;
1811                 int err1;
1812 
1813                 /*
1814                  * write as many contiguous blocks as possible from this page
1815                  */
1816                 xfersize = io_len - pgoff;
1817                 err1 = pc_bmap(pcp, (daddr_t)lbn, &bn, &xfersize);
1818                 if (err1) {
1819                         err = err1;
1820                         goto out;
1821                 }
1822                 bp = pageio_setup(pp, xfersize, devvp, B_WRITE | flags);
1823                 bp->b_edev = devvp->v_rdev;
1824                 bp->b_dev = cmpdev(devvp->v_rdev);
1825                 bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1826                 bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1827                 bp->b_file = vp;
1828                 bp->b_offset = (offset_t)(io_off + pgoff);
1829 
1830                 (void) bdev_strategy(bp);
1831 
1832                 lwp_stat_update(LWP_STAT_OUBLK, 1);
1833 
1834                 if (err == 0)
1835                         err = biowait(bp);
1836                 else
1837                         (void) biowait(bp);
1838                 pageio_done(bp);
1839         }
1840         pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
1841         pp = NULL;
1842 
1843 out:
1844         if ((fsp->pcfs_flags & PCFS_IRRECOV) && pp != NULL) {
1845                 pvn_write_done(pp, B_WRITE | flags);
1846         } else if (err != 0 && pp != NULL) {
1847                 pvn_write_done(pp, B_ERROR | B_WRITE | flags);
1848         }
1849 
1850         if (offp)
1851                 *offp = io_off;
1852         if (lenp)
1853                 *lenp = io_len;
1854                 PC_DPRINTF4(4, "pcfs_putapage: vp=%p pp=%p off=%lld len=%lu\n",
1855                     (void *)vp, (void *)pp, io_off, io_len);
1856         if (err) {
1857                 PC_DPRINTF1(1, "pcfs_putapage err=%d", err);
1858         }
1859         return (err);
1860 }
1861 
1862 /*ARGSUSED*/
1863 static int
1864 pcfs_map(
1865         struct vnode *vp,
1866         offset_t off,
1867         struct as *as,
1868         caddr_t *addrp,
1869         size_t len,
1870         uchar_t prot,
1871         uchar_t maxprot,
1872         uint_t flags,
1873         struct cred *cr,
1874         caller_context_t *ct)
1875 {
1876         struct segvn_crargs vn_a;
1877         int error;
1878 
1879         PC_DPRINTF0(6, "pcfs_map\n");
1880         if (vp->v_flag & VNOMAP)
1881                 return (ENOSYS);
1882 
1883         if (off > UINT32_MAX || off + len > UINT32_MAX)
1884                 return (ENXIO);
1885 
1886         as_rangelock(as);
1887         error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
1888         if (error != 0) {
1889                 as_rangeunlock(as);
1890                 return (error);
1891         }
1892 
1893         vn_a.vp = vp;
1894         vn_a.offset = off;
1895         vn_a.type = flags & MAP_TYPE;
1896         vn_a.prot = prot;
1897         vn_a.maxprot = maxprot;
1898         vn_a.flags = flags & ~MAP_TYPE;
1899         vn_a.cred = cr;
1900         vn_a.amp = NULL;
1901         vn_a.szc = 0;
1902         vn_a.lgrp_mem_policy_flags = 0;
1903 
1904         error = as_map(as, *addrp, len, segvn_create, &vn_a);
1905         as_rangeunlock(as);
1906         return (error);
1907 }
1908 
1909 /* ARGSUSED */
1910 static int
1911 pcfs_seek(
1912         struct vnode *vp,
1913         offset_t ooff,
1914         offset_t *noffp,
1915         caller_context_t *ct)
1916 {
1917         if (*noffp < 0)
1918                 return (EINVAL);
1919         else if (*noffp > MAXOFFSET_T)
1920                 return (EINVAL);
1921         else
1922                 return (0);
1923 }
1924 
1925 /* ARGSUSED */
1926 static int
1927 pcfs_addmap(
1928         struct vnode *vp,
1929         offset_t off,
1930         struct as *as,
1931         caddr_t addr,
1932         size_t len,
1933         uchar_t prot,
1934         uchar_t maxprot,
1935         uint_t flags,
1936         struct cred *cr,
1937         caller_context_t *ct)
1938 {
1939         if (vp->v_flag & VNOMAP)
1940                 return (ENOSYS);
1941         return (0);
1942 }
1943 
1944 /*ARGSUSED*/
1945 static int
1946 pcfs_delmap(
1947         struct vnode *vp,
1948         offset_t off,
1949         struct as *as,
1950         caddr_t addr,
1951         size_t len,
1952         uint_t prot,
1953         uint_t maxprot,
1954         uint_t flags,
1955         struct cred *cr,
1956         caller_context_t *ct)
1957 {
1958         if (vp->v_flag & VNOMAP)
1959                 return (ENOSYS);
1960         return (0);
1961 }
1962 
1963 /*
1964  * POSIX pathconf() support.
1965  */
1966 /* ARGSUSED */
1967 static int
1968 pcfs_pathconf(
1969         struct vnode *vp,
1970         int cmd,
1971         ulong_t *valp,
1972         struct cred *cr,
1973         caller_context_t *ct)
1974 {
1975         struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1976 
1977         switch (cmd) {
1978         case _PC_LINK_MAX:
1979                 *valp = 1;
1980                 return (0);
1981 
1982         case _PC_CASE_BEHAVIOR:
1983                 return (EINVAL);
1984 
1985         case _PC_FILESIZEBITS:
1986                 /*
1987                  * Both FAT16 and FAT32 support 4GB - 1 byte for file size.
1988                  * FAT12 can only go up to the maximum filesystem capacity
1989                  * which is ~509MB.
1990                  */
1991                 *valp = IS_FAT12(fsp) ? 30 : 33;
1992                 return (0);
1993 
1994         case _PC_TIMESTAMP_RESOLUTION:
1995                 /*
1996                  * PCFS keeps track of modification times, it its own
1997                  * internal format, to a resolution of 2 seconds.
1998                  * Since 2000 million is representable in an int32_t
1999                  * without overflow (or becoming negative), we allow
2000                  * this value to be returned.
2001                  */
2002                 *valp = 2000000000L;
2003                 return (0);
2004 
2005         default:
2006                 return (fs_pathconf(vp, cmd, valp, cr, ct));
2007         }
2008 
2009 }
2010 
2011 /* ARGSUSED */
2012 static int
2013 pcfs_space(
2014         struct vnode *vp,
2015         int cmd,
2016         struct flock64 *bfp,
2017         int flag,
2018         offset_t offset,
2019         cred_t *cr,
2020         caller_context_t *ct)
2021 {
2022         struct vattr vattr;
2023         int error;
2024 
2025         if (cmd != F_FREESP)
2026                 return (EINVAL);
2027 
2028         if ((error = convoff(vp, bfp, 0, offset)) == 0) {
2029                 if ((bfp->l_start > UINT32_MAX) || (bfp->l_len > UINT32_MAX))
2030                         return (EFBIG);
2031                 /*
2032                  * we only support the special case of l_len == 0,
2033                  * meaning free to end of file at this moment.
2034                  */
2035                 if (bfp->l_len != 0)
2036                         return (EINVAL);
2037                 vattr.va_mask = AT_SIZE;
2038                 vattr.va_size = bfp->l_start;
2039                 error = VOP_SETATTR(vp, (vattr_t *)&vattr, 0, cr, ct);
2040         }
2041         return (error);
2042 }
2043 
2044 /*
2045  * Break up 'len' chars from 'buf' into a long file name chunk.
2046  * Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
2047  */
2048 void
2049 set_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int len)
2050 {
2051         int     i;
2052 
2053         ASSERT(buf != NULL);
2054 
2055         for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
2056                 if (len > 0) {
2057                         ep->pcdl_firstfilename[i] = *buf++;
2058                         ep->pcdl_firstfilename[i + 1] = *buf++;
2059                         len -= 2;
2060                 } else {
2061                         ep->pcdl_firstfilename[i] = (uchar_t)0xff;
2062                         ep->pcdl_firstfilename[i + 1] = (uchar_t)0xff;
2063                 }
2064         }
2065 
2066         for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
2067                 if (len > 0) {
2068                         ep->pcdl_secondfilename[i] = *buf++;
2069                         ep->pcdl_secondfilename[i + 1] = *buf++;
2070                         len -= 2;
2071                 } else {
2072                         ep->pcdl_secondfilename[i] = (uchar_t)0xff;
2073                         ep->pcdl_secondfilename[i + 1] = (uchar_t)0xff;
2074                 }
2075         }
2076         for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
2077                 if (len > 0) {
2078                         ep->pcdl_thirdfilename[i] = *buf++;
2079                         ep->pcdl_thirdfilename[i + 1] = *buf++;
2080                         len -= 2;
2081                 } else {
2082                         ep->pcdl_thirdfilename[i] = (uchar_t)0xff;
2083                         ep->pcdl_thirdfilename[i + 1] = (uchar_t)0xff;
2084                 }
2085         }
2086 }
2087 
2088 /*
2089  * Extract the characters from the long filename chunk into 'buf'.
2090  * Return the number of characters extracted.
2091  */
2092 static int
2093 get_long_fn_chunk(struct pcdir_lfn *ep, char *buf)
2094 {
2095         char    *tmp = buf;
2096         int     i;
2097 
2098         /* Copy all the names, no filtering now */
2099 
2100         for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2, tmp += 2) {
2101                 *tmp = ep->pcdl_firstfilename[i];
2102                 *(tmp + 1) = ep->pcdl_firstfilename[i + 1];
2103 
2104                 if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2105                         return (tmp - buf);
2106         }
2107         for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2, tmp += 2) {
2108                 *tmp = ep->pcdl_secondfilename[i];
2109                 *(tmp + 1) = ep->pcdl_secondfilename[i + 1];
2110 
2111                 if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2112                         return (tmp - buf);
2113         }
2114         for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2, tmp += 2) {
2115                 *tmp = ep->pcdl_thirdfilename[i];
2116                 *(tmp + 1) = ep->pcdl_thirdfilename[i + 1];
2117 
2118                 if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2119                         return (tmp - buf);
2120         }
2121         return (tmp - buf);
2122 }
2123 
2124 
2125 /*
2126  * Checksum the passed in short filename.
2127  * This is used to validate each component of the long name to make
2128  * sure the long name is valid (it hasn't been "detached" from the
2129  * short filename). This algorithm was found in FreeBSD.
2130  * (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
2131  */
2132 
2133 uchar_t
2134 pc_checksum_long_fn(char *name, char *ext)
2135 {
2136         uchar_t c;
2137         char    b[11];
2138 
2139         bcopy(name, b, 8);
2140         bcopy(ext, b+8, 3);
2141 
2142         c = b[0];
2143         c = ((c << 7) | (c >> 1)) + b[1];
2144         c = ((c << 7) | (c >> 1)) + b[2];
2145         c = ((c << 7) | (c >> 1)) + b[3];
2146         c = ((c << 7) | (c >> 1)) + b[4];
2147         c = ((c << 7) | (c >> 1)) + b[5];
2148         c = ((c << 7) | (c >> 1)) + b[6];
2149         c = ((c << 7) | (c >> 1)) + b[7];
2150         c = ((c << 7) | (c >> 1)) + b[8];
2151         c = ((c << 7) | (c >> 1)) + b[9];
2152         c = ((c << 7) | (c >> 1)) + b[10];
2153 
2154         return (c);
2155 }
2156 
2157 /*
2158  * Read a chunk of long filename entries into 'namep'.
2159  * Return with offset pointing to short entry (on success), or next
2160  * entry to read (if this wasn't a valid lfn really).
2161  * Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
2162  * a long filename.
2163  *
2164  * Can also be called with a NULL namep, in which case it just returns
2165  * whether this was really a valid long filename and consumes it
2166  * (used by pc_dirempty()).
2167  */
2168 int
2169 pc_extract_long_fn(struct pcnode *pcp, char *namep,
2170     struct pcdir **epp, offset_t *offset, struct buf **bp)
2171 {
2172         struct pcdir *ep = *epp;
2173         struct pcdir_lfn *lep = (struct pcdir_lfn *)ep;
2174         struct vnode *dvp = PCTOV(pcp);
2175         struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2176         char    *lfn;
2177         char    *lfn_base;
2178         int     boff;
2179         int     i, cs;
2180         char    *buf;
2181         uchar_t cksum;
2182         int     detached = 0;
2183         int     error = 0;
2184         int     foldcase;
2185         int     count = 0;
2186         size_t  u16l = 0, u8l = 0;
2187         char    *outbuf;
2188         size_t  ret, inlen, outlen;
2189 
2190         foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2191         lfn_base = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2192         lfn = lfn_base + PCMAXNAM_UTF16 - sizeof (uint16_t);
2193         *lfn = '\0';
2194         *(lfn + 1) = '\0';
2195         cksum = lep->pcdl_checksum;
2196 
2197         buf = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2198         for (i = (lep->pcdl_ordinal & ~0xc0); i > 0; i--) {
2199                 /* read next block if necessary */
2200                 boff = pc_blkoff(fsp, *offset);
2201                 if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2202                         if (*bp != NULL) {
2203                                 brelse(*bp);
2204                                 *bp = NULL;
2205                         }
2206                         error = pc_blkatoff(pcp, *offset, bp, &ep);
2207                         if (error) {
2208                                 kmem_free(lfn_base, PCMAXNAM_UTF16);
2209                                 kmem_free(buf, PCMAXNAM_UTF16);
2210                                 return (error);
2211                         }
2212                         lep = (struct pcdir_lfn *)ep;
2213                 }
2214                 /* can this happen? Bad fs? */
2215                 if (!PCDL_IS_LFN((struct pcdir *)lep)) {
2216                         detached = 1;
2217                         break;
2218                 }
2219                 if (cksum != lep->pcdl_checksum)
2220                         detached = 1;
2221                 /* process current entry */
2222                 cs = get_long_fn_chunk(lep, buf);
2223                 count += cs;
2224                 for (; cs > 0; cs--) {
2225                         /* see if we underflow */
2226                         if (lfn >= lfn_base)
2227                                 *--lfn = buf[cs - 1];
2228                         else
2229                                 detached = 1;
2230                 }
2231                 lep++;
2232                 *offset += sizeof (struct pcdir);
2233         }
2234         kmem_free(buf, PCMAXNAM_UTF16);
2235         /* read next block if necessary */
2236         boff = pc_blkoff(fsp, *offset);
2237         ep = (struct pcdir *)lep;
2238         if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2239                 if (*bp != NULL) {
2240                         brelse(*bp);
2241                         *bp = NULL;
2242                 }
2243                 error = pc_blkatoff(pcp, *offset, bp, &ep);
2244                 if (error) {
2245                         kmem_free(lfn_base, PCMAXNAM_UTF16);
2246                         return (error);
2247                 }
2248         }
2249         /* should be on the short one */
2250         if (PCDL_IS_LFN(ep) || ((ep->pcd_filename[0] == PCD_UNUSED) ||
2251             (ep->pcd_filename[0] == PCD_ERASED))) {
2252                 detached = 1;
2253         }
2254         if (detached ||
2255             (cksum != pc_checksum_long_fn(ep->pcd_filename, ep->pcd_ext)) ||
2256             !pc_valid_long_fn(lfn, 0)) {
2257                 /*
2258                  * process current entry again. This may end up another lfn
2259                  * or a short name.
2260                  */
2261                 *epp = ep;
2262                 kmem_free(lfn_base, PCMAXNAM_UTF16);
2263                 return (EINVAL);
2264         }
2265         if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2266                 /*
2267                  * Don't display label because it may contain
2268                  * funny characters.
2269                  */
2270                 *offset += sizeof (struct pcdir);
2271                 ep++;
2272                 *epp = ep;
2273                 kmem_free(lfn_base, PCMAXNAM_UTF16);
2274                 return (EINVAL);
2275         }
2276         if (namep) {
2277                 u16l = count / 2;
2278                 u8l = PCMAXNAMLEN;
2279                 error = uconv_u16tou8((const uint16_t *)lfn, &u16l,
2280                     (uchar_t *)namep, &u8l, UCONV_IN_LITTLE_ENDIAN);
2281                 /*
2282                  * uconv_u16tou8() will catch conversion errors including
2283                  * the case where there is not enough room to write the
2284                  * converted result and the u8l will never go over the given
2285                  * PCMAXNAMLEN.
2286                  */
2287                 if (error != 0) {
2288                         kmem_free(lfn_base, PCMAXNAM_UTF16);
2289                         return (EINVAL);
2290                 }
2291                 namep[u8l] = '\0';
2292                 if (foldcase) {
2293                         inlen = strlen(namep);
2294                         outlen = PCMAXNAMLEN;
2295                         outbuf = kmem_alloc(PCMAXNAMLEN + 1, KM_SLEEP);
2296                         ret = u8_textprep_str(namep, &inlen, outbuf,
2297                             &outlen, U8_TEXTPREP_TOLOWER, U8_UNICODE_LATEST,
2298                             &error);
2299                         if (ret == -1) {
2300                                 kmem_free(outbuf, PCMAXNAMLEN + 1);
2301                                 kmem_free(lfn_base, PCMAXNAM_UTF16);
2302                                 return (EINVAL);
2303                         }
2304                         outbuf[PCMAXNAMLEN - outlen] = '\0';
2305                         (void) strncpy(namep, outbuf, PCMAXNAMLEN + 1);
2306                         kmem_free(outbuf, PCMAXNAMLEN + 1);
2307                 }
2308         }
2309         kmem_free(lfn_base, PCMAXNAM_UTF16);
2310         *epp = ep;
2311         return (0);
2312 }
2313 /*
2314  * Read a long filename into the pc_dirent structure and copy it out.
2315  */
2316 int
2317 pc_read_long_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2318     struct pcdir **epp, offset_t *offset, struct buf **bp)
2319 {
2320         struct pcdir *ep;
2321         struct pcnode *pcp = VTOPC(dvp);
2322         struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2323         offset_t uiooffset = uiop->uio_loffset;
2324         int     error = 0;
2325         offset_t oldoffset;
2326 
2327         oldoffset = *offset;
2328         error = pc_extract_long_fn(pcp, ld->d_name, epp, offset, bp);
2329         if (error) {
2330                 if (error == EINVAL) {
2331                         uiop->uio_loffset += *offset - oldoffset;
2332                         return (0);
2333                 } else
2334                         return (error);
2335         }
2336 
2337         ep = *epp;
2338         uiop->uio_loffset += *offset - oldoffset;
2339         ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2340         if (ld->d_reclen > uiop->uio_resid) {
2341                 uiop->uio_loffset = uiooffset;
2342                 return (ENOSPC);
2343         }
2344         ld->d_off = uiop->uio_loffset + sizeof (struct pcdir);
2345         ld->d_ino = pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2346             pc_blkoff(fsp, *offset), ep->pcd_attr,
2347             pc_getstartcluster(fsp, ep), pc_direntpersec(fsp));
2348         (void) uiomove((caddr_t)ld, ld->d_reclen, UIO_READ, uiop);
2349         uiop->uio_loffset = ld->d_off;
2350         *offset += sizeof (struct pcdir);
2351         ep++;
2352         *epp = ep;
2353         return (0);
2354 }
2355 
2356 /*
2357  * Read a short filename into the pc_dirent structure and copy it out.
2358  */
2359 int
2360 pc_read_short_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2361     struct pcdir **epp, offset_t *offset, struct buf **bp)
2362 {
2363         struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2364         int     boff = pc_blkoff(fsp, *offset);
2365         struct pcdir *ep = *epp;
2366         offset_t        oldoffset = uiop->uio_loffset;
2367         int     error;
2368         int     foldcase;
2369 
2370         if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2371                 uiop->uio_loffset += sizeof (struct pcdir);
2372                 *offset += sizeof (struct pcdir);
2373                 ep++;
2374                 *epp = ep;
2375                 return (0);
2376         }
2377         ld->d_ino = (ino64_t)pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2378             boff, ep->pcd_attr, pc_getstartcluster(fsp, ep),
2379             pc_direntpersec(fsp));
2380         foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2381         error = pc_fname_ext_to_name(&ld->d_name[0], &ep->pcd_filename[0],
2382             &ep->pcd_ext[0], foldcase);
2383         if (error == 0) {
2384                 ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2385                 if (ld->d_reclen > uiop->uio_resid) {
2386                         uiop->uio_loffset = oldoffset;
2387                         return (ENOSPC);
2388                 }
2389                 ld->d_off = (off64_t)(uiop->uio_loffset +
2390                     sizeof (struct pcdir));
2391                 (void) uiomove((caddr_t)ld,
2392                     ld->d_reclen, UIO_READ, uiop);
2393                 uiop->uio_loffset = ld->d_off;
2394         } else {
2395                 uiop->uio_loffset += sizeof (struct pcdir);
2396         }
2397         *offset += sizeof (struct pcdir);
2398         ep++;
2399         *epp = ep;
2400         return (0);
2401 }
2402 
2403 /* ARGSUSED */
2404 static int
2405 pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
2406 {
2407         struct pc_fid *pcfid;
2408         struct pcnode *pcp;
2409         struct pcfs     *fsp;
2410         int     error;
2411 
2412         fsp = VFSTOPCFS(vp->v_vfsp);
2413         if (fsp == NULL)
2414                 return (EIO);
2415         error = pc_lockfs(fsp, 0, 0);
2416         if (error)
2417                 return (error);
2418         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
2419                 pc_unlockfs(fsp);
2420                 return (EIO);
2421         }
2422         if (fidp->fid_len < (sizeof (struct pc_fid) - sizeof (ushort_t))) {
2423                 fidp->fid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2424                 pc_unlockfs(fsp);
2425                 return (ENOSPC);
2426         }
2427 
2428         pcfid = (struct pc_fid *)fidp;
2429         bzero(pcfid, sizeof (struct pc_fid));
2430         pcfid->pcfid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2431         if (vp->v_flag & VROOT) {
2432                 pcfid->pcfid_block = 0;
2433                 pcfid->pcfid_offset = 0;
2434                 pcfid->pcfid_ctime = 0;
2435         } else {
2436                 pcfid->pcfid_block = pcp->pc_eblkno;
2437                 pcfid->pcfid_offset = pcp->pc_eoffset;
2438                 pcfid->pcfid_ctime = pcp->pc_entry.pcd_crtime.pct_time;
2439         }
2440         pc_unlockfs(fsp);
2441         return (0);
2442 }