1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  24  */
  25 
  26 /*
  27  * utility routines for the /dev fs
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/t_lock.h>
  33 #include <sys/systm.h>
  34 #include <sys/sysmacros.h>
  35 #include <sys/user.h>
  36 #include <sys/time.h>
  37 #include <sys/vfs.h>
  38 #include <sys/vnode.h>
  39 #include <sys/file.h>
  40 #include <sys/fcntl.h>
  41 #include <sys/flock.h>
  42 #include <sys/kmem.h>
  43 #include <sys/uio.h>
  44 #include <sys/errno.h>
  45 #include <sys/stat.h>
  46 #include <sys/cred.h>
  47 #include <sys/dirent.h>
  48 #include <sys/pathname.h>
  49 #include <sys/cmn_err.h>
  50 #include <sys/debug.h>
  51 #include <sys/mode.h>
  52 #include <sys/policy.h>
  53 #include <fs/fs_subr.h>
  54 #include <sys/mount.h>
  55 #include <sys/fs/snode.h>
  56 #include <sys/fs/dv_node.h>
  57 #include <sys/fs/sdev_impl.h>
  58 #include <sys/sunndi.h>
  59 #include <sys/sunmdi.h>
  60 #include <sys/conf.h>
  61 #include <sys/proc.h>
  62 #include <sys/user.h>
  63 #include <sys/modctl.h>
  64 
  65 #ifdef DEBUG
  66 int sdev_debug = 0x00000001;
  67 int sdev_debug_cache_flags = 0;
  68 #endif
  69 
  70 /*
  71  * globals
  72  */
  73 /* prototype memory vattrs */
  74 vattr_t sdev_vattr_dir = {
  75         AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
  76         VDIR,                                   /* va_type */
  77         SDEV_DIRMODE_DEFAULT,                   /* va_mode */
  78         SDEV_UID_DEFAULT,                       /* va_uid */
  79         SDEV_GID_DEFAULT,                       /* va_gid */
  80         0,                                      /* va_fsid */
  81         0,                                      /* va_nodeid */
  82         0,                                      /* va_nlink */
  83         0,                                      /* va_size */
  84         0,                                      /* va_atime */
  85         0,                                      /* va_mtime */
  86         0,                                      /* va_ctime */
  87         0,                                      /* va_rdev */
  88         0,                                      /* va_blksize */
  89         0,                                      /* va_nblocks */
  90         0                                       /* va_vcode */
  91 };
  92 
  93 vattr_t sdev_vattr_lnk = {
  94         AT_TYPE|AT_MODE,                        /* va_mask */
  95         VLNK,                                   /* va_type */
  96         SDEV_LNKMODE_DEFAULT,                   /* va_mode */
  97         SDEV_UID_DEFAULT,                       /* va_uid */
  98         SDEV_GID_DEFAULT,                       /* va_gid */
  99         0,                                      /* va_fsid */
 100         0,                                      /* va_nodeid */
 101         0,                                      /* va_nlink */
 102         0,                                      /* va_size */
 103         0,                                      /* va_atime */
 104         0,                                      /* va_mtime */
 105         0,                                      /* va_ctime */
 106         0,                                      /* va_rdev */
 107         0,                                      /* va_blksize */
 108         0,                                      /* va_nblocks */
 109         0                                       /* va_vcode */
 110 };
 111 
 112 vattr_t sdev_vattr_blk = {
 113         AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
 114         VBLK,                                   /* va_type */
 115         S_IFBLK | SDEV_DEVMODE_DEFAULT,         /* va_mode */
 116         SDEV_UID_DEFAULT,                       /* va_uid */
 117         SDEV_GID_DEFAULT,                       /* va_gid */
 118         0,                                      /* va_fsid */
 119         0,                                      /* va_nodeid */
 120         0,                                      /* va_nlink */
 121         0,                                      /* va_size */
 122         0,                                      /* va_atime */
 123         0,                                      /* va_mtime */
 124         0,                                      /* va_ctime */
 125         0,                                      /* va_rdev */
 126         0,                                      /* va_blksize */
 127         0,                                      /* va_nblocks */
 128         0                                       /* va_vcode */
 129 };
 130 
 131 vattr_t sdev_vattr_chr = {
 132         AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
 133         VCHR,                                   /* va_type */
 134         S_IFCHR | SDEV_DEVMODE_DEFAULT,         /* va_mode */
 135         SDEV_UID_DEFAULT,                       /* va_uid */
 136         SDEV_GID_DEFAULT,                       /* va_gid */
 137         0,                                      /* va_fsid */
 138         0,                                      /* va_nodeid */
 139         0,                                      /* va_nlink */
 140         0,                                      /* va_size */
 141         0,                                      /* va_atime */
 142         0,                                      /* va_mtime */
 143         0,                                      /* va_ctime */
 144         0,                                      /* va_rdev */
 145         0,                                      /* va_blksize */
 146         0,                                      /* va_nblocks */
 147         0                                       /* va_vcode */
 148 };
 149 
 150 kmem_cache_t    *sdev_node_cache;       /* sdev_node cache */
 151 int             devtype;                /* fstype */
 152 
 153 /* static */
 154 static struct vnodeops *sdev_get_vop(struct sdev_node *);
 155 static void sdev_set_no_negcache(struct sdev_node *);
 156 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
 157 static void sdev_free_vtab(fs_operation_def_t *);
 158 
 159 static void
 160 sdev_prof_free(struct sdev_node *dv)
 161 {
 162         ASSERT(!SDEV_IS_GLOBAL(dv));
 163         nvlist_free(dv->sdev_prof.dev_name);
 164         nvlist_free(dv->sdev_prof.dev_map);
 165         nvlist_free(dv->sdev_prof.dev_symlink);
 166         nvlist_free(dv->sdev_prof.dev_glob_incdir);
 167         nvlist_free(dv->sdev_prof.dev_glob_excdir);
 168         bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
 169 }
 170 
 171 /* sdev_node cache constructor */
 172 /*ARGSUSED1*/
 173 static int
 174 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
 175 {
 176         struct sdev_node *dv = (struct sdev_node *)buf;
 177         struct vnode *vp;
 178 
 179         bzero(buf, sizeof (struct sdev_node));
 180         vp = dv->sdev_vnode = vn_alloc(flag);
 181         if (vp == NULL) {
 182                 return (-1);
 183         }
 184         vp->v_data = dv;
 185         rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
 186         return (0);
 187 }
 188 
 189 /* sdev_node cache destructor */
 190 /*ARGSUSED1*/
 191 static void
 192 i_sdev_node_dtor(void *buf, void *arg)
 193 {
 194         struct sdev_node *dv = (struct sdev_node *)buf;
 195         struct vnode *vp = SDEVTOV(dv);
 196 
 197         rw_destroy(&dv->sdev_contents);
 198         vn_free(vp);
 199 }
 200 
 201 /* initialize sdev_node cache */
 202 void
 203 sdev_node_cache_init()
 204 {
 205         int flags = 0;
 206 
 207 #ifdef  DEBUG
 208         flags = sdev_debug_cache_flags;
 209         if (flags)
 210                 sdcmn_err(("cache debug flags 0x%x\n", flags));
 211 #endif  /* DEBUG */
 212 
 213         ASSERT(sdev_node_cache == NULL);
 214         sdev_node_cache = kmem_cache_create("sdev_node_cache",
 215             sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
 216             NULL, NULL, NULL, flags);
 217 }
 218 
 219 /* destroy sdev_node cache */
 220 void
 221 sdev_node_cache_fini()
 222 {
 223         ASSERT(sdev_node_cache != NULL);
 224         kmem_cache_destroy(sdev_node_cache);
 225         sdev_node_cache = NULL;
 226 }
 227 
 228 /*
 229  * Compare two nodes lexographically to balance avl tree
 230  */
 231 static int
 232 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
 233 {
 234         int rv;
 235         if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
 236                 return (0);
 237         return ((rv < 0) ? -1 : 1);
 238 }
 239 
 240 void
 241 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
 242 {
 243         ASSERT(dv);
 244         ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
 245         dv->sdev_state = state;
 246 }
 247 
 248 static void
 249 sdev_attr_update(struct sdev_node *dv, vattr_t *vap)
 250 {
 251         timestruc_t     now;
 252         struct vattr    *attrp;
 253         uint_t          mask;
 254 
 255         ASSERT(dv->sdev_attr);
 256         ASSERT(vap);
 257 
 258         attrp = dv->sdev_attr;
 259         mask = vap->va_mask;
 260         if (mask & AT_TYPE)
 261                 attrp->va_type = vap->va_type;
 262         if (mask & AT_MODE)
 263                 attrp->va_mode = vap->va_mode;
 264         if (mask & AT_UID)
 265                 attrp->va_uid = vap->va_uid;
 266         if (mask & AT_GID)
 267                 attrp->va_gid = vap->va_gid;
 268         if (mask & AT_RDEV)
 269                 attrp->va_rdev = vap->va_rdev;
 270 
 271         gethrestime(&now);
 272         attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now;
 273         attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now;
 274         attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now;
 275 }
 276 
 277 static void
 278 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap)
 279 {
 280         ASSERT(dv->sdev_attr == NULL);
 281         ASSERT(vap->va_mask & AT_TYPE);
 282         ASSERT(vap->va_mask & AT_MODE);
 283 
 284         dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
 285         sdev_attr_update(dv, vap);
 286 }
 287 
 288 /* alloc and initialize a sdev_node */
 289 int
 290 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
 291     vattr_t *vap)
 292 {
 293         struct sdev_node *dv = NULL;
 294         struct vnode *vp;
 295         size_t nmlen, len;
 296         devname_handle_t  *dhl;
 297 
 298         nmlen = strlen(nm) + 1;
 299         if (nmlen > MAXNAMELEN) {
 300                 sdcmn_err9(("sdev_nodeinit: node name %s"
 301                     " too long\n", nm));
 302                 *newdv = NULL;
 303                 return (ENAMETOOLONG);
 304         }
 305 
 306         dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
 307 
 308         dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
 309         bcopy(nm, dv->sdev_name, nmlen);
 310         dv->sdev_namelen = nmlen - 1;        /* '\0' not included */
 311         len = strlen(ddv->sdev_path) + strlen(nm) + 2;
 312         dv->sdev_path = kmem_alloc(len, KM_SLEEP);
 313         (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
 314         /* overwritten for VLNK nodes */
 315         dv->sdev_symlink = NULL;
 316 
 317         vp = SDEVTOV(dv);
 318         vn_reinit(vp);
 319         vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
 320         if (vap)
 321                 vp->v_type = vap->va_type;
 322 
 323         /*
 324          * initialized to the parent's vnodeops.
 325          * maybe overwriten for a VDIR
 326          */
 327         vn_setops(vp, vn_getops(SDEVTOV(ddv)));
 328         vn_exists(vp);
 329 
 330         dv->sdev_dotdot = NULL;
 331         dv->sdev_attrvp = NULL;
 332         if (vap) {
 333                 sdev_attr_alloc(dv, vap);
 334         } else {
 335                 dv->sdev_attr = NULL;
 336         }
 337 
 338         dv->sdev_ino = sdev_mkino(dv);
 339         dv->sdev_nlink = 0;          /* updated on insert */
 340         dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
 341         dv->sdev_flags |= SDEV_BUILD;
 342         mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
 343         cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
 344         if (SDEV_IS_GLOBAL(ddv)) {
 345                 dv->sdev_flags |= SDEV_GLOBAL;
 346                 dhl = &(dv->sdev_handle);
 347                 dhl->dh_data = dv;
 348                 dhl->dh_args = NULL;
 349                 sdev_set_no_negcache(dv);
 350                 dv->sdev_gdir_gen = 0;
 351         } else {
 352                 dv->sdev_flags &= ~SDEV_GLOBAL;
 353                 dv->sdev_origin = NULL; /* set later */
 354                 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
 355                 dv->sdev_ldir_gen = 0;
 356                 dv->sdev_devtree_gen = 0;
 357         }
 358 
 359         rw_enter(&dv->sdev_contents, RW_WRITER);
 360         sdev_set_nodestate(dv, SDEV_INIT);
 361         rw_exit(&dv->sdev_contents);
 362         *newdv = dv;
 363 
 364         return (0);
 365 }
 366 
 367 /*
 368  * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the
 369  * caller to transition the node to the SDEV_ZOMBIE state.
 370  */
 371 int
 372 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
 373     void *args, struct cred *cred)
 374 {
 375         int error = 0;
 376         struct vnode *vp = SDEVTOV(dv);
 377         vtype_t type;
 378 
 379         ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
 380 
 381         type = vap->va_type;
 382         vp->v_type = type;
 383         vp->v_rdev = vap->va_rdev;
 384         rw_enter(&dv->sdev_contents, RW_WRITER);
 385         if (type == VDIR) {
 386                 dv->sdev_nlink = 2;
 387                 dv->sdev_flags &= ~SDEV_PERSIST;
 388                 dv->sdev_flags &= ~SDEV_DYNAMIC;
 389                 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
 390                 ASSERT(dv->sdev_dotdot);
 391                 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
 392                 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
 393                 avl_create(&dv->sdev_entries,
 394                     (int (*)(const void *, const void *))sdev_compare_nodes,
 395                     sizeof (struct sdev_node),
 396                     offsetof(struct sdev_node, sdev_avllink));
 397         } else if (type == VLNK) {
 398                 ASSERT(args);
 399                 dv->sdev_nlink = 1;
 400                 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
 401         } else {
 402                 dv->sdev_nlink = 1;
 403         }
 404 
 405         if (!(SDEV_IS_GLOBAL(dv))) {
 406                 dv->sdev_origin = (struct sdev_node *)args;
 407                 dv->sdev_flags &= ~SDEV_PERSIST;
 408         }
 409 
 410         /*
 411          * shadow node is created here OR
 412          * if failed (indicated by dv->sdev_attrvp == NULL),
 413          * created later in sdev_setattr
 414          */
 415         if (avp) {
 416                 dv->sdev_attrvp = avp;
 417         } else {
 418                 if (dv->sdev_attr == NULL) {
 419                         sdev_attr_alloc(dv, vap);
 420                 } else {
 421                         sdev_attr_update(dv, vap);
 422                 }
 423 
 424                 if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv))
 425                         error = sdev_shadow_node(dv, cred);
 426         }
 427 
 428         if (error == 0) {
 429                 /* transition to READY state */
 430                 sdev_set_nodestate(dv, SDEV_READY);
 431                 sdev_nc_node_exists(dv);
 432         }
 433         rw_exit(&dv->sdev_contents);
 434         return (error);
 435 }
 436 
 437 /*
 438  * Build the VROOT sdev_node.
 439  */
 440 /*ARGSUSED*/
 441 struct sdev_node *
 442 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
 443     struct vnode *avp, struct cred *cred)
 444 {
 445         struct sdev_node *dv;
 446         struct vnode *vp;
 447         char devdir[] = "/dev";
 448 
 449         ASSERT(sdev_node_cache != NULL);
 450         ASSERT(avp);
 451         dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
 452         vp = SDEVTOV(dv);
 453         vn_reinit(vp);
 454         vp->v_flag |= VROOT;
 455         vp->v_vfsp = vfsp;
 456         vp->v_type = VDIR;
 457         vp->v_rdev = devdev;
 458         vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
 459         vn_exists(vp);
 460 
 461         if (vfsp->vfs_mntpt)
 462                 dv->sdev_name = i_ddi_strdup(
 463                     (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
 464         else
 465                 /* vfs_mountdev1 set mount point later */
 466                 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
 467         dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
 468         dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
 469         dv->sdev_ino = SDEV_ROOTINO;
 470         dv->sdev_nlink = 2;          /* name + . (no sdev_insert) */
 471         dv->sdev_dotdot = dv;                /* .. == self */
 472         dv->sdev_attrvp = avp;
 473         dv->sdev_attr = NULL;
 474         mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
 475         cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
 476         if (strcmp(dv->sdev_name, "/dev") == 0) {
 477                 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
 478                 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
 479                 dv->sdev_gdir_gen = 0;
 480         } else {
 481                 dv->sdev_flags = SDEV_BUILD;
 482                 dv->sdev_flags &= ~SDEV_PERSIST;
 483                 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
 484                 dv->sdev_ldir_gen = 0;
 485                 dv->sdev_devtree_gen = 0;
 486         }
 487 
 488         avl_create(&dv->sdev_entries,
 489             (int (*)(const void *, const void *))sdev_compare_nodes,
 490             sizeof (struct sdev_node),
 491             offsetof(struct sdev_node, sdev_avllink));
 492 
 493         rw_enter(&dv->sdev_contents, RW_WRITER);
 494         sdev_set_nodestate(dv, SDEV_READY);
 495         rw_exit(&dv->sdev_contents);
 496         sdev_nc_node_exists(dv);
 497         return (dv);
 498 }
 499 
 500 /* directory dependent vop table */
 501 struct sdev_vop_table {
 502         char *vt_name;                          /* subdirectory name */
 503         const fs_operation_def_t *vt_service;   /* vnodeops table */
 504         struct vnodeops *vt_vops;               /* constructed vop */
 505         struct vnodeops **vt_global_vops;       /* global container for vop */
 506         int (*vt_vtor)(struct sdev_node *);     /* validate sdev_node */
 507         int vt_flags;
 508 };
 509 
 510 /*
 511  * A nice improvement would be to provide a plug-in mechanism
 512  * for this table instead of a const table.
 513  */
 514 static struct sdev_vop_table vtab[] =
 515 {
 516         { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
 517         SDEV_DYNAMIC | SDEV_VTOR },
 518 
 519         { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate,
 520         SDEV_DYNAMIC | SDEV_VTOR },
 521 
 522         { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops,
 523         devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
 524 
 525         { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
 526 
 527         { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
 528         SDEV_DYNAMIC | SDEV_VTOR },
 529 
 530         { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops,
 531         devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
 532 
 533         /*
 534          * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the
 535          * lofi driver controls child nodes.
 536          *
 537          * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted
 538          * stale nodes (e.g. from devfsadm -R).
 539          *
 540          * In addition, devfsadm knows not to attempt a rmdir: a zone
 541          * may hold a reference, which would zombify the node,
 542          * preventing a mkdir.
 543          */
 544 
 545         { "lofi", NULL, NULL, NULL, NULL,
 546             SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
 547         { "rlofi", NULL, NULL, NULL, NULL,
 548             SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
 549 
 550         { NULL, NULL, NULL, NULL, NULL, 0}
 551 };
 552 
 553 /*
 554  * We need to match off of the sdev_path, not the sdev_name. We are only allowed
 555  * to exist directly under /dev.
 556  */
 557 struct sdev_vop_table *
 558 sdev_match(struct sdev_node *dv)
 559 {
 560         int vlen;
 561         int i;
 562         const char *path;
 563 
 564         if (strlen(dv->sdev_path) <= 5)
 565                 return (NULL);
 566 
 567         if (strncmp(dv->sdev_path, "/dev/", 5) != 0)
 568                 return (NULL);
 569         path = dv->sdev_path + 5;
 570 
 571         for (i = 0; vtab[i].vt_name; i++) {
 572                 if (strcmp(vtab[i].vt_name, path) == 0)
 573                         return (&vtab[i]);
 574                 if (vtab[i].vt_flags & SDEV_SUBDIR) {
 575                         vlen = strlen(vtab[i].vt_name);
 576                         if ((strncmp(vtab[i].vt_name, path,
 577                             vlen - 1) == 0) && path[vlen] == '/')
 578                                 return (&vtab[i]);
 579                 }
 580 
 581         }
 582         return (NULL);
 583 }
 584 
 585 /*
 586  *  sets a directory's vnodeops if the directory is in the vtab;
 587  */
 588 static struct vnodeops *
 589 sdev_get_vop(struct sdev_node *dv)
 590 {
 591         struct sdev_vop_table *vtp;
 592         char *path;
 593 
 594         path = dv->sdev_path;
 595         ASSERT(path);
 596 
 597         /* gets the relative path to /dev/ */
 598         path += 5;
 599 
 600         /* gets the vtab entry it matches */
 601         if ((vtp = sdev_match(dv)) != NULL) {
 602                 dv->sdev_flags |= vtp->vt_flags;
 603                 if (SDEV_IS_PERSIST(dv->sdev_dotdot) &&
 604                     (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv)))
 605                         dv->sdev_flags |= SDEV_PERSIST;
 606 
 607                 if (vtp->vt_vops) {
 608                         if (vtp->vt_global_vops)
 609                                 *(vtp->vt_global_vops) = vtp->vt_vops;
 610 
 611                         return (vtp->vt_vops);
 612                 }
 613 
 614                 if (vtp->vt_service) {
 615                         fs_operation_def_t *templ;
 616                         templ = sdev_merge_vtab(vtp->vt_service);
 617                         if (vn_make_ops(vtp->vt_name,
 618                             (const fs_operation_def_t *)templ,
 619                             &vtp->vt_vops) != 0) {
 620                                 cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
 621                                     vtp->vt_name);
 622                                 /*NOTREACHED*/
 623                         }
 624                         if (vtp->vt_global_vops) {
 625                                 *(vtp->vt_global_vops) = vtp->vt_vops;
 626                         }
 627                         sdev_free_vtab(templ);
 628 
 629                         return (vtp->vt_vops);
 630                 }
 631 
 632                 return (sdev_vnodeops);
 633         }
 634 
 635         /* child inherits the persistence of the parent */
 636         if (SDEV_IS_PERSIST(dv->sdev_dotdot))
 637                 dv->sdev_flags |= SDEV_PERSIST;
 638 
 639         return (sdev_vnodeops);
 640 }
 641 
 642 static void
 643 sdev_set_no_negcache(struct sdev_node *dv)
 644 {
 645         int i;
 646         char *path;
 647 
 648         ASSERT(dv->sdev_path);
 649         path = dv->sdev_path + strlen("/dev/");
 650 
 651         for (i = 0; vtab[i].vt_name; i++) {
 652                 if (strcmp(vtab[i].vt_name, path) == 0) {
 653                         if (vtab[i].vt_flags & SDEV_NO_NCACHE)
 654                                 dv->sdev_flags |= SDEV_NO_NCACHE;
 655                         break;
 656                 }
 657         }
 658 }
 659 
 660 void *
 661 sdev_get_vtor(struct sdev_node *dv)
 662 {
 663         struct sdev_vop_table *vtp;
 664 
 665         vtp = sdev_match(dv);
 666         if (vtp)
 667                 return ((void *)vtp->vt_vtor);
 668         else
 669                 return (NULL);
 670 }
 671 
 672 /*
 673  * Build the base root inode
 674  */
 675 ino_t
 676 sdev_mkino(struct sdev_node *dv)
 677 {
 678         ino_t   ino;
 679 
 680         /*
 681          * for now, follow the lead of tmpfs here
 682          * need to someday understand the requirements here
 683          */
 684         ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
 685         ino += SDEV_ROOTINO + 1;
 686 
 687         return (ino);
 688 }
 689 
 690 int
 691 sdev_getlink(struct vnode *linkvp, char **link)
 692 {
 693         int err;
 694         char *buf;
 695         struct uio uio = {0};
 696         struct iovec iov = {0};
 697 
 698         if (linkvp == NULL)
 699                 return (ENOENT);
 700         ASSERT(linkvp->v_type == VLNK);
 701 
 702         buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
 703         iov.iov_base = buf;
 704         iov.iov_len = MAXPATHLEN;
 705         uio.uio_iov = &iov;
 706         uio.uio_iovcnt = 1;
 707         uio.uio_resid = MAXPATHLEN;
 708         uio.uio_segflg = UIO_SYSSPACE;
 709         uio.uio_llimit = MAXOFFSET_T;
 710 
 711         err = VOP_READLINK(linkvp, &uio, kcred, NULL);
 712         if (err) {
 713                 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
 714                 kmem_free(buf, MAXPATHLEN);
 715                 return (ENOENT);
 716         }
 717 
 718         /* mission complete */
 719         *link = i_ddi_strdup(buf, KM_SLEEP);
 720         kmem_free(buf, MAXPATHLEN);
 721         return (0);
 722 }
 723 
 724 /*
 725  * A convenient wrapper to get the devfs node vnode for a device
 726  * minor functionality: readlink() of a /dev symlink
 727  * Place the link into dv->sdev_symlink
 728  */
 729 static int
 730 sdev_follow_link(struct sdev_node *dv)
 731 {
 732         int err;
 733         struct vnode *linkvp;
 734         char *link = NULL;
 735 
 736         linkvp = SDEVTOV(dv);
 737         if (linkvp == NULL)
 738                 return (ENOENT);
 739         ASSERT(linkvp->v_type == VLNK);
 740         err = sdev_getlink(linkvp, &link);
 741         if (err) {
 742                 dv->sdev_symlink = NULL;
 743                 return (ENOENT);
 744         }
 745 
 746         ASSERT(link != NULL);
 747         dv->sdev_symlink = link;
 748         return (0);
 749 }
 750 
 751 static int
 752 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
 753 {
 754         vtype_t otype = SDEVTOV(dv)->v_type;
 755 
 756         /*
 757          * existing sdev_node has a different type.
 758          */
 759         if (otype != nvap->va_type) {
 760                 sdcmn_err9(("sdev_node_check: existing node "
 761                     "  %s type %d does not match new node type %d\n",
 762                     dv->sdev_name, otype, nvap->va_type));
 763                 return (EEXIST);
 764         }
 765 
 766         /*
 767          * For a symlink, the target should be the same.
 768          */
 769         if (otype == VLNK) {
 770                 ASSERT(nargs != NULL);
 771                 ASSERT(dv->sdev_symlink != NULL);
 772                 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
 773                         sdcmn_err9(("sdev_node_check: existing node "
 774                             " %s has different symlink %s as new node "
 775                             " %s\n", dv->sdev_name, dv->sdev_symlink,
 776                             (char *)nargs));
 777                         return (EEXIST);
 778                 }
 779         }
 780 
 781         return (0);
 782 }
 783 
 784 /*
 785  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
 786  *
 787  * arguments:
 788  *      - ddv (parent)
 789  *      - nm (child name)
 790  *      - newdv (sdev_node for nm is returned here)
 791  *      - vap (vattr for the node to be created, va_type should be set.
 792  *      - avp (attribute vnode)
 793  *        the defaults should be used if unknown)
 794  *      - cred
 795  *      - args
 796  *          . tnm (for VLNK)
 797  *          . global sdev_node (for !SDEV_GLOBAL)
 798  *      - state: SDEV_INIT, SDEV_READY
 799  *
 800  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
 801  *
 802  * NOTE:  directory contents writers lock needs to be held before
 803  *        calling this routine.
 804  */
 805 int
 806 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
 807     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
 808     sdev_node_state_t state)
 809 {
 810         int error = 0;
 811         sdev_node_state_t node_state;
 812         struct sdev_node *dv = NULL;
 813 
 814         ASSERT(state != SDEV_ZOMBIE);
 815         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
 816 
 817         if (*newdv) {
 818                 dv = *newdv;
 819         } else {
 820                 /* allocate and initialize a sdev_node */
 821                 if (ddv->sdev_state == SDEV_ZOMBIE) {
 822                         sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
 823                             ddv->sdev_path));
 824                         return (ENOENT);
 825                 }
 826 
 827                 error = sdev_nodeinit(ddv, nm, &dv, vap);
 828                 if (error != 0) {
 829                         sdcmn_err9(("sdev_mknode: error %d,"
 830                             " name %s can not be initialized\n",
 831                             error, nm));
 832                         return (error);
 833                 }
 834                 ASSERT(dv);
 835 
 836                 /* insert into the directory cache */
 837                 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
 838         }
 839 
 840         ASSERT(dv);
 841         node_state = dv->sdev_state;
 842         ASSERT(node_state != SDEV_ZOMBIE);
 843 
 844         if (state == SDEV_READY) {
 845                 switch (node_state) {
 846                 case SDEV_INIT:
 847                         error = sdev_nodeready(dv, vap, avp, args, cred);
 848                         if (error) {
 849                                 sdcmn_err9(("sdev_mknode: node %s can NOT"
 850                                     " be transitioned into READY state, "
 851                                     "error %d\n", nm, error));
 852                         }
 853                         break;
 854                 case SDEV_READY:
 855                         /*
 856                          * Do some sanity checking to make sure
 857                          * the existing sdev_node is what has been
 858                          * asked for.
 859                          */
 860                         error = sdev_node_check(dv, vap, args);
 861                         break;
 862                 default:
 863                         break;
 864                 }
 865         }
 866 
 867         if (!error) {
 868                 *newdv = dv;
 869                 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
 870         } else {
 871                 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
 872                 /*
 873                  * We created this node, it wasn't passed into us. Therefore it
 874                  * is up to us to delete it.
 875                  */
 876                 if (*newdv == NULL)
 877                         SDEV_SIMPLE_RELE(dv);
 878                 *newdv = NULL;
 879         }
 880 
 881         return (error);
 882 }
 883 
 884 /*
 885  * convenient wrapper to change vp's ATIME, CTIME and MTIME
 886  */
 887 void
 888 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
 889 {
 890         struct vattr attr;
 891         timestruc_t now;
 892         int err;
 893 
 894         ASSERT(vp);
 895         gethrestime(&now);
 896         if (mask & AT_CTIME)
 897                 attr.va_ctime = now;
 898         if (mask & AT_MTIME)
 899                 attr.va_mtime = now;
 900         if (mask & AT_ATIME)
 901                 attr.va_atime = now;
 902 
 903         attr.va_mask = (mask & AT_TIMES);
 904         err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
 905         if (err && (err != EROFS)) {
 906                 sdcmn_err(("update timestamps error %d\n", err));
 907         }
 908 }
 909 
 910 /*
 911  * the backing store vnode is released here
 912  */
 913 /*ARGSUSED1*/
 914 void
 915 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
 916 {
 917         /* no references */
 918         ASSERT(dv->sdev_nlink == 0);
 919 
 920         if (dv->sdev_attrvp != NULLVP) {
 921                 VN_RELE(dv->sdev_attrvp);
 922                 /*
 923                  * reset the attrvp so that no more
 924                  * references can be made on this already
 925                  * vn_rele() vnode
 926                  */
 927                 dv->sdev_attrvp = NULLVP;
 928         }
 929 
 930         if (dv->sdev_attr != NULL) {
 931                 kmem_free(dv->sdev_attr, sizeof (struct vattr));
 932                 dv->sdev_attr = NULL;
 933         }
 934 
 935         if (dv->sdev_name != NULL) {
 936                 kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
 937                 dv->sdev_name = NULL;
 938         }
 939 
 940         if (dv->sdev_symlink != NULL) {
 941                 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
 942                 dv->sdev_symlink = NULL;
 943         }
 944 
 945         if (dv->sdev_path) {
 946                 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
 947                 dv->sdev_path = NULL;
 948         }
 949 
 950         if (!SDEV_IS_GLOBAL(dv))
 951                 sdev_prof_free(dv);
 952 
 953         if (SDEVTOV(dv)->v_type == VDIR) {
 954                 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
 955                 avl_destroy(&dv->sdev_entries);
 956         }
 957 
 958         mutex_destroy(&dv->sdev_lookup_lock);
 959         cv_destroy(&dv->sdev_lookup_cv);
 960 
 961         /* return node to initial state as per constructor */
 962         (void) memset((void *)&dv->sdev_instance_data, 0,
 963             sizeof (dv->sdev_instance_data));
 964         vn_invalid(SDEVTOV(dv));
 965         kmem_cache_free(sdev_node_cache, dv);
 966 }
 967 
 968 /*
 969  * DIRECTORY CACHE lookup
 970  */
 971 struct sdev_node *
 972 sdev_findbyname(struct sdev_node *ddv, char *nm)
 973 {
 974         struct sdev_node *dv;
 975         struct sdev_node dvtmp;
 976         avl_index_t     where;
 977 
 978         ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
 979 
 980         dvtmp.sdev_name = nm;
 981         dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
 982         if (dv) {
 983                 ASSERT(dv->sdev_dotdot == ddv);
 984                 ASSERT(strcmp(dv->sdev_name, nm) == 0);
 985                 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
 986                 SDEV_HOLD(dv);
 987                 return (dv);
 988         }
 989         return (NULL);
 990 }
 991 
 992 /*
 993  * Inserts a new sdev_node in a parent directory
 994  */
 995 void
 996 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
 997 {
 998         avl_index_t where;
 999 
1000         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1001         ASSERT(SDEVTOV(ddv)->v_type == VDIR);
1002         ASSERT(ddv->sdev_nlink >= 2);
1003         ASSERT(dv->sdev_nlink == 0);
1004         ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1005 
1006         dv->sdev_dotdot = ddv;
1007         VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
1008         avl_insert(&ddv->sdev_entries, dv, where);
1009         ddv->sdev_nlink++;
1010 }
1011 
1012 /*
1013  * The following check is needed because while sdev_nodes are linked
1014  * in SDEV_INIT state, they have their link counts incremented only
1015  * in SDEV_READY state.
1016  */
1017 static void
1018 decr_link(struct sdev_node *dv)
1019 {
1020         VERIFY(RW_WRITE_HELD(&dv->sdev_contents));
1021         if (dv->sdev_state != SDEV_INIT) {
1022                 VERIFY(dv->sdev_nlink >= 1);
1023                 dv->sdev_nlink--;
1024         } else {
1025                 VERIFY(dv->sdev_nlink == 0);
1026         }
1027 }
1028 
1029 /*
1030  * Delete an existing dv from directory cache
1031  *
1032  * In the case of a node is still held by non-zero reference count, the node is
1033  * put into ZOMBIE state. The node is always unlinked from its parent, but it is
1034  * not destroyed via sdev_inactive until its reference count reaches "0".
1035  */
1036 static void
1037 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1038 {
1039         struct vnode *vp;
1040         sdev_node_state_t os;
1041 
1042         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1043 
1044         vp = SDEVTOV(dv);
1045         mutex_enter(&vp->v_lock);
1046         rw_enter(&dv->sdev_contents, RW_WRITER);
1047         os = dv->sdev_state;
1048         ASSERT(os != SDEV_ZOMBIE);
1049         dv->sdev_state = SDEV_ZOMBIE;
1050 
1051         /*
1052          * unlink ourselves from the parent directory now to take care of the ..
1053          * link. However, if we're a directory, we don't remove our reference to
1054          * ourself eg. '.' until we are torn down in the inactive callback.
1055          */
1056         decr_link(ddv);
1057         avl_remove(&ddv->sdev_entries, dv);
1058         /*
1059          * sdev_inactive expects nodes to have a link to themselves when we're
1060          * tearing them down. If we're transitioning from the initial state to
1061          * zombie and not via ready, then we're not going to have this link that
1062          * comes from the node being ready. As a result, we need to increment
1063          * our link count by one to account for this.
1064          */
1065         if (os == SDEV_INIT && dv->sdev_nlink == 0)
1066                 dv->sdev_nlink++;
1067         rw_exit(&dv->sdev_contents);
1068         mutex_exit(&vp->v_lock);
1069 }
1070 
1071 /*
1072  * check if the source is in the path of the target
1073  *
1074  * source and target are different
1075  */
1076 /*ARGSUSED2*/
1077 static int
1078 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1079 {
1080         int error = 0;
1081         struct sdev_node *dotdot, *dir;
1082 
1083         dotdot = tdv->sdev_dotdot;
1084         ASSERT(dotdot);
1085 
1086         /* fs root */
1087         if (dotdot == tdv) {
1088                 return (0);
1089         }
1090 
1091         for (;;) {
1092                 /*
1093                  * avoid error cases like
1094                  *      mv a a/b
1095                  *      mv a a/b/c
1096                  *      etc.
1097                  */
1098                 if (dotdot == sdv) {
1099                         error = EINVAL;
1100                         break;
1101                 }
1102 
1103                 dir = dotdot;
1104                 dotdot = dir->sdev_dotdot;
1105 
1106                 /* done checking because root is reached */
1107                 if (dir == dotdot) {
1108                         break;
1109                 }
1110         }
1111         return (error);
1112 }
1113 
1114 int
1115 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1116     struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1117     struct cred *cred)
1118 {
1119         int error = 0;
1120         struct vnode *ovp = SDEVTOV(odv);
1121         struct vnode *nvp;
1122         struct vattr vattr;
1123         int doingdir = (ovp->v_type == VDIR);
1124         char *link = NULL;
1125         int samedir = (oddv == nddv) ? 1 : 0;
1126         int bkstore = 0;
1127         struct sdev_node *idv = NULL;
1128         struct sdev_node *ndv = NULL;
1129         timestruc_t now;
1130 
1131         vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1132         error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1133         if (error)
1134                 return (error);
1135 
1136         if (!samedir)
1137                 rw_enter(&oddv->sdev_contents, RW_WRITER);
1138         rw_enter(&nddv->sdev_contents, RW_WRITER);
1139 
1140         /*
1141          * the source may have been deleted by another thread before
1142          * we gets here.
1143          */
1144         if (odv->sdev_state != SDEV_READY) {
1145                 error = ENOENT;
1146                 goto err_out;
1147         }
1148 
1149         if (doingdir && (odv == nddv)) {
1150                 error = EINVAL;
1151                 goto err_out;
1152         }
1153 
1154         /*
1155          * If renaming a directory, and the parents are different (".." must be
1156          * changed) then the source dir must not be in the dir hierarchy above
1157          * the target since it would orphan everything below the source dir.
1158          */
1159         if (doingdir && (oddv != nddv)) {
1160                 error = sdev_checkpath(odv, nddv, cred);
1161                 if (error)
1162                         goto err_out;
1163         }
1164 
1165         /* fix the source for a symlink */
1166         if (vattr.va_type == VLNK) {
1167                 if (odv->sdev_symlink == NULL) {
1168                         error = sdev_follow_link(odv);
1169                         if (error) {
1170                                 /*
1171                                  * The underlying symlink doesn't exist. This
1172                                  * node probably shouldn't even exist. While
1173                                  * it's a bit jarring to consumers, we're going
1174                                  * to remove the node from /dev.
1175                                  */
1176                                 if (SDEV_IS_PERSIST((*ndvp)))
1177                                         bkstore = 1;
1178                                 sdev_dirdelete(oddv, odv);
1179                                 if (bkstore) {
1180                                         ASSERT(nddv->sdev_attrvp);
1181                                         error = VOP_REMOVE(nddv->sdev_attrvp,
1182                                             nnm, cred, NULL, 0);
1183                                         if (error)
1184                                                 goto err_out;
1185                                 }
1186                                 error = ENOENT;
1187                                 goto err_out;
1188                         }
1189                 }
1190                 ASSERT(odv->sdev_symlink);
1191                 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1192         }
1193 
1194         /* destination existing */
1195         if (*ndvp) {
1196                 nvp = SDEVTOV(*ndvp);
1197                 ASSERT(nvp);
1198 
1199                 /* handling renaming to itself */
1200                 if (odv == *ndvp) {
1201                         error = 0;
1202                         goto err_out;
1203                 }
1204 
1205                 if (nvp->v_type == VDIR) {
1206                         if (!doingdir) {
1207                                 error = EISDIR;
1208                                 goto err_out;
1209                         }
1210 
1211                         if (vn_vfswlock(nvp)) {
1212                                 error = EBUSY;
1213                                 goto err_out;
1214                         }
1215 
1216                         if (vn_mountedvfs(nvp) != NULL) {
1217                                 vn_vfsunlock(nvp);
1218                                 error = EBUSY;
1219                                 goto err_out;
1220                         }
1221 
1222                         /* in case dir1 exists in dir2 and "mv dir1 dir2" */
1223                         if ((*ndvp)->sdev_nlink > 2) {
1224                                 vn_vfsunlock(nvp);
1225                                 error = EEXIST;
1226                                 goto err_out;
1227                         }
1228                         vn_vfsunlock(nvp);
1229 
1230                         /*
1231                          * We did not place the hold on *ndvp, so even though
1232                          * we're deleting the node, we should not get rid of our
1233                          * reference.
1234                          */
1235                         sdev_dirdelete(nddv, *ndvp);
1236                         *ndvp = NULL;
1237                         ASSERT(nddv->sdev_attrvp);
1238                         error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1239                             nddv->sdev_attrvp, cred, NULL, 0);
1240                         if (error)
1241                                 goto err_out;
1242                 } else {
1243                         if (doingdir) {
1244                                 error = ENOTDIR;
1245                                 goto err_out;
1246                         }
1247 
1248                         if (SDEV_IS_PERSIST((*ndvp))) {
1249                                 bkstore = 1;
1250                         }
1251 
1252                         /*
1253                          * Get rid of the node from the directory cache note.
1254                          * Don't forget that it's not up to us to remove the vn
1255                          * ref on the sdev node, as we did not place it.
1256                          */
1257                         sdev_dirdelete(nddv, *ndvp);
1258                         *ndvp = NULL;
1259                         if (bkstore) {
1260                                 ASSERT(nddv->sdev_attrvp);
1261                                 error = VOP_REMOVE(nddv->sdev_attrvp,
1262                                     nnm, cred, NULL, 0);
1263                                 if (error)
1264                                         goto err_out;
1265                         }
1266                 }
1267         }
1268 
1269         /*
1270          * make a fresh node from the source attrs
1271          */
1272         ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1273         error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1274             NULL, (void *)link, cred, SDEV_READY);
1275 
1276         if (link != NULL) {
1277                 kmem_free(link, strlen(link) + 1);
1278                 link = NULL;
1279         }
1280 
1281         if (error)
1282                 goto err_out;
1283         ASSERT(*ndvp);
1284         ASSERT((*ndvp)->sdev_state == SDEV_READY);
1285 
1286         /* move dir contents */
1287         if (doingdir) {
1288                 for (idv = SDEV_FIRST_ENTRY(odv); idv;
1289                     idv = SDEV_NEXT_ENTRY(odv, idv)) {
1290                         SDEV_HOLD(idv);
1291                         error = sdev_rnmnode(odv, idv,
1292                             (struct sdev_node *)(*ndvp), &ndv,
1293                             idv->sdev_name, cred);
1294                         SDEV_RELE(idv);
1295                         if (error)
1296                                 goto err_out;
1297                         ndv = NULL;
1298                 }
1299         }
1300 
1301         if ((*ndvp)->sdev_attrvp) {
1302                 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1303                     AT_CTIME|AT_ATIME);
1304         } else {
1305                 ASSERT((*ndvp)->sdev_attr);
1306                 gethrestime(&now);
1307                 (*ndvp)->sdev_attr->va_ctime = now;
1308                 (*ndvp)->sdev_attr->va_atime = now;
1309         }
1310 
1311         if (nddv->sdev_attrvp) {
1312                 sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1313                     AT_MTIME|AT_ATIME);
1314         } else {
1315                 ASSERT(nddv->sdev_attr);
1316                 gethrestime(&now);
1317                 nddv->sdev_attr->va_mtime = now;
1318                 nddv->sdev_attr->va_atime = now;
1319         }
1320         rw_exit(&nddv->sdev_contents);
1321         if (!samedir)
1322                 rw_exit(&oddv->sdev_contents);
1323 
1324         SDEV_RELE(*ndvp);
1325         return (error);
1326 
1327 err_out:
1328         if (link != NULL) {
1329                 kmem_free(link, strlen(link) + 1);
1330                 link = NULL;
1331         }
1332 
1333         rw_exit(&nddv->sdev_contents);
1334         if (!samedir)
1335                 rw_exit(&oddv->sdev_contents);
1336         return (error);
1337 }
1338 
1339 /*
1340  * Merge sdev_node specific information into an attribute structure.
1341  *
1342  * note: sdev_node is not locked here
1343  */
1344 void
1345 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1346 {
1347         struct vnode *vp = SDEVTOV(dv);
1348 
1349         vap->va_nlink = dv->sdev_nlink;
1350         vap->va_nodeid = dv->sdev_ino;
1351         vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1352         vap->va_type = vp->v_type;
1353 
1354         if (vp->v_type == VDIR) {
1355                 vap->va_rdev = 0;
1356                 vap->va_fsid = vp->v_rdev;
1357         } else if (vp->v_type == VLNK) {
1358                 vap->va_rdev = 0;
1359                 vap->va_mode  &= ~S_IFMT;
1360                 vap->va_mode |= S_IFLNK;
1361         } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1362                 vap->va_rdev = vp->v_rdev;
1363                 vap->va_mode &= ~S_IFMT;
1364                 if (vap->va_type == VCHR)
1365                         vap->va_mode |= S_IFCHR;
1366                 else
1367                         vap->va_mode |= S_IFBLK;
1368         } else {
1369                 vap->va_rdev = 0;
1370         }
1371 }
1372 
1373 struct vattr *
1374 sdev_getdefault_attr(enum vtype type)
1375 {
1376         if (type == VDIR)
1377                 return (&sdev_vattr_dir);
1378         else if (type == VCHR)
1379                 return (&sdev_vattr_chr);
1380         else if (type == VBLK)
1381                 return (&sdev_vattr_blk);
1382         else if (type == VLNK)
1383                 return (&sdev_vattr_lnk);
1384         else
1385                 return (NULL);
1386 }
1387 int
1388 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1389 {
1390         int rv = 0;
1391         struct vnode *vp = SDEVTOV(dv);
1392 
1393         switch (vp->v_type) {
1394         case VCHR:
1395         case VBLK:
1396                 /*
1397                  * If vnode is a device, return special vnode instead
1398                  * (though it knows all about -us- via sp->s_realvp)
1399                  */
1400                 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1401                 VN_RELE(vp);
1402                 if (*vpp == NULLVP)
1403                         rv = ENOSYS;
1404                 break;
1405         default:        /* most types are returned as is */
1406                 *vpp = vp;
1407                 break;
1408         }
1409         return (rv);
1410 }
1411 
1412 /*
1413  * junction between devname and root file system, e.g. ufs
1414  */
1415 int
1416 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1417 {
1418         struct vnode *rdvp = ddv->sdev_attrvp;
1419         int rval = 0;
1420 
1421         ASSERT(rdvp);
1422 
1423         rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1424             NULL);
1425         return (rval);
1426 }
1427 
1428 static int
1429 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1430 {
1431         struct sdev_node *dv = NULL;
1432         char    *nm;
1433         struct vnode *dirvp;
1434         int     error;
1435         vnode_t *vp;
1436         int eof;
1437         struct iovec iov;
1438         struct uio uio;
1439         struct dirent64 *dp;
1440         dirent64_t *dbuf;
1441         size_t dbuflen;
1442         struct vattr vattr;
1443         char *link = NULL;
1444 
1445         if (ddv->sdev_attrvp == NULL)
1446                 return (0);
1447         if (!(ddv->sdev_flags & SDEV_BUILD))
1448                 return (0);
1449 
1450         dirvp = ddv->sdev_attrvp;
1451         VN_HOLD(dirvp);
1452         dbuf = kmem_zalloc(dlen, KM_SLEEP);
1453 
1454         uio.uio_iov = &iov;
1455         uio.uio_iovcnt = 1;
1456         uio.uio_segflg = UIO_SYSSPACE;
1457         uio.uio_fmode = 0;
1458         uio.uio_extflg = UIO_COPY_CACHED;
1459         uio.uio_loffset = 0;
1460         uio.uio_llimit = MAXOFFSET_T;
1461 
1462         eof = 0;
1463         error = 0;
1464         while (!error && !eof) {
1465                 uio.uio_resid = dlen;
1466                 iov.iov_base = (char *)dbuf;
1467                 iov.iov_len = dlen;
1468                 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1469                 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1470                 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1471 
1472                 dbuflen = dlen - uio.uio_resid;
1473                 if (error || dbuflen == 0)
1474                         break;
1475 
1476                 if (!(ddv->sdev_flags & SDEV_BUILD))
1477                         break;
1478 
1479                 for (dp = dbuf; ((intptr_t)dp <
1480                     (intptr_t)dbuf + dbuflen);
1481                     dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1482                         nm = dp->d_name;
1483 
1484                         if (strcmp(nm, ".") == 0 ||
1485                             strcmp(nm, "..") == 0)
1486                                 continue;
1487 
1488                         vp = NULLVP;
1489                         dv = sdev_cache_lookup(ddv, nm);
1490                         if (dv) {
1491                                 VERIFY(dv->sdev_state != SDEV_ZOMBIE);
1492                                 SDEV_SIMPLE_RELE(dv);
1493                                 continue;
1494                         }
1495 
1496                         /* refill the cache if not already */
1497                         error = devname_backstore_lookup(ddv, nm, &vp);
1498                         if (error)
1499                                 continue;
1500 
1501                         vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1502                         error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1503                         if (error)
1504                                 continue;
1505 
1506                         if (vattr.va_type == VLNK) {
1507                                 error = sdev_getlink(vp, &link);
1508                                 if (error) {
1509                                         continue;
1510                                 }
1511                                 ASSERT(link != NULL);
1512                         }
1513 
1514                         if (!rw_tryupgrade(&ddv->sdev_contents)) {
1515                                 rw_exit(&ddv->sdev_contents);
1516                                 rw_enter(&ddv->sdev_contents, RW_WRITER);
1517                         }
1518                         error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1519                             cred, SDEV_READY);
1520                         rw_downgrade(&ddv->sdev_contents);
1521 
1522                         if (link != NULL) {
1523                                 kmem_free(link, strlen(link) + 1);
1524                                 link = NULL;
1525                         }
1526 
1527                         if (!error) {
1528                                 ASSERT(dv);
1529                                 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1530                                 SDEV_SIMPLE_RELE(dv);
1531                         }
1532                         vp = NULL;
1533                         dv = NULL;
1534                 }
1535         }
1536 
1537 done:
1538         VN_RELE(dirvp);
1539         kmem_free(dbuf, dlen);
1540 
1541         return (error);
1542 }
1543 
1544 void
1545 sdev_filldir_dynamic(struct sdev_node *ddv)
1546 {
1547         int error;
1548         int i;
1549         struct vattr vattr;
1550         struct vattr *vap = &vattr;
1551         char *nm = NULL;
1552         struct sdev_node *dv = NULL;
1553 
1554         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1555         ASSERT((ddv->sdev_flags & SDEV_BUILD));
1556 
1557         *vap = *sdev_getdefault_attr(VDIR);     /* note structure copy here */
1558         gethrestime(&vap->va_atime);
1559         vap->va_mtime = vap->va_atime;
1560         vap->va_ctime = vap->va_atime;
1561         for (i = 0; vtab[i].vt_name != NULL; i++) {
1562                 /*
1563                  * This early, we may be in a read-only /dev environment: leave
1564                  * the creation of any nodes we'd attempt to persist to
1565                  * devfsadm. Because /dev itself is normally persistent, any
1566                  * node which is not marked dynamic will end up being marked
1567                  * persistent. However, some nodes are both dynamic and
1568                  * persistent, mostly lofi and rlofi, so we need to be careful
1569                  * in our check.
1570                  */
1571                 if ((vtab[i].vt_flags & SDEV_PERSIST) ||
1572                     !(vtab[i].vt_flags & SDEV_DYNAMIC))
1573                         continue;
1574                 nm = vtab[i].vt_name;
1575                 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1576                 dv = NULL;
1577                 error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1578                     NULL, kcred, SDEV_READY);
1579                 if (error) {
1580                         cmn_err(CE_WARN, "%s/%s: error %d\n",
1581                             ddv->sdev_name, nm, error);
1582                 } else {
1583                         ASSERT(dv);
1584                         ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1585                         SDEV_SIMPLE_RELE(dv);
1586                 }
1587         }
1588 }
1589 
1590 /*
1591  * Creating a backing store entry based on sdev_attr.
1592  * This is called either as part of node creation in a persistent directory
1593  * or from setattr/setsecattr to persist access attributes across reboot.
1594  */
1595 int
1596 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1597 {
1598         int error = 0;
1599         struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1600         struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1601         struct vattr *vap = dv->sdev_attr;
1602         char *nm = dv->sdev_name;
1603         struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1604 
1605         ASSERT(dv && dv->sdev_name && rdvp);
1606         ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1607 
1608 lookup:
1609         /* try to find it in the backing store */
1610         error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1611             NULL);
1612         if (error == 0) {
1613                 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1614                         VN_HOLD(rrvp);
1615                         VN_RELE(*rvp);
1616                         *rvp = rrvp;
1617                 }
1618 
1619                 kmem_free(dv->sdev_attr, sizeof (vattr_t));
1620                 dv->sdev_attr = NULL;
1621                 dv->sdev_attrvp = *rvp;
1622                 return (0);
1623         }
1624 
1625         /* let's try to persist the node */
1626         gethrestime(&vap->va_atime);
1627         vap->va_mtime = vap->va_atime;
1628         vap->va_ctime = vap->va_atime;
1629         vap->va_mask |= AT_TYPE|AT_MODE;
1630         switch (vap->va_type) {
1631         case VDIR:
1632                 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1633                 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1634                     (void *)(*rvp), error));
1635                 if (!error)
1636                         VN_RELE(*rvp);
1637                 break;
1638         case VCHR:
1639         case VBLK:
1640         case VREG:
1641         case VDOOR:
1642                 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1643                     rvp, cred, 0, NULL, NULL);
1644                 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1645                     (void *)(*rvp), error));
1646                 if (!error)
1647                         VN_RELE(*rvp);
1648                 break;
1649         case VLNK:
1650                 ASSERT(dv->sdev_symlink);
1651                 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1652                     NULL, 0);
1653                 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1654                     error));
1655                 break;
1656         default:
1657                 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1658                     "create\n", nm);
1659                 /*NOTREACHED*/
1660         }
1661 
1662         /* go back to lookup to factor out spec node and set attrvp */
1663         if (error == 0)
1664                 goto lookup;
1665 
1666         sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1667         return (error);
1668 }
1669 
1670 static void
1671 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1672 {
1673         struct sdev_node *dup = NULL;
1674 
1675         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1676         if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1677                 sdev_direnter(ddv, *dv);
1678         } else {
1679                 VERIFY(dup->sdev_state != SDEV_ZOMBIE);
1680                 SDEV_SIMPLE_RELE(*dv);
1681                 sdev_nodedestroy(*dv, 0);
1682                 *dv = dup;
1683         }
1684 }
1685 
1686 static void
1687 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1688 {
1689         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1690         sdev_dirdelete(ddv, *dv);
1691 }
1692 
1693 /*
1694  * update the in-core directory cache
1695  */
1696 void
1697 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1698     sdev_cache_ops_t ops)
1699 {
1700         ASSERT((SDEV_HELD(*dv)));
1701 
1702         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1703         switch (ops) {
1704         case SDEV_CACHE_ADD:
1705                 sdev_cache_add(ddv, dv, nm);
1706                 break;
1707         case SDEV_CACHE_DELETE:
1708                 sdev_cache_delete(ddv, dv);
1709                 break;
1710         default:
1711                 break;
1712         }
1713 }
1714 
1715 /*
1716  * retrieve the named entry from the directory cache
1717  */
1718 struct sdev_node *
1719 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1720 {
1721         struct sdev_node *dv = NULL;
1722 
1723         ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1724         dv = sdev_findbyname(ddv, nm);
1725 
1726         return (dv);
1727 }
1728 
1729 /*
1730  * Implicit reconfig for nodes constructed by a link generator
1731  * Start devfsadm if needed, or if devfsadm is in progress,
1732  * prepare to block on devfsadm either completing or
1733  * constructing the desired node.  As devfsadmd is global
1734  * in scope, constructing all necessary nodes, we only
1735  * need to initiate it once.
1736  */
1737 static int
1738 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1739 {
1740         int error = 0;
1741 
1742         if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1743                 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1744                     ddv->sdev_name, nm, devfsadm_state));
1745                 mutex_enter(&dv->sdev_lookup_lock);
1746                 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1747                 mutex_exit(&dv->sdev_lookup_lock);
1748                 error = 0;
1749         } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1750                 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1751                     ddv->sdev_name, nm, devfsadm_state));
1752 
1753                 sdev_devfsadmd_thread(ddv, dv, kcred);
1754                 mutex_enter(&dv->sdev_lookup_lock);
1755                 SDEV_BLOCK_OTHERS(dv,
1756                     (SDEV_LOOKUP | SDEV_LGWAITING));
1757                 mutex_exit(&dv->sdev_lookup_lock);
1758                 error = 0;
1759         } else {
1760                 error = -1;
1761         }
1762 
1763         return (error);
1764 }
1765 
1766 /*
1767  *  Support for specialized device naming construction mechanisms
1768  */
1769 static int
1770 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1771     int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1772     void *, char *), int flags, struct cred *cred)
1773 {
1774         int rv = 0;
1775         char *physpath = NULL;
1776         struct vattr vattr;
1777         struct vattr *vap = &vattr;
1778         struct sdev_node *dv = NULL;
1779 
1780         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1781         if (flags & SDEV_VLINK) {
1782                 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1783                 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1784                     NULL);
1785                 if (rv) {
1786                         kmem_free(physpath, MAXPATHLEN);
1787                         return (-1);
1788                 }
1789 
1790                 *vap = *sdev_getdefault_attr(VLNK);     /* structure copy */
1791                 vap->va_size = strlen(physpath);
1792                 gethrestime(&vap->va_atime);
1793                 vap->va_mtime = vap->va_atime;
1794                 vap->va_ctime = vap->va_atime;
1795 
1796                 rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1797                     (void *)physpath, cred, SDEV_READY);
1798                 kmem_free(physpath, MAXPATHLEN);
1799                 if (rv)
1800                         return (rv);
1801         } else if (flags & SDEV_VATTR) {
1802                 /*
1803                  * /dev/pts
1804                  *
1805                  * callback is responsible to set the basic attributes,
1806                  * e.g. va_type/va_uid/va_gid/
1807                  *    dev_t if VCHR or VBLK/
1808                  */
1809                 ASSERT(callback);
1810                 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
1811                 if (rv) {
1812                         sdcmn_err3(("devname_lookup_func: SDEV_NONE "
1813                             "callback failed \n"));
1814                         return (-1);
1815                 }
1816 
1817                 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
1818                     cred, SDEV_READY);
1819 
1820                 if (rv)
1821                         return (rv);
1822 
1823         } else {
1824                 impossible(("lookup: %s/%s by %s not supported (%d)\n",
1825                     SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
1826                     __LINE__));
1827                 rv = -1;
1828         }
1829 
1830         *dvp = dv;
1831         return (rv);
1832 }
1833 
1834 static int
1835 is_devfsadm_thread(char *exec_name)
1836 {
1837         /*
1838          * note: because devfsadmd -> /usr/sbin/devfsadm
1839          * it is safe to use "devfsadm" to capture the lookups
1840          * from devfsadm and its daemon version.
1841          */
1842         if (strcmp(exec_name, "devfsadm") == 0)
1843                 return (1);
1844         return (0);
1845 }
1846 
1847 /*
1848  * Lookup Order:
1849  *      sdev_node cache;
1850  *      backing store (SDEV_PERSIST);
1851  *      DBNR: a. dir_ops implemented in the loadable modules;
1852  *            b. vnode ops in vtab.
1853  */
1854 int
1855 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
1856     struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
1857     struct cred *, void *, char *), int flags)
1858 {
1859         int rv = 0, nmlen;
1860         struct vnode *rvp = NULL;
1861         struct sdev_node *dv = NULL;
1862         int     retried = 0;
1863         int     error = 0;
1864         struct vattr vattr;
1865         char *lookup_thread = curproc->p_user.u_comm;
1866         int failed_flags = 0;
1867         int (*vtor)(struct sdev_node *) = NULL;
1868         int state;
1869         int parent_state;
1870         char *link = NULL;
1871 
1872         if (SDEVTOV(ddv)->v_type != VDIR)
1873                 return (ENOTDIR);
1874 
1875         /*
1876          * Empty name or ., return node itself.
1877          */
1878         nmlen = strlen(nm);
1879         if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
1880                 *vpp = SDEVTOV(ddv);
1881                 VN_HOLD(*vpp);
1882                 return (0);
1883         }
1884 
1885         /*
1886          * .., return the parent directory
1887          */
1888         if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
1889                 *vpp = SDEVTOV(ddv->sdev_dotdot);
1890                 VN_HOLD(*vpp);
1891                 return (0);
1892         }
1893 
1894         rw_enter(&ddv->sdev_contents, RW_READER);
1895         if (ddv->sdev_flags & SDEV_VTOR) {
1896                 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
1897                 ASSERT(vtor);
1898         }
1899 
1900 tryagain:
1901         /*
1902          * (a) directory cache lookup:
1903          */
1904         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1905         parent_state = ddv->sdev_state;
1906         dv = sdev_cache_lookup(ddv, nm);
1907         if (dv) {
1908                 state = dv->sdev_state;
1909                 switch (state) {
1910                 case SDEV_INIT:
1911                         if (is_devfsadm_thread(lookup_thread))
1912                                 break;
1913 
1914                         /* ZOMBIED parent won't allow node creation */
1915                         if (parent_state == SDEV_ZOMBIE) {
1916                                 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1917                                     retried);
1918                                 goto nolock_notfound;
1919                         }
1920 
1921                         mutex_enter(&dv->sdev_lookup_lock);
1922                         /* compensate the threads started after devfsadm */
1923                         if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
1924                             !(SDEV_IS_LOOKUP(dv)))
1925                                 SDEV_BLOCK_OTHERS(dv,
1926                                     (SDEV_LOOKUP | SDEV_LGWAITING));
1927 
1928                         if (SDEV_IS_LOOKUP(dv)) {
1929                                 failed_flags |= SLF_REBUILT;
1930                                 rw_exit(&ddv->sdev_contents);
1931                                 error = sdev_wait4lookup(dv, SDEV_LOOKUP);
1932                                 mutex_exit(&dv->sdev_lookup_lock);
1933                                 rw_enter(&ddv->sdev_contents, RW_READER);
1934 
1935                                 if (error != 0) {
1936                                         SD_TRACE_FAILED_LOOKUP(ddv, nm,
1937                                             retried);
1938                                         goto nolock_notfound;
1939                                 }
1940 
1941                                 state = dv->sdev_state;
1942                                 if (state == SDEV_INIT) {
1943                                         SD_TRACE_FAILED_LOOKUP(ddv, nm,
1944                                             retried);
1945                                         goto nolock_notfound;
1946                                 } else if (state == SDEV_READY) {
1947                                         goto found;
1948                                 } else if (state == SDEV_ZOMBIE) {
1949                                         rw_exit(&ddv->sdev_contents);
1950                                         SD_TRACE_FAILED_LOOKUP(ddv, nm,
1951                                             retried);
1952                                         SDEV_RELE(dv);
1953                                         goto lookup_failed;
1954                                 }
1955                         } else {
1956                                 mutex_exit(&dv->sdev_lookup_lock);
1957                         }
1958                         break;
1959                 case SDEV_READY:
1960                         goto found;
1961                 case SDEV_ZOMBIE:
1962                         rw_exit(&ddv->sdev_contents);
1963                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1964                         SDEV_RELE(dv);
1965                         goto lookup_failed;
1966                 default:
1967                         rw_exit(&ddv->sdev_contents);
1968                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1969                         sdev_lookup_failed(ddv, nm, failed_flags);
1970                         *vpp = NULLVP;
1971                         return (ENOENT);
1972                 }
1973         }
1974         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1975 
1976         /*
1977          * ZOMBIED parent does not allow new node creation.
1978          * bail out early
1979          */
1980         if (parent_state == SDEV_ZOMBIE) {
1981                 rw_exit(&ddv->sdev_contents);
1982                 *vpp = NULLVP;
1983                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1984                 return (ENOENT);
1985         }
1986 
1987         /*
1988          * (b0): backing store lookup
1989          *      SDEV_PERSIST is default except:
1990          *              1) pts nodes
1991          *              2) non-chmod'ed local nodes
1992          *              3) zvol nodes
1993          */
1994         if (SDEV_IS_PERSIST(ddv)) {
1995                 error = devname_backstore_lookup(ddv, nm, &rvp);
1996 
1997                 if (!error) {
1998 
1999                         vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
2000                         error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
2001                         if (error) {
2002                                 rw_exit(&ddv->sdev_contents);
2003                                 if (dv)
2004                                         SDEV_RELE(dv);
2005                                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2006                                 sdev_lookup_failed(ddv, nm, failed_flags);
2007                                 *vpp = NULLVP;
2008                                 return (ENOENT);
2009                         }
2010 
2011                         if (vattr.va_type == VLNK) {
2012                                 error = sdev_getlink(rvp, &link);
2013                                 if (error) {
2014                                         rw_exit(&ddv->sdev_contents);
2015                                         if (dv)
2016                                                 SDEV_RELE(dv);
2017                                         SD_TRACE_FAILED_LOOKUP(ddv, nm,
2018                                             retried);
2019                                         sdev_lookup_failed(ddv, nm,
2020                                             failed_flags);
2021                                         *vpp = NULLVP;
2022                                         return (ENOENT);
2023                                 }
2024                                 ASSERT(link != NULL);
2025                         }
2026 
2027                         if (!rw_tryupgrade(&ddv->sdev_contents)) {
2028                                 rw_exit(&ddv->sdev_contents);
2029                                 rw_enter(&ddv->sdev_contents, RW_WRITER);
2030                         }
2031                         error = sdev_mknode(ddv, nm, &dv, &vattr,
2032                             rvp, link, cred, SDEV_READY);
2033                         rw_downgrade(&ddv->sdev_contents);
2034 
2035                         if (link != NULL) {
2036                                 kmem_free(link, strlen(link) + 1);
2037                                 link = NULL;
2038                         }
2039 
2040                         if (error) {
2041                                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2042                                 rw_exit(&ddv->sdev_contents);
2043                                 if (dv)
2044                                         SDEV_RELE(dv);
2045                                 goto lookup_failed;
2046                         } else {
2047                                 goto found;
2048                         }
2049                 } else if (retried) {
2050                         rw_exit(&ddv->sdev_contents);
2051                         sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2052                             ddv->sdev_name, nm));
2053                         if (dv)
2054                                 SDEV_RELE(dv);
2055                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2056                         sdev_lookup_failed(ddv, nm, failed_flags);
2057                         *vpp = NULLVP;
2058                         return (ENOENT);
2059                 }
2060         }
2061 
2062 lookup_create_node:
2063         /* first thread that is doing the lookup on this node */
2064         if (callback) {
2065                 ASSERT(dv == NULL);
2066                 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2067                         rw_exit(&ddv->sdev_contents);
2068                         rw_enter(&ddv->sdev_contents, RW_WRITER);
2069                 }
2070                 error = sdev_call_dircallback(ddv, &dv, nm, callback,
2071                     flags, cred);
2072                 rw_downgrade(&ddv->sdev_contents);
2073                 if (error == 0) {
2074                         goto found;
2075                 } else {
2076                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2077                         rw_exit(&ddv->sdev_contents);
2078                         goto lookup_failed;
2079                 }
2080         }
2081         if (!dv) {
2082                 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2083                         rw_exit(&ddv->sdev_contents);
2084                         rw_enter(&ddv->sdev_contents, RW_WRITER);
2085                 }
2086                 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2087                     cred, SDEV_INIT);
2088                 if (!dv) {
2089                         rw_exit(&ddv->sdev_contents);
2090                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2091                         sdev_lookup_failed(ddv, nm, failed_flags);
2092                         *vpp = NULLVP;
2093                         return (ENOENT);
2094                 }
2095                 rw_downgrade(&ddv->sdev_contents);
2096         }
2097 
2098         /*
2099          * (b1) invoking devfsadm once per life time for devfsadm nodes
2100          */
2101         ASSERT(SDEV_HELD(dv));
2102 
2103         if (SDEV_IS_NO_NCACHE(dv))
2104                 failed_flags |= SLF_NO_NCACHE;
2105         if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2106             SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2107             ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2108                 ASSERT(SDEV_HELD(dv));
2109                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2110                 goto nolock_notfound;
2111         }
2112 
2113         /*
2114          * filter out known non-existent devices recorded
2115          * during initial reconfiguration boot for which
2116          * reconfig should not be done and lookup may
2117          * be short-circuited now.
2118          */
2119         if (sdev_lookup_filter(ddv, nm)) {
2120                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2121                 goto nolock_notfound;
2122         }
2123 
2124         /* bypassing devfsadm internal nodes */
2125         if (is_devfsadm_thread(lookup_thread)) {
2126                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2127                 goto nolock_notfound;
2128         }
2129 
2130         if (sdev_reconfig_disable) {
2131                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2132                 goto nolock_notfound;
2133         }
2134 
2135         error = sdev_call_devfsadmd(ddv, dv, nm);
2136         if (error == 0) {
2137                 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2138                     ddv->sdev_name, nm, curproc->p_user.u_comm));
2139                 if (sdev_reconfig_verbose) {
2140                         cmn_err(CE_CONT,
2141                             "?lookup of %s/%s by %s: reconfig\n",
2142                             ddv->sdev_name, nm, curproc->p_user.u_comm);
2143                 }
2144                 retried = 1;
2145                 failed_flags |= SLF_REBUILT;
2146                 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2147                 SDEV_SIMPLE_RELE(dv);
2148                 goto tryagain;
2149         } else {
2150                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2151                 goto nolock_notfound;
2152         }
2153 
2154 found:
2155         ASSERT(dv->sdev_state == SDEV_READY);
2156         if (vtor) {
2157                 /*
2158                  * Check validity of returned node
2159                  */
2160                 switch (vtor(dv)) {
2161                 case SDEV_VTOR_VALID:
2162                         break;
2163                 case SDEV_VTOR_STALE:
2164                         /*
2165                          * The name exists, but the cache entry is
2166                          * stale and needs to be re-created.
2167                          */
2168                         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2169                         if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
2170                                 rw_exit(&ddv->sdev_contents);
2171                                 rw_enter(&ddv->sdev_contents, RW_WRITER);
2172                         }
2173                         sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
2174                         rw_downgrade(&ddv->sdev_contents);
2175                         SDEV_RELE(dv);
2176                         dv = NULL;
2177                         goto lookup_create_node;
2178                         /* FALLTHRU */
2179                 case SDEV_VTOR_INVALID:
2180                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2181                         sdcmn_err7(("lookup: destroy invalid "
2182                             "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2183                         goto nolock_notfound;
2184                 case SDEV_VTOR_SKIP:
2185                         sdcmn_err7(("lookup: node not applicable - "
2186                             "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2187                         rw_exit(&ddv->sdev_contents);
2188                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2189                         SDEV_RELE(dv);
2190                         goto lookup_failed;
2191                 default:
2192                         cmn_err(CE_PANIC,
2193                             "dev fs: validator failed: %s(%p)\n",
2194                             dv->sdev_name, (void *)dv);
2195                         break;
2196                 }
2197         }
2198 
2199         rw_exit(&ddv->sdev_contents);
2200         rv = sdev_to_vp(dv, vpp);
2201         sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2202             "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2203             dv->sdev_state, nm, rv));
2204         return (rv);
2205 
2206 nolock_notfound:
2207         /*
2208          * Destroy the node that is created for synchronization purposes.
2209          */
2210         sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2211             nm, dv->sdev_state));
2212         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2213         if (dv->sdev_state == SDEV_INIT) {
2214                 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2215                         rw_exit(&ddv->sdev_contents);
2216                         rw_enter(&ddv->sdev_contents, RW_WRITER);
2217                 }
2218 
2219                 /*
2220                  * Node state may have changed during the lock
2221                  * changes. Re-check.
2222                  */
2223                 if (dv->sdev_state == SDEV_INIT) {
2224                         sdev_dirdelete(ddv, dv);
2225                         rw_exit(&ddv->sdev_contents);
2226                         sdev_lookup_failed(ddv, nm, failed_flags);
2227                         SDEV_RELE(dv);
2228                         *vpp = NULL;
2229                         return (ENOENT);
2230                 }
2231         }
2232 
2233         rw_exit(&ddv->sdev_contents);
2234         SDEV_RELE(dv);
2235 
2236 lookup_failed:
2237         sdev_lookup_failed(ddv, nm, failed_flags);
2238         *vpp = NULL;
2239         return (ENOENT);
2240 }
2241 
2242 /*
2243  * Given a directory node, mark all nodes beneath as
2244  * STALE, i.e. nodes that don't exist as far as new
2245  * consumers are concerned.  Remove them from the
2246  * list of directory entries so that no lookup or
2247  * directory traversal will find them.  The node
2248  * not deallocated so existing holds are not affected.
2249  */
2250 void
2251 sdev_stale(struct sdev_node *ddv)
2252 {
2253         struct sdev_node *dv;
2254         struct vnode *vp;
2255 
2256         ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2257 
2258         rw_enter(&ddv->sdev_contents, RW_WRITER);
2259         while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) {
2260                 vp = SDEVTOV(dv);
2261                 SDEV_HOLD(dv);
2262                 if (vp->v_type == VDIR)
2263                         sdev_stale(dv);
2264 
2265                 sdev_dirdelete(ddv, dv);
2266                 SDEV_RELE(dv);
2267         }
2268         ddv->sdev_flags |= SDEV_BUILD;
2269         rw_exit(&ddv->sdev_contents);
2270 }
2271 
2272 /*
2273  * Given a directory node, clean out all the nodes beneath.
2274  * If expr is specified, clean node with names matching expr.
2275  * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2276  *      so they are excluded from future lookups.
2277  */
2278 int
2279 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2280 {
2281         int error = 0;
2282         int busy = 0;
2283         struct vnode *vp;
2284         struct sdev_node *dv;
2285         int bkstore = 0;
2286         int len = 0;
2287         char *bks_name = NULL;
2288 
2289         ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2290 
2291         /*
2292          * We try our best to destroy all unused sdev_node's
2293          */
2294         rw_enter(&ddv->sdev_contents, RW_WRITER);
2295         while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) {
2296                 vp = SDEVTOV(dv);
2297 
2298                 if (expr && gmatch(dv->sdev_name, expr) == 0)
2299                         continue;
2300 
2301                 if (vp->v_type == VDIR &&
2302                     sdev_cleandir(dv, NULL, flags) != 0) {
2303                         sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2304                             dv->sdev_name));
2305                         busy++;
2306                         continue;
2307                 }
2308 
2309                 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2310                         sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2311                             dv->sdev_name));
2312                         busy++;
2313                         continue;
2314                 }
2315 
2316                 /*
2317                  * at this point, either dv is not held or SDEV_ENFORCE
2318                  * is specified. In either case, dv needs to be deleted
2319                  */
2320                 SDEV_HOLD(dv);
2321 
2322                 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2323                 if (bkstore && (vp->v_type == VDIR))
2324                         bkstore += 1;
2325 
2326                 if (bkstore) {
2327                         len = strlen(dv->sdev_name) + 1;
2328                         bks_name = kmem_alloc(len, KM_SLEEP);
2329                         bcopy(dv->sdev_name, bks_name, len);
2330                 }
2331 
2332                 sdev_dirdelete(ddv, dv);
2333 
2334                 /* take care the backing store clean up */
2335                 if (bkstore) {
2336                         ASSERT(bks_name);
2337                         ASSERT(ddv->sdev_attrvp);
2338 
2339                         if (bkstore == 1) {
2340                                 error = VOP_REMOVE(ddv->sdev_attrvp,
2341                                     bks_name, kcred, NULL, 0);
2342                         } else if (bkstore == 2) {
2343                                 error = VOP_RMDIR(ddv->sdev_attrvp,
2344                                     bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2345                         }
2346 
2347                         /* do not propagate the backing store errors */
2348                         if (error) {
2349                                 sdcmn_err9(("sdev_cleandir: backing store"
2350                                     "not cleaned\n"));
2351                                 error = 0;
2352                         }
2353 
2354                         bkstore = 0;
2355                         kmem_free(bks_name, len);
2356                         bks_name = NULL;
2357                         len = 0;
2358                 }
2359 
2360                 ddv->sdev_flags |= SDEV_BUILD;
2361                 SDEV_RELE(dv);
2362         }
2363 
2364         ddv->sdev_flags |= SDEV_BUILD;
2365         rw_exit(&ddv->sdev_contents);
2366 
2367         if (busy) {
2368                 error = EBUSY;
2369         }
2370 
2371         return (error);
2372 }
2373 
2374 /*
2375  * a convenient wrapper for readdir() funcs
2376  */
2377 size_t
2378 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2379 {
2380         size_t reclen = DIRENT64_RECLEN(strlen(nm));
2381         if (reclen > size)
2382                 return (0);
2383 
2384         de->d_ino = (ino64_t)ino;
2385         de->d_off = (off64_t)off + 1;
2386         de->d_reclen = (ushort_t)reclen;
2387         (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2388         return (reclen);
2389 }
2390 
2391 /*
2392  * sdev_mount service routines
2393  */
2394 int
2395 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2396 {
2397         int     error;
2398 
2399         if (uap->datalen != sizeof (*args))
2400                 return (EINVAL);
2401 
2402         if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2403                 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2404                     "get user data. error %d\n", error);
2405                 return (EFAULT);
2406         }
2407 
2408         return (0);
2409 }
2410 
2411 #ifdef nextdp
2412 #undef nextdp
2413 #endif
2414 #define nextdp(dp)      ((struct dirent64 *) \
2415                             (intptr_t)((char *)(dp) + (dp)->d_reclen))
2416 
2417 /*
2418  * readdir helper func
2419  */
2420 int
2421 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2422     int flags)
2423 {
2424         struct sdev_node *ddv = VTOSDEV(vp);
2425         struct sdev_node *dv;
2426         dirent64_t      *dp;
2427         ulong_t         outcount = 0;
2428         size_t          namelen;
2429         ulong_t         alloc_count;
2430         void            *outbuf;
2431         struct iovec    *iovp;
2432         int             error = 0;
2433         size_t          reclen;
2434         offset_t        diroff;
2435         offset_t        soff;
2436         int             this_reclen;
2437         int (*vtor)(struct sdev_node *) = NULL;
2438         struct vattr attr;
2439         timestruc_t now;
2440 
2441         ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2442         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2443 
2444         if (uiop->uio_loffset >= MAXOFF_T) {
2445                 if (eofp)
2446                         *eofp = 1;
2447                 return (0);
2448         }
2449 
2450         if (uiop->uio_iovcnt != 1)
2451                 return (EINVAL);
2452 
2453         if (vp->v_type != VDIR)
2454                 return (ENOTDIR);
2455 
2456         if (ddv->sdev_flags & SDEV_VTOR) {
2457                 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2458                 ASSERT(vtor);
2459         }
2460 
2461         if (eofp != NULL)
2462                 *eofp = 0;
2463 
2464         soff = uiop->uio_loffset;
2465         iovp = uiop->uio_iov;
2466         alloc_count = iovp->iov_len;
2467         dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2468         outcount = 0;
2469 
2470         if (ddv->sdev_state == SDEV_ZOMBIE)
2471                 goto get_cache;
2472 
2473         if (SDEV_IS_GLOBAL(ddv)) {
2474 
2475                 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2476                     !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2477                     !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2478                     ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2479                     !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2480                     !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2481                     !sdev_reconfig_disable) {
2482                         /*
2483                          * invoking "devfsadm" to do system device reconfig
2484                          */
2485                         mutex_enter(&ddv->sdev_lookup_lock);
2486                         SDEV_BLOCK_OTHERS(ddv,
2487                             (SDEV_READDIR|SDEV_LGWAITING));
2488                         mutex_exit(&ddv->sdev_lookup_lock);
2489 
2490                         sdcmn_err8(("readdir of %s by %s: reconfig\n",
2491                             ddv->sdev_path, curproc->p_user.u_comm));
2492                         if (sdev_reconfig_verbose) {
2493                                 cmn_err(CE_CONT,
2494                                     "?readdir of %s by %s: reconfig\n",
2495                                     ddv->sdev_path, curproc->p_user.u_comm);
2496                         }
2497 
2498                         sdev_devfsadmd_thread(ddv, NULL, kcred);
2499                 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2500                         /*
2501                          * compensate the "ls" started later than "devfsadm"
2502                          */
2503                         mutex_enter(&ddv->sdev_lookup_lock);
2504                         SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2505                         mutex_exit(&ddv->sdev_lookup_lock);
2506                 }
2507 
2508                 /*
2509                  * release the contents lock so that
2510                  * the cache may be updated by devfsadmd
2511                  */
2512                 rw_exit(&ddv->sdev_contents);
2513                 mutex_enter(&ddv->sdev_lookup_lock);
2514                 if (SDEV_IS_READDIR(ddv))
2515                         (void) sdev_wait4lookup(ddv, SDEV_READDIR);
2516                 mutex_exit(&ddv->sdev_lookup_lock);
2517                 rw_enter(&ddv->sdev_contents, RW_READER);
2518 
2519                 sdcmn_err4(("readdir of directory %s by %s\n",
2520                     ddv->sdev_name, curproc->p_user.u_comm));
2521                 if (ddv->sdev_flags & SDEV_BUILD) {
2522                         if (SDEV_IS_PERSIST(ddv)) {
2523                                 error = sdev_filldir_from_store(ddv,
2524                                     alloc_count, cred);
2525                         }
2526                         ddv->sdev_flags &= ~SDEV_BUILD;
2527                 }
2528         }
2529 
2530 get_cache:
2531         /* handle "." and ".." */
2532         diroff = 0;
2533         if (soff == 0) {
2534                 /* first time */
2535                 this_reclen = DIRENT64_RECLEN(1);
2536                 if (alloc_count < this_reclen) {
2537                         error = EINVAL;
2538                         goto done;
2539                 }
2540 
2541                 dp->d_ino = (ino64_t)ddv->sdev_ino;
2542                 dp->d_off = (off64_t)1;
2543                 dp->d_reclen = (ushort_t)this_reclen;
2544 
2545                 (void) strncpy(dp->d_name, ".",
2546                     DIRENT64_NAMELEN(this_reclen));
2547                 outcount += dp->d_reclen;
2548                 dp = nextdp(dp);
2549         }
2550 
2551         diroff++;
2552         if (soff <= 1) {
2553                 this_reclen = DIRENT64_RECLEN(2);
2554                 if (alloc_count < outcount + this_reclen) {
2555                         error = EINVAL;
2556                         goto done;
2557                 }
2558 
2559                 dp->d_reclen = (ushort_t)this_reclen;
2560                 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2561                 dp->d_off = (off64_t)2;
2562 
2563                 (void) strncpy(dp->d_name, "..",
2564                     DIRENT64_NAMELEN(this_reclen));
2565                 outcount += dp->d_reclen;
2566 
2567                 dp = nextdp(dp);
2568         }
2569 
2570 
2571         /* gets the cache */
2572         diroff++;
2573         for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2574             dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2575                 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2576                     diroff, soff, dv->sdev_name));
2577 
2578                 /* bypassing pre-matured nodes */
2579                 if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2580                         sdcmn_err3(("sdev_readdir: pre-mature node  "
2581                             "%s %d\n", dv->sdev_name, dv->sdev_state));
2582                         continue;
2583                 }
2584 
2585                 /*
2586                  * Check validity of node
2587                  * Drop invalid and nodes to be skipped.
2588                  * A node the validator indicates as stale needs
2589                  * to be returned as presumably the node name itself
2590                  * is valid and the node data itself will be refreshed
2591                  * on lookup.  An application performing a readdir then
2592                  * stat on each entry should thus always see consistent
2593                  * data.  In any case, it is not possible to synchronize
2594                  * with dynamic kernel state, and any view we return can
2595                  * never be anything more than a snapshot at a point in time.
2596                  */
2597                 if (vtor) {
2598                         switch (vtor(dv)) {
2599                         case SDEV_VTOR_VALID:
2600                                 break;
2601                         case SDEV_VTOR_INVALID:
2602                         case SDEV_VTOR_SKIP:
2603                                 continue;
2604                         case SDEV_VTOR_STALE:
2605                                 sdcmn_err3(("sdev_readir: %s stale\n",
2606                                     dv->sdev_name));
2607                                 break;
2608                         default:
2609                                 cmn_err(CE_PANIC,
2610                                     "dev fs: validator failed: %s(%p)\n",
2611                                     dv->sdev_name, (void *)dv);
2612                                 break;
2613                         /*NOTREACHED*/
2614                         }
2615                 }
2616 
2617                 namelen = strlen(dv->sdev_name);
2618                 reclen = DIRENT64_RECLEN(namelen);
2619                 if (outcount + reclen > alloc_count) {
2620                         goto full;
2621                 }
2622                 dp->d_reclen = (ushort_t)reclen;
2623                 dp->d_ino = (ino64_t)dv->sdev_ino;
2624                 dp->d_off = (off64_t)diroff + 1;
2625                 (void) strncpy(dp->d_name, dv->sdev_name,
2626                     DIRENT64_NAMELEN(reclen));
2627                 outcount += reclen;
2628                 dp = nextdp(dp);
2629         }
2630 
2631 full:
2632         sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2633             "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2634             (void *)dv));
2635 
2636         if (outcount)
2637                 error = uiomove(outbuf, outcount, UIO_READ, uiop);
2638 
2639         if (!error) {
2640                 uiop->uio_loffset = diroff;
2641                 if (eofp)
2642                         *eofp = dv ? 0 : 1;
2643         }
2644 
2645 
2646         if (ddv->sdev_attrvp) {
2647                 gethrestime(&now);
2648                 attr.va_ctime = now;
2649                 attr.va_atime = now;
2650                 attr.va_mask = AT_CTIME|AT_ATIME;
2651 
2652                 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2653         }
2654 done:
2655         kmem_free(outbuf, alloc_count);
2656         return (error);
2657 }
2658 
2659 static int
2660 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2661 {
2662         vnode_t *vp;
2663         vnode_t *cvp;
2664         struct sdev_node *svp;
2665         char *nm;
2666         struct pathname pn;
2667         int error;
2668         int persisted = 0;
2669 
2670         ASSERT(INGLOBALZONE(curproc));
2671 
2672         if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2673                 return (error);
2674         nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2675 
2676         vp = rootdir;
2677         VN_HOLD(vp);
2678 
2679         while (pn_pathleft(&pn)) {
2680                 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
2681                 (void) pn_getcomponent(&pn, nm);
2682 
2683                 /*
2684                  * Deal with the .. special case where we may be
2685                  * traversing up across a mount point, to the
2686                  * root of this filesystem or global root.
2687                  */
2688                 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
2689 checkforroot:
2690                         if (VN_CMP(vp, rootdir)) {
2691                                 nm[1] = 0;
2692                         } else if (vp->v_flag & VROOT) {
2693                                 vfs_t *vfsp;
2694                                 cvp = vp;
2695                                 vfsp = cvp->v_vfsp;
2696                                 vfs_rlock_wait(vfsp);
2697                                 vp = cvp->v_vfsp->vfs_vnodecovered;
2698                                 if (vp == NULL ||
2699                                     (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
2700                                         vfs_unlock(vfsp);
2701                                         VN_RELE(cvp);
2702                                         error = EIO;
2703                                         break;
2704                                 }
2705                                 VN_HOLD(vp);
2706                                 vfs_unlock(vfsp);
2707                                 VN_RELE(cvp);
2708                                 cvp = NULL;
2709                                 goto checkforroot;
2710                         }
2711                 }
2712 
2713                 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2714                     NULL, NULL);
2715                 if (error) {
2716                         VN_RELE(vp);
2717                         break;
2718                 }
2719 
2720                 /* traverse mount points encountered on our journey */
2721                 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2722                         VN_RELE(vp);
2723                         VN_RELE(cvp);
2724                         break;
2725                 }
2726 
2727                 /*
2728                  * symbolic link, can be either relative and absolute
2729                  */
2730                 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) {
2731                         struct pathname linkpath;
2732                         pn_alloc(&linkpath);
2733                         if (error = pn_getsymlink(cvp, &linkpath, kcred)) {
2734                                 pn_free(&linkpath);
2735                                 break;
2736                         }
2737                         if (pn_pathleft(&linkpath) == 0)
2738                                 (void) pn_set(&linkpath, ".");
2739                         error = pn_insert(&pn, &linkpath, strlen(nm));
2740                         pn_free(&linkpath);
2741                         if (pn.pn_pathlen == 0) {
2742                                 VN_RELE(vp);
2743                                 return (ENOENT);
2744                         }
2745                         if (pn.pn_path[0] == '/') {
2746                                 pn_skipslash(&pn);
2747                                 VN_RELE(vp);
2748                                 VN_RELE(cvp);
2749                                 vp = rootdir;
2750                                 VN_HOLD(vp);
2751                         } else {
2752                                 VN_RELE(cvp);
2753                         }
2754                         continue;
2755                 }
2756 
2757                 VN_RELE(vp);
2758 
2759                 /*
2760                  * Direct the operation to the persisting filesystem
2761                  * underlying /dev.  Bail if we encounter a
2762                  * non-persistent dev entity here.
2763                  */
2764                 if (cvp->v_vfsp->vfs_fstype == devtype) {
2765 
2766                         if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2767                                 error = ENOENT;
2768                                 VN_RELE(cvp);
2769                                 break;
2770                         }
2771 
2772                         if (VTOSDEV(cvp) == NULL) {
2773                                 error = ENOENT;
2774                                 VN_RELE(cvp);
2775                                 break;
2776                         }
2777                         svp = VTOSDEV(cvp);
2778                         if ((vp = svp->sdev_attrvp) == NULL) {
2779                                 error = ENOENT;
2780                                 VN_RELE(cvp);
2781                                 break;
2782                         }
2783                         persisted = 1;
2784                         VN_HOLD(vp);
2785                         VN_RELE(cvp);
2786                         cvp = vp;
2787                 }
2788 
2789                 vp = cvp;
2790                 pn_skipslash(&pn);
2791         }
2792 
2793         kmem_free(nm, MAXNAMELEN);
2794         pn_free(&pn);
2795 
2796         if (error)
2797                 return (error);
2798 
2799         /*
2800          * Only return persisted nodes in the filesystem underlying /dev.
2801          */
2802         if (!persisted) {
2803                 VN_RELE(vp);
2804                 return (ENOENT);
2805         }
2806 
2807         *r_vp = vp;
2808         return (0);
2809 }
2810 
2811 int
2812 sdev_modctl_readdir(const char *dir, char ***dirlistp,
2813         int *npathsp, int *npathsp_alloc, int checking_empty)
2814 {
2815         char    **pathlist = NULL;
2816         char    **newlist = NULL;
2817         int     npaths = 0;
2818         int     npaths_alloc = 0;
2819         dirent64_t *dbuf = NULL;
2820         int     n;
2821         char    *s;
2822         int error;
2823         vnode_t *vp;
2824         int eof;
2825         struct iovec iov;
2826         struct uio uio;
2827         struct dirent64 *dp;
2828         size_t dlen;
2829         size_t dbuflen;
2830         int ndirents = 64;
2831         char *nm;
2832 
2833         error = sdev_modctl_lookup(dir, &vp);
2834         sdcmn_err11(("modctl readdir: %s by %s: %s\n",
2835             dir, curproc->p_user.u_comm,
2836             (error == 0) ? "ok" : "failed"));
2837         if (error)
2838                 return (error);
2839 
2840         dlen = ndirents * (sizeof (*dbuf));
2841         dbuf = kmem_alloc(dlen, KM_SLEEP);
2842 
2843         uio.uio_iov = &iov;
2844         uio.uio_iovcnt = 1;
2845         uio.uio_segflg = UIO_SYSSPACE;
2846         uio.uio_fmode = 0;
2847         uio.uio_extflg = UIO_COPY_CACHED;
2848         uio.uio_loffset = 0;
2849         uio.uio_llimit = MAXOFFSET_T;
2850 
2851         eof = 0;
2852         error = 0;
2853         while (!error && !eof) {
2854                 uio.uio_resid = dlen;
2855                 iov.iov_base = (char *)dbuf;
2856                 iov.iov_len = dlen;
2857 
2858                 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2859                 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
2860                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2861 
2862                 dbuflen = dlen - uio.uio_resid;
2863 
2864                 if (error || dbuflen == 0)
2865                         break;
2866 
2867                 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
2868                     dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
2869 
2870                         nm = dp->d_name;
2871 
2872                         if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
2873                                 continue;
2874                         if (npaths == npaths_alloc) {
2875                                 npaths_alloc += 64;
2876                                 newlist = (char **)
2877                                     kmem_zalloc((npaths_alloc + 1) *
2878                                     sizeof (char *), KM_SLEEP);
2879                                 if (pathlist) {
2880                                         bcopy(pathlist, newlist,
2881                                             npaths * sizeof (char *));
2882                                         kmem_free(pathlist,
2883                                             (npaths + 1) * sizeof (char *));
2884                                 }
2885                                 pathlist = newlist;
2886                         }
2887                         n = strlen(nm) + 1;
2888                         s = kmem_alloc(n, KM_SLEEP);
2889                         bcopy(nm, s, n);
2890                         pathlist[npaths++] = s;
2891                         sdcmn_err11(("  %s/%s\n", dir, s));
2892 
2893                         /* if checking empty, one entry is as good as many */
2894                         if (checking_empty) {
2895                                 eof = 1;
2896                                 break;
2897                         }
2898                 }
2899         }
2900 
2901 exit:
2902         VN_RELE(vp);
2903 
2904         if (dbuf)
2905                 kmem_free(dbuf, dlen);
2906 
2907         if (error)
2908                 return (error);
2909 
2910         *dirlistp = pathlist;
2911         *npathsp = npaths;
2912         *npathsp_alloc = npaths_alloc;
2913 
2914         return (0);
2915 }
2916 
2917 void
2918 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
2919 {
2920         int     i, n;
2921 
2922         for (i = 0; i < npaths; i++) {
2923                 n = strlen(pathlist[i]) + 1;
2924                 kmem_free(pathlist[i], n);
2925         }
2926 
2927         kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
2928 }
2929 
2930 int
2931 sdev_modctl_devexists(const char *path)
2932 {
2933         vnode_t *vp;
2934         int error;
2935 
2936         error = sdev_modctl_lookup(path, &vp);
2937         sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
2938             path, curproc->p_user.u_comm,
2939             (error == 0) ? "ok" : "failed"));
2940         if (error == 0)
2941                 VN_RELE(vp);
2942 
2943         return (error);
2944 }
2945 
2946 extern int sdev_vnodeops_tbl_size;
2947 
2948 /*
2949  * construct a new template with overrides from vtab
2950  */
2951 static fs_operation_def_t *
2952 sdev_merge_vtab(const fs_operation_def_t tab[])
2953 {
2954         fs_operation_def_t *new;
2955         const fs_operation_def_t *tab_entry;
2956 
2957         /* make a copy of standard vnode ops table */
2958         new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
2959         bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
2960 
2961         /* replace the overrides from tab */
2962         for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
2963                 fs_operation_def_t *std_entry = new;
2964                 while (std_entry->name) {
2965                         if (strcmp(tab_entry->name, std_entry->name) == 0) {
2966                                 std_entry->func = tab_entry->func;
2967                                 break;
2968                         }
2969                         std_entry++;
2970                 }
2971                 if (std_entry->name == NULL)
2972                         cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
2973                             tab_entry->name);
2974         }
2975 
2976         return (new);
2977 }
2978 
2979 /* free memory allocated by sdev_merge_vtab */
2980 static void
2981 sdev_free_vtab(fs_operation_def_t *new)
2982 {
2983         kmem_free(new, sdev_vnodeops_tbl_size);
2984 }
2985 
2986 /*
2987  * a generic setattr() function
2988  *
2989  * note: flags only supports AT_UID and AT_GID.
2990  *       Future enhancements can be done for other types, e.g. AT_MODE
2991  */
2992 int
2993 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
2994     struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
2995     int), int protocol)
2996 {
2997         struct sdev_node        *dv = VTOSDEV(vp);
2998         struct sdev_node        *parent = dv->sdev_dotdot;
2999         struct vattr            *get;
3000         uint_t                  mask = vap->va_mask;
3001         int                     error;
3002 
3003         /* some sanity checks */
3004         if (vap->va_mask & AT_NOSET)
3005                 return (EINVAL);
3006 
3007         if (vap->va_mask & AT_SIZE) {
3008                 if (vp->v_type == VDIR) {
3009                         return (EISDIR);
3010                 }
3011         }
3012 
3013         /* no need to set attribute, but do not fail either */
3014         ASSERT(parent);
3015         rw_enter(&parent->sdev_contents, RW_READER);
3016         if (dv->sdev_state == SDEV_ZOMBIE) {
3017                 rw_exit(&parent->sdev_contents);
3018                 return (0);
3019         }
3020 
3021         /* If backing store exists, just set it. */
3022         if (dv->sdev_attrvp) {
3023                 rw_exit(&parent->sdev_contents);
3024                 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3025         }
3026 
3027         /*
3028          * Otherwise, for nodes with the persistence attribute, create it.
3029          */
3030         ASSERT(dv->sdev_attr);
3031         if (SDEV_IS_PERSIST(dv) ||
3032             ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3033                 sdev_vattr_merge(dv, vap);
3034                 rw_enter(&dv->sdev_contents, RW_WRITER);
3035                 error = sdev_shadow_node(dv, cred);
3036                 rw_exit(&dv->sdev_contents);
3037                 rw_exit(&parent->sdev_contents);
3038 
3039                 if (error)
3040                         return (error);
3041                 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3042         }
3043 
3044 
3045         /*
3046          * sdev_attr was allocated in sdev_mknode
3047          */
3048         rw_enter(&dv->sdev_contents, RW_WRITER);
3049         error = secpolicy_vnode_setattr(cred, vp, vap,
3050             dv->sdev_attr, flags, sdev_unlocked_access, dv);
3051         if (error) {
3052                 rw_exit(&dv->sdev_contents);
3053                 rw_exit(&parent->sdev_contents);
3054                 return (error);
3055         }
3056 
3057         get = dv->sdev_attr;
3058         if (mask & AT_MODE) {
3059                 get->va_mode &= S_IFMT;
3060                 get->va_mode |= vap->va_mode & ~S_IFMT;
3061         }
3062 
3063         if ((mask & AT_UID) || (mask & AT_GID)) {
3064                 if (mask & AT_UID)
3065                         get->va_uid = vap->va_uid;
3066                 if (mask & AT_GID)
3067                         get->va_gid = vap->va_gid;
3068                 /*
3069                  * a callback must be provided if the protocol is set
3070                  */
3071                 if ((protocol & AT_UID) || (protocol & AT_GID)) {
3072                         ASSERT(callback);
3073                         error = callback(dv, get, protocol);
3074                         if (error) {
3075                                 rw_exit(&dv->sdev_contents);
3076                                 rw_exit(&parent->sdev_contents);
3077                                 return (error);
3078                         }
3079                 }
3080         }
3081 
3082         if (mask & AT_ATIME)
3083                 get->va_atime = vap->va_atime;
3084         if (mask & AT_MTIME)
3085                 get->va_mtime = vap->va_mtime;
3086         if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3087                 gethrestime(&get->va_ctime);
3088         }
3089 
3090         sdev_vattr_merge(dv, get);
3091         rw_exit(&dv->sdev_contents);
3092         rw_exit(&parent->sdev_contents);
3093         return (0);
3094 }
3095 
3096 /*
3097  * a generic inactive() function
3098  */
3099 /*ARGSUSED*/
3100 void
3101 devname_inactive_func(struct vnode *vp, struct cred *cred,
3102     void (*callback)(struct vnode *))
3103 {
3104         int clean;
3105         struct sdev_node *dv = VTOSDEV(vp);
3106         int state;
3107 
3108         mutex_enter(&vp->v_lock);
3109         ASSERT(vp->v_count >= 1);
3110 
3111 
3112         if (vp->v_count == 1 && callback != NULL)
3113                 callback(vp);
3114 
3115         rw_enter(&dv->sdev_contents, RW_WRITER);
3116         state = dv->sdev_state;
3117 
3118         clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3119 
3120         /*
3121          * sdev is a rather bad public citizen. It violates the general
3122          * agreement that in memory nodes should always have a valid reference
3123          * count on their vnode. But that's not the case here. This means that
3124          * we do actually have to distinguish between getting inactive callbacks
3125          * for zombies and otherwise. This should probably be fixed.
3126          */
3127         if (clean) {
3128                 /* Remove the . entry to ourselves */
3129                 if (vp->v_type == VDIR) {
3130                         decr_link(dv);
3131                 }
3132                 VERIFY(dv->sdev_nlink == 1);
3133                 decr_link(dv);
3134                 --vp->v_count;
3135                 rw_exit(&dv->sdev_contents);
3136                 mutex_exit(&vp->v_lock);
3137                 sdev_nodedestroy(dv, 0);
3138         } else {
3139                 --vp->v_count;
3140                 rw_exit(&dv->sdev_contents);
3141                 mutex_exit(&vp->v_lock);
3142         }
3143 }