1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  28 /*        All Rights Reserved   */
  29 
  30 /*
  31  * University Copyright- Copyright (c) 1982, 1986, 1988
  32  * The Regents of the University of California
  33  * All Rights Reserved
  34  *
  35  * University Acknowledgment- Portions of this document are derived from
  36  * software developed by the University of California, Berkeley, and its
  37  * contributors.
  38  */
  39 
  40 /*
  41  * VM - segment of a mapped device.
  42  *
  43  * This segment driver is used when mapping character special devices.
  44  */
  45 
  46 #include <sys/types.h>
  47 #include <sys/t_lock.h>
  48 #include <sys/sysmacros.h>
  49 #include <sys/vtrace.h>
  50 #include <sys/systm.h>
  51 #include <sys/vmsystm.h>
  52 #include <sys/mman.h>
  53 #include <sys/errno.h>
  54 #include <sys/kmem.h>
  55 #include <sys/cmn_err.h>
  56 #include <sys/vnode.h>
  57 #include <sys/proc.h>
  58 #include <sys/conf.h>
  59 #include <sys/debug.h>
  60 #include <sys/ddidevmap.h>
  61 #include <sys/ddi_implfuncs.h>
  62 #include <sys/lgrp.h>
  63 
  64 #include <vm/page.h>
  65 #include <vm/hat.h>
  66 #include <vm/as.h>
  67 #include <vm/seg.h>
  68 #include <vm/seg_dev.h>
  69 #include <vm/seg_kp.h>
  70 #include <vm/seg_kmem.h>
  71 #include <vm/vpage.h>
  72 
  73 #include <sys/sunddi.h>
  74 #include <sys/esunddi.h>
  75 #include <sys/fs/snode.h>
  76 
  77 
  78 #if DEBUG
  79 int segdev_debug;
  80 #define DEBUGF(level, args) { if (segdev_debug >= (level)) cmn_err args; }
  81 #else
  82 #define DEBUGF(level, args)
  83 #endif
  84 
  85 /* Default timeout for devmap context management */
  86 #define CTX_TIMEOUT_VALUE 0
  87 
  88 #define HOLD_DHP_LOCK(dhp)  if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
  89                         { mutex_enter(&dhp->dh_lock); }
  90 
  91 #define RELE_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
  92                         { mutex_exit(&dhp->dh_lock); }
  93 
  94 #define round_down_p2(a, s)     ((a) & ~((s) - 1))
  95 #define round_up_p2(a, s)       (((a) + (s) - 1) & ~((s) - 1))
  96 
  97 /*
  98  * VA_PA_ALIGNED checks to see if both VA and PA are on pgsize boundary
  99  * VA_PA_PGSIZE_ALIGNED check to see if VA is aligned with PA w.r.t. pgsize
 100  */
 101 #define VA_PA_ALIGNED(uvaddr, paddr, pgsize)            \
 102         (((uvaddr | paddr) & (pgsize - 1)) == 0)
 103 #define VA_PA_PGSIZE_ALIGNED(uvaddr, paddr, pgsize)     \
 104         (((uvaddr ^ paddr) & (pgsize - 1)) == 0)
 105 
 106 #define vpgtob(n)       ((n) * sizeof (struct vpage))   /* For brevity */
 107 
 108 #define VTOCVP(vp)      (VTOS(vp)->s_commonvp)       /* we "know" it's an snode */
 109 
 110 static struct devmap_ctx *devmapctx_list = NULL;
 111 static struct devmap_softlock *devmap_slist = NULL;
 112 
 113 /*
 114  * mutex, vnode and page for the page of zeros we use for the trash mappings.
 115  * One trash page is allocated on the first ddi_umem_setup call that uses it
 116  * XXX Eventually, we may want to combine this with what segnf does when all
 117  * hat layers implement HAT_NOFAULT.
 118  *
 119  * The trash page is used when the backing store for a userland mapping is
 120  * removed but the application semantics do not take kindly to a SIGBUS.
 121  * In that scenario, the applications pages are mapped to some dummy page
 122  * which returns garbage on read and writes go into a common place.
 123  * (Perfect for NO_FAULT semantics)
 124  * The device driver is responsible to communicating to the app with some
 125  * other mechanism that such remapping has happened and the app should take
 126  * corrective action.
 127  * We can also use an anonymous memory page as there is no requirement to
 128  * keep the page locked, however this complicates the fault code. RFE.
 129  */
 130 static struct vnode trashvp;
 131 static struct page *trashpp;
 132 
 133 /* Non-pageable kernel memory is allocated from the umem_np_arena. */
 134 static vmem_t *umem_np_arena;
 135 
 136 /* Set the cookie to a value we know will never be a valid umem_cookie */
 137 #define DEVMAP_DEVMEM_COOKIE    ((ddi_umem_cookie_t)0x1)
 138 
 139 /*
 140  * Macros to check if type of devmap handle
 141  */
 142 #define cookie_is_devmem(c)     \
 143         ((c) == (struct ddi_umem_cookie *)DEVMAP_DEVMEM_COOKIE)
 144 
 145 #define cookie_is_pmem(c)       \
 146         ((c) == (struct ddi_umem_cookie *)DEVMAP_PMEM_COOKIE)
 147 
 148 #define cookie_is_kpmem(c)      (!cookie_is_devmem(c) && !cookie_is_pmem(c) &&\
 149         ((c)->type == KMEM_PAGEABLE))
 150 
 151 #define dhp_is_devmem(dhp)      \
 152         (cookie_is_devmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
 153 
 154 #define dhp_is_pmem(dhp)        \
 155         (cookie_is_pmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
 156 
 157 #define dhp_is_kpmem(dhp)       \
 158         (cookie_is_kpmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
 159 
 160 /*
 161  * Private seg op routines.
 162  */
 163 static int      segdev_dup(struct seg *, struct seg *);
 164 static int      segdev_unmap(struct seg *, caddr_t, size_t);
 165 static void     segdev_free(struct seg *);
 166 static faultcode_t segdev_fault(struct hat *, struct seg *, caddr_t, size_t,
 167                     enum fault_type, enum seg_rw);
 168 static faultcode_t segdev_faulta(struct seg *, caddr_t);
 169 static int      segdev_setprot(struct seg *, caddr_t, size_t, uint_t);
 170 static int      segdev_checkprot(struct seg *, caddr_t, size_t, uint_t);
 171 static void     segdev_badop(void);
 172 static int      segdev_sync(struct seg *, caddr_t, size_t, int, uint_t);
 173 static size_t   segdev_incore(struct seg *, caddr_t, size_t, char *);
 174 static int      segdev_lockop(struct seg *, caddr_t, size_t, int, int,
 175                     ulong_t *, size_t);
 176 static int      segdev_getprot(struct seg *, caddr_t, size_t, uint_t *);
 177 static u_offset_t       segdev_getoffset(struct seg *, caddr_t);
 178 static int      segdev_gettype(struct seg *, caddr_t);
 179 static int      segdev_getvp(struct seg *, caddr_t, struct vnode **);
 180 static int      segdev_advise(struct seg *, caddr_t, size_t, uint_t);
 181 static void     segdev_dump(struct seg *);
 182 static int      segdev_pagelock(struct seg *, caddr_t, size_t,
 183                     struct page ***, enum lock_type, enum seg_rw);
 184 static int      segdev_getmemid(struct seg *, caddr_t, memid_t *);
 185 
 186 /*
 187  * XXX  this struct is used by rootnex_map_fault to identify
 188  *      the segment it has been passed. So if you make it
 189  *      "static" you'll need to fix rootnex_map_fault.
 190  */
 191 struct seg_ops segdev_ops = {
 192         .dup            = segdev_dup,
 193         .unmap          = segdev_unmap,
 194         .free           = segdev_free,
 195         .fault          = segdev_fault,
 196         .faulta         = segdev_faulta,
 197         .setprot        = segdev_setprot,
 198         .checkprot      = segdev_checkprot,
 199         .kluster        = (int (*)())segdev_badop,
 200         .sync           = segdev_sync,
 201         .incore         = segdev_incore,
 202         .lockop         = segdev_lockop,
 203         .getprot        = segdev_getprot,
 204         .getoffset      = segdev_getoffset,
 205         .gettype        = segdev_gettype,
 206         .getvp          = segdev_getvp,
 207         .advise         = segdev_advise,
 208         .dump           = segdev_dump,
 209         .pagelock       = segdev_pagelock,
 210         .getmemid       = segdev_getmemid,
 211 };
 212 
 213 /*
 214  * Private segdev support routines
 215  */
 216 static struct segdev_data *sdp_alloc(void);
 217 
 218 static void segdev_softunlock(struct hat *, struct seg *, caddr_t,
 219     size_t, enum seg_rw);
 220 
 221 static faultcode_t segdev_faultpage(struct hat *, struct seg *, caddr_t,
 222     struct vpage *, enum fault_type, enum seg_rw, devmap_handle_t *);
 223 
 224 static faultcode_t segdev_faultpages(struct hat *, struct seg *, caddr_t,
 225     size_t, enum fault_type, enum seg_rw, devmap_handle_t *);
 226 
 227 static struct devmap_ctx *devmap_ctxinit(dev_t, ulong_t);
 228 static struct devmap_softlock *devmap_softlock_init(dev_t, ulong_t);
 229 static void devmap_softlock_rele(devmap_handle_t *);
 230 static void devmap_ctx_rele(devmap_handle_t *);
 231 
 232 static void devmap_ctxto(void *);
 233 
 234 static devmap_handle_t *devmap_find_handle(devmap_handle_t *dhp_head,
 235     caddr_t addr);
 236 
 237 static ulong_t devmap_roundup(devmap_handle_t *dhp, ulong_t offset, size_t len,
 238     ulong_t *opfn, ulong_t *pagesize);
 239 
 240 static void free_devmap_handle(devmap_handle_t *dhp);
 241 
 242 static int devmap_handle_dup(devmap_handle_t *dhp, devmap_handle_t **new_dhp,
 243     struct seg *newseg);
 244 
 245 static devmap_handle_t *devmap_handle_unmap(devmap_handle_t *dhp);
 246 
 247 static void devmap_handle_unmap_head(devmap_handle_t *dhp, size_t len);
 248 
 249 static void devmap_handle_unmap_tail(devmap_handle_t *dhp, caddr_t addr);
 250 
 251 static int devmap_device(devmap_handle_t *dhp, struct as *as, caddr_t *addr,
 252     offset_t off, size_t len, uint_t flags);
 253 
 254 static void devmap_get_large_pgsize(devmap_handle_t *dhp, size_t len,
 255     caddr_t addr, size_t *llen, caddr_t *laddr);
 256 
 257 static void devmap_handle_reduce_len(devmap_handle_t *dhp, size_t len);
 258 
 259 static void *devmap_alloc_pages(vmem_t *vmp, size_t size, int vmflag);
 260 static void devmap_free_pages(vmem_t *vmp, void *inaddr, size_t size);
 261 
 262 static void *devmap_umem_alloc_np(size_t size, size_t flags);
 263 static void devmap_umem_free_np(void *addr, size_t size);
 264 
 265 /*
 266  * routines to lock and unlock underlying segkp segment for
 267  * KMEM_PAGEABLE type cookies.
 268  */
 269 static faultcode_t  acquire_kpmem_lock(struct ddi_umem_cookie *, size_t);
 270 static void release_kpmem_lock(struct ddi_umem_cookie *, size_t);
 271 
 272 /*
 273  * Routines to synchronize F_SOFTLOCK and F_INVAL faults for
 274  * drivers with devmap_access callbacks
 275  */
 276 static int devmap_softlock_enter(struct devmap_softlock *, size_t,
 277         enum fault_type);
 278 static void devmap_softlock_exit(struct devmap_softlock *, size_t,
 279         enum fault_type);
 280 
 281 static kmutex_t devmapctx_lock;
 282 
 283 static kmutex_t devmap_slock;
 284 
 285 /*
 286  * Initialize the thread callbacks and thread private data.
 287  */
 288 static struct devmap_ctx *
 289 devmap_ctxinit(dev_t dev, ulong_t id)
 290 {
 291         struct devmap_ctx       *devctx;
 292         struct devmap_ctx       *tmp;
 293         dev_info_t              *dip;
 294 
 295         tmp =  kmem_zalloc(sizeof (struct devmap_ctx), KM_SLEEP);
 296 
 297         mutex_enter(&devmapctx_lock);
 298 
 299         dip = e_ddi_hold_devi_by_dev(dev, 0);
 300         ASSERT(dip != NULL);
 301         ddi_release_devi(dip);
 302 
 303         for (devctx = devmapctx_list; devctx != NULL; devctx = devctx->next)
 304                 if ((devctx->dip == dip) && (devctx->id == id))
 305                         break;
 306 
 307         if (devctx == NULL) {
 308                 devctx = tmp;
 309                 devctx->dip = dip;
 310                 devctx->id = id;
 311                 mutex_init(&devctx->lock, NULL, MUTEX_DEFAULT, NULL);
 312                 cv_init(&devctx->cv, NULL, CV_DEFAULT, NULL);
 313                 devctx->next = devmapctx_list;
 314                 devmapctx_list = devctx;
 315         } else
 316                 kmem_free(tmp, sizeof (struct devmap_ctx));
 317 
 318         mutex_enter(&devctx->lock);
 319         devctx->refcnt++;
 320         mutex_exit(&devctx->lock);
 321         mutex_exit(&devmapctx_lock);
 322 
 323         return (devctx);
 324 }
 325 
 326 /*
 327  * Timeout callback called if a CPU has not given up the device context
 328  * within dhp->dh_timeout_length ticks
 329  */
 330 static void
 331 devmap_ctxto(void *data)
 332 {
 333         struct devmap_ctx *devctx = data;
 334 
 335         TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_CTXTO,
 336             "devmap_ctxto:timeout expired, devctx=%p", (void *)devctx);
 337         mutex_enter(&devctx->lock);
 338         /*
 339          * Set oncpu = 0 so the next mapping trying to get the device context
 340          * can.
 341          */
 342         devctx->oncpu = 0;
 343         devctx->timeout = 0;
 344         cv_signal(&devctx->cv);
 345         mutex_exit(&devctx->lock);
 346 }
 347 
 348 /*
 349  * Create a device segment.
 350  */
 351 int
 352 segdev_create(struct seg *seg, void *argsp)
 353 {
 354         struct segdev_data *sdp;
 355         struct segdev_crargs *a = (struct segdev_crargs *)argsp;
 356         devmap_handle_t *dhp = (devmap_handle_t *)a->devmap_data;
 357         int error;
 358 
 359         /*
 360          * Since the address space is "write" locked, we
 361          * don't need the segment lock to protect "segdev" data.
 362          */
 363         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 364 
 365         hat_map(seg->s_as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
 366 
 367         sdp = sdp_alloc();
 368 
 369         sdp->mapfunc = a->mapfunc;
 370         sdp->offset = a->offset;
 371         sdp->prot = a->prot;
 372         sdp->maxprot = a->maxprot;
 373         sdp->type = a->type;
 374         sdp->pageprot = 0;
 375         sdp->softlockcnt = 0;
 376         sdp->vpage = NULL;
 377 
 378         if (sdp->mapfunc == NULL)
 379                 sdp->devmap_data = dhp;
 380         else
 381                 sdp->devmap_data = dhp = NULL;
 382 
 383         sdp->hat_flags = a->hat_flags;
 384         sdp->hat_attr = a->hat_attr;
 385 
 386         /*
 387          * Currently, hat_flags supports only HAT_LOAD_NOCONSIST
 388          */
 389         ASSERT(!(sdp->hat_flags & ~HAT_LOAD_NOCONSIST));
 390 
 391         /*
 392          * Hold shadow vnode -- segdev only deals with
 393          * character (VCHR) devices. We use the common
 394          * vp to hang pages on.
 395          */
 396         sdp->vp = specfind(a->dev, VCHR);
 397         ASSERT(sdp->vp != NULL);
 398 
 399         seg->s_ops = &segdev_ops;
 400         seg->s_data = sdp;
 401 
 402         while (dhp != NULL) {
 403                 dhp->dh_seg = seg;
 404                 dhp = dhp->dh_next;
 405         }
 406 
 407         /*
 408          * Inform the vnode of the new mapping.
 409          */
 410         /*
 411          * It is ok to use pass sdp->maxprot to ADDMAP rather than to use
 412          * dhp specific maxprot because spec_addmap does not use maxprot.
 413          */
 414         error = VOP_ADDMAP(VTOCVP(sdp->vp), sdp->offset,
 415             seg->s_as, seg->s_base, seg->s_size,
 416             sdp->prot, sdp->maxprot, sdp->type, CRED(), NULL);
 417 
 418         if (error != 0) {
 419                 sdp->devmap_data = NULL;
 420                 hat_unload(seg->s_as->a_hat, seg->s_base, seg->s_size,
 421                     HAT_UNLOAD_UNMAP);
 422         } else {
 423                 /*
 424                  * Mappings of /dev/null don't count towards the VSZ of a
 425                  * process.  Mappings of /dev/null have no mapping type.
 426                  */
 427                 if ((segop_gettype(seg, seg->s_base) & (MAP_SHARED |
 428                     MAP_PRIVATE)) == 0) {
 429                         seg->s_as->a_resvsize -= seg->s_size;
 430                 }
 431         }
 432 
 433         return (error);
 434 }
 435 
 436 static struct segdev_data *
 437 sdp_alloc(void)
 438 {
 439         struct segdev_data *sdp;
 440 
 441         sdp = kmem_zalloc(sizeof (struct segdev_data), KM_SLEEP);
 442         rw_init(&sdp->lock, NULL, RW_DEFAULT, NULL);
 443 
 444         return (sdp);
 445 }
 446 
 447 /*
 448  * Duplicate seg and return new segment in newseg.
 449  */
 450 static int
 451 segdev_dup(struct seg *seg, struct seg *newseg)
 452 {
 453         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
 454         struct segdev_data *newsdp;
 455         devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
 456         size_t npages;
 457         int ret;
 458 
 459         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_DUP,
 460             "segdev_dup:start dhp=%p, seg=%p", (void *)dhp, (void *)seg);
 461 
 462         DEBUGF(3, (CE_CONT, "segdev_dup: dhp %p seg %p\n",
 463             (void *)dhp, (void *)seg));
 464 
 465         /*
 466          * Since the address space is "write" locked, we
 467          * don't need the segment lock to protect "segdev" data.
 468          */
 469         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 470 
 471         newsdp = sdp_alloc();
 472 
 473         newseg->s_ops = seg->s_ops;
 474         newseg->s_data = (void *)newsdp;
 475 
 476         VN_HOLD(sdp->vp);
 477         newsdp->vp   = sdp->vp;
 478         newsdp->mapfunc = sdp->mapfunc;
 479         newsdp->offset       = sdp->offset;
 480         newsdp->pageprot = sdp->pageprot;
 481         newsdp->prot = sdp->prot;
 482         newsdp->maxprot = sdp->maxprot;
 483         newsdp->type = sdp->type;
 484         newsdp->hat_attr = sdp->hat_attr;
 485         newsdp->hat_flags = sdp->hat_flags;
 486         newsdp->softlockcnt = 0;
 487 
 488         /*
 489          * Initialize per page data if the segment we are
 490          * dup'ing has per page information.
 491          */
 492         npages = seg_pages(newseg);
 493 
 494         if (sdp->vpage != NULL) {
 495                 size_t nbytes = vpgtob(npages);
 496 
 497                 newsdp->vpage = kmem_zalloc(nbytes, KM_SLEEP);
 498                 bcopy(sdp->vpage, newsdp->vpage, nbytes);
 499         } else
 500                 newsdp->vpage = NULL;
 501 
 502         /*
 503          * duplicate devmap handles
 504          */
 505         if (dhp != NULL) {
 506                 ret = devmap_handle_dup(dhp,
 507                     (devmap_handle_t **)&newsdp->devmap_data, newseg);
 508                 if (ret != 0) {
 509                         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_DUP_CK1,
 510                             "segdev_dup:ret1 ret=%x, dhp=%p seg=%p",
 511                             ret, (void *)dhp, (void *)seg);
 512                         DEBUGF(1, (CE_CONT,
 513                             "segdev_dup: ret %x dhp %p seg %p\n",
 514                             ret, (void *)dhp, (void *)seg));
 515                         return (ret);
 516                 }
 517         }
 518 
 519         /*
 520          * Inform the common vnode of the new mapping.
 521          */
 522         return (VOP_ADDMAP(VTOCVP(newsdp->vp),
 523             newsdp->offset, newseg->s_as,
 524             newseg->s_base, newseg->s_size, newsdp->prot,
 525             newsdp->maxprot, sdp->type, CRED(), NULL));
 526 }
 527 
 528 /*
 529  * duplicate devmap handles
 530  */
 531 static int
 532 devmap_handle_dup(devmap_handle_t *dhp, devmap_handle_t **new_dhp,
 533     struct seg *newseg)
 534 {
 535         devmap_handle_t *newdhp_save = NULL;
 536         devmap_handle_t *newdhp = NULL;
 537         struct devmap_callback_ctl *callbackops;
 538 
 539         while (dhp != NULL) {
 540                 newdhp = kmem_alloc(sizeof (devmap_handle_t), KM_SLEEP);
 541 
 542                 /* Need to lock the original dhp while copying if REMAP */
 543                 HOLD_DHP_LOCK(dhp);
 544                 bcopy(dhp, newdhp, sizeof (devmap_handle_t));
 545                 RELE_DHP_LOCK(dhp);
 546                 newdhp->dh_seg = newseg;
 547                 newdhp->dh_next = NULL;
 548                 if (newdhp_save != NULL)
 549                         newdhp_save->dh_next = newdhp;
 550                 else
 551                         *new_dhp = newdhp;
 552                 newdhp_save = newdhp;
 553 
 554                 callbackops = &newdhp->dh_callbackops;
 555 
 556                 if (dhp->dh_softlock != NULL)
 557                         newdhp->dh_softlock = devmap_softlock_init(
 558                             newdhp->dh_dev,
 559                             (ulong_t)callbackops->devmap_access);
 560                 if (dhp->dh_ctx != NULL)
 561                         newdhp->dh_ctx = devmap_ctxinit(newdhp->dh_dev,
 562                             (ulong_t)callbackops->devmap_access);
 563 
 564                 /*
 565                  * Initialize dh_lock if we want to do remap.
 566                  */
 567                 if (newdhp->dh_flags & DEVMAP_ALLOW_REMAP) {
 568                         mutex_init(&newdhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
 569                         newdhp->dh_flags |= DEVMAP_LOCK_INITED;
 570                 }
 571 
 572                 if (callbackops->devmap_dup != NULL) {
 573                         int ret;
 574 
 575                         /*
 576                          * Call the dup callback so that the driver can
 577                          * duplicate its private data.
 578                          */
 579                         ret = (*callbackops->devmap_dup)(dhp, dhp->dh_pvtp,
 580                             (devmap_cookie_t *)newdhp, &newdhp->dh_pvtp);
 581 
 582                         if (ret != 0) {
 583                                 /*
 584                                  * We want to free up this segment as the driver
 585                                  * has indicated that we can't dup it.  But we
 586                                  * don't want to call the drivers, devmap_unmap,
 587                                  * callback function as the driver does not
 588                                  * think this segment exists. The caller of
 589                                  * devmap_dup will call seg_free on newseg
 590                                  * as it was the caller that allocated the
 591                                  * segment.
 592                                  */
 593                                 DEBUGF(1, (CE_CONT, "devmap_handle_dup ERROR: "
 594                                     "newdhp %p dhp %p\n", (void *)newdhp,
 595                                     (void *)dhp));
 596                                 callbackops->devmap_unmap = NULL;
 597                                 return (ret);
 598                         }
 599                 }
 600 
 601                 dhp = dhp->dh_next;
 602         }
 603 
 604         return (0);
 605 }
 606 
 607 /*
 608  * Split a segment at addr for length len.
 609  */
 610 /*ARGSUSED*/
 611 static int
 612 segdev_unmap(struct seg *seg, caddr_t addr, size_t len)
 613 {
 614         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
 615         register struct segdev_data *nsdp;
 616         register struct seg *nseg;
 617         register size_t opages;         /* old segment size in pages */
 618         register size_t npages;         /* new segment size in pages */
 619         register size_t dpages;         /* pages being deleted (unmapped) */
 620         register size_t nbytes;
 621         devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
 622         devmap_handle_t *dhpp;
 623         devmap_handle_t *newdhp;
 624         struct devmap_callback_ctl *callbackops;
 625         caddr_t nbase;
 626         offset_t off;
 627         ulong_t nsize;
 628         size_t mlen, sz;
 629 
 630         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP,
 631             "segdev_unmap:start dhp=%p, seg=%p addr=%p len=%lx",
 632             (void *)dhp, (void *)seg, (void *)addr, len);
 633 
 634         DEBUGF(3, (CE_CONT, "segdev_unmap: dhp %p seg %p addr %p len %lx\n",
 635             (void *)dhp, (void *)seg, (void *)addr, len));
 636 
 637         /*
 638          * Since the address space is "write" locked, we
 639          * don't need the segment lock to protect "segdev" data.
 640          */
 641         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 642 
 643         if ((sz = sdp->softlockcnt) > 0) {
 644                 /*
 645                  * Fail the unmap if pages are SOFTLOCKed through this mapping.
 646                  * softlockcnt is protected from change by the as write lock.
 647                  */
 648                 TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK1,
 649                     "segdev_unmap:error softlockcnt = %ld", sz);
 650                 DEBUGF(1, (CE_CONT, "segdev_unmap: softlockcnt %ld\n", sz));
 651                 return (EAGAIN);
 652         }
 653 
 654         /*
 655          * Check for bad sizes
 656          */
 657         if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
 658             (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
 659                 panic("segdev_unmap");
 660 
 661         if (dhp != NULL) {
 662                 devmap_handle_t *tdhp;
 663                 /*
 664                  * If large page size was used in hat_devload(),
 665                  * the same page size must be used in hat_unload().
 666                  */
 667                 dhpp = tdhp = devmap_find_handle(dhp, addr);
 668                 while (tdhp != NULL) {
 669                         if (tdhp->dh_flags & DEVMAP_FLAG_LARGE) {
 670                                 break;
 671                         }
 672                         tdhp = tdhp->dh_next;
 673                 }
 674                 if (tdhp != NULL) {     /* found a dhp using large pages */
 675                         size_t slen = len;
 676                         size_t mlen;
 677                         size_t soff;
 678 
 679                         soff = (ulong_t)(addr - dhpp->dh_uvaddr);
 680                         while (slen != 0) {
 681                                 mlen = MIN(slen, (dhpp->dh_len - soff));
 682                                 hat_unload(seg->s_as->a_hat, dhpp->dh_uvaddr,
 683                                     dhpp->dh_len, HAT_UNLOAD_UNMAP);
 684                                 dhpp = dhpp->dh_next;
 685                                 ASSERT(slen >= mlen);
 686                                 slen -= mlen;
 687                                 soff = 0;
 688                         }
 689                 } else
 690                         hat_unload(seg->s_as->a_hat, addr, len,
 691                             HAT_UNLOAD_UNMAP);
 692         } else {
 693                 /*
 694                  * Unload any hardware translations in the range
 695                  * to be taken out.
 696                  */
 697                 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
 698         }
 699 
 700         /*
 701          * get the user offset which will used in the driver callbacks
 702          */
 703         off = sdp->offset + (offset_t)(addr - seg->s_base);
 704 
 705         /*
 706          * Inform the vnode of the unmapping.
 707          */
 708         ASSERT(sdp->vp != NULL);
 709         (void) VOP_DELMAP(VTOCVP(sdp->vp), off, seg->s_as, addr, len,
 710             sdp->prot, sdp->maxprot, sdp->type, CRED(), NULL);
 711 
 712         /*
 713          * Check for entire segment
 714          */
 715         if (addr == seg->s_base && len == seg->s_size) {
 716                 seg_free(seg);
 717                 return (0);
 718         }
 719 
 720         opages = seg_pages(seg);
 721         dpages = btop(len);
 722         npages = opages - dpages;
 723 
 724         /*
 725          * Check for beginning of segment
 726          */
 727         if (addr == seg->s_base) {
 728                 if (sdp->vpage != NULL) {
 729                         register struct vpage *ovpage;
 730 
 731                         ovpage = sdp->vpage; /* keep pointer to vpage */
 732 
 733                         nbytes = vpgtob(npages);
 734                         sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
 735                         bcopy(&ovpage[dpages], sdp->vpage, nbytes);
 736 
 737                         /* free up old vpage */
 738                         kmem_free(ovpage, vpgtob(opages));
 739                 }
 740 
 741                 /*
 742                  * free devmap handles from the beginning of the mapping.
 743                  */
 744                 if (dhp != NULL)
 745                         devmap_handle_unmap_head(dhp, len);
 746 
 747                 sdp->offset += (offset_t)len;
 748 
 749                 seg->s_base += len;
 750                 seg->s_size -= len;
 751 
 752                 return (0);
 753         }
 754 
 755         /*
 756          * Check for end of segment
 757          */
 758         if (addr + len == seg->s_base + seg->s_size) {
 759                 if (sdp->vpage != NULL) {
 760                         register struct vpage *ovpage;
 761 
 762                         ovpage = sdp->vpage; /* keep pointer to vpage */
 763 
 764                         nbytes = vpgtob(npages);
 765                         sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
 766                         bcopy(ovpage, sdp->vpage, nbytes);
 767 
 768                         /* free up old vpage */
 769                         kmem_free(ovpage, vpgtob(opages));
 770                 }
 771                 seg->s_size -= len;
 772 
 773                 /*
 774                  * free devmap handles from addr to the end of the mapping.
 775                  */
 776                 if (dhp != NULL)
 777                         devmap_handle_unmap_tail(dhp, addr);
 778 
 779                 return (0);
 780         }
 781 
 782         /*
 783          * The section to go is in the middle of the segment,
 784          * have to make it into two segments.  nseg is made for
 785          * the high end while seg is cut down at the low end.
 786          */
 787         nbase = addr + len;                             /* new seg base */
 788         nsize = (seg->s_base + seg->s_size) - nbase;      /* new seg size */
 789         seg->s_size = addr - seg->s_base;         /* shrink old seg */
 790         nseg = seg_alloc(seg->s_as, nbase, nsize);
 791         if (nseg == NULL)
 792                 panic("segdev_unmap seg_alloc");
 793 
 794         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK2,
 795             "segdev_unmap: seg=%p nseg=%p", (void *)seg, (void *)nseg);
 796         DEBUGF(3, (CE_CONT, "segdev_unmap: segdev_dup seg %p nseg %p\n",
 797             (void *)seg, (void *)nseg));
 798         nsdp = sdp_alloc();
 799 
 800         nseg->s_ops = seg->s_ops;
 801         nseg->s_data = (void *)nsdp;
 802 
 803         VN_HOLD(sdp->vp);
 804         nsdp->mapfunc = sdp->mapfunc;
 805         nsdp->offset = sdp->offset + (offset_t)(nseg->s_base - seg->s_base);
 806         nsdp->vp     = sdp->vp;
 807         nsdp->pageprot = sdp->pageprot;
 808         nsdp->prot   = sdp->prot;
 809         nsdp->maxprot = sdp->maxprot;
 810         nsdp->type = sdp->type;
 811         nsdp->hat_attr = sdp->hat_attr;
 812         nsdp->hat_flags = sdp->hat_flags;
 813         nsdp->softlockcnt = 0;
 814 
 815         /*
 816          * Initialize per page data if the segment we are
 817          * dup'ing has per page information.
 818          */
 819         if (sdp->vpage != NULL) {
 820                 /* need to split vpage into two arrays */
 821                 register size_t nnbytes;
 822                 register size_t nnpages;
 823                 register struct vpage *ovpage;
 824 
 825                 ovpage = sdp->vpage;         /* keep pointer to vpage */
 826 
 827                 npages = seg_pages(seg);        /* seg has shrunk */
 828                 nbytes = vpgtob(npages);
 829                 nnpages = seg_pages(nseg);
 830                 nnbytes = vpgtob(nnpages);
 831 
 832                 sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
 833                 bcopy(ovpage, sdp->vpage, nbytes);
 834 
 835                 nsdp->vpage = kmem_alloc(nnbytes, KM_SLEEP);
 836                 bcopy(&ovpage[npages + dpages], nsdp->vpage, nnbytes);
 837 
 838                 /* free up old vpage */
 839                 kmem_free(ovpage, vpgtob(opages));
 840         } else
 841                 nsdp->vpage = NULL;
 842 
 843         /*
 844          * unmap dhps.
 845          */
 846         if (dhp == NULL) {
 847                 nsdp->devmap_data = NULL;
 848                 return (0);
 849         }
 850         while (dhp != NULL) {
 851                 callbackops = &dhp->dh_callbackops;
 852                 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK3,
 853                     "segdev_unmap: dhp=%p addr=%p", dhp, addr);
 854                 DEBUGF(3, (CE_CONT, "unmap: dhp %p addr %p uvaddr %p len %lx\n",
 855                     (void *)dhp, (void *)addr,
 856                     (void *)dhp->dh_uvaddr, dhp->dh_len));
 857 
 858                 if (addr == (dhp->dh_uvaddr + dhp->dh_len)) {
 859                         dhpp = dhp->dh_next;
 860                         dhp->dh_next = NULL;
 861                         dhp = dhpp;
 862                 } else if (addr > (dhp->dh_uvaddr + dhp->dh_len)) {
 863                         dhp = dhp->dh_next;
 864                 } else if (addr > dhp->dh_uvaddr &&
 865                     (addr + len) < (dhp->dh_uvaddr + dhp->dh_len)) {
 866                         /*
 867                          * <addr, addr+len> is enclosed by dhp.
 868                          * create a newdhp that begins at addr+len and
 869                          * ends at dhp->dh_uvaddr+dhp->dh_len.
 870                          */
 871                         newdhp = kmem_alloc(sizeof (devmap_handle_t), KM_SLEEP);
 872                         HOLD_DHP_LOCK(dhp);
 873                         bcopy(dhp, newdhp, sizeof (devmap_handle_t));
 874                         RELE_DHP_LOCK(dhp);
 875                         newdhp->dh_seg = nseg;
 876                         newdhp->dh_next = dhp->dh_next;
 877                         if (dhp->dh_softlock != NULL)
 878                                 newdhp->dh_softlock = devmap_softlock_init(
 879                                     newdhp->dh_dev,
 880                                     (ulong_t)callbackops->devmap_access);
 881                         if (dhp->dh_ctx != NULL)
 882                                 newdhp->dh_ctx = devmap_ctxinit(newdhp->dh_dev,
 883                                     (ulong_t)callbackops->devmap_access);
 884                         if (newdhp->dh_flags & DEVMAP_LOCK_INITED) {
 885                                 mutex_init(&newdhp->dh_lock,
 886                                     NULL, MUTEX_DEFAULT, NULL);
 887                         }
 888                         if (callbackops->devmap_unmap != NULL)
 889                                 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
 890                                     off, len, dhp, &dhp->dh_pvtp,
 891                                     newdhp, &newdhp->dh_pvtp);
 892                         mlen = len + (addr - dhp->dh_uvaddr);
 893                         devmap_handle_reduce_len(newdhp, mlen);
 894                         nsdp->devmap_data = newdhp;
 895                         /* XX Changing len should recalculate LARGE flag */
 896                         dhp->dh_len = addr - dhp->dh_uvaddr;
 897                         dhpp = dhp->dh_next;
 898                         dhp->dh_next = NULL;
 899                         dhp = dhpp;
 900                 } else if ((addr > dhp->dh_uvaddr) &&
 901                     ((addr + len) >= (dhp->dh_uvaddr + dhp->dh_len))) {
 902                         mlen = dhp->dh_len + dhp->dh_uvaddr - addr;
 903                         /*
 904                          * <addr, addr+len> spans over dhps.
 905                          */
 906                         if (callbackops->devmap_unmap != NULL)
 907                                 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
 908                                     off, mlen, (devmap_cookie_t *)dhp,
 909                                     &dhp->dh_pvtp, NULL, NULL);
 910                         /* XX Changing len should recalculate LARGE flag */
 911                         dhp->dh_len = addr - dhp->dh_uvaddr;
 912                         dhpp = dhp->dh_next;
 913                         dhp->dh_next = NULL;
 914                         dhp = dhpp;
 915                         nsdp->devmap_data = dhp;
 916                 } else if ((addr + len) >= (dhp->dh_uvaddr + dhp->dh_len)) {
 917                         /*
 918                          * dhp is enclosed by <addr, addr+len>.
 919                          */
 920                         dhp->dh_seg = nseg;
 921                         nsdp->devmap_data = dhp;
 922                         dhp = devmap_handle_unmap(dhp);
 923                         nsdp->devmap_data = dhp; /* XX redundant? */
 924                 } else if (((addr + len) > dhp->dh_uvaddr) &&
 925                     ((addr + len) < (dhp->dh_uvaddr + dhp->dh_len))) {
 926                         mlen = addr + len - dhp->dh_uvaddr;
 927                         if (callbackops->devmap_unmap != NULL)
 928                                 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
 929                                     dhp->dh_uoff, mlen, NULL,
 930                                     NULL, dhp, &dhp->dh_pvtp);
 931                         devmap_handle_reduce_len(dhp, mlen);
 932                         nsdp->devmap_data = dhp;
 933                         dhp->dh_seg = nseg;
 934                         dhp = dhp->dh_next;
 935                 } else {
 936                         dhp->dh_seg = nseg;
 937                         dhp = dhp->dh_next;
 938                 }
 939         }
 940         return (0);
 941 }
 942 
 943 /*
 944  * Utility function handles reducing the length of a devmap handle during unmap
 945  * Note that is only used for unmapping the front portion of the handler,
 946  * i.e., we are bumping up the offset/pfn etc up by len
 947  * Do not use if reducing length at the tail.
 948  */
 949 static void
 950 devmap_handle_reduce_len(devmap_handle_t *dhp, size_t len)
 951 {
 952         struct ddi_umem_cookie *cp;
 953         struct devmap_pmem_cookie *pcp;
 954         /*
 955          * adjust devmap handle fields
 956          */
 957         ASSERT(len < dhp->dh_len);
 958 
 959         /* Make sure only page-aligned changes are done */
 960         ASSERT((len & PAGEOFFSET) == 0);
 961 
 962         dhp->dh_len -= len;
 963         dhp->dh_uoff += (offset_t)len;
 964         dhp->dh_roff += (offset_t)len;
 965         dhp->dh_uvaddr += len;
 966         /* Need to grab dhp lock if REMAP */
 967         HOLD_DHP_LOCK(dhp);
 968         cp = dhp->dh_cookie;
 969         if (!(dhp->dh_flags & DEVMAP_MAPPING_INVALID)) {
 970                 if (cookie_is_devmem(cp)) {
 971                         dhp->dh_pfn += btop(len);
 972                 } else if (cookie_is_pmem(cp)) {
 973                         pcp = (struct devmap_pmem_cookie *)dhp->dh_pcookie;
 974                         ASSERT((dhp->dh_roff & PAGEOFFSET) == 0 &&
 975                             dhp->dh_roff < ptob(pcp->dp_npages));
 976                 } else {
 977                         ASSERT(dhp->dh_roff < cp->size);
 978                         ASSERT(dhp->dh_cvaddr >= cp->cvaddr &&
 979                             dhp->dh_cvaddr < (cp->cvaddr + cp->size));
 980                         ASSERT((dhp->dh_cvaddr + len) <=
 981                             (cp->cvaddr + cp->size));
 982 
 983                         dhp->dh_cvaddr += len;
 984                 }
 985         }
 986         /* XXX - Should recalculate the DEVMAP_FLAG_LARGE after changes */
 987         RELE_DHP_LOCK(dhp);
 988 }
 989 
 990 /*
 991  * Free devmap handle, dhp.
 992  * Return the next devmap handle on the linked list.
 993  */
 994 static devmap_handle_t *
 995 devmap_handle_unmap(devmap_handle_t *dhp)
 996 {
 997         struct devmap_callback_ctl *callbackops = &dhp->dh_callbackops;
 998         struct segdev_data *sdp = (struct segdev_data *)dhp->dh_seg->s_data;
 999         devmap_handle_t *dhpp = (devmap_handle_t *)sdp->devmap_data;
1000 
1001         ASSERT(dhp != NULL);
1002 
1003         /*
1004          * before we free up dhp, call the driver's devmap_unmap entry point
1005          * to free resources allocated for this dhp.
1006          */
1007         if (callbackops->devmap_unmap != NULL) {
1008                 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp, dhp->dh_uoff,
1009                     dhp->dh_len, NULL, NULL, NULL, NULL);
1010         }
1011 
1012         if (dhpp == dhp) {      /* releasing first dhp, change sdp data */
1013                 sdp->devmap_data = dhp->dh_next;
1014         } else {
1015                 while (dhpp->dh_next != dhp) {
1016                         dhpp = dhpp->dh_next;
1017                 }
1018                 dhpp->dh_next = dhp->dh_next;
1019         }
1020         dhpp = dhp->dh_next; /* return value is next dhp in chain */
1021 
1022         if (dhp->dh_softlock != NULL)
1023                 devmap_softlock_rele(dhp);
1024 
1025         if (dhp->dh_ctx != NULL)
1026                 devmap_ctx_rele(dhp);
1027 
1028         if (dhp->dh_flags & DEVMAP_LOCK_INITED) {
1029                 mutex_destroy(&dhp->dh_lock);
1030         }
1031         kmem_free(dhp, sizeof (devmap_handle_t));
1032 
1033         return (dhpp);
1034 }
1035 
1036 /*
1037  * Free complete devmap handles from dhp for len bytes
1038  * dhp can be either the first handle or a subsequent handle
1039  */
1040 static void
1041 devmap_handle_unmap_head(devmap_handle_t *dhp, size_t len)
1042 {
1043         struct devmap_callback_ctl *callbackops;
1044 
1045         /*
1046          * free the devmap handles covered by len.
1047          */
1048         while (len >= dhp->dh_len) {
1049                 len -= dhp->dh_len;
1050                 dhp = devmap_handle_unmap(dhp);
1051         }
1052         if (len != 0) { /* partial unmap at head of first remaining dhp */
1053                 callbackops = &dhp->dh_callbackops;
1054 
1055                 /*
1056                  * Call the unmap callback so the drivers can make
1057                  * adjustment on its private data.
1058                  */
1059                 if (callbackops->devmap_unmap != NULL)
1060                         (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
1061                             dhp->dh_uoff, len, NULL, NULL, dhp, &dhp->dh_pvtp);
1062                 devmap_handle_reduce_len(dhp, len);
1063         }
1064 }
1065 
1066 /*
1067  * Free devmap handles to truncate  the mapping after addr
1068  * RFE: Simpler to pass in dhp pointing at correct dhp (avoid find again)
1069  *      Also could then use the routine in middle unmap case too
1070  */
1071 static void
1072 devmap_handle_unmap_tail(devmap_handle_t *dhp, caddr_t addr)
1073 {
1074         register struct seg *seg = dhp->dh_seg;
1075         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1076         register devmap_handle_t *dhph = (devmap_handle_t *)sdp->devmap_data;
1077         struct devmap_callback_ctl *callbackops;
1078         register devmap_handle_t *dhpp;
1079         size_t maplen;
1080         ulong_t off;
1081         size_t len;
1082 
1083         maplen = (size_t)(addr - dhp->dh_uvaddr);
1084         dhph = devmap_find_handle(dhph, addr);
1085 
1086         while (dhph != NULL) {
1087                 if (maplen == 0) {
1088                         dhph =  devmap_handle_unmap(dhph);
1089                 } else {
1090                         callbackops = &dhph->dh_callbackops;
1091                         len = dhph->dh_len - maplen;
1092                         off = (ulong_t)sdp->offset + (addr - seg->s_base);
1093                         /*
1094                          * Call the unmap callback so the driver
1095                          * can make adjustments on its private data.
1096                          */
1097                         if (callbackops->devmap_unmap != NULL)
1098                                 (*callbackops->devmap_unmap)(dhph,
1099                                     dhph->dh_pvtp, off, len,
1100                                     (devmap_cookie_t *)dhph,
1101                                     &dhph->dh_pvtp, NULL, NULL);
1102                         /* XXX Reducing len needs to recalculate LARGE flag */
1103                         dhph->dh_len = maplen;
1104                         maplen = 0;
1105                         dhpp = dhph->dh_next;
1106                         dhph->dh_next = NULL;
1107                         dhph = dhpp;
1108                 }
1109         } /* end while */
1110 }
1111 
1112 /*
1113  * Free a segment.
1114  */
1115 static void
1116 segdev_free(struct seg *seg)
1117 {
1118         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1119         devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
1120 
1121         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_FREE,
1122             "segdev_free: dhp=%p seg=%p", (void *)dhp, (void *)seg);
1123         DEBUGF(3, (CE_CONT, "segdev_free: dhp %p seg %p\n",
1124             (void *)dhp, (void *)seg));
1125 
1126         /*
1127          * Since the address space is "write" locked, we
1128          * don't need the segment lock to protect "segdev" data.
1129          */
1130         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1131 
1132         while (dhp != NULL)
1133                 dhp = devmap_handle_unmap(dhp);
1134 
1135         VN_RELE(sdp->vp);
1136         if (sdp->vpage != NULL)
1137                 kmem_free(sdp->vpage, vpgtob(seg_pages(seg)));
1138 
1139         rw_destroy(&sdp->lock);
1140         kmem_free(sdp, sizeof (*sdp));
1141 }
1142 
1143 static void
1144 free_devmap_handle(devmap_handle_t *dhp)
1145 {
1146         register devmap_handle_t *dhpp;
1147 
1148         /*
1149          * free up devmap handle
1150          */
1151         while (dhp != NULL) {
1152                 dhpp = dhp->dh_next;
1153                 if (dhp->dh_flags & DEVMAP_LOCK_INITED) {
1154                         mutex_destroy(&dhp->dh_lock);
1155                 }
1156 
1157                 if (dhp->dh_softlock != NULL)
1158                         devmap_softlock_rele(dhp);
1159 
1160                 if (dhp->dh_ctx != NULL)
1161                         devmap_ctx_rele(dhp);
1162 
1163                 kmem_free(dhp, sizeof (devmap_handle_t));
1164                 dhp = dhpp;
1165         }
1166 }
1167 
1168 /*
1169  * routines to lock and unlock underlying segkp segment for
1170  * KMEM_PAGEABLE type cookies.
1171  * segkp only allows a single pending F_SOFTLOCK
1172  * we keep track of number of locks in the cookie so we can
1173  * have multiple pending faults and manage the calls to segkp.
1174  * RFE: if segkp supports either pagelock or can support multiple
1175  * calls to F_SOFTLOCK, then these routines can go away.
1176  *      If pagelock, segdev_faultpage can fault on a page by page basis
1177  *              and simplifies the code quite a bit.
1178  *      if multiple calls allowed but not partial ranges, then need for
1179  *      cookie->lock and locked count goes away, code can call as_fault directly
1180  */
1181 static faultcode_t
1182 acquire_kpmem_lock(struct ddi_umem_cookie *cookie, size_t npages)
1183 {
1184         int err = 0;
1185         ASSERT(cookie_is_kpmem(cookie));
1186         /*
1187          * Fault in pages in segkp with F_SOFTLOCK.
1188          * We want to hold the lock until all pages have been loaded.
1189          * segkp only allows single caller to hold SOFTLOCK, so cookie
1190          * holds a count so we dont call into segkp multiple times
1191          */
1192         mutex_enter(&cookie->lock);
1193 
1194         /*
1195          * Check for overflow in locked field
1196          */
1197         if ((UINT32_MAX - cookie->locked) < npages) {
1198                 err = FC_MAKE_ERR(ENOMEM);
1199         } else if (cookie->locked == 0) {
1200                 /* First time locking */
1201                 err = as_fault(kas.a_hat, &kas, cookie->cvaddr,
1202                     cookie->size, F_SOFTLOCK, PROT_READ|PROT_WRITE);
1203         }
1204         if (!err) {
1205                 cookie->locked += npages;
1206         }
1207         mutex_exit(&cookie->lock);
1208         return (err);
1209 }
1210 
1211 static void
1212 release_kpmem_lock(struct ddi_umem_cookie *cookie, size_t npages)
1213 {
1214         mutex_enter(&cookie->lock);
1215         ASSERT(cookie_is_kpmem(cookie));
1216         ASSERT(cookie->locked >= npages);
1217         cookie->locked -= (uint_t)npages;
1218         if (cookie->locked == 0) {
1219                 /* Last unlock */
1220                 if (as_fault(kas.a_hat, &kas, cookie->cvaddr,
1221                     cookie->size, F_SOFTUNLOCK, PROT_READ|PROT_WRITE))
1222                         panic("segdev releasing kpmem lock %p", (void *)cookie);
1223         }
1224         mutex_exit(&cookie->lock);
1225 }
1226 
1227 /*
1228  * Routines to synchronize F_SOFTLOCK and F_INVAL faults for
1229  * drivers with devmap_access callbacks
1230  * slock->softlocked basically works like a rw lock
1231  *      -ve counts => F_SOFTLOCK in progress
1232  *      +ve counts => F_INVAL/F_PROT in progress
1233  * We allow only one F_SOFTLOCK at a time
1234  * but can have multiple pending F_INVAL/F_PROT calls
1235  *
1236  * This routine waits using cv_wait_sig so killing processes is more graceful
1237  * Returns EINTR if coming out of this routine due to a signal, 0 otherwise
1238  */
1239 static int devmap_softlock_enter(
1240         struct devmap_softlock *slock,
1241         size_t npages,
1242         enum fault_type type)
1243 {
1244         if (npages == 0)
1245                 return (0);
1246         mutex_enter(&(slock->lock));
1247         switch (type) {
1248         case F_SOFTLOCK :
1249                 while (slock->softlocked) {
1250                         if (cv_wait_sig(&(slock)->cv, &(slock)->lock) == 0) {
1251                                 /* signalled */
1252                                 mutex_exit(&(slock->lock));
1253                                 return (EINTR);
1254                         }
1255                 }
1256                 slock->softlocked -= npages; /* -ve count => locked */
1257                 break;
1258         case F_INVAL :
1259         case F_PROT :
1260                 while (slock->softlocked < 0)
1261                         if (cv_wait_sig(&(slock)->cv, &(slock)->lock) == 0) {
1262                                 /* signalled */
1263                                 mutex_exit(&(slock->lock));
1264                                 return (EINTR);
1265                         }
1266                 slock->softlocked += npages; /* +ve count => f_invals */
1267                 break;
1268         default:
1269                 ASSERT(0);
1270         }
1271         mutex_exit(&(slock->lock));
1272         return (0);
1273 }
1274 
1275 static void devmap_softlock_exit(
1276         struct devmap_softlock *slock,
1277         size_t npages,
1278         enum fault_type type)
1279 {
1280         if (slock == NULL)
1281                 return;
1282         mutex_enter(&(slock->lock));
1283         switch (type) {
1284         case F_SOFTLOCK :
1285                 ASSERT(-slock->softlocked >= npages);
1286                 slock->softlocked += npages; /* -ve count is softlocked */
1287                 if (slock->softlocked == 0)
1288                         cv_signal(&slock->cv);
1289                 break;
1290         case F_INVAL :
1291         case F_PROT:
1292                 ASSERT(slock->softlocked >= npages);
1293                 slock->softlocked -= npages;
1294                 if (slock->softlocked == 0)
1295                         cv_signal(&slock->cv);
1296                 break;
1297         default:
1298                 ASSERT(0);
1299         }
1300         mutex_exit(&(slock->lock));
1301 }
1302 
1303 /*
1304  * Do a F_SOFTUNLOCK call over the range requested.
1305  * The range must have already been F_SOFTLOCK'ed.
1306  * The segment lock should be held, (but not the segment private lock?)
1307  *  The softunlock code below does not adjust for large page sizes
1308  *      assumes the caller already did any addr/len adjustments for
1309  *      pagesize mappings before calling.
1310  */
1311 /*ARGSUSED*/
1312 static void
1313 segdev_softunlock(
1314         struct hat *hat,                /* the hat */
1315         struct seg *seg,                /* seg_dev of interest */
1316         caddr_t addr,                   /* base address of range */
1317         size_t len,                     /* number of bytes */
1318         enum seg_rw rw)                 /* type of access at fault */
1319 {
1320         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1321         devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
1322 
1323         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_SOFTUNLOCK,
1324             "segdev_softunlock:dhp_head=%p sdp=%p addr=%p len=%lx",
1325             dhp_head, sdp, addr, len);
1326         DEBUGF(3, (CE_CONT, "segdev_softunlock: dhp %p lockcnt %lx "
1327             "addr %p len %lx\n",
1328             (void *)dhp_head, sdp->softlockcnt, (void *)addr, len));
1329 
1330         hat_unlock(hat, addr, len);
1331 
1332         if (dhp_head != NULL) {
1333                 devmap_handle_t *dhp;
1334                 size_t mlen;
1335                 size_t tlen = len;
1336                 ulong_t off;
1337 
1338                 dhp = devmap_find_handle(dhp_head, addr);
1339                 ASSERT(dhp != NULL);
1340 
1341                 off = (ulong_t)(addr - dhp->dh_uvaddr);
1342                 while (tlen != 0) {
1343                         mlen = MIN(tlen, (dhp->dh_len - off));
1344 
1345                         /*
1346                          * unlock segkp memory, locked during F_SOFTLOCK
1347                          */
1348                         if (dhp_is_kpmem(dhp)) {
1349                                 release_kpmem_lock(
1350                                     (struct ddi_umem_cookie *)dhp->dh_cookie,
1351                                     btopr(mlen));
1352                         }
1353 
1354                         /*
1355                          * Do the softlock accounting for devmap_access
1356                          */
1357                         if (dhp->dh_callbackops.devmap_access != NULL) {
1358                                 devmap_softlock_exit(dhp->dh_softlock,
1359                                     btopr(mlen), F_SOFTLOCK);
1360                         }
1361 
1362                         tlen -= mlen;
1363                         dhp = dhp->dh_next;
1364                         off = 0;
1365                 }
1366         }
1367 
1368         mutex_enter(&freemem_lock);
1369         ASSERT(sdp->softlockcnt >= btopr(len));
1370         sdp->softlockcnt -= btopr(len);
1371         mutex_exit(&freemem_lock);
1372         if (sdp->softlockcnt == 0) {
1373                 /*
1374                  * All SOFTLOCKS are gone. Wakeup any waiting
1375                  * unmappers so they can try again to unmap.
1376                  * Check for waiters first without the mutex
1377                  * held so we don't always grab the mutex on
1378                  * softunlocks.
1379                  */
1380                 if (AS_ISUNMAPWAIT(seg->s_as)) {
1381                         mutex_enter(&seg->s_as->a_contents);
1382                         if (AS_ISUNMAPWAIT(seg->s_as)) {
1383                                 AS_CLRUNMAPWAIT(seg->s_as);
1384                                 cv_broadcast(&seg->s_as->a_cv);
1385                         }
1386                         mutex_exit(&seg->s_as->a_contents);
1387                 }
1388         }
1389 
1390 }
1391 
1392 /*
1393  * Handle fault for a single page.
1394  * Done in a separate routine so we can handle errors more easily.
1395  * This routine is called only from segdev_faultpages()
1396  * when looping over the range of addresses requested. The segment lock is held.
1397  */
1398 static faultcode_t
1399 segdev_faultpage(
1400         struct hat *hat,                /* the hat */
1401         struct seg *seg,                /* seg_dev of interest */
1402         caddr_t addr,                   /* address in as */
1403         struct vpage *vpage,            /* pointer to vpage for seg, addr */
1404         enum fault_type type,           /* type of fault */
1405         enum seg_rw rw,                 /* type of access at fault */
1406         devmap_handle_t *dhp)           /* devmap handle if any for this page */
1407 {
1408         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1409         uint_t prot;
1410         pfn_t pfnum = PFN_INVALID;
1411         u_offset_t offset;
1412         uint_t hat_flags;
1413         dev_info_t *dip;
1414 
1415         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_FAULTPAGE,
1416             "segdev_faultpage: dhp=%p seg=%p addr=%p", dhp, seg, addr);
1417         DEBUGF(8, (CE_CONT, "segdev_faultpage: dhp %p seg %p addr %p \n",
1418             (void *)dhp, (void *)seg, (void *)addr));
1419 
1420         /*
1421          * Initialize protection value for this page.
1422          * If we have per page protection values check it now.
1423          */
1424         if (sdp->pageprot) {
1425                 uint_t protchk;
1426 
1427                 switch (rw) {
1428                 case S_READ:
1429                         protchk = PROT_READ;
1430                         break;
1431                 case S_WRITE:
1432                         protchk = PROT_WRITE;
1433                         break;
1434                 case S_EXEC:
1435                         protchk = PROT_EXEC;
1436                         break;
1437                 case S_OTHER:
1438                 default:
1439                         protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
1440                         break;
1441                 }
1442 
1443                 prot = VPP_PROT(vpage);
1444                 if ((prot & protchk) == 0)
1445                         return (FC_PROT);       /* illegal access type */
1446         } else {
1447                 prot = sdp->prot;
1448                 /* caller has already done segment level protection check */
1449         }
1450 
1451         if (type == F_SOFTLOCK) {
1452                 mutex_enter(&freemem_lock);
1453                 sdp->softlockcnt++;
1454                 mutex_exit(&freemem_lock);
1455         }
1456 
1457         hat_flags = ((type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD);
1458         offset = sdp->offset + (u_offset_t)(addr - seg->s_base);
1459         /*
1460          * In the devmap framework, sdp->mapfunc is set to NULL.  we can get
1461          * pfnum from dhp->dh_pfn (at beginning of segment) and offset from
1462          * seg->s_base.
1463          */
1464         if (dhp == NULL) {
1465                 /* If segment has devmap_data, then dhp should be non-NULL */
1466                 ASSERT(sdp->devmap_data == NULL);
1467                 pfnum = (pfn_t)cdev_mmap(sdp->mapfunc, sdp->vp->v_rdev,
1468                     (off_t)offset, prot);
1469                 prot |= sdp->hat_attr;
1470         } else {
1471                 ulong_t off;
1472                 struct ddi_umem_cookie *cp;
1473                 struct devmap_pmem_cookie *pcp;
1474 
1475                 /* ensure the dhp passed in contains addr. */
1476                 ASSERT(dhp == devmap_find_handle(
1477                     (devmap_handle_t *)sdp->devmap_data, addr));
1478 
1479                 off = addr - dhp->dh_uvaddr;
1480 
1481                 /*
1482                  * This routine assumes that the caller makes sure that the
1483                  * fields in dhp used below are unchanged due to remap during
1484                  * this call. Caller does HOLD_DHP_LOCK if neeed
1485                  */
1486                 cp = dhp->dh_cookie;
1487                 if (dhp->dh_flags & DEVMAP_MAPPING_INVALID) {
1488                         pfnum = PFN_INVALID;
1489                 } else if (cookie_is_devmem(cp)) {
1490                         pfnum = dhp->dh_pfn + btop(off);
1491                 } else if (cookie_is_pmem(cp)) {
1492                         pcp = (struct devmap_pmem_cookie *)dhp->dh_pcookie;
1493                         ASSERT((dhp->dh_roff & PAGEOFFSET) == 0 &&
1494                             dhp->dh_roff < ptob(pcp->dp_npages));
1495                         pfnum = page_pptonum(
1496                             pcp->dp_pparray[btop(off + dhp->dh_roff)]);
1497                 } else {
1498                         ASSERT(dhp->dh_roff < cp->size);
1499                         ASSERT(dhp->dh_cvaddr >= cp->cvaddr &&
1500                             dhp->dh_cvaddr < (cp->cvaddr + cp->size));
1501                         ASSERT((dhp->dh_cvaddr + off) <=
1502                             (cp->cvaddr + cp->size));
1503                         ASSERT((dhp->dh_cvaddr + off + PAGESIZE) <=
1504                             (cp->cvaddr + cp->size));
1505 
1506                         switch (cp->type) {
1507                         case UMEM_LOCKED :
1508                                 if (cp->pparray != NULL) {
1509                                         ASSERT((dhp->dh_roff &
1510                                             PAGEOFFSET) == 0);
1511                                         pfnum = page_pptonum(
1512                                             cp->pparray[btop(off +
1513                                             dhp->dh_roff)]);
1514                                 } else {
1515                                         pfnum = hat_getpfnum(
1516                                             ((proc_t *)cp->procp)->p_as->a_hat,
1517                                             cp->cvaddr + off);
1518                                 }
1519                         break;
1520                         case UMEM_TRASH :
1521                                 pfnum = page_pptonum(trashpp);
1522                                 /*
1523                                  * We should set hat_flags to HAT_NOFAULT also
1524                                  * However, not all hat layers implement this
1525                                  */
1526                                 break;
1527                         case KMEM_PAGEABLE:
1528                         case KMEM_NON_PAGEABLE:
1529                                 pfnum = hat_getpfnum(kas.a_hat,
1530                                     dhp->dh_cvaddr + off);
1531                                 break;
1532                         default :
1533                                 pfnum = PFN_INVALID;
1534                                 break;
1535                         }
1536                 }
1537                 prot |= dhp->dh_hat_attr;
1538         }
1539         if (pfnum == PFN_INVALID) {
1540                 return (FC_MAKE_ERR(EFAULT));
1541         }
1542         /* prot should already be OR'ed in with hat_attributes if needed */
1543 
1544         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_FAULTPAGE_CK1,
1545             "segdev_faultpage: pfnum=%lx memory=%x prot=%x flags=%x",
1546             pfnum, pf_is_memory(pfnum), prot, hat_flags);
1547         DEBUGF(9, (CE_CONT, "segdev_faultpage: pfnum %lx memory %x "
1548             "prot %x flags %x\n", pfnum, pf_is_memory(pfnum), prot, hat_flags));
1549 
1550         if (pf_is_memory(pfnum) || (dhp != NULL)) {
1551                 /*
1552                  * It's not _really_ required here to pass sdp->hat_flags
1553                  * to hat_devload even though we do it.
1554                  * This is because hat figures it out DEVMEM mappings
1555                  * are non-consistent, anyway.
1556                  */
1557                 hat_devload(hat, addr, PAGESIZE, pfnum,
1558                     prot, hat_flags | sdp->hat_flags);
1559                 return (0);
1560         }
1561 
1562         /*
1563          * Fall through to the case where devmap is not used and need to call
1564          * up the device tree to set up the mapping
1565          */
1566 
1567         dip = VTOS(VTOCVP(sdp->vp))->s_dip;
1568         ASSERT(dip);
1569 
1570         /*
1571          * When calling ddi_map_fault, we do not OR in sdp->hat_attr
1572          * This is because this calls drivers which may not expect
1573          * prot to have any other values than PROT_ALL
1574          * The root nexus driver has a hack to peek into the segment
1575          * structure and then OR in sdp->hat_attr.
1576          * XX In case the bus_ops interfaces are ever revisited
1577          * we need to fix this. prot should include other hat attributes
1578          */
1579         if (ddi_map_fault(dip, hat, seg, addr, NULL, pfnum, prot & PROT_ALL,
1580             (uint_t)(type == F_SOFTLOCK)) != DDI_SUCCESS) {
1581                 return (FC_MAKE_ERR(EFAULT));
1582         }
1583         return (0);
1584 }
1585 
1586 static faultcode_t
1587 segdev_fault(
1588         struct hat *hat,                /* the hat */
1589         struct seg *seg,                /* the seg_dev of interest */
1590         caddr_t addr,                   /* the address of the fault */
1591         size_t len,                     /* the length of the range */
1592         enum fault_type type,           /* type of fault */
1593         enum seg_rw rw)                 /* type of access at fault */
1594 {
1595         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1596         devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
1597         devmap_handle_t *dhp;
1598         struct devmap_softlock *slock = NULL;
1599         ulong_t slpage = 0;
1600         ulong_t off;
1601         caddr_t maddr = addr;
1602         int err;
1603         int err_is_faultcode = 0;
1604 
1605         TRACE_5(TR_FAC_DEVMAP, TR_DEVMAP_FAULT,
1606             "segdev_fault: dhp_head=%p seg=%p addr=%p len=%lx type=%x",
1607             (void *)dhp_head, (void *)seg, (void *)addr, len, type);
1608         DEBUGF(7, (CE_CONT, "segdev_fault: dhp_head %p seg %p "
1609             "addr %p len %lx type %x\n",
1610             (void *)dhp_head, (void *)seg, (void *)addr, len, type));
1611 
1612         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1613 
1614         /* Handle non-devmap case */
1615         if (dhp_head == NULL)
1616                 return (segdev_faultpages(hat, seg, addr, len, type, rw, NULL));
1617 
1618         /* Find devmap handle */
1619         if ((dhp = devmap_find_handle(dhp_head, addr)) == NULL)
1620                 return (FC_NOMAP);
1621 
1622         /*
1623          * The seg_dev driver does not implement copy-on-write,
1624          * and always loads translations with maximal allowed permissions
1625          * but we got an fault trying to access the device.
1626          * Servicing the fault is not going to result in any better result
1627          * RFE: If we want devmap_access callbacks to be involved in F_PROT
1628          *      faults, then the code below is written for that
1629          *      Pending resolution of the following:
1630          *      - determine if the F_INVAL/F_SOFTLOCK syncing
1631          *      is needed for F_PROT also or not. The code below assumes it does
1632          *      - If driver sees F_PROT and calls devmap_load with same type,
1633          *      then segdev_faultpages will fail with FC_PROT anyway, need to
1634          *      change that so calls from devmap_load to segdev_faultpages for
1635          *      F_PROT type are retagged to F_INVAL.
1636          * RFE: Today we dont have drivers that use devmap and want to handle
1637          *      F_PROT calls. The code in segdev_fault* is written to allow
1638          *      this case but is not tested. A driver that needs this capability
1639          *      should be able to remove the short-circuit case; resolve the
1640          *      above issues and "should" work.
1641          */
1642         if (type == F_PROT) {
1643                 return (FC_PROT);
1644         }
1645 
1646         /*
1647          * Loop through dhp list calling devmap_access or segdev_faultpages for
1648          * each devmap handle.
1649          * drivers which implement devmap_access can interpose on faults and do
1650          * device-appropriate special actions before calling devmap_load.
1651          */
1652 
1653         /*
1654          * Unfortunately, this simple loop has turned out to expose a variety
1655          * of complex problems which results in the following convoluted code.
1656          *
1657          * First, a desire to handle a serialization of F_SOFTLOCK calls
1658          * to the driver within the framework.
1659          *      This results in a dh_softlock structure that is on a per device
1660          *      (or device instance) basis and serializes devmap_access calls.
1661          *      Ideally we would need to do this for underlying
1662          *      memory/device regions that are being faulted on
1663          *      but that is hard to identify and with REMAP, harder
1664          * Second, a desire to serialize F_INVAL(and F_PROT) calls w.r.t.
1665          *      to F_SOFTLOCK calls to the driver.
1666          * These serializations are to simplify the driver programmer model.
1667          * To support these two features, the code first goes through the
1668          *      devmap handles and counts the pages (slpage) that are covered
1669          *      by devmap_access callbacks.
1670          * This part ends with a devmap_softlock_enter call
1671          *      which allows only one F_SOFTLOCK active on a device instance,
1672          *      but multiple F_INVAL/F_PROTs can be active except when a
1673          *      F_SOFTLOCK is active
1674          *
1675          * Next, we dont short-circuit the fault code upfront to call
1676          *      segdev_softunlock for F_SOFTUNLOCK, because we must use
1677          *      the same length when we softlock and softunlock.
1678          *
1679          *      -Hat layers may not support softunlocking lengths less than the
1680          *      original length when there is large page support.
1681          *      -kpmem locking is dependent on keeping the lengths same.
1682          *      -if drivers handled F_SOFTLOCK, they probably also expect to
1683          *              see an F_SOFTUNLOCK of the same length
1684          *      Hence, if extending lengths during softlock,
1685          *      softunlock has to make the same adjustments and goes through
1686          *      the same loop calling segdev_faultpages/segdev_softunlock
1687          *      But some of the synchronization and error handling is different
1688          */
1689 
1690         if (type != F_SOFTUNLOCK) {
1691                 devmap_handle_t *dhpp = dhp;
1692                 size_t slen = len;
1693 
1694                 /*
1695                  * Calculate count of pages that are :
1696                  * a) within the (potentially extended) fault region
1697                  * b) AND covered by devmap handle with devmap_access
1698                  */
1699                 off = (ulong_t)(addr - dhpp->dh_uvaddr);
1700                 while (slen != 0) {
1701                         size_t mlen;
1702 
1703                         /*
1704                          * Softlocking on a region that allows remap is
1705                          * unsupported due to unresolved locking issues
1706                          * XXX: unclear what these are?
1707                          *      One potential is that if there is a pending
1708                          *      softlock, then a remap should not be allowed
1709                          *      until the unlock is done. This is easily
1710                          *      fixed by returning error in devmap*remap on
1711                          *      checking the dh->dh_softlock->softlocked value
1712                          */
1713                         if ((type == F_SOFTLOCK) &&
1714                             (dhpp->dh_flags & DEVMAP_ALLOW_REMAP)) {
1715                                 return (FC_NOSUPPORT);
1716                         }
1717 
1718                         mlen = MIN(slen, (dhpp->dh_len - off));
1719                         if (dhpp->dh_callbackops.devmap_access) {
1720                                 size_t llen;
1721                                 caddr_t laddr;
1722                                 /*
1723                                  * use extended length for large page mappings
1724                                  */
1725                                 HOLD_DHP_LOCK(dhpp);
1726                                 if ((sdp->pageprot == 0) &&
1727                                     (dhpp->dh_flags & DEVMAP_FLAG_LARGE)) {
1728                                         devmap_get_large_pgsize(dhpp,
1729                                             mlen, maddr, &llen, &laddr);
1730                                 } else {
1731                                         llen = mlen;
1732                                 }
1733                                 RELE_DHP_LOCK(dhpp);
1734 
1735                                 slpage += btopr(llen);
1736                                 slock = dhpp->dh_softlock;
1737                         }
1738                         maddr += mlen;
1739                         ASSERT(slen >= mlen);
1740                         slen -= mlen;
1741                         dhpp = dhpp->dh_next;
1742                         off = 0;
1743                 }
1744                 /*
1745                  * synchonize with other faulting threads and wait till safe
1746                  * devmap_softlock_enter might return due to signal in cv_wait
1747                  *
1748                  * devmap_softlock_enter has to be called outside of while loop
1749                  * to prevent a deadlock if len spans over multiple dhps.
1750                  * dh_softlock is based on device instance and if multiple dhps
1751                  * use the same device instance, the second dhp's LOCK call
1752                  * will hang waiting on the first to complete.
1753                  * devmap_setup verifies that slocks in a dhp_chain are same.
1754                  * RFE: this deadlock only hold true for F_SOFTLOCK. For
1755                  *      F_INVAL/F_PROT, since we now allow multiple in parallel,
1756                  *      we could have done the softlock_enter inside the loop
1757                  *      and supported multi-dhp mappings with dissimilar devices
1758                  */
1759                 if (err = devmap_softlock_enter(slock, slpage, type))
1760                         return (FC_MAKE_ERR(err));
1761         }
1762 
1763         /* reset 'maddr' to the start addr of the range of fault. */
1764         maddr = addr;
1765 
1766         /* calculate the offset corresponds to 'addr' in the first dhp. */
1767         off = (ulong_t)(addr - dhp->dh_uvaddr);
1768 
1769         /*
1770          * The fault length may span over multiple dhps.
1771          * Loop until the total length is satisfied.
1772          */
1773         while (len != 0) {
1774                 size_t llen;
1775                 size_t mlen;
1776                 caddr_t laddr;
1777 
1778                 /*
1779                  * mlen is the smaller of 'len' and the length
1780                  * from addr to the end of mapping defined by dhp.
1781                  */
1782                 mlen = MIN(len, (dhp->dh_len - off));
1783 
1784                 HOLD_DHP_LOCK(dhp);
1785                 /*
1786                  * Pass the extended length and address to devmap_access
1787                  * if large pagesize is used for loading address translations.
1788                  */
1789                 if ((sdp->pageprot == 0) &&
1790                     (dhp->dh_flags & DEVMAP_FLAG_LARGE)) {
1791                         devmap_get_large_pgsize(dhp, mlen, maddr,
1792                             &llen, &laddr);
1793                         ASSERT(maddr == addr || laddr == maddr);
1794                 } else {
1795                         llen = mlen;
1796                         laddr = maddr;
1797                 }
1798 
1799                 if (dhp->dh_callbackops.devmap_access != NULL) {
1800                         offset_t aoff;
1801 
1802                         aoff = sdp->offset + (offset_t)(laddr - seg->s_base);
1803 
1804                         /*
1805                          * call driver's devmap_access entry point which will
1806                          * call devmap_load/contextmgmt to load the translations
1807                          *
1808                          * We drop the dhp_lock before calling access so
1809                          * drivers can call devmap_*_remap within access
1810                          */
1811                         RELE_DHP_LOCK(dhp);
1812 
1813                         err = (*dhp->dh_callbackops.devmap_access)(
1814                             dhp, (void *)dhp->dh_pvtp, aoff, llen, type, rw);
1815                 } else {
1816                         /*
1817                          * If no devmap_access entry point, then load mappings
1818                          * hold dhp_lock across faultpages if REMAP
1819                          */
1820                         err = segdev_faultpages(hat, seg, laddr, llen,
1821                             type, rw, dhp);
1822                         err_is_faultcode = 1;
1823                         RELE_DHP_LOCK(dhp);
1824                 }
1825 
1826                 if (err) {
1827                         if ((type == F_SOFTLOCK) && (maddr > addr)) {
1828                                 /*
1829                                  * If not first dhp, use
1830                                  * segdev_fault(F_SOFTUNLOCK) for prior dhps
1831                                  * While this is recursion, it is incorrect to
1832                                  * call just segdev_softunlock
1833                                  * if we are using either large pages
1834                                  * or devmap_access. It will be more right
1835                                  * to go through the same loop as above
1836                                  * rather than call segdev_softunlock directly
1837                                  * It will use the right lenghths as well as
1838                                  * call into the driver devmap_access routines.
1839                                  */
1840                                 size_t done = (size_t)(maddr - addr);
1841                                 (void) segdev_fault(hat, seg, addr, done,
1842                                     F_SOFTUNLOCK, S_OTHER);
1843                                 /*
1844                                  * reduce slpage by number of pages
1845                                  * released by segdev_softunlock
1846                                  */
1847                                 ASSERT(slpage >= btopr(done));
1848                                 devmap_softlock_exit(slock,
1849                                     slpage - btopr(done), type);
1850                         } else {
1851                                 devmap_softlock_exit(slock, slpage, type);
1852                         }
1853 
1854 
1855                         /*
1856                          * Segdev_faultpages() already returns a faultcode,
1857                          * hence, result from segdev_faultpages() should be
1858                          * returned directly.
1859                          */
1860                         if (err_is_faultcode)
1861                                 return (err);
1862                         return (FC_MAKE_ERR(err));
1863                 }
1864 
1865                 maddr += mlen;
1866                 ASSERT(len >= mlen);
1867                 len -= mlen;
1868                 dhp = dhp->dh_next;
1869                 off = 0;
1870 
1871                 ASSERT(!dhp || len == 0 || maddr == dhp->dh_uvaddr);
1872         }
1873         /*
1874          * release the softlock count at end of fault
1875          * For F_SOFTLOCk this is done in the later F_SOFTUNLOCK
1876          */
1877         if ((type == F_INVAL) || (type == F_PROT))
1878                 devmap_softlock_exit(slock, slpage, type);
1879         return (0);
1880 }
1881 
1882 /*
1883  * segdev_faultpages
1884  *
1885  * Used to fault in seg_dev segment pages. Called by segdev_fault or devmap_load
1886  * This routine assumes that the callers makes sure that the fields
1887  * in dhp used below are not changed due to remap during this call.
1888  * Caller does HOLD_DHP_LOCK if neeed
1889  * This routine returns a faultcode_t as a return value for segdev_fault.
1890  */
1891 static faultcode_t
1892 segdev_faultpages(
1893         struct hat *hat,                /* the hat */
1894         struct seg *seg,                /* the seg_dev of interest */
1895         caddr_t addr,                   /* the address of the fault */
1896         size_t len,                     /* the length of the range */
1897         enum fault_type type,           /* type of fault */
1898         enum seg_rw rw,                 /* type of access at fault */
1899         devmap_handle_t *dhp)           /* devmap handle */
1900 {
1901         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1902         register caddr_t a;
1903         struct vpage *vpage;
1904         struct ddi_umem_cookie *kpmem_cookie = NULL;
1905         int err;
1906 
1907         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_FAULTPAGES,
1908             "segdev_faultpages: dhp=%p seg=%p addr=%p len=%lx",
1909             (void *)dhp, (void *)seg, (void *)addr, len);
1910         DEBUGF(5, (CE_CONT, "segdev_faultpages: "
1911             "dhp %p seg %p addr %p len %lx\n",
1912             (void *)dhp, (void *)seg, (void *)addr, len));
1913 
1914         /*
1915          * The seg_dev driver does not implement copy-on-write,
1916          * and always loads translations with maximal allowed permissions
1917          * but we got an fault trying to access the device.
1918          * Servicing the fault is not going to result in any better result
1919          * XXX: If we want to allow devmap_access to handle F_PROT calls,
1920          * This code should be removed and let the normal fault handling
1921          * take care of finding the error
1922          */
1923         if (type == F_PROT) {
1924                 return (FC_PROT);
1925         }
1926 
1927         if (type == F_SOFTUNLOCK) {
1928                 segdev_softunlock(hat, seg, addr, len, rw);
1929                 return (0);
1930         }
1931 
1932         /*
1933          * For kernel pageable memory, fault/lock segkp pages
1934          * We hold this until the completion of this
1935          * fault (INVAL/PROT) or till unlock (SOFTLOCK).
1936          */
1937         if ((dhp != NULL) && dhp_is_kpmem(dhp)) {
1938                 kpmem_cookie = (struct ddi_umem_cookie *)dhp->dh_cookie;
1939                 if (err = acquire_kpmem_lock(kpmem_cookie, btopr(len)))
1940                         return (err);
1941         }
1942 
1943         /*
1944          * If we have the same protections for the entire segment,
1945          * insure that the access being attempted is legitimate.
1946          */
1947         rw_enter(&sdp->lock, RW_READER);
1948         if (sdp->pageprot == 0) {
1949                 uint_t protchk;
1950 
1951                 switch (rw) {
1952                 case S_READ:
1953                         protchk = PROT_READ;
1954                         break;
1955                 case S_WRITE:
1956                         protchk = PROT_WRITE;
1957                         break;
1958                 case S_EXEC:
1959                         protchk = PROT_EXEC;
1960                         break;
1961                 case S_OTHER:
1962                 default:
1963                         protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
1964                         break;
1965                 }
1966 
1967                 if ((sdp->prot & protchk) == 0) {
1968                         rw_exit(&sdp->lock);
1969                         /* undo kpmem locking */
1970                         if (kpmem_cookie != NULL) {
1971                                 release_kpmem_lock(kpmem_cookie, btopr(len));
1972                         }
1973                         return (FC_PROT);       /* illegal access type */
1974                 }
1975         }
1976 
1977         /*
1978          * we do a single hat_devload for the range if
1979          *   - devmap framework (dhp is not NULL),
1980          *   - pageprot == 0, i.e., no per-page protection set and
1981          *   - is device pages, irrespective of whether we are using large pages
1982          */
1983         if ((sdp->pageprot == 0) && (dhp != NULL) && dhp_is_devmem(dhp)) {
1984                 pfn_t pfnum;
1985                 uint_t hat_flags;
1986 
1987                 if (dhp->dh_flags & DEVMAP_MAPPING_INVALID) {
1988                         rw_exit(&sdp->lock);
1989                         return (FC_NOMAP);
1990                 }
1991 
1992                 if (type == F_SOFTLOCK) {
1993                         mutex_enter(&freemem_lock);
1994                         sdp->softlockcnt += btopr(len);
1995                         mutex_exit(&freemem_lock);
1996                 }
1997 
1998                 hat_flags = ((type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD);
1999                 pfnum = dhp->dh_pfn + btop((uintptr_t)(addr - dhp->dh_uvaddr));
2000                 ASSERT(!pf_is_memory(pfnum));
2001 
2002                 hat_devload(hat, addr, len, pfnum, sdp->prot | dhp->dh_hat_attr,
2003                     hat_flags | sdp->hat_flags);
2004                 rw_exit(&sdp->lock);
2005                 return (0);
2006         }
2007 
2008         /* Handle cases where we have to loop through fault handling per-page */
2009 
2010         if (sdp->vpage == NULL)
2011                 vpage = NULL;
2012         else
2013                 vpage = &sdp->vpage[seg_page(seg, addr)];
2014 
2015         /* loop over the address range handling each fault */
2016         for (a = addr; a < addr + len; a += PAGESIZE) {
2017                 if (err = segdev_faultpage(hat, seg, a, vpage, type, rw, dhp)) {
2018                         break;
2019                 }
2020                 if (vpage != NULL)
2021                         vpage++;
2022         }
2023         rw_exit(&sdp->lock);
2024         if (err && (type == F_SOFTLOCK)) { /* error handling for F_SOFTLOCK */
2025                 size_t done = (size_t)(a - addr); /* pages fault successfully */
2026                 if (done > 0) {
2027                         /* use softunlock for those pages */
2028                         segdev_softunlock(hat, seg, addr, done, S_OTHER);
2029                 }
2030                 if (kpmem_cookie != NULL) {
2031                         /* release kpmem lock for rest of pages */
2032                         ASSERT(len >= done);
2033                         release_kpmem_lock(kpmem_cookie, btopr(len - done));
2034                 }
2035         } else if ((kpmem_cookie != NULL) && (type != F_SOFTLOCK)) {
2036                 /* for non-SOFTLOCK cases, release kpmem */
2037                 release_kpmem_lock(kpmem_cookie, btopr(len));
2038         }
2039         return (err);
2040 }
2041 
2042 /*
2043  * Asynchronous page fault.  We simply do nothing since this
2044  * entry point is not supposed to load up the translation.
2045  */
2046 /*ARGSUSED*/
2047 static faultcode_t
2048 segdev_faulta(struct seg *seg, caddr_t addr)
2049 {
2050         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_FAULTA,
2051             "segdev_faulta: seg=%p addr=%p", (void *)seg, (void *)addr);
2052         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2053 
2054         return (0);
2055 }
2056 
2057 static int
2058 segdev_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
2059 {
2060         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2061         register devmap_handle_t *dhp;
2062         register struct vpage *vp, *evp;
2063         devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
2064         ulong_t off;
2065         size_t mlen, sz;
2066 
2067         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_SETPROT,
2068             "segdev_setprot:start seg=%p addr=%p len=%lx prot=%x",
2069             (void *)seg, (void *)addr, len, prot);
2070         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2071 
2072         if ((sz = sdp->softlockcnt) > 0 && dhp_head != NULL) {
2073                 /*
2074                  * Fail the setprot if pages are SOFTLOCKed through this
2075                  * mapping.
2076                  * Softlockcnt is protected from change by the as read lock.
2077                  */
2078                 TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_SETPROT_CK1,
2079                     "segdev_setprot:error softlockcnt=%lx", sz);
2080                 DEBUGF(1, (CE_CONT, "segdev_setprot: softlockcnt %ld\n", sz));
2081                 return (EAGAIN);
2082         }
2083 
2084         if (dhp_head != NULL) {
2085                 if ((dhp = devmap_find_handle(dhp_head, addr)) == NULL)
2086                         return (EINVAL);
2087 
2088                 /*
2089                  * check if violate maxprot.
2090                  */
2091                 off = (ulong_t)(addr - dhp->dh_uvaddr);
2092                 mlen  = len;
2093                 while (dhp) {
2094                         if ((dhp->dh_maxprot & prot) != prot)
2095                                 return (EACCES);        /* violated maxprot */
2096 
2097                         if (mlen > (dhp->dh_len - off)) {
2098                                 mlen -= dhp->dh_len - off;
2099                                 dhp = dhp->dh_next;
2100                                 off = 0;
2101                         } else
2102                                 break;
2103                 }
2104         } else {
2105                 if ((sdp->maxprot & prot) != prot)
2106                         return (EACCES);
2107         }
2108 
2109         rw_enter(&sdp->lock, RW_WRITER);
2110         if (addr == seg->s_base && len == seg->s_size && sdp->pageprot == 0) {
2111                 if (sdp->prot == prot) {
2112                         rw_exit(&sdp->lock);
2113                         return (0);                     /* all done */
2114                 }
2115                 sdp->prot = (uchar_t)prot;
2116         } else {
2117                 sdp->pageprot = 1;
2118                 if (sdp->vpage == NULL) {
2119                         /*
2120                          * First time through setting per page permissions,
2121                          * initialize all the vpage structures to prot
2122                          */
2123                         sdp->vpage = kmem_zalloc(vpgtob(seg_pages(seg)),
2124                             KM_SLEEP);
2125                         evp = &sdp->vpage[seg_pages(seg)];
2126                         for (vp = sdp->vpage; vp < evp; vp++)
2127                                 VPP_SETPROT(vp, sdp->prot);
2128                 }
2129                 /*
2130                  * Now go change the needed vpages protections.
2131                  */
2132                 evp = &sdp->vpage[seg_page(seg, addr + len)];
2133                 for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++)
2134                         VPP_SETPROT(vp, prot);
2135         }
2136         rw_exit(&sdp->lock);
2137 
2138         if (dhp_head != NULL) {
2139                 devmap_handle_t *tdhp;
2140                 /*
2141                  * If large page size was used in hat_devload(),
2142                  * the same page size must be used in hat_unload().
2143                  */
2144                 dhp = tdhp = devmap_find_handle(dhp_head, addr);
2145                 while (tdhp != NULL) {
2146                         if (tdhp->dh_flags & DEVMAP_FLAG_LARGE) {
2147                                 break;
2148                         }
2149                         tdhp = tdhp->dh_next;
2150                 }
2151                 if (tdhp) {
2152                         size_t slen = len;
2153                         size_t mlen;
2154                         size_t soff;
2155 
2156                         soff = (ulong_t)(addr - dhp->dh_uvaddr);
2157                         while (slen != 0) {
2158                                 mlen = MIN(slen, (dhp->dh_len - soff));
2159                                 hat_unload(seg->s_as->a_hat, dhp->dh_uvaddr,
2160                                     dhp->dh_len, HAT_UNLOAD);
2161                                 dhp = dhp->dh_next;
2162                                 ASSERT(slen >= mlen);
2163                                 slen -= mlen;
2164                                 soff = 0;
2165                         }
2166                         return (0);
2167                 }
2168         }
2169 
2170         if ((prot & ~PROT_USER) == PROT_NONE) {
2171                 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD);
2172         } else {
2173                 /*
2174                  * RFE: the segment should keep track of all attributes
2175                  * allowing us to remove the deprecated hat_chgprot
2176                  * and use hat_chgattr.
2177                  */
2178                 hat_chgprot(seg->s_as->a_hat, addr, len, prot);
2179         }
2180 
2181         return (0);
2182 }
2183 
2184 static int
2185 segdev_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
2186 {
2187         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2188         struct vpage *vp, *evp;
2189 
2190         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_CHECKPROT,
2191             "segdev_checkprot:start seg=%p addr=%p len=%lx prot=%x",
2192             (void *)seg, (void *)addr, len, prot);
2193         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2194 
2195         /*
2196          * If segment protection can be used, simply check against them
2197          */
2198         rw_enter(&sdp->lock, RW_READER);
2199         if (sdp->pageprot == 0) {
2200                 register int err;
2201 
2202                 err = ((sdp->prot & prot) != prot) ? EACCES : 0;
2203                 rw_exit(&sdp->lock);
2204                 return (err);
2205         }
2206 
2207         /*
2208          * Have to check down to the vpage level
2209          */
2210         evp = &sdp->vpage[seg_page(seg, addr + len)];
2211         for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++) {
2212                 if ((VPP_PROT(vp) & prot) != prot) {
2213                         rw_exit(&sdp->lock);
2214                         return (EACCES);
2215                 }
2216         }
2217         rw_exit(&sdp->lock);
2218         return (0);
2219 }
2220 
2221 static int
2222 segdev_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
2223 {
2224         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2225         size_t pgno;
2226 
2227         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_GETPROT,
2228             "segdev_getprot:start seg=%p addr=%p len=%lx protv=%p",
2229             (void *)seg, (void *)addr, len, (void *)protv);
2230         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2231 
2232         pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
2233         if (pgno != 0) {
2234                 rw_enter(&sdp->lock, RW_READER);
2235                 if (sdp->pageprot == 0) {
2236                         do {
2237                                 protv[--pgno] = sdp->prot;
2238                         } while (pgno != 0);
2239                 } else {
2240                         size_t pgoff = seg_page(seg, addr);
2241 
2242                         do {
2243                                 pgno--;
2244                                 protv[pgno] =
2245                                     VPP_PROT(&sdp->vpage[pgno + pgoff]);
2246                         } while (pgno != 0);
2247                 }
2248                 rw_exit(&sdp->lock);
2249         }
2250         return (0);
2251 }
2252 
2253 static u_offset_t
2254 segdev_getoffset(register struct seg *seg, caddr_t addr)
2255 {
2256         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2257 
2258         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_GETOFFSET,
2259             "segdev_getoffset:start seg=%p addr=%p", (void *)seg, (void *)addr);
2260 
2261         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2262 
2263         return ((u_offset_t)sdp->offset + (addr - seg->s_base));
2264 }
2265 
2266 /*ARGSUSED*/
2267 static int
2268 segdev_gettype(register struct seg *seg, caddr_t addr)
2269 {
2270         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2271 
2272         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_GETTYPE,
2273             "segdev_gettype:start seg=%p addr=%p", (void *)seg, (void *)addr);
2274 
2275         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2276 
2277         return (sdp->type);
2278 }
2279 
2280 
2281 /*ARGSUSED*/
2282 static int
2283 segdev_getvp(register struct seg *seg, caddr_t addr, struct vnode **vpp)
2284 {
2285         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2286 
2287         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_GETVP,
2288             "segdev_getvp:start seg=%p addr=%p", (void *)seg, (void *)addr);
2289 
2290         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2291 
2292         /*
2293          * Note that this vp is the common_vp of the device, where the
2294          * pages are hung ..
2295          */
2296         *vpp = VTOCVP(sdp->vp);
2297 
2298         return (0);
2299 }
2300 
2301 static void
2302 segdev_badop(void)
2303 {
2304         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SEGDEV_BADOP,
2305             "segdev_badop:start");
2306         panic("segdev_badop");
2307         /*NOTREACHED*/
2308 }
2309 
2310 /*
2311  * segdev pages are not in the cache, and thus can't really be controlled.
2312  * Hence, syncs are simply always successful.
2313  */
2314 /*ARGSUSED*/
2315 static int
2316 segdev_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
2317 {
2318         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SYNC, "segdev_sync:start");
2319 
2320         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2321 
2322         return (0);
2323 }
2324 
2325 /*
2326  * segdev pages are always "in core".
2327  */
2328 /*ARGSUSED*/
2329 static size_t
2330 segdev_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
2331 {
2332         size_t v = 0;
2333 
2334         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_INCORE, "segdev_incore:start");
2335 
2336         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2337 
2338         for (len = (len + PAGEOFFSET) & PAGEMASK; len; len -= PAGESIZE,
2339             v += PAGESIZE)
2340                 *vec++ = 1;
2341         return (v);
2342 }
2343 
2344 /*
2345  * segdev pages are not in the cache, and thus can't really be controlled.
2346  * Hence, locks are simply always successful.
2347  */
2348 /*ARGSUSED*/
2349 static int
2350 segdev_lockop(struct seg *seg, caddr_t addr,
2351     size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
2352 {
2353         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_LOCKOP, "segdev_lockop:start");
2354 
2355         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2356 
2357         return (0);
2358 }
2359 
2360 /*
2361  * segdev pages are not in the cache, and thus can't really be controlled.
2362  * Hence, advise is simply always successful.
2363  */
2364 /*ARGSUSED*/
2365 static int
2366 segdev_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
2367 {
2368         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_ADVISE, "segdev_advise:start");
2369 
2370         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2371 
2372         return (0);
2373 }
2374 
2375 /*
2376  * segdev pages are not dumped, so we just return
2377  */
2378 /*ARGSUSED*/
2379 static void
2380 segdev_dump(struct seg *seg)
2381 {}
2382 
2383 /*
2384  * ddi_segmap_setup:    Used by drivers who wish specify mapping attributes
2385  *                      for a segment.  Called from a drivers segmap(9E)
2386  *                      routine.
2387  */
2388 /*ARGSUSED*/
2389 int
2390 ddi_segmap_setup(dev_t dev, off_t offset, struct as *as, caddr_t *addrp,
2391     off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cred,
2392     ddi_device_acc_attr_t *accattrp, uint_t rnumber)
2393 {
2394         struct segdev_crargs dev_a;
2395         int (*mapfunc)(dev_t dev, off_t off, int prot);
2396         uint_t hat_attr;
2397         pfn_t pfn;
2398         int     error, i;
2399 
2400         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SEGMAP_SETUP,
2401             "ddi_segmap_setup:start");
2402 
2403         if ((mapfunc = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap) == nodev)
2404                 return (ENODEV);
2405 
2406         /*
2407          * Character devices that support the d_mmap
2408          * interface can only be mmap'ed shared.
2409          */
2410         if ((flags & MAP_TYPE) != MAP_SHARED)
2411                 return (EINVAL);
2412 
2413         /*
2414          * Check that this region is indeed mappable on this platform.
2415          * Use the mapping function.
2416          */
2417         if (ddi_device_mapping_check(dev, accattrp, rnumber, &hat_attr) == -1)
2418                 return (ENXIO);
2419 
2420         /*
2421          * Check to ensure that the entire range is
2422          * legal and we are not trying to map in
2423          * more than the device will let us.
2424          */
2425         for (i = 0; i < len; i += PAGESIZE) {
2426                 if (i == 0) {
2427                         /*
2428                          * Save the pfn at offset here. This pfn will be
2429                          * used later to get user address.
2430                          */
2431                         if ((pfn = (pfn_t)cdev_mmap(mapfunc, dev, offset,
2432                             maxprot)) == PFN_INVALID)
2433                                 return (ENXIO);
2434                 } else {
2435                         if (cdev_mmap(mapfunc, dev, offset + i, maxprot) ==
2436                             PFN_INVALID)
2437                                 return (ENXIO);
2438                 }
2439         }
2440 
2441         as_rangelock(as);
2442         /* Pick an address w/o worrying about any vac alignment constraints. */
2443         error = choose_addr(as, addrp, len, ptob(pfn), ADDR_NOVACALIGN, flags);
2444         if (error != 0) {
2445                 as_rangeunlock(as);
2446                 return (error);
2447         }
2448 
2449         dev_a.mapfunc = mapfunc;
2450         dev_a.dev = dev;
2451         dev_a.offset = (offset_t)offset;
2452         dev_a.type = flags & MAP_TYPE;
2453         dev_a.prot = (uchar_t)prot;
2454         dev_a.maxprot = (uchar_t)maxprot;
2455         dev_a.hat_attr = hat_attr;
2456         dev_a.hat_flags = 0;
2457         dev_a.devmap_data = NULL;
2458 
2459         error = as_map(as, *addrp, len, segdev_create, &dev_a);
2460         as_rangeunlock(as);
2461         return (error);
2462 
2463 }
2464 
2465 /*ARGSUSED*/
2466 static int
2467 segdev_pagelock(struct seg *seg, caddr_t addr, size_t len,
2468     struct page ***ppp, enum lock_type type, enum seg_rw rw)
2469 {
2470         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_PAGELOCK,
2471             "segdev_pagelock:start");
2472         return (ENOTSUP);
2473 }
2474 
2475 /*
2476  * devmap_device: Used by devmap framework to establish mapping
2477  *                called by devmap_seup(9F) during map setup time.
2478  */
2479 /*ARGSUSED*/
2480 static int
2481 devmap_device(devmap_handle_t *dhp, struct as *as, caddr_t *addr,
2482     offset_t off, size_t len, uint_t flags)
2483 {
2484         devmap_handle_t *rdhp, *maxdhp;
2485         struct segdev_crargs dev_a;
2486         int     err;
2487         uint_t maxprot = PROT_ALL;
2488         offset_t offset = 0;
2489         pfn_t pfn;
2490         struct devmap_pmem_cookie *pcp;
2491 
2492         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_DEVICE,
2493             "devmap_device:start dhp=%p addr=%p off=%llx, len=%lx",
2494             (void *)dhp, (void *)addr, off, len);
2495 
2496         DEBUGF(2, (CE_CONT, "devmap_device: dhp %p addr %p off %llx len %lx\n",
2497             (void *)dhp, (void *)addr, off, len));
2498 
2499         as_rangelock(as);
2500         if ((flags & MAP_FIXED) == 0) {
2501                 offset_t aligned_off;
2502 
2503                 rdhp = maxdhp = dhp;
2504                 while (rdhp != NULL) {
2505                         maxdhp = (maxdhp->dh_len > rdhp->dh_len) ?
2506                             maxdhp : rdhp;
2507                         rdhp = rdhp->dh_next;
2508                         maxprot |= dhp->dh_maxprot;
2509                 }
2510                 offset = maxdhp->dh_uoff - dhp->dh_uoff;
2511 
2512                 /*
2513                  * Use the dhp that has the
2514                  * largest len to get user address.
2515                  */
2516                 /*
2517                  * If MAPPING_INVALID, cannot use dh_pfn/dh_cvaddr,
2518                  * use 0 which is as good as any other.
2519                  */
2520                 if (maxdhp->dh_flags & DEVMAP_MAPPING_INVALID) {
2521                         aligned_off = (offset_t)0;
2522                 } else if (dhp_is_devmem(maxdhp)) {
2523                         aligned_off = (offset_t)ptob(maxdhp->dh_pfn) - offset;
2524                 } else if (dhp_is_pmem(maxdhp)) {
2525                         pcp = (struct devmap_pmem_cookie *)maxdhp->dh_pcookie;
2526                         pfn = page_pptonum(
2527                             pcp->dp_pparray[btop(maxdhp->dh_roff)]);
2528                         aligned_off = (offset_t)ptob(pfn) - offset;
2529                 } else {
2530                         aligned_off = (offset_t)(uintptr_t)maxdhp->dh_cvaddr -
2531                             offset;
2532                 }
2533 
2534                 /*
2535                  * Pick an address aligned to dh_cookie.
2536                  * for kernel memory/user memory, cookie is cvaddr.
2537                  * for device memory, cookie is physical address.
2538                  */
2539                 map_addr(addr, len, aligned_off, 1, flags);
2540                 if (*addr == NULL) {
2541                         as_rangeunlock(as);
2542                         return (ENOMEM);
2543                 }
2544         } else {
2545                 /*
2546                  * User-specified address; blow away any previous mappings.
2547                  */
2548                 (void) as_unmap(as, *addr, len);
2549         }
2550 
2551         dev_a.mapfunc = NULL;
2552         dev_a.dev = dhp->dh_dev;
2553         dev_a.type = flags & MAP_TYPE;
2554         dev_a.offset = off;
2555         /*
2556          * sdp->maxprot has the least restrict protection of all dhps.
2557          */
2558         dev_a.maxprot = maxprot;
2559         dev_a.prot = dhp->dh_prot;
2560         /*
2561          * devmap uses dhp->dh_hat_attr for hat.
2562          */
2563         dev_a.hat_flags = 0;
2564         dev_a.hat_attr = 0;
2565         dev_a.devmap_data = (void *)dhp;
2566 
2567         err = as_map(as, *addr, len, segdev_create, &dev_a);
2568         as_rangeunlock(as);
2569         return (err);
2570 }
2571 
2572 int
2573 devmap_do_ctxmgt(devmap_cookie_t dhc, void *pvtp, offset_t off, size_t len,
2574     uint_t type, uint_t rw, int (*ctxmgt)(devmap_cookie_t, void *, offset_t,
2575     size_t, uint_t, uint_t))
2576 {
2577         register devmap_handle_t *dhp = (devmap_handle_t *)dhc;
2578         struct devmap_ctx *devctx;
2579         int do_timeout = 0;
2580         int ret;
2581 
2582 #ifdef lint
2583         pvtp = pvtp;
2584 #endif
2585 
2586         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT,
2587             "devmap_do_ctxmgt:start dhp=%p off=%llx, len=%lx",
2588             (void *)dhp, off, len);
2589         DEBUGF(7, (CE_CONT, "devmap_do_ctxmgt: dhp %p off %llx len %lx\n",
2590             (void *)dhp, off, len));
2591 
2592         if (ctxmgt == NULL)
2593                 return (FC_HWERR);
2594 
2595         devctx = dhp->dh_ctx;
2596 
2597         /*
2598          * If we are on an MP system with more than one cpu running
2599          * and if a thread on some CPU already has the context, wait
2600          * for it to finish if there is a hysteresis timeout.
2601          *
2602          * We call cv_wait() instead of cv_wait_sig() because
2603          * it does not matter much if it returned due to a signal
2604          * or due to a cv_signal() or cv_broadcast().  In either event
2605          * we need to complete the mapping otherwise the processes
2606          * will die with a SEGV.
2607          */
2608         if ((dhp->dh_timeout_length > 0) && (ncpus > 1)) {
2609                 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK1,
2610                     "devmap_do_ctxmgt:doing hysteresis, devctl %p dhp %p",
2611                     devctx, dhp);
2612                 do_timeout = 1;
2613                 mutex_enter(&devctx->lock);
2614                 while (devctx->oncpu)
2615                         cv_wait(&devctx->cv, &devctx->lock);
2616                 devctx->oncpu = 1;
2617                 mutex_exit(&devctx->lock);
2618         }
2619 
2620         /*
2621          * Call the contextmgt callback so that the driver can handle
2622          * the fault.
2623          */
2624         ret = (*ctxmgt)(dhp, dhp->dh_pvtp, off, len, type, rw);
2625 
2626         /*
2627          * If devmap_access() returned -1, then there was a hardware
2628          * error so we need to convert the return value to something
2629          * that trap() will understand.  Otherwise, the return value
2630          * is already a fault code generated by devmap_unload()
2631          * or devmap_load().
2632          */
2633         if (ret) {
2634                 TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK2,
2635                     "devmap_do_ctxmgt: ret=%x dhp=%p devctx=%p",
2636                     ret, dhp, devctx);
2637                 DEBUGF(1, (CE_CONT, "devmap_do_ctxmgt: ret %x dhp %p\n",
2638                     ret, (void *)dhp));
2639                 if (devctx->oncpu) {
2640                         mutex_enter(&devctx->lock);
2641                         devctx->oncpu = 0;
2642                         cv_signal(&devctx->cv);
2643                         mutex_exit(&devctx->lock);
2644                 }
2645                 return (FC_HWERR);
2646         }
2647 
2648         /*
2649          * Setup the timeout if we need to
2650          */
2651         if (do_timeout) {
2652                 mutex_enter(&devctx->lock);
2653                 if (dhp->dh_timeout_length > 0) {
2654                         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK3,
2655                             "devmap_do_ctxmgt:timeout set");
2656                         devctx->timeout = timeout(devmap_ctxto,
2657                             devctx, dhp->dh_timeout_length);
2658                 } else {
2659                         /*
2660                          * We don't want to wait so set oncpu to
2661                          * 0 and wake up anyone waiting.
2662                          */
2663                         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK4,
2664                             "devmap_do_ctxmgt:timeout not set");
2665                         devctx->oncpu = 0;
2666                         cv_signal(&devctx->cv);
2667                 }
2668                 mutex_exit(&devctx->lock);
2669         }
2670 
2671         return (DDI_SUCCESS);
2672 }
2673 
2674 /*
2675  *                                       end of mapping
2676  *                    poff   fault_offset         |
2677  *            base     |        |                 |
2678  *              |      |        |                 |
2679  *              V      V        V                 V
2680  *  +-----------+---------------+-------+---------+-------+
2681  *              ^               ^       ^         ^
2682  *              |<--- offset--->|<-len->|         |
2683  *              |<--- dh_len(size of mapping) --->|
2684  *                     |<--  pg -->|
2685  *                              -->|rlen|<--
2686  */
2687 static ulong_t
2688 devmap_roundup(devmap_handle_t *dhp, ulong_t offset, size_t len,
2689     ulong_t *opfn, ulong_t *pagesize)
2690 {
2691         register int level;
2692         ulong_t pg;
2693         ulong_t poff;
2694         ulong_t base;
2695         caddr_t uvaddr;
2696         long rlen;
2697 
2698         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_ROUNDUP,
2699             "devmap_roundup:start dhp=%p off=%lx len=%lx",
2700             (void *)dhp, offset, len);
2701         DEBUGF(2, (CE_CONT, "devmap_roundup: dhp %p off %lx len %lx\n",
2702             (void *)dhp, offset, len));
2703 
2704         /*
2705          * get the max. pagesize that is aligned within the range
2706          * <dh_pfn, dh_pfn+offset>.
2707          *
2708          * The calculations below use physical address to ddetermine
2709          * the page size to use. The same calculations can use the
2710          * virtual address to determine the page size.
2711          */
2712         base = (ulong_t)ptob(dhp->dh_pfn);
2713         for (level = dhp->dh_mmulevel; level >= 0; level--) {
2714                 pg = page_get_pagesize(level);
2715                 poff = ((base + offset) & ~(pg - 1));
2716                 uvaddr = dhp->dh_uvaddr + (poff - base);
2717                 if ((poff >= base) &&
2718                     ((poff + pg) <= (base + dhp->dh_len)) &&
2719                     VA_PA_ALIGNED((uintptr_t)uvaddr, poff, pg))
2720                         break;
2721         }
2722 
2723         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_ROUNDUP_CK1,
2724             "devmap_roundup: base=%lx poff=%lx dhp=%p",
2725             base, poff, dhp);
2726         DEBUGF(2, (CE_CONT, "devmap_roundup: base %lx poff %lx pfn %lx\n",
2727             base, poff, dhp->dh_pfn));
2728 
2729         ASSERT(VA_PA_ALIGNED((uintptr_t)uvaddr, poff, pg));
2730         ASSERT(level >= 0);
2731 
2732         *pagesize = pg;
2733         *opfn = dhp->dh_pfn + btop(poff - base);
2734 
2735         rlen = len + offset - (poff - base + pg);
2736 
2737         ASSERT(rlen < (long)len);
2738 
2739         TRACE_5(TR_FAC_DEVMAP, TR_DEVMAP_ROUNDUP_CK2,
2740             "devmap_roundup:ret dhp=%p level=%x rlen=%lx psiz=%p opfn=%p",
2741             (void *)dhp, level, rlen, pagesize, opfn);
2742         DEBUGF(1, (CE_CONT, "devmap_roundup: dhp %p "
2743             "level %x rlen %lx psize %lx opfn %lx\n",
2744             (void *)dhp, level, rlen, *pagesize, *opfn));
2745 
2746         return ((ulong_t)((rlen > 0) ? rlen : 0));
2747 }
2748 
2749 /*
2750  * find the dhp that contains addr.
2751  */
2752 static devmap_handle_t *
2753 devmap_find_handle(devmap_handle_t *dhp_head, caddr_t addr)
2754 {
2755         devmap_handle_t *dhp;
2756 
2757         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_FIND_HANDLE,
2758             "devmap_find_handle:start");
2759 
2760         dhp = dhp_head;
2761         while (dhp) {
2762                 if (addr >= dhp->dh_uvaddr &&
2763                     addr < (dhp->dh_uvaddr + dhp->dh_len))
2764                         return (dhp);
2765                 dhp = dhp->dh_next;
2766         }
2767 
2768         return ((devmap_handle_t *)NULL);
2769 }
2770 
2771 /*
2772  * devmap_unload:
2773  *                      Marks a segdev segment or pages if offset->offset+len
2774  *                      is not the entire segment as intercept and unloads the
2775  *                      pages in the range offset -> offset+len.
2776  */
2777 int
2778 devmap_unload(devmap_cookie_t dhc, offset_t offset, size_t len)
2779 {
2780         register devmap_handle_t *dhp = (devmap_handle_t *)dhc;
2781         caddr_t addr;
2782         ulong_t size;
2783         ssize_t soff;
2784 
2785         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_UNLOAD,
2786             "devmap_unload:start dhp=%p offset=%llx len=%lx",
2787             (void *)dhp, offset, len);
2788         DEBUGF(7, (CE_CONT, "devmap_unload: dhp %p offset %llx len %lx\n",
2789             (void *)dhp, offset, len));
2790 
2791         soff = (ssize_t)(offset - dhp->dh_uoff);
2792         soff = round_down_p2(soff, PAGESIZE);
2793         if (soff < 0 || soff >= dhp->dh_len)
2794                 return (FC_MAKE_ERR(EINVAL));
2795 
2796         /*
2797          * Address and size must be page aligned.  Len is set to the
2798          * number of bytes in the number of pages that are required to
2799          * support len.  Offset is set to the byte offset of the first byte
2800          * of the page that contains offset.
2801          */
2802         len = round_up_p2(len, PAGESIZE);
2803 
2804         /*
2805          * If len is == 0, then calculate the size by getting
2806          * the number of bytes from offset to the end of the segment.
2807          */
2808         if (len == 0)
2809                 size = dhp->dh_len - soff;
2810         else {
2811                 size = len;
2812                 if ((soff + size) > dhp->dh_len)
2813                         return (FC_MAKE_ERR(EINVAL));
2814         }
2815 
2816         /*
2817          * The address is offset bytes from the base address of
2818          * the dhp.
2819          */
2820         addr = (caddr_t)(soff + dhp->dh_uvaddr);
2821 
2822         /*
2823          * If large page size was used in hat_devload(),
2824          * the same page size must be used in hat_unload().
2825          */
2826         if (dhp->dh_flags & DEVMAP_FLAG_LARGE) {
2827                 hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
2828                     dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
2829         } else {
2830                 hat_unload(dhp->dh_seg->s_as->a_hat,  addr, size,
2831                     HAT_UNLOAD|HAT_UNLOAD_OTHER);
2832         }
2833 
2834         return (0);
2835 }
2836 
2837 /*
2838  * calculates the optimal page size that will be used for hat_devload().
2839  */
2840 static void
2841 devmap_get_large_pgsize(devmap_handle_t *dhp, size_t len, caddr_t addr,
2842     size_t *llen, caddr_t *laddr)
2843 {
2844         ulong_t off;
2845         ulong_t pfn;
2846         ulong_t pgsize;
2847         uint_t first = 1;
2848 
2849         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_GET_LARGE_PGSIZE,
2850             "devmap_get_large_pgsize:start");
2851 
2852         /*
2853          * RFE - Code only supports large page mappings for devmem
2854          * This code could be changed in future if we want to support
2855          * large page mappings for kernel exported memory.
2856          */
2857         ASSERT(dhp_is_devmem(dhp));
2858         ASSERT(!(dhp->dh_flags & DEVMAP_MAPPING_INVALID));
2859 
2860         *llen = 0;
2861         off = (ulong_t)(addr - dhp->dh_uvaddr);
2862         while ((long)len > 0) {
2863                 /*
2864                  * get the optimal pfn to minimize address translations.
2865                  * devmap_roundup() returns residue bytes for next round
2866                  * calculations.
2867                  */
2868                 len = devmap_roundup(dhp, off, len, &pfn, &pgsize);
2869 
2870                 if (first) {
2871                         *laddr = dhp->dh_uvaddr + ptob(pfn - dhp->dh_pfn);
2872                         first = 0;
2873                 }
2874 
2875                 *llen += pgsize;
2876                 off = ptob(pfn - dhp->dh_pfn) + pgsize;
2877         }
2878         /* Large page mapping len/addr cover more range than original fault */
2879         ASSERT(*llen >= len && *laddr <= addr);
2880         ASSERT((*laddr + *llen) >= (addr + len));
2881 }
2882 
2883 /*
2884  * Initialize the devmap_softlock structure.
2885  */
2886 static struct devmap_softlock *
2887 devmap_softlock_init(dev_t dev, ulong_t id)
2888 {
2889         struct devmap_softlock *slock;
2890         struct devmap_softlock *tmp;
2891 
2892         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SOFTLOCK_INIT,
2893             "devmap_softlock_init:start");
2894 
2895         tmp = kmem_zalloc(sizeof (struct devmap_softlock), KM_SLEEP);
2896         mutex_enter(&devmap_slock);
2897 
2898         for (slock = devmap_slist; slock != NULL; slock = slock->next)
2899                 if ((slock->dev == dev) && (slock->id == id))
2900                         break;
2901 
2902         if (slock == NULL) {
2903                 slock = tmp;
2904                 slock->dev = dev;
2905                 slock->id = id;
2906                 mutex_init(&slock->lock, NULL, MUTEX_DEFAULT, NULL);
2907                 cv_init(&slock->cv, NULL, CV_DEFAULT, NULL);
2908                 slock->next = devmap_slist;
2909                 devmap_slist = slock;
2910         } else
2911                 kmem_free(tmp, sizeof (struct devmap_softlock));
2912 
2913         mutex_enter(&slock->lock);
2914         slock->refcnt++;
2915         mutex_exit(&slock->lock);
2916         mutex_exit(&devmap_slock);
2917 
2918         return (slock);
2919 }
2920 
2921 /*
2922  * Wake up processes that sleep on softlocked.
2923  * Free dh_softlock if refcnt is 0.
2924  */
2925 static void
2926 devmap_softlock_rele(devmap_handle_t *dhp)
2927 {
2928         struct devmap_softlock *slock = dhp->dh_softlock;
2929         struct devmap_softlock *tmp;
2930         struct devmap_softlock *parent;
2931 
2932         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SOFTLOCK_RELE,
2933             "devmap_softlock_rele:start");
2934 
2935         mutex_enter(&devmap_slock);
2936         mutex_enter(&slock->lock);
2937 
2938         ASSERT(slock->refcnt > 0);
2939 
2940         slock->refcnt--;
2941 
2942         /*
2943          * If no one is using the device, free up the slock data.
2944          */
2945         if (slock->refcnt == 0) {
2946                 slock->softlocked = 0;
2947                 cv_signal(&slock->cv);
2948 
2949                 if (devmap_slist == slock)
2950                         devmap_slist = slock->next;
2951                 else {
2952                         parent = devmap_slist;
2953                         for (tmp = devmap_slist->next; tmp != NULL;
2954                             tmp = tmp->next) {
2955                                 if (tmp == slock) {
2956                                         parent->next = tmp->next;
2957                                         break;
2958                                 }
2959                                 parent = tmp;
2960                         }
2961                 }
2962                 mutex_exit(&slock->lock);
2963                 mutex_destroy(&slock->lock);
2964                 cv_destroy(&slock->cv);
2965                 kmem_free(slock, sizeof (struct devmap_softlock));
2966         } else
2967                 mutex_exit(&slock->lock);
2968 
2969         mutex_exit(&devmap_slock);
2970 }
2971 
2972 /*
2973  * Wake up processes that sleep on dh_ctx->locked.
2974  * Free dh_ctx if refcnt is 0.
2975  */
2976 static void
2977 devmap_ctx_rele(devmap_handle_t *dhp)
2978 {
2979         struct devmap_ctx *devctx = dhp->dh_ctx;
2980         struct devmap_ctx *tmp;
2981         struct devmap_ctx *parent;
2982         timeout_id_t tid;
2983 
2984         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_CTX_RELE,
2985             "devmap_ctx_rele:start");
2986 
2987         mutex_enter(&devmapctx_lock);
2988         mutex_enter(&devctx->lock);
2989 
2990         ASSERT(devctx->refcnt > 0);
2991 
2992         devctx->refcnt--;
2993 
2994         /*
2995          * If no one is using the device, free up the devctx data.
2996          */
2997         if (devctx->refcnt == 0) {
2998                 /*
2999                  * Untimeout any threads using this mapping as they are about
3000                  * to go away.
3001                  */
3002                 if (devctx->timeout != 0) {
3003                         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_CTX_RELE_CK1,
3004                             "devmap_ctx_rele:untimeout ctx->timeout");
3005 
3006                         tid = devctx->timeout;
3007                         mutex_exit(&devctx->lock);
3008                         (void) untimeout(tid);
3009                         mutex_enter(&devctx->lock);
3010                 }
3011 
3012                 devctx->oncpu = 0;
3013                 cv_signal(&devctx->cv);
3014 
3015                 if (devmapctx_list == devctx)
3016                         devmapctx_list = devctx->next;
3017                 else {
3018                         parent = devmapctx_list;
3019                         for (tmp = devmapctx_list->next; tmp != NULL;
3020                             tmp = tmp->next) {
3021                                 if (tmp == devctx) {
3022                                         parent->next = tmp->next;
3023                                         break;
3024                                 }
3025                                 parent = tmp;
3026                         }
3027                 }
3028                 mutex_exit(&devctx->lock);
3029                 mutex_destroy(&devctx->lock);
3030                 cv_destroy(&devctx->cv);
3031                 kmem_free(devctx, sizeof (struct devmap_ctx));
3032         } else
3033                 mutex_exit(&devctx->lock);
3034 
3035         mutex_exit(&devmapctx_lock);
3036 }
3037 
3038 /*
3039  * devmap_load:
3040  *                      Marks a segdev segment or pages if offset->offset+len
3041  *                      is not the entire segment as nointercept and faults in
3042  *                      the pages in the range offset -> offset+len.
3043  */
3044 int
3045 devmap_load(devmap_cookie_t dhc, offset_t offset, size_t len, uint_t type,
3046     uint_t rw)
3047 {
3048         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3049         struct as *asp = dhp->dh_seg->s_as;
3050         caddr_t addr;
3051         ulong_t size;
3052         ssize_t soff;   /* offset from the beginning of the segment */
3053         int rc;
3054 
3055         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_LOAD,
3056             "devmap_load:start dhp=%p offset=%llx len=%lx",
3057             (void *)dhp, offset, len);
3058 
3059         DEBUGF(7, (CE_CONT, "devmap_load: dhp %p offset %llx len %lx\n",
3060             (void *)dhp, offset, len));
3061 
3062         /*
3063          *      Hat layer only supports devload to process' context for which
3064          *      the as lock is held. Verify here and return error if drivers
3065          *      inadvertently call devmap_load on a wrong devmap handle.
3066          */
3067         if ((asp != &kas) && !AS_LOCK_HELD(asp, &asp->a_lock))
3068                 return (FC_MAKE_ERR(EINVAL));
3069 
3070         soff = (ssize_t)(offset - dhp->dh_uoff);
3071         soff = round_down_p2(soff, PAGESIZE);
3072         if (soff < 0 || soff >= dhp->dh_len)
3073                 return (FC_MAKE_ERR(EINVAL));
3074 
3075         /*
3076          * Address and size must be page aligned.  Len is set to the
3077          * number of bytes in the number of pages that are required to
3078          * support len.  Offset is set to the byte offset of the first byte
3079          * of the page that contains offset.
3080          */
3081         len = round_up_p2(len, PAGESIZE);
3082 
3083         /*
3084          * If len == 0, then calculate the size by getting
3085          * the number of bytes from offset to the end of the segment.
3086          */
3087         if (len == 0)
3088                 size = dhp->dh_len - soff;
3089         else {
3090                 size = len;
3091                 if ((soff + size) > dhp->dh_len)
3092                         return (FC_MAKE_ERR(EINVAL));
3093         }
3094 
3095         /*
3096          * The address is offset bytes from the base address of
3097          * the segment.
3098          */
3099         addr = (caddr_t)(soff + dhp->dh_uvaddr);
3100 
3101         HOLD_DHP_LOCK(dhp);
3102         rc = segdev_faultpages(asp->a_hat,
3103             dhp->dh_seg, addr, size, type, rw, dhp);
3104         RELE_DHP_LOCK(dhp);
3105         return (rc);
3106 }
3107 
3108 int
3109 devmap_setup(dev_t dev, offset_t off, struct as *as, caddr_t *addrp,
3110     size_t len, uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
3111 {
3112         register devmap_handle_t *dhp;
3113         int (*devmap)(dev_t, devmap_cookie_t, offset_t, size_t,
3114             size_t *, uint_t);
3115         int (*mmap)(dev_t, off_t, int);
3116         struct devmap_callback_ctl *callbackops;
3117         devmap_handle_t *dhp_head = NULL;
3118         devmap_handle_t *dhp_prev = NULL;
3119         devmap_handle_t *dhp_curr;
3120         caddr_t addr;
3121         int map_flag;
3122         int ret;
3123         ulong_t total_len;
3124         size_t map_len;
3125         size_t resid_len = len;
3126         offset_t map_off = off;
3127         struct devmap_softlock *slock = NULL;
3128 
3129 #ifdef lint
3130         cred = cred;
3131 #endif
3132 
3133         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_SETUP,
3134             "devmap_setup:start off=%llx len=%lx", off, len);
3135         DEBUGF(3, (CE_CONT, "devmap_setup: off %llx len %lx\n",
3136             off, len));
3137 
3138         devmap = devopsp[getmajor(dev)]->devo_cb_ops->cb_devmap;
3139         mmap = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap;
3140 
3141         /*
3142          * driver must provide devmap(9E) entry point in cb_ops to use the
3143          * devmap framework.
3144          */
3145         if (devmap == NULL || devmap == nulldev || devmap == nodev)
3146                 return (EINVAL);
3147 
3148         /*
3149          * To protect from an inadvertent entry because the devmap entry point
3150          * is not NULL, return error if D_DEVMAP bit is not set in cb_flag and
3151          * mmap is NULL.
3152          */
3153         map_flag = devopsp[getmajor(dev)]->devo_cb_ops->cb_flag;
3154         if ((map_flag & D_DEVMAP) == 0 && (mmap == NULL || mmap == nulldev))
3155                 return (EINVAL);
3156 
3157         /*
3158          * devmap allows mmap(2) to map multiple registers.
3159          * one devmap_handle is created for each register mapped.
3160          */
3161         for (total_len = 0; total_len < len; total_len += map_len) {
3162                 dhp = kmem_zalloc(sizeof (devmap_handle_t), KM_SLEEP);
3163 
3164                 if (dhp_prev != NULL)
3165                         dhp_prev->dh_next = dhp;
3166                 else
3167                         dhp_head = dhp;
3168                 dhp_prev = dhp;
3169 
3170                 dhp->dh_prot = prot;
3171                 dhp->dh_orig_maxprot = dhp->dh_maxprot = maxprot;
3172                 dhp->dh_dev = dev;
3173                 dhp->dh_timeout_length = CTX_TIMEOUT_VALUE;
3174                 dhp->dh_uoff = map_off;
3175 
3176                 /*
3177                  * Get mapping specific info from
3178                  * the driver, such as rnumber, roff, len, callbackops,
3179                  * accattrp and, if the mapping is for kernel memory,
3180                  * ddi_umem_cookie.
3181                  */
3182                 if ((ret = cdev_devmap(dev, dhp, map_off,
3183                     resid_len, &map_len, get_udatamodel())) != 0) {
3184                         free_devmap_handle(dhp_head);
3185                         return (ENXIO);
3186                 }
3187 
3188                 if (map_len & PAGEOFFSET) {
3189                         free_devmap_handle(dhp_head);
3190                         return (EINVAL);
3191                 }
3192 
3193                 callbackops = &dhp->dh_callbackops;
3194 
3195                 if ((callbackops->devmap_access == NULL) ||
3196                     (callbackops->devmap_access == nulldev) ||
3197                     (callbackops->devmap_access == nodev)) {
3198                         /*
3199                          * Normally devmap does not support MAP_PRIVATE unless
3200                          * the drivers provide a valid devmap_access routine.
3201                          */
3202                         if ((flags & MAP_PRIVATE) != 0) {
3203                                 free_devmap_handle(dhp_head);
3204                                 return (EINVAL);
3205                         }
3206                 } else {
3207                         /*
3208                          * Initialize dhp_softlock and dh_ctx if the drivers
3209                          * provide devmap_access.
3210                          */
3211                         dhp->dh_softlock = devmap_softlock_init(dev,
3212                             (ulong_t)callbackops->devmap_access);
3213                         dhp->dh_ctx = devmap_ctxinit(dev,
3214                             (ulong_t)callbackops->devmap_access);
3215 
3216                         /*
3217                          * segdev_fault can only work when all
3218                          * dh_softlock in a multi-dhp mapping
3219                          * are same. see comments in segdev_fault
3220                          * This code keeps track of the first
3221                          * dh_softlock allocated in slock and
3222                          * compares all later allocations and if
3223                          * not similar, returns an error.
3224                          */
3225                         if (slock == NULL)
3226                                 slock = dhp->dh_softlock;
3227                         if (slock != dhp->dh_softlock) {
3228                                 free_devmap_handle(dhp_head);
3229                                 return (ENOTSUP);
3230                         }
3231                 }
3232 
3233                 map_off += map_len;
3234                 resid_len -= map_len;
3235         }
3236 
3237         /*
3238          * get the user virtual address and establish the mapping between
3239          * uvaddr and device physical address.
3240          */
3241         if ((ret = devmap_device(dhp_head, as, addrp, off, len, flags))
3242             != 0) {
3243                 /*
3244                  * free devmap handles if error during the mapping.
3245                  */
3246                 free_devmap_handle(dhp_head);
3247 
3248                 return (ret);
3249         }
3250 
3251         /*
3252          * call the driver's devmap_map callback to do more after the mapping,
3253          * such as to allocate driver private data for context management.
3254          */
3255         dhp = dhp_head;
3256         map_off = off;
3257         addr = *addrp;
3258         while (dhp != NULL) {
3259                 callbackops = &dhp->dh_callbackops;
3260                 dhp->dh_uvaddr = addr;
3261                 dhp_curr = dhp;
3262                 if (callbackops->devmap_map != NULL) {
3263                         ret = (*callbackops->devmap_map)((devmap_cookie_t)dhp,
3264                             dev, flags, map_off,
3265                             dhp->dh_len, &dhp->dh_pvtp);
3266                         if (ret != 0) {
3267                                 struct segdev_data *sdp;
3268 
3269                                 /*
3270                                  * call driver's devmap_unmap entry point
3271                                  * to free driver resources.
3272                                  */
3273                                 dhp = dhp_head;
3274                                 map_off = off;
3275                                 while (dhp != dhp_curr) {
3276                                         callbackops = &dhp->dh_callbackops;
3277                                         if (callbackops->devmap_unmap != NULL) {
3278                                                 (*callbackops->devmap_unmap)(
3279                                                     dhp, dhp->dh_pvtp,
3280                                                     map_off, dhp->dh_len,
3281                                                     NULL, NULL, NULL, NULL);
3282                                         }
3283                                         map_off += dhp->dh_len;
3284                                         dhp = dhp->dh_next;
3285                                 }
3286                                 sdp = dhp_head->dh_seg->s_data;
3287                                 sdp->devmap_data = NULL;
3288                                 free_devmap_handle(dhp_head);
3289                                 return (ENXIO);
3290                         }
3291                 }
3292                 map_off += dhp->dh_len;
3293                 addr += dhp->dh_len;
3294                 dhp = dhp->dh_next;
3295         }
3296 
3297         return (0);
3298 }
3299 
3300 int
3301 ddi_devmap_segmap(dev_t dev, off_t off, ddi_as_handle_t as, caddr_t *addrp,
3302     off_t len, uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
3303 {
3304         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SEGMAP,
3305             "devmap_segmap:start");
3306         return (devmap_setup(dev, (offset_t)off, (struct as *)as, addrp,
3307             (size_t)len, prot, maxprot, flags, cred));
3308 }
3309 
3310 /*
3311  * Called from devmap_devmem_setup/remap to see if can use large pages for
3312  * this device mapping.
3313  * Also calculate the max. page size for this mapping.
3314  * this page size will be used in fault routine for
3315  * optimal page size calculations.
3316  */
3317 static void
3318 devmap_devmem_large_page_setup(devmap_handle_t *dhp)
3319 {
3320         ASSERT(dhp_is_devmem(dhp));
3321         dhp->dh_mmulevel = 0;
3322 
3323         /*
3324          * use large page size only if:
3325          *  1. device memory.
3326          *  2. mmu supports multiple page sizes,
3327          *  3. Driver did not disallow it
3328          *  4. dhp length is at least as big as the large pagesize
3329          *  5. the uvaddr and pfn are large pagesize aligned
3330          */
3331         if (page_num_pagesizes() > 1 &&
3332             !(dhp->dh_flags & (DEVMAP_USE_PAGESIZE | DEVMAP_MAPPING_INVALID))) {
3333                 ulong_t base;
3334                 int level;
3335 
3336                 base = (ulong_t)ptob(dhp->dh_pfn);
3337                 for (level = 1; level < page_num_pagesizes(); level++) {
3338                         size_t pgsize = page_get_pagesize(level);
3339                         if ((dhp->dh_len < pgsize) ||
3340                             (!VA_PA_PGSIZE_ALIGNED((uintptr_t)dhp->dh_uvaddr,
3341                             base, pgsize))) {
3342                                 break;
3343                         }
3344                 }
3345                 dhp->dh_mmulevel = level - 1;
3346         }
3347         if (dhp->dh_mmulevel > 0) {
3348                 dhp->dh_flags |= DEVMAP_FLAG_LARGE;
3349         } else {
3350                 dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
3351         }
3352 }
3353 
3354 /*
3355  * Called by driver devmap routine to pass device specific info to
3356  * the framework.    used for device memory mapping only.
3357  */
3358 int
3359 devmap_devmem_setup(devmap_cookie_t dhc, dev_info_t *dip,
3360     struct devmap_callback_ctl *callbackops, uint_t rnumber, offset_t roff,
3361     size_t len, uint_t maxprot, uint_t flags, ddi_device_acc_attr_t *accattrp)
3362 {
3363         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3364         ddi_acc_handle_t handle;
3365         ddi_map_req_t mr;
3366         ddi_acc_hdl_t *hp;
3367         int err;
3368 
3369         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_DEVMEM_SETUP,
3370             "devmap_devmem_setup:start dhp=%p offset=%llx rnum=%d len=%lx",
3371             (void *)dhp, roff, rnumber, (uint_t)len);
3372         DEBUGF(2, (CE_CONT, "devmap_devmem_setup: dhp %p offset %llx "
3373             "rnum %d len %lx\n", (void *)dhp, roff, rnumber, len));
3374 
3375         /*
3376          * First to check if this function has been called for this dhp.
3377          */
3378         if (dhp->dh_flags & DEVMAP_SETUP_DONE)
3379                 return (DDI_FAILURE);
3380 
3381         if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3382                 return (DDI_FAILURE);
3383 
3384         if (flags & DEVMAP_MAPPING_INVALID) {
3385                 /*
3386                  * Don't go up the tree to get pfn if the driver specifies
3387                  * DEVMAP_MAPPING_INVALID in flags.
3388                  *
3389                  * If DEVMAP_MAPPING_INVALID is specified, we have to grant
3390                  * remap permission.
3391                  */
3392                 if (!(flags & DEVMAP_ALLOW_REMAP)) {
3393                         return (DDI_FAILURE);
3394                 }
3395                 dhp->dh_pfn = PFN_INVALID;
3396         } else {
3397                 handle = impl_acc_hdl_alloc(KM_SLEEP, NULL);
3398                 if (handle == NULL)
3399                         return (DDI_FAILURE);
3400 
3401                 hp = impl_acc_hdl_get(handle);
3402                 hp->ah_vers = VERS_ACCHDL;
3403                 hp->ah_dip = dip;
3404                 hp->ah_rnumber = rnumber;
3405                 hp->ah_offset = roff;
3406                 hp->ah_len = len;
3407                 if (accattrp != NULL)
3408                         hp->ah_acc = *accattrp;
3409 
3410                 mr.map_op = DDI_MO_MAP_LOCKED;
3411                 mr.map_type = DDI_MT_RNUMBER;
3412                 mr.map_obj.rnumber = rnumber;
3413                 mr.map_prot = maxprot & dhp->dh_orig_maxprot;
3414                 mr.map_flags = DDI_MF_DEVICE_MAPPING;
3415                 mr.map_handlep = hp;
3416                 mr.map_vers = DDI_MAP_VERSION;
3417 
3418                 /*
3419                  * up the device tree to get pfn.
3420                  * The rootnex_map_regspec() routine in nexus drivers has been
3421                  * modified to return pfn if map_flags is DDI_MF_DEVICE_MAPPING.
3422                  */
3423                 err = ddi_map(dip, &mr, roff, len, (caddr_t *)&dhp->dh_pfn);
3424                 dhp->dh_hat_attr = hp->ah_hat_flags;
3425                 impl_acc_hdl_free(handle);
3426 
3427                 if (err)
3428                         return (DDI_FAILURE);
3429         }
3430         /* Should not be using devmem setup for memory pages */
3431         ASSERT(!pf_is_memory(dhp->dh_pfn));
3432 
3433         /* Only some of the flags bits are settable by the driver */
3434         dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS);
3435         dhp->dh_len = ptob(btopr(len));
3436 
3437         dhp->dh_cookie = DEVMAP_DEVMEM_COOKIE;
3438         dhp->dh_roff = ptob(btop(roff));
3439 
3440         /* setup the dh_mmulevel and DEVMAP_FLAG_LARGE */
3441         devmap_devmem_large_page_setup(dhp);
3442         dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3443         ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3444 
3445 
3446         if (callbackops != NULL) {
3447                 bcopy(callbackops, &dhp->dh_callbackops,
3448                     sizeof (struct devmap_callback_ctl));
3449         }
3450 
3451         /*
3452          * Initialize dh_lock if we want to do remap.
3453          */
3454         if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) {
3455                 mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
3456                 dhp->dh_flags |= DEVMAP_LOCK_INITED;
3457         }
3458 
3459         dhp->dh_flags |= DEVMAP_SETUP_DONE;
3460 
3461         return (DDI_SUCCESS);
3462 }
3463 
3464 int
3465 devmap_devmem_remap(devmap_cookie_t dhc, dev_info_t *dip,
3466     uint_t rnumber, offset_t roff, size_t len, uint_t maxprot,
3467     uint_t flags, ddi_device_acc_attr_t *accattrp)
3468 {
3469         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3470         ddi_acc_handle_t handle;
3471         ddi_map_req_t mr;
3472         ddi_acc_hdl_t *hp;
3473         pfn_t   pfn;
3474         uint_t  hat_flags;
3475         int     err;
3476 
3477         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_DEVMEM_REMAP,
3478             "devmap_devmem_setup:start dhp=%p offset=%llx rnum=%d len=%lx",
3479             (void *)dhp, roff, rnumber, (uint_t)len);
3480         DEBUGF(2, (CE_CONT, "devmap_devmem_remap: dhp %p offset %llx "
3481             "rnum %d len %lx\n", (void *)dhp, roff, rnumber, len));
3482 
3483         /*
3484          * Return failure if setup has not been done or no remap permission
3485          * has been granted during the setup.
3486          */
3487         if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 ||
3488             (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0)
3489                 return (DDI_FAILURE);
3490 
3491         /* Only DEVMAP_MAPPING_INVALID flag supported for remap */
3492         if ((flags != 0) && (flags != DEVMAP_MAPPING_INVALID))
3493                 return (DDI_FAILURE);
3494 
3495         if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3496                 return (DDI_FAILURE);
3497 
3498         if (!(flags & DEVMAP_MAPPING_INVALID)) {
3499                 handle = impl_acc_hdl_alloc(KM_SLEEP, NULL);
3500                 if (handle == NULL)
3501                         return (DDI_FAILURE);
3502         }
3503 
3504         HOLD_DHP_LOCK(dhp);
3505 
3506         /*
3507          * Unload the old mapping, so next fault will setup the new mappings
3508          * Do this while holding the dhp lock so other faults dont reestablish
3509          * the mappings
3510          */
3511         hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
3512             dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
3513 
3514         if (flags & DEVMAP_MAPPING_INVALID) {
3515                 dhp->dh_flags |= DEVMAP_MAPPING_INVALID;
3516                 dhp->dh_pfn = PFN_INVALID;
3517         } else {
3518                 /* clear any prior DEVMAP_MAPPING_INVALID flag */
3519                 dhp->dh_flags &= ~DEVMAP_MAPPING_INVALID;
3520                 hp = impl_acc_hdl_get(handle);
3521                 hp->ah_vers = VERS_ACCHDL;
3522                 hp->ah_dip = dip;
3523                 hp->ah_rnumber = rnumber;
3524                 hp->ah_offset = roff;
3525                 hp->ah_len = len;
3526                 if (accattrp != NULL)
3527                         hp->ah_acc = *accattrp;
3528 
3529                 mr.map_op = DDI_MO_MAP_LOCKED;
3530                 mr.map_type = DDI_MT_RNUMBER;
3531                 mr.map_obj.rnumber = rnumber;
3532                 mr.map_prot = maxprot & dhp->dh_orig_maxprot;
3533                 mr.map_flags = DDI_MF_DEVICE_MAPPING;
3534                 mr.map_handlep = hp;
3535                 mr.map_vers = DDI_MAP_VERSION;
3536 
3537                 /*
3538                  * up the device tree to get pfn.
3539                  * The rootnex_map_regspec() routine in nexus drivers has been
3540                  * modified to return pfn if map_flags is DDI_MF_DEVICE_MAPPING.
3541                  */
3542                 err = ddi_map(dip, &mr, roff, len, (caddr_t *)&pfn);
3543                 hat_flags = hp->ah_hat_flags;
3544                 impl_acc_hdl_free(handle);
3545                 if (err) {
3546                         RELE_DHP_LOCK(dhp);
3547                         return (DDI_FAILURE);
3548                 }
3549                 /*
3550                  * Store result of ddi_map first in local variables, as we do
3551                  * not want to overwrite the existing dhp with wrong data.
3552                  */
3553                 dhp->dh_pfn = pfn;
3554                 dhp->dh_hat_attr = hat_flags;
3555         }
3556 
3557         /* clear the large page size flag */
3558         dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
3559 
3560         dhp->dh_cookie = DEVMAP_DEVMEM_COOKIE;
3561         dhp->dh_roff = ptob(btop(roff));
3562 
3563         /* setup the dh_mmulevel and DEVMAP_FLAG_LARGE */
3564         devmap_devmem_large_page_setup(dhp);
3565         dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3566         ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3567 
3568         RELE_DHP_LOCK(dhp);
3569         return (DDI_SUCCESS);
3570 }
3571 
3572 /*
3573  * called by driver devmap routine to pass kernel virtual address  mapping
3574  * info to the framework.    used only for kernel memory
3575  * allocated from ddi_umem_alloc().
3576  */
3577 int
3578 devmap_umem_setup(devmap_cookie_t dhc, dev_info_t *dip,
3579     struct devmap_callback_ctl *callbackops, ddi_umem_cookie_t cookie,
3580     offset_t off, size_t len, uint_t maxprot, uint_t flags,
3581     ddi_device_acc_attr_t *accattrp)
3582 {
3583         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3584         struct ddi_umem_cookie *cp = (struct ddi_umem_cookie *)cookie;
3585 
3586 #ifdef lint
3587         dip = dip;
3588 #endif
3589 
3590         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_SETUP,
3591             "devmap_umem_setup:start dhp=%p offset=%llx cookie=%p len=%lx",
3592             (void *)dhp, off, cookie, len);
3593         DEBUGF(2, (CE_CONT, "devmap_umem_setup: dhp %p offset %llx "
3594             "cookie %p len %lx\n", (void *)dhp, off, (void *)cookie, len));
3595 
3596         if (cookie == NULL)
3597                 return (DDI_FAILURE);
3598 
3599         /* For UMEM_TRASH, this restriction is not needed */
3600         if ((off + len) > cp->size)
3601                 return (DDI_FAILURE);
3602 
3603         /* check if the cache attributes are supported */
3604         if (i_ddi_check_cache_attr(flags) == B_FALSE)
3605                 return (DDI_FAILURE);
3606 
3607         /*
3608          * First to check if this function has been called for this dhp.
3609          */
3610         if (dhp->dh_flags & DEVMAP_SETUP_DONE)
3611                 return (DDI_FAILURE);
3612 
3613         if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3614                 return (DDI_FAILURE);
3615 
3616         if (flags & DEVMAP_MAPPING_INVALID) {
3617                 /*
3618                  * If DEVMAP_MAPPING_INVALID is specified, we have to grant
3619                  * remap permission.
3620                  */
3621                 if (!(flags & DEVMAP_ALLOW_REMAP)) {
3622                         return (DDI_FAILURE);
3623                 }
3624         } else {
3625                 dhp->dh_cookie = cookie;
3626                 dhp->dh_roff = ptob(btop(off));
3627                 dhp->dh_cvaddr = cp->cvaddr + dhp->dh_roff;
3628                 /* set HAT cache attributes */
3629                 i_ddi_cacheattr_to_hatacc(flags, &dhp->dh_hat_attr);
3630                 /* set HAT endianess attributes */
3631                 i_ddi_devacc_to_hatacc(accattrp, &dhp->dh_hat_attr);
3632         }
3633 
3634         /*
3635          * The default is _not_ to pass HAT_LOAD_NOCONSIST to hat_devload();
3636          * we pass HAT_LOAD_NOCONSIST _only_ in cases where hat tries to
3637          * create consistent mappings but our intention was to create
3638          * non-consistent mappings.
3639          *
3640          * DEVMEM: hat figures it out it's DEVMEM and creates non-consistent
3641          * mappings.
3642          *
3643          * kernel exported memory: hat figures it out it's memory and always
3644          * creates consistent mappings.
3645          *
3646          * /dev/mem: non-consistent mappings. See comments in common/io/mem.c
3647          *
3648          * /dev/kmem: consistent mappings are created unless they are
3649          * MAP_FIXED. We _explicitly_ tell hat to create non-consistent
3650          * mappings by passing HAT_LOAD_NOCONSIST in case of MAP_FIXED
3651          * mappings of /dev/kmem. See common/io/mem.c
3652          */
3653 
3654         /* Only some of the flags bits are settable by the driver */
3655         dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS);
3656 
3657         dhp->dh_len = ptob(btopr(len));
3658         dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3659         ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3660 
3661         if (callbackops != NULL) {
3662                 bcopy(callbackops, &dhp->dh_callbackops,
3663                     sizeof (struct devmap_callback_ctl));
3664         }
3665         /*
3666          * Initialize dh_lock if we want to do remap.
3667          */
3668         if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) {
3669                 mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
3670                 dhp->dh_flags |= DEVMAP_LOCK_INITED;
3671         }
3672 
3673         dhp->dh_flags |= DEVMAP_SETUP_DONE;
3674 
3675         return (DDI_SUCCESS);
3676 }
3677 
3678 int
3679 devmap_umem_remap(devmap_cookie_t dhc, dev_info_t *dip,
3680     ddi_umem_cookie_t cookie, offset_t off, size_t len, uint_t maxprot,
3681     uint_t flags, ddi_device_acc_attr_t *accattrp)
3682 {
3683         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3684         struct ddi_umem_cookie *cp = (struct ddi_umem_cookie *)cookie;
3685 
3686         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_REMAP,
3687             "devmap_umem_remap:start dhp=%p offset=%llx cookie=%p len=%lx",
3688             (void *)dhp, off, cookie, len);
3689         DEBUGF(2, (CE_CONT, "devmap_umem_remap: dhp %p offset %llx "
3690             "cookie %p len %lx\n", (void *)dhp, off, (void *)cookie, len));
3691 
3692 #ifdef lint
3693         dip = dip;
3694         accattrp = accattrp;
3695 #endif
3696         /*
3697          * Reture failure if setup has not been done or no remap permission
3698          * has been granted during the setup.
3699          */
3700         if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 ||
3701             (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0)
3702                 return (DDI_FAILURE);
3703 
3704         /* No flags supported for remap yet */
3705         if (flags != 0)
3706                 return (DDI_FAILURE);
3707 
3708         /* check if the cache attributes are supported */
3709         if (i_ddi_check_cache_attr(flags) == B_FALSE)
3710                 return (DDI_FAILURE);
3711 
3712         if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3713                 return (DDI_FAILURE);
3714 
3715         /* For UMEM_TRASH, this restriction is not needed */
3716         if ((off + len) > cp->size)
3717                 return (DDI_FAILURE);
3718 
3719         HOLD_DHP_LOCK(dhp);
3720         /*
3721          * Unload the old mapping, so next fault will setup the new mappings
3722          * Do this while holding the dhp lock so other faults dont reestablish
3723          * the mappings
3724          */
3725         hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
3726             dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
3727 
3728         dhp->dh_cookie = cookie;
3729         dhp->dh_roff = ptob(btop(off));
3730         dhp->dh_cvaddr = cp->cvaddr + dhp->dh_roff;
3731         /* set HAT cache attributes */
3732         i_ddi_cacheattr_to_hatacc(flags, &dhp->dh_hat_attr);
3733         /* set HAT endianess attributes */
3734         i_ddi_devacc_to_hatacc(accattrp, &dhp->dh_hat_attr);
3735 
3736         /* clear the large page size flag */
3737         dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
3738 
3739         dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3740         ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3741         RELE_DHP_LOCK(dhp);
3742         return (DDI_SUCCESS);
3743 }
3744 
3745 /*
3746  * to set timeout value for the driver's context management callback, e.g.
3747  * devmap_access().
3748  */
3749 void
3750 devmap_set_ctx_timeout(devmap_cookie_t dhc, clock_t ticks)
3751 {
3752         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3753 
3754         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_SET_CTX_TIMEOUT,
3755             "devmap_set_ctx_timeout:start dhp=%p ticks=%x",
3756             (void *)dhp, ticks);
3757         dhp->dh_timeout_length = ticks;
3758 }
3759 
3760 int
3761 devmap_default_access(devmap_cookie_t dhp, void *pvtp, offset_t off,
3762     size_t len, uint_t type, uint_t rw)
3763 {
3764 #ifdef lint
3765         pvtp = pvtp;
3766 #endif
3767 
3768         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_DEFAULT_ACCESS,
3769             "devmap_default_access:start");
3770         return (devmap_load(dhp, off, len, type, rw));
3771 }
3772 
3773 /*
3774  * segkmem_alloc() wrapper to allocate memory which is both
3775  * non-relocatable (for DR) and sharelocked, since the rest
3776  * of this segment driver requires it.
3777  */
3778 static void *
3779 devmap_alloc_pages(vmem_t *vmp, size_t size, int vmflag)
3780 {
3781         ASSERT(vmp != NULL);
3782         ASSERT(kvseg.s_base != NULL);
3783         vmflag |= (VM_NORELOC | SEGKMEM_SHARELOCKED);
3784         return (segkmem_alloc(vmp, size, vmflag));
3785 }
3786 
3787 /*
3788  * This is where things are a bit incestuous with seg_kmem: unlike
3789  * seg_kp, seg_kmem does not keep its pages long-term sharelocked, so
3790  * we need to do a bit of a dance around that to prevent duplication of
3791  * code until we decide to bite the bullet and implement a new kernel
3792  * segment for driver-allocated memory that is exported to user space.
3793  */
3794 static void
3795 devmap_free_pages(vmem_t *vmp, void *inaddr, size_t size)
3796 {
3797         page_t *pp;
3798         caddr_t addr = inaddr;
3799         caddr_t eaddr;
3800         pgcnt_t npages = btopr(size);
3801 
3802         ASSERT(vmp != NULL);
3803         ASSERT(kvseg.s_base != NULL);
3804         ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
3805 
3806         hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
3807 
3808         for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
3809                 /*
3810                  * Use page_find() instead of page_lookup() to find the page
3811                  * since we know that it is hashed and has a shared lock.
3812                  */
3813                 pp = page_find(&kvp, (u_offset_t)(uintptr_t)addr);
3814 
3815                 if (pp == NULL)
3816                         panic("devmap_free_pages: page not found");
3817                 if (!page_tryupgrade(pp)) {
3818                         page_unlock(pp);
3819                         pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr,
3820                             SE_EXCL);
3821                         if (pp == NULL)
3822                                 panic("devmap_free_pages: page already freed");
3823                 }
3824                 /* Clear p_lckcnt so page_destroy() doesn't update availrmem */
3825                 pp->p_lckcnt = 0;
3826                 page_destroy(pp, 0);
3827         }
3828         page_unresv(npages);
3829 
3830         if (vmp != NULL)
3831                 vmem_free(vmp, inaddr, size);
3832 }
3833 
3834 /*
3835  * devmap_umem_alloc_np() replaces kmem_zalloc() as the method for
3836  * allocating non-pageable kmem in response to a ddi_umem_alloc()
3837  * default request. For now we allocate our own pages and we keep
3838  * them long-term sharelocked, since: A) the fault routines expect the
3839  * memory to already be locked; B) pageable umem is already long-term
3840  * locked; C) it's a lot of work to make it otherwise, particularly
3841  * since the nexus layer expects the pages to never fault. An RFE is to
3842  * not keep the pages long-term locked, but instead to be able to
3843  * take faults on them and simply look them up in kvp in case we
3844  * fault on them. Even then, we must take care not to let pageout
3845  * steal them from us since the data must remain resident; if we
3846  * do this we must come up with some way to pin the pages to prevent
3847  * faults while a driver is doing DMA to/from them.
3848  */
3849 static void *
3850 devmap_umem_alloc_np(size_t size, size_t flags)
3851 {
3852         void *buf;
3853         int vmflags = (flags & DDI_UMEM_NOSLEEP)? VM_NOSLEEP : VM_SLEEP;
3854 
3855         buf = vmem_alloc(umem_np_arena, size, vmflags);
3856         if (buf != NULL)
3857                 bzero(buf, size);
3858         return (buf);
3859 }
3860 
3861 static void
3862 devmap_umem_free_np(void *addr, size_t size)
3863 {
3864         vmem_free(umem_np_arena, addr, size);
3865 }
3866 
3867 /*
3868  * allocate page aligned kernel memory for exporting to user land.
3869  * The devmap framework will use the cookie allocated by ddi_umem_alloc()
3870  * to find a user virtual address that is in same color as the address
3871  * allocated here.
3872  */
3873 void *
3874 ddi_umem_alloc(size_t size, int flags, ddi_umem_cookie_t *cookie)
3875 {
3876         register size_t len = ptob(btopr(size));
3877         void *buf = NULL;
3878         struct ddi_umem_cookie *cp;
3879         int iflags = 0;
3880 
3881         *cookie = NULL;
3882 
3883         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_ALLOC,
3884             "devmap_umem_alloc:start");
3885         if (len == 0)
3886                 return ((void *)NULL);
3887 
3888         /*
3889          * allocate cookie
3890          */
3891         if ((cp = kmem_zalloc(sizeof (struct ddi_umem_cookie),
3892             flags & DDI_UMEM_NOSLEEP ? KM_NOSLEEP : KM_SLEEP)) == NULL) {
3893                 ASSERT(flags & DDI_UMEM_NOSLEEP);
3894                 return ((void *)NULL);
3895         }
3896 
3897         if (flags & DDI_UMEM_PAGEABLE) {
3898                 /* Only one of the flags is allowed */
3899                 ASSERT(!(flags & DDI_UMEM_TRASH));
3900                 /* initialize resource with 0 */
3901                 iflags = KPD_ZERO;
3902 
3903                 /*
3904                  * to allocate unlocked pageable memory, use segkp_get() to
3905                  * create a segkp segment.  Since segkp can only service kas,
3906                  * other segment drivers such as segdev have to do
3907                  * as_fault(segkp, SOFTLOCK) in its fault routine,
3908                  */
3909                 if (flags & DDI_UMEM_NOSLEEP)
3910                         iflags |= KPD_NOWAIT;
3911 
3912                 if ((buf = segkp_get(segkp, len, iflags)) == NULL) {
3913                         kmem_free(cp, sizeof (struct ddi_umem_cookie));
3914                         return ((void *)NULL);
3915                 }
3916                 cp->type = KMEM_PAGEABLE;
3917                 mutex_init(&cp->lock, NULL, MUTEX_DEFAULT, NULL);
3918                 cp->locked = 0;
3919         } else if (flags & DDI_UMEM_TRASH) {
3920                 /* Only one of the flags is allowed */
3921                 ASSERT(!(flags & DDI_UMEM_PAGEABLE));
3922                 cp->type = UMEM_TRASH;
3923                 buf = NULL;
3924         } else {
3925                 if ((buf = devmap_umem_alloc_np(len, flags)) == NULL) {
3926                         kmem_free(cp, sizeof (struct ddi_umem_cookie));
3927                         return ((void *)NULL);
3928                 }
3929 
3930                 cp->type = KMEM_NON_PAGEABLE;
3931         }
3932 
3933         /*
3934          * need to save size here.  size will be used when
3935          * we do kmem_free.
3936          */
3937         cp->size = len;
3938         cp->cvaddr = (caddr_t)buf;
3939 
3940         *cookie =  (void *)cp;
3941         return (buf);
3942 }
3943 
3944 void
3945 ddi_umem_free(ddi_umem_cookie_t cookie)
3946 {
3947         struct ddi_umem_cookie *cp;
3948 
3949         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_FREE,
3950             "devmap_umem_free:start");
3951 
3952         /*
3953          * if cookie is NULL, no effects on the system
3954          */
3955         if (cookie == NULL)
3956                 return;
3957 
3958         cp = (struct ddi_umem_cookie *)cookie;
3959 
3960         switch (cp->type) {
3961         case KMEM_PAGEABLE :
3962                 ASSERT(cp->cvaddr != NULL && cp->size != 0);
3963                 /*
3964                  * Check if there are still any pending faults on the cookie
3965                  * while the driver is deleting it,
3966                  * XXX - could change to an ASSERT but wont catch errant drivers
3967                  */
3968                 mutex_enter(&cp->lock);
3969                 if (cp->locked) {
3970                         mutex_exit(&cp->lock);
3971                         panic("ddi_umem_free for cookie with pending faults %p",
3972                             (void *)cp);
3973                         return;
3974                 }
3975 
3976                 segkp_release(segkp, cp->cvaddr);
3977 
3978                 /*
3979                  * release mutex associated with this cookie.
3980                  */
3981                 mutex_destroy(&cp->lock);
3982                 break;
3983         case KMEM_NON_PAGEABLE :
3984                 ASSERT(cp->cvaddr != NULL && cp->size != 0);
3985                 devmap_umem_free_np(cp->cvaddr, cp->size);
3986                 break;
3987         case UMEM_TRASH :
3988                 break;
3989         case UMEM_LOCKED :
3990                 /* Callers should use ddi_umem_unlock for this type */
3991                 ddi_umem_unlock(cookie);
3992                 /* Frees the cookie too */
3993                 return;
3994         default:
3995                 /* panic so we can diagnose the underlying cause */
3996                 panic("ddi_umem_free: illegal cookie type 0x%x\n",
3997                     cp->type);
3998         }
3999 
4000         kmem_free(cookie, sizeof (struct ddi_umem_cookie));
4001 }
4002 
4003 
4004 static int
4005 segdev_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
4006 {
4007         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
4008 
4009         /*
4010          * It looks as if it is always mapped shared
4011          */
4012         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_GETMEMID,
4013             "segdev_getmemid:start");
4014         memidp->val[0] = (uintptr_t)VTOCVP(sdp->vp);
4015         memidp->val[1] = sdp->offset + (uintptr_t)(addr - seg->s_base);
4016         return (0);
4017 }
4018 
4019 /*
4020  * ddi_umem_alloc() non-pageable quantum cache max size.
4021  * This is just a SWAG.
4022  */
4023 #define DEVMAP_UMEM_QUANTUM     (8*PAGESIZE)
4024 
4025 /*
4026  * Initialize seg_dev from boot. This routine sets up the trash page
4027  * and creates the umem_np_arena used to back non-pageable memory
4028  * requests.
4029  */
4030 void
4031 segdev_init(void)
4032 {
4033         struct seg kseg;
4034 
4035         umem_np_arena = vmem_create("umem_np", NULL, 0, PAGESIZE,
4036             devmap_alloc_pages, devmap_free_pages, heap_arena,
4037             DEVMAP_UMEM_QUANTUM, VM_SLEEP);
4038 
4039         kseg.s_as = &kas;
4040         trashpp = page_create_va(&trashvp, 0, PAGESIZE,
4041             PG_NORELOC | PG_EXCL | PG_WAIT, &kseg, NULL);
4042         if (trashpp == NULL)
4043                 panic("segdev_init: failed to create trash page");
4044         pagezero(trashpp, 0, PAGESIZE);
4045         page_downgrade(trashpp);
4046 }
4047 
4048 /*
4049  * Invoke platform-dependent support routines so that /proc can have
4050  * the platform code deal with curious hardware.
4051  */
4052 int
4053 segdev_copyfrom(struct seg *seg,
4054     caddr_t uaddr, const void *devaddr, void *kaddr, size_t len)
4055 {
4056         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
4057         struct snode *sp = VTOS(VTOCVP(sdp->vp));
4058 
4059         return (e_ddi_copyfromdev(sp->s_dip,
4060             (off_t)(uaddr - seg->s_base), devaddr, kaddr, len));
4061 }
4062 
4063 int
4064 segdev_copyto(struct seg *seg,
4065     caddr_t uaddr, const void *kaddr, void *devaddr, size_t len)
4066 {
4067         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
4068         struct snode *sp = VTOS(VTOCVP(sdp->vp));
4069 
4070         return (e_ddi_copytodev(sp->s_dip,
4071             (off_t)(uaddr - seg->s_base), kaddr, devaddr, len));
4072 }