1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  28 /*        All Rights Reserved   */
  29 
  30 /*
  31  * University Copyright- Copyright (c) 1982, 1986, 1988
  32  * The Regents of the University of California
  33  * All Rights Reserved
  34  *
  35  * University Acknowledgment- Portions of this document are derived from
  36  * software developed by the University of California, Berkeley, and its
  37  * contributors.
  38  */
  39 
  40 /*
  41  * VM - segment of a mapped device.
  42  *
  43  * This segment driver is used when mapping character special devices.
  44  */
  45 
  46 #include <sys/types.h>
  47 #include <sys/t_lock.h>
  48 #include <sys/sysmacros.h>
  49 #include <sys/vtrace.h>
  50 #include <sys/systm.h>
  51 #include <sys/vmsystm.h>
  52 #include <sys/mman.h>
  53 #include <sys/errno.h>
  54 #include <sys/kmem.h>
  55 #include <sys/cmn_err.h>
  56 #include <sys/vnode.h>
  57 #include <sys/proc.h>
  58 #include <sys/conf.h>
  59 #include <sys/debug.h>
  60 #include <sys/ddidevmap.h>
  61 #include <sys/ddi_implfuncs.h>
  62 #include <sys/lgrp.h>
  63 
  64 #include <vm/page.h>
  65 #include <vm/hat.h>
  66 #include <vm/as.h>
  67 #include <vm/seg.h>
  68 #include <vm/seg_dev.h>
  69 #include <vm/seg_kp.h>
  70 #include <vm/seg_kmem.h>
  71 #include <vm/vpage.h>
  72 
  73 #include <sys/sunddi.h>
  74 #include <sys/esunddi.h>
  75 #include <sys/fs/snode.h>
  76 
  77 
  78 #if DEBUG
  79 int segdev_debug;
  80 #define DEBUGF(level, args) { if (segdev_debug >= (level)) cmn_err args; }
  81 #else
  82 #define DEBUGF(level, args)
  83 #endif
  84 
  85 /* Default timeout for devmap context management */
  86 #define CTX_TIMEOUT_VALUE 0
  87 
  88 #define HOLD_DHP_LOCK(dhp)  if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
  89                         { mutex_enter(&dhp->dh_lock); }
  90 
  91 #define RELE_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
  92                         { mutex_exit(&dhp->dh_lock); }
  93 
  94 #define round_down_p2(a, s)     ((a) & ~((s) - 1))
  95 #define round_up_p2(a, s)       (((a) + (s) - 1) & ~((s) - 1))
  96 
  97 /*
  98  * VA_PA_ALIGNED checks to see if both VA and PA are on pgsize boundary
  99  * VA_PA_PGSIZE_ALIGNED check to see if VA is aligned with PA w.r.t. pgsize
 100  */
 101 #define VA_PA_ALIGNED(uvaddr, paddr, pgsize)            \
 102         (((uvaddr | paddr) & (pgsize - 1)) == 0)
 103 #define VA_PA_PGSIZE_ALIGNED(uvaddr, paddr, pgsize)     \
 104         (((uvaddr ^ paddr) & (pgsize - 1)) == 0)
 105 
 106 #define vpgtob(n)       ((n) * sizeof (struct vpage))   /* For brevity */
 107 
 108 #define VTOCVP(vp)      (VTOS(vp)->s_commonvp)       /* we "know" it's an snode */
 109 
 110 static struct devmap_ctx *devmapctx_list = NULL;
 111 static struct devmap_softlock *devmap_slist = NULL;
 112 
 113 /*
 114  * mutex, vnode and page for the page of zeros we use for the trash mappings.
 115  * One trash page is allocated on the first ddi_umem_setup call that uses it
 116  * XXX Eventually, we may want to combine this with what segnf does when all
 117  * hat layers implement HAT_NOFAULT.
 118  *
 119  * The trash page is used when the backing store for a userland mapping is
 120  * removed but the application semantics do not take kindly to a SIGBUS.
 121  * In that scenario, the applications pages are mapped to some dummy page
 122  * which returns garbage on read and writes go into a common place.
 123  * (Perfect for NO_FAULT semantics)
 124  * The device driver is responsible to communicating to the app with some
 125  * other mechanism that such remapping has happened and the app should take
 126  * corrective action.
 127  * We can also use an anonymous memory page as there is no requirement to
 128  * keep the page locked, however this complicates the fault code. RFE.
 129  */
 130 static struct vnode trashvp;
 131 static struct page *trashpp;
 132 
 133 /* Non-pageable kernel memory is allocated from the umem_np_arena. */
 134 static vmem_t *umem_np_arena;
 135 
 136 /* Set the cookie to a value we know will never be a valid umem_cookie */
 137 #define DEVMAP_DEVMEM_COOKIE    ((ddi_umem_cookie_t)0x1)
 138 
 139 /*
 140  * Macros to check if type of devmap handle
 141  */
 142 #define cookie_is_devmem(c)     \
 143         ((c) == (struct ddi_umem_cookie *)DEVMAP_DEVMEM_COOKIE)
 144 
 145 #define cookie_is_pmem(c)       \
 146         ((c) == (struct ddi_umem_cookie *)DEVMAP_PMEM_COOKIE)
 147 
 148 #define cookie_is_kpmem(c)      (!cookie_is_devmem(c) && !cookie_is_pmem(c) &&\
 149         ((c)->type == KMEM_PAGEABLE))
 150 
 151 #define dhp_is_devmem(dhp)      \
 152         (cookie_is_devmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
 153 
 154 #define dhp_is_pmem(dhp)        \
 155         (cookie_is_pmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
 156 
 157 #define dhp_is_kpmem(dhp)       \
 158         (cookie_is_kpmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
 159 
 160 /*
 161  * Private seg op routines.
 162  */
 163 static int      segdev_dup(struct seg *, struct seg *);
 164 static int      segdev_unmap(struct seg *, caddr_t, size_t);
 165 static void     segdev_free(struct seg *);
 166 static faultcode_t segdev_fault(struct hat *, struct seg *, caddr_t, size_t,
 167                     enum fault_type, enum seg_rw);
 168 static faultcode_t segdev_faulta(struct seg *, caddr_t);
 169 static int      segdev_setprot(struct seg *, caddr_t, size_t, uint_t);
 170 static int      segdev_checkprot(struct seg *, caddr_t, size_t, uint_t);
 171 static void     segdev_badop(void);
 172 static int      segdev_sync(struct seg *, caddr_t, size_t, int, uint_t);
 173 static size_t   segdev_incore(struct seg *, caddr_t, size_t, char *);
 174 static int      segdev_lockop(struct seg *, caddr_t, size_t, int, int,
 175                     ulong_t *, size_t);
 176 static int      segdev_getprot(struct seg *, caddr_t, size_t, uint_t *);
 177 static u_offset_t       segdev_getoffset(struct seg *, caddr_t);
 178 static int      segdev_gettype(struct seg *, caddr_t);
 179 static int      segdev_getvp(struct seg *, caddr_t, struct vnode **);
 180 static int      segdev_advise(struct seg *, caddr_t, size_t, uint_t);
 181 static int      segdev_pagelock(struct seg *, caddr_t, size_t,
 182                     struct page ***, enum lock_type, enum seg_rw);
 183 static int      segdev_getmemid(struct seg *, caddr_t, memid_t *);
 184 
 185 /*
 186  * XXX  this struct is used by rootnex_map_fault to identify
 187  *      the segment it has been passed. So if you make it
 188  *      "static" you'll need to fix rootnex_map_fault.
 189  */
 190 struct seg_ops segdev_ops = {
 191         .dup            = segdev_dup,
 192         .unmap          = segdev_unmap,
 193         .free           = segdev_free,
 194         .fault          = segdev_fault,
 195         .faulta         = segdev_faulta,
 196         .setprot        = segdev_setprot,
 197         .checkprot      = segdev_checkprot,
 198         .kluster        = (int (*)())segdev_badop,
 199         .sync           = segdev_sync,
 200         .incore         = segdev_incore,
 201         .lockop         = segdev_lockop,
 202         .getprot        = segdev_getprot,
 203         .getoffset      = segdev_getoffset,
 204         .gettype        = segdev_gettype,
 205         .getvp          = segdev_getvp,
 206         .advise         = segdev_advise,
 207         .pagelock       = segdev_pagelock,
 208         .getmemid       = segdev_getmemid,
 209 };
 210 
 211 /*
 212  * Private segdev support routines
 213  */
 214 static struct segdev_data *sdp_alloc(void);
 215 
 216 static void segdev_softunlock(struct hat *, struct seg *, caddr_t,
 217     size_t, enum seg_rw);
 218 
 219 static faultcode_t segdev_faultpage(struct hat *, struct seg *, caddr_t,
 220     struct vpage *, enum fault_type, enum seg_rw, devmap_handle_t *);
 221 
 222 static faultcode_t segdev_faultpages(struct hat *, struct seg *, caddr_t,
 223     size_t, enum fault_type, enum seg_rw, devmap_handle_t *);
 224 
 225 static struct devmap_ctx *devmap_ctxinit(dev_t, ulong_t);
 226 static struct devmap_softlock *devmap_softlock_init(dev_t, ulong_t);
 227 static void devmap_softlock_rele(devmap_handle_t *);
 228 static void devmap_ctx_rele(devmap_handle_t *);
 229 
 230 static void devmap_ctxto(void *);
 231 
 232 static devmap_handle_t *devmap_find_handle(devmap_handle_t *dhp_head,
 233     caddr_t addr);
 234 
 235 static ulong_t devmap_roundup(devmap_handle_t *dhp, ulong_t offset, size_t len,
 236     ulong_t *opfn, ulong_t *pagesize);
 237 
 238 static void free_devmap_handle(devmap_handle_t *dhp);
 239 
 240 static int devmap_handle_dup(devmap_handle_t *dhp, devmap_handle_t **new_dhp,
 241     struct seg *newseg);
 242 
 243 static devmap_handle_t *devmap_handle_unmap(devmap_handle_t *dhp);
 244 
 245 static void devmap_handle_unmap_head(devmap_handle_t *dhp, size_t len);
 246 
 247 static void devmap_handle_unmap_tail(devmap_handle_t *dhp, caddr_t addr);
 248 
 249 static int devmap_device(devmap_handle_t *dhp, struct as *as, caddr_t *addr,
 250     offset_t off, size_t len, uint_t flags);
 251 
 252 static void devmap_get_large_pgsize(devmap_handle_t *dhp, size_t len,
 253     caddr_t addr, size_t *llen, caddr_t *laddr);
 254 
 255 static void devmap_handle_reduce_len(devmap_handle_t *dhp, size_t len);
 256 
 257 static void *devmap_alloc_pages(vmem_t *vmp, size_t size, int vmflag);
 258 static void devmap_free_pages(vmem_t *vmp, void *inaddr, size_t size);
 259 
 260 static void *devmap_umem_alloc_np(size_t size, size_t flags);
 261 static void devmap_umem_free_np(void *addr, size_t size);
 262 
 263 /*
 264  * routines to lock and unlock underlying segkp segment for
 265  * KMEM_PAGEABLE type cookies.
 266  */
 267 static faultcode_t  acquire_kpmem_lock(struct ddi_umem_cookie *, size_t);
 268 static void release_kpmem_lock(struct ddi_umem_cookie *, size_t);
 269 
 270 /*
 271  * Routines to synchronize F_SOFTLOCK and F_INVAL faults for
 272  * drivers with devmap_access callbacks
 273  */
 274 static int devmap_softlock_enter(struct devmap_softlock *, size_t,
 275         enum fault_type);
 276 static void devmap_softlock_exit(struct devmap_softlock *, size_t,
 277         enum fault_type);
 278 
 279 static kmutex_t devmapctx_lock;
 280 
 281 static kmutex_t devmap_slock;
 282 
 283 /*
 284  * Initialize the thread callbacks and thread private data.
 285  */
 286 static struct devmap_ctx *
 287 devmap_ctxinit(dev_t dev, ulong_t id)
 288 {
 289         struct devmap_ctx       *devctx;
 290         struct devmap_ctx       *tmp;
 291         dev_info_t              *dip;
 292 
 293         tmp =  kmem_zalloc(sizeof (struct devmap_ctx), KM_SLEEP);
 294 
 295         mutex_enter(&devmapctx_lock);
 296 
 297         dip = e_ddi_hold_devi_by_dev(dev, 0);
 298         ASSERT(dip != NULL);
 299         ddi_release_devi(dip);
 300 
 301         for (devctx = devmapctx_list; devctx != NULL; devctx = devctx->next)
 302                 if ((devctx->dip == dip) && (devctx->id == id))
 303                         break;
 304 
 305         if (devctx == NULL) {
 306                 devctx = tmp;
 307                 devctx->dip = dip;
 308                 devctx->id = id;
 309                 mutex_init(&devctx->lock, NULL, MUTEX_DEFAULT, NULL);
 310                 cv_init(&devctx->cv, NULL, CV_DEFAULT, NULL);
 311                 devctx->next = devmapctx_list;
 312                 devmapctx_list = devctx;
 313         } else
 314                 kmem_free(tmp, sizeof (struct devmap_ctx));
 315 
 316         mutex_enter(&devctx->lock);
 317         devctx->refcnt++;
 318         mutex_exit(&devctx->lock);
 319         mutex_exit(&devmapctx_lock);
 320 
 321         return (devctx);
 322 }
 323 
 324 /*
 325  * Timeout callback called if a CPU has not given up the device context
 326  * within dhp->dh_timeout_length ticks
 327  */
 328 static void
 329 devmap_ctxto(void *data)
 330 {
 331         struct devmap_ctx *devctx = data;
 332 
 333         TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_CTXTO,
 334             "devmap_ctxto:timeout expired, devctx=%p", (void *)devctx);
 335         mutex_enter(&devctx->lock);
 336         /*
 337          * Set oncpu = 0 so the next mapping trying to get the device context
 338          * can.
 339          */
 340         devctx->oncpu = 0;
 341         devctx->timeout = 0;
 342         cv_signal(&devctx->cv);
 343         mutex_exit(&devctx->lock);
 344 }
 345 
 346 /*
 347  * Create a device segment.
 348  */
 349 int
 350 segdev_create(struct seg *seg, void *argsp)
 351 {
 352         struct segdev_data *sdp;
 353         struct segdev_crargs *a = (struct segdev_crargs *)argsp;
 354         devmap_handle_t *dhp = (devmap_handle_t *)a->devmap_data;
 355         int error;
 356 
 357         /*
 358          * Since the address space is "write" locked, we
 359          * don't need the segment lock to protect "segdev" data.
 360          */
 361         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 362 
 363         hat_map(seg->s_as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
 364 
 365         sdp = sdp_alloc();
 366 
 367         sdp->mapfunc = a->mapfunc;
 368         sdp->offset = a->offset;
 369         sdp->prot = a->prot;
 370         sdp->maxprot = a->maxprot;
 371         sdp->type = a->type;
 372         sdp->pageprot = 0;
 373         sdp->softlockcnt = 0;
 374         sdp->vpage = NULL;
 375 
 376         if (sdp->mapfunc == NULL)
 377                 sdp->devmap_data = dhp;
 378         else
 379                 sdp->devmap_data = dhp = NULL;
 380 
 381         sdp->hat_flags = a->hat_flags;
 382         sdp->hat_attr = a->hat_attr;
 383 
 384         /*
 385          * Currently, hat_flags supports only HAT_LOAD_NOCONSIST
 386          */
 387         ASSERT(!(sdp->hat_flags & ~HAT_LOAD_NOCONSIST));
 388 
 389         /*
 390          * Hold shadow vnode -- segdev only deals with
 391          * character (VCHR) devices. We use the common
 392          * vp to hang pages on.
 393          */
 394         sdp->vp = specfind(a->dev, VCHR);
 395         ASSERT(sdp->vp != NULL);
 396 
 397         seg->s_ops = &segdev_ops;
 398         seg->s_data = sdp;
 399 
 400         while (dhp != NULL) {
 401                 dhp->dh_seg = seg;
 402                 dhp = dhp->dh_next;
 403         }
 404 
 405         /*
 406          * Inform the vnode of the new mapping.
 407          */
 408         /*
 409          * It is ok to use pass sdp->maxprot to ADDMAP rather than to use
 410          * dhp specific maxprot because spec_addmap does not use maxprot.
 411          */
 412         error = VOP_ADDMAP(VTOCVP(sdp->vp), sdp->offset,
 413             seg->s_as, seg->s_base, seg->s_size,
 414             sdp->prot, sdp->maxprot, sdp->type, CRED(), NULL);
 415 
 416         if (error != 0) {
 417                 sdp->devmap_data = NULL;
 418                 hat_unload(seg->s_as->a_hat, seg->s_base, seg->s_size,
 419                     HAT_UNLOAD_UNMAP);
 420         } else {
 421                 /*
 422                  * Mappings of /dev/null don't count towards the VSZ of a
 423                  * process.  Mappings of /dev/null have no mapping type.
 424                  */
 425                 if ((segop_gettype(seg, seg->s_base) & (MAP_SHARED |
 426                     MAP_PRIVATE)) == 0) {
 427                         seg->s_as->a_resvsize -= seg->s_size;
 428                 }
 429         }
 430 
 431         return (error);
 432 }
 433 
 434 static struct segdev_data *
 435 sdp_alloc(void)
 436 {
 437         struct segdev_data *sdp;
 438 
 439         sdp = kmem_zalloc(sizeof (struct segdev_data), KM_SLEEP);
 440         rw_init(&sdp->lock, NULL, RW_DEFAULT, NULL);
 441 
 442         return (sdp);
 443 }
 444 
 445 /*
 446  * Duplicate seg and return new segment in newseg.
 447  */
 448 static int
 449 segdev_dup(struct seg *seg, struct seg *newseg)
 450 {
 451         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
 452         struct segdev_data *newsdp;
 453         devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
 454         size_t npages;
 455         int ret;
 456 
 457         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_DUP,
 458             "segdev_dup:start dhp=%p, seg=%p", (void *)dhp, (void *)seg);
 459 
 460         DEBUGF(3, (CE_CONT, "segdev_dup: dhp %p seg %p\n",
 461             (void *)dhp, (void *)seg));
 462 
 463         /*
 464          * Since the address space is "write" locked, we
 465          * don't need the segment lock to protect "segdev" data.
 466          */
 467         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 468 
 469         newsdp = sdp_alloc();
 470 
 471         newseg->s_ops = seg->s_ops;
 472         newseg->s_data = (void *)newsdp;
 473 
 474         VN_HOLD(sdp->vp);
 475         newsdp->vp   = sdp->vp;
 476         newsdp->mapfunc = sdp->mapfunc;
 477         newsdp->offset       = sdp->offset;
 478         newsdp->pageprot = sdp->pageprot;
 479         newsdp->prot = sdp->prot;
 480         newsdp->maxprot = sdp->maxprot;
 481         newsdp->type = sdp->type;
 482         newsdp->hat_attr = sdp->hat_attr;
 483         newsdp->hat_flags = sdp->hat_flags;
 484         newsdp->softlockcnt = 0;
 485 
 486         /*
 487          * Initialize per page data if the segment we are
 488          * dup'ing has per page information.
 489          */
 490         npages = seg_pages(newseg);
 491 
 492         if (sdp->vpage != NULL) {
 493                 size_t nbytes = vpgtob(npages);
 494 
 495                 newsdp->vpage = kmem_zalloc(nbytes, KM_SLEEP);
 496                 bcopy(sdp->vpage, newsdp->vpage, nbytes);
 497         } else
 498                 newsdp->vpage = NULL;
 499 
 500         /*
 501          * duplicate devmap handles
 502          */
 503         if (dhp != NULL) {
 504                 ret = devmap_handle_dup(dhp,
 505                     (devmap_handle_t **)&newsdp->devmap_data, newseg);
 506                 if (ret != 0) {
 507                         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_DUP_CK1,
 508                             "segdev_dup:ret1 ret=%x, dhp=%p seg=%p",
 509                             ret, (void *)dhp, (void *)seg);
 510                         DEBUGF(1, (CE_CONT,
 511                             "segdev_dup: ret %x dhp %p seg %p\n",
 512                             ret, (void *)dhp, (void *)seg));
 513                         return (ret);
 514                 }
 515         }
 516 
 517         /*
 518          * Inform the common vnode of the new mapping.
 519          */
 520         return (VOP_ADDMAP(VTOCVP(newsdp->vp),
 521             newsdp->offset, newseg->s_as,
 522             newseg->s_base, newseg->s_size, newsdp->prot,
 523             newsdp->maxprot, sdp->type, CRED(), NULL));
 524 }
 525 
 526 /*
 527  * duplicate devmap handles
 528  */
 529 static int
 530 devmap_handle_dup(devmap_handle_t *dhp, devmap_handle_t **new_dhp,
 531     struct seg *newseg)
 532 {
 533         devmap_handle_t *newdhp_save = NULL;
 534         devmap_handle_t *newdhp = NULL;
 535         struct devmap_callback_ctl *callbackops;
 536 
 537         while (dhp != NULL) {
 538                 newdhp = kmem_alloc(sizeof (devmap_handle_t), KM_SLEEP);
 539 
 540                 /* Need to lock the original dhp while copying if REMAP */
 541                 HOLD_DHP_LOCK(dhp);
 542                 bcopy(dhp, newdhp, sizeof (devmap_handle_t));
 543                 RELE_DHP_LOCK(dhp);
 544                 newdhp->dh_seg = newseg;
 545                 newdhp->dh_next = NULL;
 546                 if (newdhp_save != NULL)
 547                         newdhp_save->dh_next = newdhp;
 548                 else
 549                         *new_dhp = newdhp;
 550                 newdhp_save = newdhp;
 551 
 552                 callbackops = &newdhp->dh_callbackops;
 553 
 554                 if (dhp->dh_softlock != NULL)
 555                         newdhp->dh_softlock = devmap_softlock_init(
 556                             newdhp->dh_dev,
 557                             (ulong_t)callbackops->devmap_access);
 558                 if (dhp->dh_ctx != NULL)
 559                         newdhp->dh_ctx = devmap_ctxinit(newdhp->dh_dev,
 560                             (ulong_t)callbackops->devmap_access);
 561 
 562                 /*
 563                  * Initialize dh_lock if we want to do remap.
 564                  */
 565                 if (newdhp->dh_flags & DEVMAP_ALLOW_REMAP) {
 566                         mutex_init(&newdhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
 567                         newdhp->dh_flags |= DEVMAP_LOCK_INITED;
 568                 }
 569 
 570                 if (callbackops->devmap_dup != NULL) {
 571                         int ret;
 572 
 573                         /*
 574                          * Call the dup callback so that the driver can
 575                          * duplicate its private data.
 576                          */
 577                         ret = (*callbackops->devmap_dup)(dhp, dhp->dh_pvtp,
 578                             (devmap_cookie_t *)newdhp, &newdhp->dh_pvtp);
 579 
 580                         if (ret != 0) {
 581                                 /*
 582                                  * We want to free up this segment as the driver
 583                                  * has indicated that we can't dup it.  But we
 584                                  * don't want to call the drivers, devmap_unmap,
 585                                  * callback function as the driver does not
 586                                  * think this segment exists. The caller of
 587                                  * devmap_dup will call seg_free on newseg
 588                                  * as it was the caller that allocated the
 589                                  * segment.
 590                                  */
 591                                 DEBUGF(1, (CE_CONT, "devmap_handle_dup ERROR: "
 592                                     "newdhp %p dhp %p\n", (void *)newdhp,
 593                                     (void *)dhp));
 594                                 callbackops->devmap_unmap = NULL;
 595                                 return (ret);
 596                         }
 597                 }
 598 
 599                 dhp = dhp->dh_next;
 600         }
 601 
 602         return (0);
 603 }
 604 
 605 /*
 606  * Split a segment at addr for length len.
 607  */
 608 /*ARGSUSED*/
 609 static int
 610 segdev_unmap(struct seg *seg, caddr_t addr, size_t len)
 611 {
 612         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
 613         register struct segdev_data *nsdp;
 614         register struct seg *nseg;
 615         register size_t opages;         /* old segment size in pages */
 616         register size_t npages;         /* new segment size in pages */
 617         register size_t dpages;         /* pages being deleted (unmapped) */
 618         register size_t nbytes;
 619         devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
 620         devmap_handle_t *dhpp;
 621         devmap_handle_t *newdhp;
 622         struct devmap_callback_ctl *callbackops;
 623         caddr_t nbase;
 624         offset_t off;
 625         ulong_t nsize;
 626         size_t mlen, sz;
 627 
 628         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP,
 629             "segdev_unmap:start dhp=%p, seg=%p addr=%p len=%lx",
 630             (void *)dhp, (void *)seg, (void *)addr, len);
 631 
 632         DEBUGF(3, (CE_CONT, "segdev_unmap: dhp %p seg %p addr %p len %lx\n",
 633             (void *)dhp, (void *)seg, (void *)addr, len));
 634 
 635         /*
 636          * Since the address space is "write" locked, we
 637          * don't need the segment lock to protect "segdev" data.
 638          */
 639         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 640 
 641         if ((sz = sdp->softlockcnt) > 0) {
 642                 /*
 643                  * Fail the unmap if pages are SOFTLOCKed through this mapping.
 644                  * softlockcnt is protected from change by the as write lock.
 645                  */
 646                 TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK1,
 647                     "segdev_unmap:error softlockcnt = %ld", sz);
 648                 DEBUGF(1, (CE_CONT, "segdev_unmap: softlockcnt %ld\n", sz));
 649                 return (EAGAIN);
 650         }
 651 
 652         /*
 653          * Check for bad sizes
 654          */
 655         if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
 656             (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
 657                 panic("segdev_unmap");
 658 
 659         if (dhp != NULL) {
 660                 devmap_handle_t *tdhp;
 661                 /*
 662                  * If large page size was used in hat_devload(),
 663                  * the same page size must be used in hat_unload().
 664                  */
 665                 dhpp = tdhp = devmap_find_handle(dhp, addr);
 666                 while (tdhp != NULL) {
 667                         if (tdhp->dh_flags & DEVMAP_FLAG_LARGE) {
 668                                 break;
 669                         }
 670                         tdhp = tdhp->dh_next;
 671                 }
 672                 if (tdhp != NULL) {     /* found a dhp using large pages */
 673                         size_t slen = len;
 674                         size_t mlen;
 675                         size_t soff;
 676 
 677                         soff = (ulong_t)(addr - dhpp->dh_uvaddr);
 678                         while (slen != 0) {
 679                                 mlen = MIN(slen, (dhpp->dh_len - soff));
 680                                 hat_unload(seg->s_as->a_hat, dhpp->dh_uvaddr,
 681                                     dhpp->dh_len, HAT_UNLOAD_UNMAP);
 682                                 dhpp = dhpp->dh_next;
 683                                 ASSERT(slen >= mlen);
 684                                 slen -= mlen;
 685                                 soff = 0;
 686                         }
 687                 } else
 688                         hat_unload(seg->s_as->a_hat, addr, len,
 689                             HAT_UNLOAD_UNMAP);
 690         } else {
 691                 /*
 692                  * Unload any hardware translations in the range
 693                  * to be taken out.
 694                  */
 695                 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
 696         }
 697 
 698         /*
 699          * get the user offset which will used in the driver callbacks
 700          */
 701         off = sdp->offset + (offset_t)(addr - seg->s_base);
 702 
 703         /*
 704          * Inform the vnode of the unmapping.
 705          */
 706         ASSERT(sdp->vp != NULL);
 707         (void) VOP_DELMAP(VTOCVP(sdp->vp), off, seg->s_as, addr, len,
 708             sdp->prot, sdp->maxprot, sdp->type, CRED(), NULL);
 709 
 710         /*
 711          * Check for entire segment
 712          */
 713         if (addr == seg->s_base && len == seg->s_size) {
 714                 seg_free(seg);
 715                 return (0);
 716         }
 717 
 718         opages = seg_pages(seg);
 719         dpages = btop(len);
 720         npages = opages - dpages;
 721 
 722         /*
 723          * Check for beginning of segment
 724          */
 725         if (addr == seg->s_base) {
 726                 if (sdp->vpage != NULL) {
 727                         register struct vpage *ovpage;
 728 
 729                         ovpage = sdp->vpage; /* keep pointer to vpage */
 730 
 731                         nbytes = vpgtob(npages);
 732                         sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
 733                         bcopy(&ovpage[dpages], sdp->vpage, nbytes);
 734 
 735                         /* free up old vpage */
 736                         kmem_free(ovpage, vpgtob(opages));
 737                 }
 738 
 739                 /*
 740                  * free devmap handles from the beginning of the mapping.
 741                  */
 742                 if (dhp != NULL)
 743                         devmap_handle_unmap_head(dhp, len);
 744 
 745                 sdp->offset += (offset_t)len;
 746 
 747                 seg->s_base += len;
 748                 seg->s_size -= len;
 749 
 750                 return (0);
 751         }
 752 
 753         /*
 754          * Check for end of segment
 755          */
 756         if (addr + len == seg->s_base + seg->s_size) {
 757                 if (sdp->vpage != NULL) {
 758                         register struct vpage *ovpage;
 759 
 760                         ovpage = sdp->vpage; /* keep pointer to vpage */
 761 
 762                         nbytes = vpgtob(npages);
 763                         sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
 764                         bcopy(ovpage, sdp->vpage, nbytes);
 765 
 766                         /* free up old vpage */
 767                         kmem_free(ovpage, vpgtob(opages));
 768                 }
 769                 seg->s_size -= len;
 770 
 771                 /*
 772                  * free devmap handles from addr to the end of the mapping.
 773                  */
 774                 if (dhp != NULL)
 775                         devmap_handle_unmap_tail(dhp, addr);
 776 
 777                 return (0);
 778         }
 779 
 780         /*
 781          * The section to go is in the middle of the segment,
 782          * have to make it into two segments.  nseg is made for
 783          * the high end while seg is cut down at the low end.
 784          */
 785         nbase = addr + len;                             /* new seg base */
 786         nsize = (seg->s_base + seg->s_size) - nbase;      /* new seg size */
 787         seg->s_size = addr - seg->s_base;         /* shrink old seg */
 788         nseg = seg_alloc(seg->s_as, nbase, nsize);
 789         if (nseg == NULL)
 790                 panic("segdev_unmap seg_alloc");
 791 
 792         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK2,
 793             "segdev_unmap: seg=%p nseg=%p", (void *)seg, (void *)nseg);
 794         DEBUGF(3, (CE_CONT, "segdev_unmap: segdev_dup seg %p nseg %p\n",
 795             (void *)seg, (void *)nseg));
 796         nsdp = sdp_alloc();
 797 
 798         nseg->s_ops = seg->s_ops;
 799         nseg->s_data = (void *)nsdp;
 800 
 801         VN_HOLD(sdp->vp);
 802         nsdp->mapfunc = sdp->mapfunc;
 803         nsdp->offset = sdp->offset + (offset_t)(nseg->s_base - seg->s_base);
 804         nsdp->vp     = sdp->vp;
 805         nsdp->pageprot = sdp->pageprot;
 806         nsdp->prot   = sdp->prot;
 807         nsdp->maxprot = sdp->maxprot;
 808         nsdp->type = sdp->type;
 809         nsdp->hat_attr = sdp->hat_attr;
 810         nsdp->hat_flags = sdp->hat_flags;
 811         nsdp->softlockcnt = 0;
 812 
 813         /*
 814          * Initialize per page data if the segment we are
 815          * dup'ing has per page information.
 816          */
 817         if (sdp->vpage != NULL) {
 818                 /* need to split vpage into two arrays */
 819                 register size_t nnbytes;
 820                 register size_t nnpages;
 821                 register struct vpage *ovpage;
 822 
 823                 ovpage = sdp->vpage;         /* keep pointer to vpage */
 824 
 825                 npages = seg_pages(seg);        /* seg has shrunk */
 826                 nbytes = vpgtob(npages);
 827                 nnpages = seg_pages(nseg);
 828                 nnbytes = vpgtob(nnpages);
 829 
 830                 sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
 831                 bcopy(ovpage, sdp->vpage, nbytes);
 832 
 833                 nsdp->vpage = kmem_alloc(nnbytes, KM_SLEEP);
 834                 bcopy(&ovpage[npages + dpages], nsdp->vpage, nnbytes);
 835 
 836                 /* free up old vpage */
 837                 kmem_free(ovpage, vpgtob(opages));
 838         } else
 839                 nsdp->vpage = NULL;
 840 
 841         /*
 842          * unmap dhps.
 843          */
 844         if (dhp == NULL) {
 845                 nsdp->devmap_data = NULL;
 846                 return (0);
 847         }
 848         while (dhp != NULL) {
 849                 callbackops = &dhp->dh_callbackops;
 850                 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK3,
 851                     "segdev_unmap: dhp=%p addr=%p", dhp, addr);
 852                 DEBUGF(3, (CE_CONT, "unmap: dhp %p addr %p uvaddr %p len %lx\n",
 853                     (void *)dhp, (void *)addr,
 854                     (void *)dhp->dh_uvaddr, dhp->dh_len));
 855 
 856                 if (addr == (dhp->dh_uvaddr + dhp->dh_len)) {
 857                         dhpp = dhp->dh_next;
 858                         dhp->dh_next = NULL;
 859                         dhp = dhpp;
 860                 } else if (addr > (dhp->dh_uvaddr + dhp->dh_len)) {
 861                         dhp = dhp->dh_next;
 862                 } else if (addr > dhp->dh_uvaddr &&
 863                     (addr + len) < (dhp->dh_uvaddr + dhp->dh_len)) {
 864                         /*
 865                          * <addr, addr+len> is enclosed by dhp.
 866                          * create a newdhp that begins at addr+len and
 867                          * ends at dhp->dh_uvaddr+dhp->dh_len.
 868                          */
 869                         newdhp = kmem_alloc(sizeof (devmap_handle_t), KM_SLEEP);
 870                         HOLD_DHP_LOCK(dhp);
 871                         bcopy(dhp, newdhp, sizeof (devmap_handle_t));
 872                         RELE_DHP_LOCK(dhp);
 873                         newdhp->dh_seg = nseg;
 874                         newdhp->dh_next = dhp->dh_next;
 875                         if (dhp->dh_softlock != NULL)
 876                                 newdhp->dh_softlock = devmap_softlock_init(
 877                                     newdhp->dh_dev,
 878                                     (ulong_t)callbackops->devmap_access);
 879                         if (dhp->dh_ctx != NULL)
 880                                 newdhp->dh_ctx = devmap_ctxinit(newdhp->dh_dev,
 881                                     (ulong_t)callbackops->devmap_access);
 882                         if (newdhp->dh_flags & DEVMAP_LOCK_INITED) {
 883                                 mutex_init(&newdhp->dh_lock,
 884                                     NULL, MUTEX_DEFAULT, NULL);
 885                         }
 886                         if (callbackops->devmap_unmap != NULL)
 887                                 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
 888                                     off, len, dhp, &dhp->dh_pvtp,
 889                                     newdhp, &newdhp->dh_pvtp);
 890                         mlen = len + (addr - dhp->dh_uvaddr);
 891                         devmap_handle_reduce_len(newdhp, mlen);
 892                         nsdp->devmap_data = newdhp;
 893                         /* XX Changing len should recalculate LARGE flag */
 894                         dhp->dh_len = addr - dhp->dh_uvaddr;
 895                         dhpp = dhp->dh_next;
 896                         dhp->dh_next = NULL;
 897                         dhp = dhpp;
 898                 } else if ((addr > dhp->dh_uvaddr) &&
 899                     ((addr + len) >= (dhp->dh_uvaddr + dhp->dh_len))) {
 900                         mlen = dhp->dh_len + dhp->dh_uvaddr - addr;
 901                         /*
 902                          * <addr, addr+len> spans over dhps.
 903                          */
 904                         if (callbackops->devmap_unmap != NULL)
 905                                 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
 906                                     off, mlen, (devmap_cookie_t *)dhp,
 907                                     &dhp->dh_pvtp, NULL, NULL);
 908                         /* XX Changing len should recalculate LARGE flag */
 909                         dhp->dh_len = addr - dhp->dh_uvaddr;
 910                         dhpp = dhp->dh_next;
 911                         dhp->dh_next = NULL;
 912                         dhp = dhpp;
 913                         nsdp->devmap_data = dhp;
 914                 } else if ((addr + len) >= (dhp->dh_uvaddr + dhp->dh_len)) {
 915                         /*
 916                          * dhp is enclosed by <addr, addr+len>.
 917                          */
 918                         dhp->dh_seg = nseg;
 919                         nsdp->devmap_data = dhp;
 920                         dhp = devmap_handle_unmap(dhp);
 921                         nsdp->devmap_data = dhp; /* XX redundant? */
 922                 } else if (((addr + len) > dhp->dh_uvaddr) &&
 923                     ((addr + len) < (dhp->dh_uvaddr + dhp->dh_len))) {
 924                         mlen = addr + len - dhp->dh_uvaddr;
 925                         if (callbackops->devmap_unmap != NULL)
 926                                 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
 927                                     dhp->dh_uoff, mlen, NULL,
 928                                     NULL, dhp, &dhp->dh_pvtp);
 929                         devmap_handle_reduce_len(dhp, mlen);
 930                         nsdp->devmap_data = dhp;
 931                         dhp->dh_seg = nseg;
 932                         dhp = dhp->dh_next;
 933                 } else {
 934                         dhp->dh_seg = nseg;
 935                         dhp = dhp->dh_next;
 936                 }
 937         }
 938         return (0);
 939 }
 940 
 941 /*
 942  * Utility function handles reducing the length of a devmap handle during unmap
 943  * Note that is only used for unmapping the front portion of the handler,
 944  * i.e., we are bumping up the offset/pfn etc up by len
 945  * Do not use if reducing length at the tail.
 946  */
 947 static void
 948 devmap_handle_reduce_len(devmap_handle_t *dhp, size_t len)
 949 {
 950         struct ddi_umem_cookie *cp;
 951         struct devmap_pmem_cookie *pcp;
 952         /*
 953          * adjust devmap handle fields
 954          */
 955         ASSERT(len < dhp->dh_len);
 956 
 957         /* Make sure only page-aligned changes are done */
 958         ASSERT((len & PAGEOFFSET) == 0);
 959 
 960         dhp->dh_len -= len;
 961         dhp->dh_uoff += (offset_t)len;
 962         dhp->dh_roff += (offset_t)len;
 963         dhp->dh_uvaddr += len;
 964         /* Need to grab dhp lock if REMAP */
 965         HOLD_DHP_LOCK(dhp);
 966         cp = dhp->dh_cookie;
 967         if (!(dhp->dh_flags & DEVMAP_MAPPING_INVALID)) {
 968                 if (cookie_is_devmem(cp)) {
 969                         dhp->dh_pfn += btop(len);
 970                 } else if (cookie_is_pmem(cp)) {
 971                         pcp = (struct devmap_pmem_cookie *)dhp->dh_pcookie;
 972                         ASSERT((dhp->dh_roff & PAGEOFFSET) == 0 &&
 973                             dhp->dh_roff < ptob(pcp->dp_npages));
 974                 } else {
 975                         ASSERT(dhp->dh_roff < cp->size);
 976                         ASSERT(dhp->dh_cvaddr >= cp->cvaddr &&
 977                             dhp->dh_cvaddr < (cp->cvaddr + cp->size));
 978                         ASSERT((dhp->dh_cvaddr + len) <=
 979                             (cp->cvaddr + cp->size));
 980 
 981                         dhp->dh_cvaddr += len;
 982                 }
 983         }
 984         /* XXX - Should recalculate the DEVMAP_FLAG_LARGE after changes */
 985         RELE_DHP_LOCK(dhp);
 986 }
 987 
 988 /*
 989  * Free devmap handle, dhp.
 990  * Return the next devmap handle on the linked list.
 991  */
 992 static devmap_handle_t *
 993 devmap_handle_unmap(devmap_handle_t *dhp)
 994 {
 995         struct devmap_callback_ctl *callbackops = &dhp->dh_callbackops;
 996         struct segdev_data *sdp = (struct segdev_data *)dhp->dh_seg->s_data;
 997         devmap_handle_t *dhpp = (devmap_handle_t *)sdp->devmap_data;
 998 
 999         ASSERT(dhp != NULL);
1000 
1001         /*
1002          * before we free up dhp, call the driver's devmap_unmap entry point
1003          * to free resources allocated for this dhp.
1004          */
1005         if (callbackops->devmap_unmap != NULL) {
1006                 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp, dhp->dh_uoff,
1007                     dhp->dh_len, NULL, NULL, NULL, NULL);
1008         }
1009 
1010         if (dhpp == dhp) {      /* releasing first dhp, change sdp data */
1011                 sdp->devmap_data = dhp->dh_next;
1012         } else {
1013                 while (dhpp->dh_next != dhp) {
1014                         dhpp = dhpp->dh_next;
1015                 }
1016                 dhpp->dh_next = dhp->dh_next;
1017         }
1018         dhpp = dhp->dh_next; /* return value is next dhp in chain */
1019 
1020         if (dhp->dh_softlock != NULL)
1021                 devmap_softlock_rele(dhp);
1022 
1023         if (dhp->dh_ctx != NULL)
1024                 devmap_ctx_rele(dhp);
1025 
1026         if (dhp->dh_flags & DEVMAP_LOCK_INITED) {
1027                 mutex_destroy(&dhp->dh_lock);
1028         }
1029         kmem_free(dhp, sizeof (devmap_handle_t));
1030 
1031         return (dhpp);
1032 }
1033 
1034 /*
1035  * Free complete devmap handles from dhp for len bytes
1036  * dhp can be either the first handle or a subsequent handle
1037  */
1038 static void
1039 devmap_handle_unmap_head(devmap_handle_t *dhp, size_t len)
1040 {
1041         struct devmap_callback_ctl *callbackops;
1042 
1043         /*
1044          * free the devmap handles covered by len.
1045          */
1046         while (len >= dhp->dh_len) {
1047                 len -= dhp->dh_len;
1048                 dhp = devmap_handle_unmap(dhp);
1049         }
1050         if (len != 0) { /* partial unmap at head of first remaining dhp */
1051                 callbackops = &dhp->dh_callbackops;
1052 
1053                 /*
1054                  * Call the unmap callback so the drivers can make
1055                  * adjustment on its private data.
1056                  */
1057                 if (callbackops->devmap_unmap != NULL)
1058                         (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
1059                             dhp->dh_uoff, len, NULL, NULL, dhp, &dhp->dh_pvtp);
1060                 devmap_handle_reduce_len(dhp, len);
1061         }
1062 }
1063 
1064 /*
1065  * Free devmap handles to truncate  the mapping after addr
1066  * RFE: Simpler to pass in dhp pointing at correct dhp (avoid find again)
1067  *      Also could then use the routine in middle unmap case too
1068  */
1069 static void
1070 devmap_handle_unmap_tail(devmap_handle_t *dhp, caddr_t addr)
1071 {
1072         register struct seg *seg = dhp->dh_seg;
1073         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1074         register devmap_handle_t *dhph = (devmap_handle_t *)sdp->devmap_data;
1075         struct devmap_callback_ctl *callbackops;
1076         register devmap_handle_t *dhpp;
1077         size_t maplen;
1078         ulong_t off;
1079         size_t len;
1080 
1081         maplen = (size_t)(addr - dhp->dh_uvaddr);
1082         dhph = devmap_find_handle(dhph, addr);
1083 
1084         while (dhph != NULL) {
1085                 if (maplen == 0) {
1086                         dhph =  devmap_handle_unmap(dhph);
1087                 } else {
1088                         callbackops = &dhph->dh_callbackops;
1089                         len = dhph->dh_len - maplen;
1090                         off = (ulong_t)sdp->offset + (addr - seg->s_base);
1091                         /*
1092                          * Call the unmap callback so the driver
1093                          * can make adjustments on its private data.
1094                          */
1095                         if (callbackops->devmap_unmap != NULL)
1096                                 (*callbackops->devmap_unmap)(dhph,
1097                                     dhph->dh_pvtp, off, len,
1098                                     (devmap_cookie_t *)dhph,
1099                                     &dhph->dh_pvtp, NULL, NULL);
1100                         /* XXX Reducing len needs to recalculate LARGE flag */
1101                         dhph->dh_len = maplen;
1102                         maplen = 0;
1103                         dhpp = dhph->dh_next;
1104                         dhph->dh_next = NULL;
1105                         dhph = dhpp;
1106                 }
1107         } /* end while */
1108 }
1109 
1110 /*
1111  * Free a segment.
1112  */
1113 static void
1114 segdev_free(struct seg *seg)
1115 {
1116         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1117         devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
1118 
1119         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_FREE,
1120             "segdev_free: dhp=%p seg=%p", (void *)dhp, (void *)seg);
1121         DEBUGF(3, (CE_CONT, "segdev_free: dhp %p seg %p\n",
1122             (void *)dhp, (void *)seg));
1123 
1124         /*
1125          * Since the address space is "write" locked, we
1126          * don't need the segment lock to protect "segdev" data.
1127          */
1128         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1129 
1130         while (dhp != NULL)
1131                 dhp = devmap_handle_unmap(dhp);
1132 
1133         VN_RELE(sdp->vp);
1134         if (sdp->vpage != NULL)
1135                 kmem_free(sdp->vpage, vpgtob(seg_pages(seg)));
1136 
1137         rw_destroy(&sdp->lock);
1138         kmem_free(sdp, sizeof (*sdp));
1139 }
1140 
1141 static void
1142 free_devmap_handle(devmap_handle_t *dhp)
1143 {
1144         register devmap_handle_t *dhpp;
1145 
1146         /*
1147          * free up devmap handle
1148          */
1149         while (dhp != NULL) {
1150                 dhpp = dhp->dh_next;
1151                 if (dhp->dh_flags & DEVMAP_LOCK_INITED) {
1152                         mutex_destroy(&dhp->dh_lock);
1153                 }
1154 
1155                 if (dhp->dh_softlock != NULL)
1156                         devmap_softlock_rele(dhp);
1157 
1158                 if (dhp->dh_ctx != NULL)
1159                         devmap_ctx_rele(dhp);
1160 
1161                 kmem_free(dhp, sizeof (devmap_handle_t));
1162                 dhp = dhpp;
1163         }
1164 }
1165 
1166 /*
1167  * routines to lock and unlock underlying segkp segment for
1168  * KMEM_PAGEABLE type cookies.
1169  * segkp only allows a single pending F_SOFTLOCK
1170  * we keep track of number of locks in the cookie so we can
1171  * have multiple pending faults and manage the calls to segkp.
1172  * RFE: if segkp supports either pagelock or can support multiple
1173  * calls to F_SOFTLOCK, then these routines can go away.
1174  *      If pagelock, segdev_faultpage can fault on a page by page basis
1175  *              and simplifies the code quite a bit.
1176  *      if multiple calls allowed but not partial ranges, then need for
1177  *      cookie->lock and locked count goes away, code can call as_fault directly
1178  */
1179 static faultcode_t
1180 acquire_kpmem_lock(struct ddi_umem_cookie *cookie, size_t npages)
1181 {
1182         int err = 0;
1183         ASSERT(cookie_is_kpmem(cookie));
1184         /*
1185          * Fault in pages in segkp with F_SOFTLOCK.
1186          * We want to hold the lock until all pages have been loaded.
1187          * segkp only allows single caller to hold SOFTLOCK, so cookie
1188          * holds a count so we dont call into segkp multiple times
1189          */
1190         mutex_enter(&cookie->lock);
1191 
1192         /*
1193          * Check for overflow in locked field
1194          */
1195         if ((UINT32_MAX - cookie->locked) < npages) {
1196                 err = FC_MAKE_ERR(ENOMEM);
1197         } else if (cookie->locked == 0) {
1198                 /* First time locking */
1199                 err = as_fault(kas.a_hat, &kas, cookie->cvaddr,
1200                     cookie->size, F_SOFTLOCK, PROT_READ|PROT_WRITE);
1201         }
1202         if (!err) {
1203                 cookie->locked += npages;
1204         }
1205         mutex_exit(&cookie->lock);
1206         return (err);
1207 }
1208 
1209 static void
1210 release_kpmem_lock(struct ddi_umem_cookie *cookie, size_t npages)
1211 {
1212         mutex_enter(&cookie->lock);
1213         ASSERT(cookie_is_kpmem(cookie));
1214         ASSERT(cookie->locked >= npages);
1215         cookie->locked -= (uint_t)npages;
1216         if (cookie->locked == 0) {
1217                 /* Last unlock */
1218                 if (as_fault(kas.a_hat, &kas, cookie->cvaddr,
1219                     cookie->size, F_SOFTUNLOCK, PROT_READ|PROT_WRITE))
1220                         panic("segdev releasing kpmem lock %p", (void *)cookie);
1221         }
1222         mutex_exit(&cookie->lock);
1223 }
1224 
1225 /*
1226  * Routines to synchronize F_SOFTLOCK and F_INVAL faults for
1227  * drivers with devmap_access callbacks
1228  * slock->softlocked basically works like a rw lock
1229  *      -ve counts => F_SOFTLOCK in progress
1230  *      +ve counts => F_INVAL/F_PROT in progress
1231  * We allow only one F_SOFTLOCK at a time
1232  * but can have multiple pending F_INVAL/F_PROT calls
1233  *
1234  * This routine waits using cv_wait_sig so killing processes is more graceful
1235  * Returns EINTR if coming out of this routine due to a signal, 0 otherwise
1236  */
1237 static int devmap_softlock_enter(
1238         struct devmap_softlock *slock,
1239         size_t npages,
1240         enum fault_type type)
1241 {
1242         if (npages == 0)
1243                 return (0);
1244         mutex_enter(&(slock->lock));
1245         switch (type) {
1246         case F_SOFTLOCK :
1247                 while (slock->softlocked) {
1248                         if (cv_wait_sig(&(slock)->cv, &(slock)->lock) == 0) {
1249                                 /* signalled */
1250                                 mutex_exit(&(slock->lock));
1251                                 return (EINTR);
1252                         }
1253                 }
1254                 slock->softlocked -= npages; /* -ve count => locked */
1255                 break;
1256         case F_INVAL :
1257         case F_PROT :
1258                 while (slock->softlocked < 0)
1259                         if (cv_wait_sig(&(slock)->cv, &(slock)->lock) == 0) {
1260                                 /* signalled */
1261                                 mutex_exit(&(slock->lock));
1262                                 return (EINTR);
1263                         }
1264                 slock->softlocked += npages; /* +ve count => f_invals */
1265                 break;
1266         default:
1267                 ASSERT(0);
1268         }
1269         mutex_exit(&(slock->lock));
1270         return (0);
1271 }
1272 
1273 static void devmap_softlock_exit(
1274         struct devmap_softlock *slock,
1275         size_t npages,
1276         enum fault_type type)
1277 {
1278         if (slock == NULL)
1279                 return;
1280         mutex_enter(&(slock->lock));
1281         switch (type) {
1282         case F_SOFTLOCK :
1283                 ASSERT(-slock->softlocked >= npages);
1284                 slock->softlocked += npages; /* -ve count is softlocked */
1285                 if (slock->softlocked == 0)
1286                         cv_signal(&slock->cv);
1287                 break;
1288         case F_INVAL :
1289         case F_PROT:
1290                 ASSERT(slock->softlocked >= npages);
1291                 slock->softlocked -= npages;
1292                 if (slock->softlocked == 0)
1293                         cv_signal(&slock->cv);
1294                 break;
1295         default:
1296                 ASSERT(0);
1297         }
1298         mutex_exit(&(slock->lock));
1299 }
1300 
1301 /*
1302  * Do a F_SOFTUNLOCK call over the range requested.
1303  * The range must have already been F_SOFTLOCK'ed.
1304  * The segment lock should be held, (but not the segment private lock?)
1305  *  The softunlock code below does not adjust for large page sizes
1306  *      assumes the caller already did any addr/len adjustments for
1307  *      pagesize mappings before calling.
1308  */
1309 /*ARGSUSED*/
1310 static void
1311 segdev_softunlock(
1312         struct hat *hat,                /* the hat */
1313         struct seg *seg,                /* seg_dev of interest */
1314         caddr_t addr,                   /* base address of range */
1315         size_t len,                     /* number of bytes */
1316         enum seg_rw rw)                 /* type of access at fault */
1317 {
1318         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1319         devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
1320 
1321         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_SOFTUNLOCK,
1322             "segdev_softunlock:dhp_head=%p sdp=%p addr=%p len=%lx",
1323             dhp_head, sdp, addr, len);
1324         DEBUGF(3, (CE_CONT, "segdev_softunlock: dhp %p lockcnt %lx "
1325             "addr %p len %lx\n",
1326             (void *)dhp_head, sdp->softlockcnt, (void *)addr, len));
1327 
1328         hat_unlock(hat, addr, len);
1329 
1330         if (dhp_head != NULL) {
1331                 devmap_handle_t *dhp;
1332                 size_t mlen;
1333                 size_t tlen = len;
1334                 ulong_t off;
1335 
1336                 dhp = devmap_find_handle(dhp_head, addr);
1337                 ASSERT(dhp != NULL);
1338 
1339                 off = (ulong_t)(addr - dhp->dh_uvaddr);
1340                 while (tlen != 0) {
1341                         mlen = MIN(tlen, (dhp->dh_len - off));
1342 
1343                         /*
1344                          * unlock segkp memory, locked during F_SOFTLOCK
1345                          */
1346                         if (dhp_is_kpmem(dhp)) {
1347                                 release_kpmem_lock(
1348                                     (struct ddi_umem_cookie *)dhp->dh_cookie,
1349                                     btopr(mlen));
1350                         }
1351 
1352                         /*
1353                          * Do the softlock accounting for devmap_access
1354                          */
1355                         if (dhp->dh_callbackops.devmap_access != NULL) {
1356                                 devmap_softlock_exit(dhp->dh_softlock,
1357                                     btopr(mlen), F_SOFTLOCK);
1358                         }
1359 
1360                         tlen -= mlen;
1361                         dhp = dhp->dh_next;
1362                         off = 0;
1363                 }
1364         }
1365 
1366         mutex_enter(&freemem_lock);
1367         ASSERT(sdp->softlockcnt >= btopr(len));
1368         sdp->softlockcnt -= btopr(len);
1369         mutex_exit(&freemem_lock);
1370         if (sdp->softlockcnt == 0) {
1371                 /*
1372                  * All SOFTLOCKS are gone. Wakeup any waiting
1373                  * unmappers so they can try again to unmap.
1374                  * Check for waiters first without the mutex
1375                  * held so we don't always grab the mutex on
1376                  * softunlocks.
1377                  */
1378                 if (AS_ISUNMAPWAIT(seg->s_as)) {
1379                         mutex_enter(&seg->s_as->a_contents);
1380                         if (AS_ISUNMAPWAIT(seg->s_as)) {
1381                                 AS_CLRUNMAPWAIT(seg->s_as);
1382                                 cv_broadcast(&seg->s_as->a_cv);
1383                         }
1384                         mutex_exit(&seg->s_as->a_contents);
1385                 }
1386         }
1387 
1388 }
1389 
1390 /*
1391  * Handle fault for a single page.
1392  * Done in a separate routine so we can handle errors more easily.
1393  * This routine is called only from segdev_faultpages()
1394  * when looping over the range of addresses requested. The segment lock is held.
1395  */
1396 static faultcode_t
1397 segdev_faultpage(
1398         struct hat *hat,                /* the hat */
1399         struct seg *seg,                /* seg_dev of interest */
1400         caddr_t addr,                   /* address in as */
1401         struct vpage *vpage,            /* pointer to vpage for seg, addr */
1402         enum fault_type type,           /* type of fault */
1403         enum seg_rw rw,                 /* type of access at fault */
1404         devmap_handle_t *dhp)           /* devmap handle if any for this page */
1405 {
1406         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1407         uint_t prot;
1408         pfn_t pfnum = PFN_INVALID;
1409         u_offset_t offset;
1410         uint_t hat_flags;
1411         dev_info_t *dip;
1412 
1413         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_FAULTPAGE,
1414             "segdev_faultpage: dhp=%p seg=%p addr=%p", dhp, seg, addr);
1415         DEBUGF(8, (CE_CONT, "segdev_faultpage: dhp %p seg %p addr %p \n",
1416             (void *)dhp, (void *)seg, (void *)addr));
1417 
1418         /*
1419          * Initialize protection value for this page.
1420          * If we have per page protection values check it now.
1421          */
1422         if (sdp->pageprot) {
1423                 uint_t protchk;
1424 
1425                 switch (rw) {
1426                 case S_READ:
1427                         protchk = PROT_READ;
1428                         break;
1429                 case S_WRITE:
1430                         protchk = PROT_WRITE;
1431                         break;
1432                 case S_EXEC:
1433                         protchk = PROT_EXEC;
1434                         break;
1435                 case S_OTHER:
1436                 default:
1437                         protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
1438                         break;
1439                 }
1440 
1441                 prot = VPP_PROT(vpage);
1442                 if ((prot & protchk) == 0)
1443                         return (FC_PROT);       /* illegal access type */
1444         } else {
1445                 prot = sdp->prot;
1446                 /* caller has already done segment level protection check */
1447         }
1448 
1449         if (type == F_SOFTLOCK) {
1450                 mutex_enter(&freemem_lock);
1451                 sdp->softlockcnt++;
1452                 mutex_exit(&freemem_lock);
1453         }
1454 
1455         hat_flags = ((type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD);
1456         offset = sdp->offset + (u_offset_t)(addr - seg->s_base);
1457         /*
1458          * In the devmap framework, sdp->mapfunc is set to NULL.  we can get
1459          * pfnum from dhp->dh_pfn (at beginning of segment) and offset from
1460          * seg->s_base.
1461          */
1462         if (dhp == NULL) {
1463                 /* If segment has devmap_data, then dhp should be non-NULL */
1464                 ASSERT(sdp->devmap_data == NULL);
1465                 pfnum = (pfn_t)cdev_mmap(sdp->mapfunc, sdp->vp->v_rdev,
1466                     (off_t)offset, prot);
1467                 prot |= sdp->hat_attr;
1468         } else {
1469                 ulong_t off;
1470                 struct ddi_umem_cookie *cp;
1471                 struct devmap_pmem_cookie *pcp;
1472 
1473                 /* ensure the dhp passed in contains addr. */
1474                 ASSERT(dhp == devmap_find_handle(
1475                     (devmap_handle_t *)sdp->devmap_data, addr));
1476 
1477                 off = addr - dhp->dh_uvaddr;
1478 
1479                 /*
1480                  * This routine assumes that the caller makes sure that the
1481                  * fields in dhp used below are unchanged due to remap during
1482                  * this call. Caller does HOLD_DHP_LOCK if neeed
1483                  */
1484                 cp = dhp->dh_cookie;
1485                 if (dhp->dh_flags & DEVMAP_MAPPING_INVALID) {
1486                         pfnum = PFN_INVALID;
1487                 } else if (cookie_is_devmem(cp)) {
1488                         pfnum = dhp->dh_pfn + btop(off);
1489                 } else if (cookie_is_pmem(cp)) {
1490                         pcp = (struct devmap_pmem_cookie *)dhp->dh_pcookie;
1491                         ASSERT((dhp->dh_roff & PAGEOFFSET) == 0 &&
1492                             dhp->dh_roff < ptob(pcp->dp_npages));
1493                         pfnum = page_pptonum(
1494                             pcp->dp_pparray[btop(off + dhp->dh_roff)]);
1495                 } else {
1496                         ASSERT(dhp->dh_roff < cp->size);
1497                         ASSERT(dhp->dh_cvaddr >= cp->cvaddr &&
1498                             dhp->dh_cvaddr < (cp->cvaddr + cp->size));
1499                         ASSERT((dhp->dh_cvaddr + off) <=
1500                             (cp->cvaddr + cp->size));
1501                         ASSERT((dhp->dh_cvaddr + off + PAGESIZE) <=
1502                             (cp->cvaddr + cp->size));
1503 
1504                         switch (cp->type) {
1505                         case UMEM_LOCKED :
1506                                 if (cp->pparray != NULL) {
1507                                         ASSERT((dhp->dh_roff &
1508                                             PAGEOFFSET) == 0);
1509                                         pfnum = page_pptonum(
1510                                             cp->pparray[btop(off +
1511                                             dhp->dh_roff)]);
1512                                 } else {
1513                                         pfnum = hat_getpfnum(
1514                                             ((proc_t *)cp->procp)->p_as->a_hat,
1515                                             cp->cvaddr + off);
1516                                 }
1517                         break;
1518                         case UMEM_TRASH :
1519                                 pfnum = page_pptonum(trashpp);
1520                                 /*
1521                                  * We should set hat_flags to HAT_NOFAULT also
1522                                  * However, not all hat layers implement this
1523                                  */
1524                                 break;
1525                         case KMEM_PAGEABLE:
1526                         case KMEM_NON_PAGEABLE:
1527                                 pfnum = hat_getpfnum(kas.a_hat,
1528                                     dhp->dh_cvaddr + off);
1529                                 break;
1530                         default :
1531                                 pfnum = PFN_INVALID;
1532                                 break;
1533                         }
1534                 }
1535                 prot |= dhp->dh_hat_attr;
1536         }
1537         if (pfnum == PFN_INVALID) {
1538                 return (FC_MAKE_ERR(EFAULT));
1539         }
1540         /* prot should already be OR'ed in with hat_attributes if needed */
1541 
1542         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_FAULTPAGE_CK1,
1543             "segdev_faultpage: pfnum=%lx memory=%x prot=%x flags=%x",
1544             pfnum, pf_is_memory(pfnum), prot, hat_flags);
1545         DEBUGF(9, (CE_CONT, "segdev_faultpage: pfnum %lx memory %x "
1546             "prot %x flags %x\n", pfnum, pf_is_memory(pfnum), prot, hat_flags));
1547 
1548         if (pf_is_memory(pfnum) || (dhp != NULL)) {
1549                 /*
1550                  * It's not _really_ required here to pass sdp->hat_flags
1551                  * to hat_devload even though we do it.
1552                  * This is because hat figures it out DEVMEM mappings
1553                  * are non-consistent, anyway.
1554                  */
1555                 hat_devload(hat, addr, PAGESIZE, pfnum,
1556                     prot, hat_flags | sdp->hat_flags);
1557                 return (0);
1558         }
1559 
1560         /*
1561          * Fall through to the case where devmap is not used and need to call
1562          * up the device tree to set up the mapping
1563          */
1564 
1565         dip = VTOS(VTOCVP(sdp->vp))->s_dip;
1566         ASSERT(dip);
1567 
1568         /*
1569          * When calling ddi_map_fault, we do not OR in sdp->hat_attr
1570          * This is because this calls drivers which may not expect
1571          * prot to have any other values than PROT_ALL
1572          * The root nexus driver has a hack to peek into the segment
1573          * structure and then OR in sdp->hat_attr.
1574          * XX In case the bus_ops interfaces are ever revisited
1575          * we need to fix this. prot should include other hat attributes
1576          */
1577         if (ddi_map_fault(dip, hat, seg, addr, NULL, pfnum, prot & PROT_ALL,
1578             (uint_t)(type == F_SOFTLOCK)) != DDI_SUCCESS) {
1579                 return (FC_MAKE_ERR(EFAULT));
1580         }
1581         return (0);
1582 }
1583 
1584 static faultcode_t
1585 segdev_fault(
1586         struct hat *hat,                /* the hat */
1587         struct seg *seg,                /* the seg_dev of interest */
1588         caddr_t addr,                   /* the address of the fault */
1589         size_t len,                     /* the length of the range */
1590         enum fault_type type,           /* type of fault */
1591         enum seg_rw rw)                 /* type of access at fault */
1592 {
1593         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1594         devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
1595         devmap_handle_t *dhp;
1596         struct devmap_softlock *slock = NULL;
1597         ulong_t slpage = 0;
1598         ulong_t off;
1599         caddr_t maddr = addr;
1600         int err;
1601         int err_is_faultcode = 0;
1602 
1603         TRACE_5(TR_FAC_DEVMAP, TR_DEVMAP_FAULT,
1604             "segdev_fault: dhp_head=%p seg=%p addr=%p len=%lx type=%x",
1605             (void *)dhp_head, (void *)seg, (void *)addr, len, type);
1606         DEBUGF(7, (CE_CONT, "segdev_fault: dhp_head %p seg %p "
1607             "addr %p len %lx type %x\n",
1608             (void *)dhp_head, (void *)seg, (void *)addr, len, type));
1609 
1610         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1611 
1612         /* Handle non-devmap case */
1613         if (dhp_head == NULL)
1614                 return (segdev_faultpages(hat, seg, addr, len, type, rw, NULL));
1615 
1616         /* Find devmap handle */
1617         if ((dhp = devmap_find_handle(dhp_head, addr)) == NULL)
1618                 return (FC_NOMAP);
1619 
1620         /*
1621          * The seg_dev driver does not implement copy-on-write,
1622          * and always loads translations with maximal allowed permissions
1623          * but we got an fault trying to access the device.
1624          * Servicing the fault is not going to result in any better result
1625          * RFE: If we want devmap_access callbacks to be involved in F_PROT
1626          *      faults, then the code below is written for that
1627          *      Pending resolution of the following:
1628          *      - determine if the F_INVAL/F_SOFTLOCK syncing
1629          *      is needed for F_PROT also or not. The code below assumes it does
1630          *      - If driver sees F_PROT and calls devmap_load with same type,
1631          *      then segdev_faultpages will fail with FC_PROT anyway, need to
1632          *      change that so calls from devmap_load to segdev_faultpages for
1633          *      F_PROT type are retagged to F_INVAL.
1634          * RFE: Today we dont have drivers that use devmap and want to handle
1635          *      F_PROT calls. The code in segdev_fault* is written to allow
1636          *      this case but is not tested. A driver that needs this capability
1637          *      should be able to remove the short-circuit case; resolve the
1638          *      above issues and "should" work.
1639          */
1640         if (type == F_PROT) {
1641                 return (FC_PROT);
1642         }
1643 
1644         /*
1645          * Loop through dhp list calling devmap_access or segdev_faultpages for
1646          * each devmap handle.
1647          * drivers which implement devmap_access can interpose on faults and do
1648          * device-appropriate special actions before calling devmap_load.
1649          */
1650 
1651         /*
1652          * Unfortunately, this simple loop has turned out to expose a variety
1653          * of complex problems which results in the following convoluted code.
1654          *
1655          * First, a desire to handle a serialization of F_SOFTLOCK calls
1656          * to the driver within the framework.
1657          *      This results in a dh_softlock structure that is on a per device
1658          *      (or device instance) basis and serializes devmap_access calls.
1659          *      Ideally we would need to do this for underlying
1660          *      memory/device regions that are being faulted on
1661          *      but that is hard to identify and with REMAP, harder
1662          * Second, a desire to serialize F_INVAL(and F_PROT) calls w.r.t.
1663          *      to F_SOFTLOCK calls to the driver.
1664          * These serializations are to simplify the driver programmer model.
1665          * To support these two features, the code first goes through the
1666          *      devmap handles and counts the pages (slpage) that are covered
1667          *      by devmap_access callbacks.
1668          * This part ends with a devmap_softlock_enter call
1669          *      which allows only one F_SOFTLOCK active on a device instance,
1670          *      but multiple F_INVAL/F_PROTs can be active except when a
1671          *      F_SOFTLOCK is active
1672          *
1673          * Next, we dont short-circuit the fault code upfront to call
1674          *      segdev_softunlock for F_SOFTUNLOCK, because we must use
1675          *      the same length when we softlock and softunlock.
1676          *
1677          *      -Hat layers may not support softunlocking lengths less than the
1678          *      original length when there is large page support.
1679          *      -kpmem locking is dependent on keeping the lengths same.
1680          *      -if drivers handled F_SOFTLOCK, they probably also expect to
1681          *              see an F_SOFTUNLOCK of the same length
1682          *      Hence, if extending lengths during softlock,
1683          *      softunlock has to make the same adjustments and goes through
1684          *      the same loop calling segdev_faultpages/segdev_softunlock
1685          *      But some of the synchronization and error handling is different
1686          */
1687 
1688         if (type != F_SOFTUNLOCK) {
1689                 devmap_handle_t *dhpp = dhp;
1690                 size_t slen = len;
1691 
1692                 /*
1693                  * Calculate count of pages that are :
1694                  * a) within the (potentially extended) fault region
1695                  * b) AND covered by devmap handle with devmap_access
1696                  */
1697                 off = (ulong_t)(addr - dhpp->dh_uvaddr);
1698                 while (slen != 0) {
1699                         size_t mlen;
1700 
1701                         /*
1702                          * Softlocking on a region that allows remap is
1703                          * unsupported due to unresolved locking issues
1704                          * XXX: unclear what these are?
1705                          *      One potential is that if there is a pending
1706                          *      softlock, then a remap should not be allowed
1707                          *      until the unlock is done. This is easily
1708                          *      fixed by returning error in devmap*remap on
1709                          *      checking the dh->dh_softlock->softlocked value
1710                          */
1711                         if ((type == F_SOFTLOCK) &&
1712                             (dhpp->dh_flags & DEVMAP_ALLOW_REMAP)) {
1713                                 return (FC_NOSUPPORT);
1714                         }
1715 
1716                         mlen = MIN(slen, (dhpp->dh_len - off));
1717                         if (dhpp->dh_callbackops.devmap_access) {
1718                                 size_t llen;
1719                                 caddr_t laddr;
1720                                 /*
1721                                  * use extended length for large page mappings
1722                                  */
1723                                 HOLD_DHP_LOCK(dhpp);
1724                                 if ((sdp->pageprot == 0) &&
1725                                     (dhpp->dh_flags & DEVMAP_FLAG_LARGE)) {
1726                                         devmap_get_large_pgsize(dhpp,
1727                                             mlen, maddr, &llen, &laddr);
1728                                 } else {
1729                                         llen = mlen;
1730                                 }
1731                                 RELE_DHP_LOCK(dhpp);
1732 
1733                                 slpage += btopr(llen);
1734                                 slock = dhpp->dh_softlock;
1735                         }
1736                         maddr += mlen;
1737                         ASSERT(slen >= mlen);
1738                         slen -= mlen;
1739                         dhpp = dhpp->dh_next;
1740                         off = 0;
1741                 }
1742                 /*
1743                  * synchonize with other faulting threads and wait till safe
1744                  * devmap_softlock_enter might return due to signal in cv_wait
1745                  *
1746                  * devmap_softlock_enter has to be called outside of while loop
1747                  * to prevent a deadlock if len spans over multiple dhps.
1748                  * dh_softlock is based on device instance and if multiple dhps
1749                  * use the same device instance, the second dhp's LOCK call
1750                  * will hang waiting on the first to complete.
1751                  * devmap_setup verifies that slocks in a dhp_chain are same.
1752                  * RFE: this deadlock only hold true for F_SOFTLOCK. For
1753                  *      F_INVAL/F_PROT, since we now allow multiple in parallel,
1754                  *      we could have done the softlock_enter inside the loop
1755                  *      and supported multi-dhp mappings with dissimilar devices
1756                  */
1757                 if (err = devmap_softlock_enter(slock, slpage, type))
1758                         return (FC_MAKE_ERR(err));
1759         }
1760 
1761         /* reset 'maddr' to the start addr of the range of fault. */
1762         maddr = addr;
1763 
1764         /* calculate the offset corresponds to 'addr' in the first dhp. */
1765         off = (ulong_t)(addr - dhp->dh_uvaddr);
1766 
1767         /*
1768          * The fault length may span over multiple dhps.
1769          * Loop until the total length is satisfied.
1770          */
1771         while (len != 0) {
1772                 size_t llen;
1773                 size_t mlen;
1774                 caddr_t laddr;
1775 
1776                 /*
1777                  * mlen is the smaller of 'len' and the length
1778                  * from addr to the end of mapping defined by dhp.
1779                  */
1780                 mlen = MIN(len, (dhp->dh_len - off));
1781 
1782                 HOLD_DHP_LOCK(dhp);
1783                 /*
1784                  * Pass the extended length and address to devmap_access
1785                  * if large pagesize is used for loading address translations.
1786                  */
1787                 if ((sdp->pageprot == 0) &&
1788                     (dhp->dh_flags & DEVMAP_FLAG_LARGE)) {
1789                         devmap_get_large_pgsize(dhp, mlen, maddr,
1790                             &llen, &laddr);
1791                         ASSERT(maddr == addr || laddr == maddr);
1792                 } else {
1793                         llen = mlen;
1794                         laddr = maddr;
1795                 }
1796 
1797                 if (dhp->dh_callbackops.devmap_access != NULL) {
1798                         offset_t aoff;
1799 
1800                         aoff = sdp->offset + (offset_t)(laddr - seg->s_base);
1801 
1802                         /*
1803                          * call driver's devmap_access entry point which will
1804                          * call devmap_load/contextmgmt to load the translations
1805                          *
1806                          * We drop the dhp_lock before calling access so
1807                          * drivers can call devmap_*_remap within access
1808                          */
1809                         RELE_DHP_LOCK(dhp);
1810 
1811                         err = (*dhp->dh_callbackops.devmap_access)(
1812                             dhp, (void *)dhp->dh_pvtp, aoff, llen, type, rw);
1813                 } else {
1814                         /*
1815                          * If no devmap_access entry point, then load mappings
1816                          * hold dhp_lock across faultpages if REMAP
1817                          */
1818                         err = segdev_faultpages(hat, seg, laddr, llen,
1819                             type, rw, dhp);
1820                         err_is_faultcode = 1;
1821                         RELE_DHP_LOCK(dhp);
1822                 }
1823 
1824                 if (err) {
1825                         if ((type == F_SOFTLOCK) && (maddr > addr)) {
1826                                 /*
1827                                  * If not first dhp, use
1828                                  * segdev_fault(F_SOFTUNLOCK) for prior dhps
1829                                  * While this is recursion, it is incorrect to
1830                                  * call just segdev_softunlock
1831                                  * if we are using either large pages
1832                                  * or devmap_access. It will be more right
1833                                  * to go through the same loop as above
1834                                  * rather than call segdev_softunlock directly
1835                                  * It will use the right lenghths as well as
1836                                  * call into the driver devmap_access routines.
1837                                  */
1838                                 size_t done = (size_t)(maddr - addr);
1839                                 (void) segdev_fault(hat, seg, addr, done,
1840                                     F_SOFTUNLOCK, S_OTHER);
1841                                 /*
1842                                  * reduce slpage by number of pages
1843                                  * released by segdev_softunlock
1844                                  */
1845                                 ASSERT(slpage >= btopr(done));
1846                                 devmap_softlock_exit(slock,
1847                                     slpage - btopr(done), type);
1848                         } else {
1849                                 devmap_softlock_exit(slock, slpage, type);
1850                         }
1851 
1852 
1853                         /*
1854                          * Segdev_faultpages() already returns a faultcode,
1855                          * hence, result from segdev_faultpages() should be
1856                          * returned directly.
1857                          */
1858                         if (err_is_faultcode)
1859                                 return (err);
1860                         return (FC_MAKE_ERR(err));
1861                 }
1862 
1863                 maddr += mlen;
1864                 ASSERT(len >= mlen);
1865                 len -= mlen;
1866                 dhp = dhp->dh_next;
1867                 off = 0;
1868 
1869                 ASSERT(!dhp || len == 0 || maddr == dhp->dh_uvaddr);
1870         }
1871         /*
1872          * release the softlock count at end of fault
1873          * For F_SOFTLOCk this is done in the later F_SOFTUNLOCK
1874          */
1875         if ((type == F_INVAL) || (type == F_PROT))
1876                 devmap_softlock_exit(slock, slpage, type);
1877         return (0);
1878 }
1879 
1880 /*
1881  * segdev_faultpages
1882  *
1883  * Used to fault in seg_dev segment pages. Called by segdev_fault or devmap_load
1884  * This routine assumes that the callers makes sure that the fields
1885  * in dhp used below are not changed due to remap during this call.
1886  * Caller does HOLD_DHP_LOCK if neeed
1887  * This routine returns a faultcode_t as a return value for segdev_fault.
1888  */
1889 static faultcode_t
1890 segdev_faultpages(
1891         struct hat *hat,                /* the hat */
1892         struct seg *seg,                /* the seg_dev of interest */
1893         caddr_t addr,                   /* the address of the fault */
1894         size_t len,                     /* the length of the range */
1895         enum fault_type type,           /* type of fault */
1896         enum seg_rw rw,                 /* type of access at fault */
1897         devmap_handle_t *dhp)           /* devmap handle */
1898 {
1899         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1900         register caddr_t a;
1901         struct vpage *vpage;
1902         struct ddi_umem_cookie *kpmem_cookie = NULL;
1903         int err;
1904 
1905         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_FAULTPAGES,
1906             "segdev_faultpages: dhp=%p seg=%p addr=%p len=%lx",
1907             (void *)dhp, (void *)seg, (void *)addr, len);
1908         DEBUGF(5, (CE_CONT, "segdev_faultpages: "
1909             "dhp %p seg %p addr %p len %lx\n",
1910             (void *)dhp, (void *)seg, (void *)addr, len));
1911 
1912         /*
1913          * The seg_dev driver does not implement copy-on-write,
1914          * and always loads translations with maximal allowed permissions
1915          * but we got an fault trying to access the device.
1916          * Servicing the fault is not going to result in any better result
1917          * XXX: If we want to allow devmap_access to handle F_PROT calls,
1918          * This code should be removed and let the normal fault handling
1919          * take care of finding the error
1920          */
1921         if (type == F_PROT) {
1922                 return (FC_PROT);
1923         }
1924 
1925         if (type == F_SOFTUNLOCK) {
1926                 segdev_softunlock(hat, seg, addr, len, rw);
1927                 return (0);
1928         }
1929 
1930         /*
1931          * For kernel pageable memory, fault/lock segkp pages
1932          * We hold this until the completion of this
1933          * fault (INVAL/PROT) or till unlock (SOFTLOCK).
1934          */
1935         if ((dhp != NULL) && dhp_is_kpmem(dhp)) {
1936                 kpmem_cookie = (struct ddi_umem_cookie *)dhp->dh_cookie;
1937                 if (err = acquire_kpmem_lock(kpmem_cookie, btopr(len)))
1938                         return (err);
1939         }
1940 
1941         /*
1942          * If we have the same protections for the entire segment,
1943          * insure that the access being attempted is legitimate.
1944          */
1945         rw_enter(&sdp->lock, RW_READER);
1946         if (sdp->pageprot == 0) {
1947                 uint_t protchk;
1948 
1949                 switch (rw) {
1950                 case S_READ:
1951                         protchk = PROT_READ;
1952                         break;
1953                 case S_WRITE:
1954                         protchk = PROT_WRITE;
1955                         break;
1956                 case S_EXEC:
1957                         protchk = PROT_EXEC;
1958                         break;
1959                 case S_OTHER:
1960                 default:
1961                         protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
1962                         break;
1963                 }
1964 
1965                 if ((sdp->prot & protchk) == 0) {
1966                         rw_exit(&sdp->lock);
1967                         /* undo kpmem locking */
1968                         if (kpmem_cookie != NULL) {
1969                                 release_kpmem_lock(kpmem_cookie, btopr(len));
1970                         }
1971                         return (FC_PROT);       /* illegal access type */
1972                 }
1973         }
1974 
1975         /*
1976          * we do a single hat_devload for the range if
1977          *   - devmap framework (dhp is not NULL),
1978          *   - pageprot == 0, i.e., no per-page protection set and
1979          *   - is device pages, irrespective of whether we are using large pages
1980          */
1981         if ((sdp->pageprot == 0) && (dhp != NULL) && dhp_is_devmem(dhp)) {
1982                 pfn_t pfnum;
1983                 uint_t hat_flags;
1984 
1985                 if (dhp->dh_flags & DEVMAP_MAPPING_INVALID) {
1986                         rw_exit(&sdp->lock);
1987                         return (FC_NOMAP);
1988                 }
1989 
1990                 if (type == F_SOFTLOCK) {
1991                         mutex_enter(&freemem_lock);
1992                         sdp->softlockcnt += btopr(len);
1993                         mutex_exit(&freemem_lock);
1994                 }
1995 
1996                 hat_flags = ((type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD);
1997                 pfnum = dhp->dh_pfn + btop((uintptr_t)(addr - dhp->dh_uvaddr));
1998                 ASSERT(!pf_is_memory(pfnum));
1999 
2000                 hat_devload(hat, addr, len, pfnum, sdp->prot | dhp->dh_hat_attr,
2001                     hat_flags | sdp->hat_flags);
2002                 rw_exit(&sdp->lock);
2003                 return (0);
2004         }
2005 
2006         /* Handle cases where we have to loop through fault handling per-page */
2007 
2008         if (sdp->vpage == NULL)
2009                 vpage = NULL;
2010         else
2011                 vpage = &sdp->vpage[seg_page(seg, addr)];
2012 
2013         /* loop over the address range handling each fault */
2014         for (a = addr; a < addr + len; a += PAGESIZE) {
2015                 if (err = segdev_faultpage(hat, seg, a, vpage, type, rw, dhp)) {
2016                         break;
2017                 }
2018                 if (vpage != NULL)
2019                         vpage++;
2020         }
2021         rw_exit(&sdp->lock);
2022         if (err && (type == F_SOFTLOCK)) { /* error handling for F_SOFTLOCK */
2023                 size_t done = (size_t)(a - addr); /* pages fault successfully */
2024                 if (done > 0) {
2025                         /* use softunlock for those pages */
2026                         segdev_softunlock(hat, seg, addr, done, S_OTHER);
2027                 }
2028                 if (kpmem_cookie != NULL) {
2029                         /* release kpmem lock for rest of pages */
2030                         ASSERT(len >= done);
2031                         release_kpmem_lock(kpmem_cookie, btopr(len - done));
2032                 }
2033         } else if ((kpmem_cookie != NULL) && (type != F_SOFTLOCK)) {
2034                 /* for non-SOFTLOCK cases, release kpmem */
2035                 release_kpmem_lock(kpmem_cookie, btopr(len));
2036         }
2037         return (err);
2038 }
2039 
2040 /*
2041  * Asynchronous page fault.  We simply do nothing since this
2042  * entry point is not supposed to load up the translation.
2043  */
2044 /*ARGSUSED*/
2045 static faultcode_t
2046 segdev_faulta(struct seg *seg, caddr_t addr)
2047 {
2048         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_FAULTA,
2049             "segdev_faulta: seg=%p addr=%p", (void *)seg, (void *)addr);
2050         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2051 
2052         return (0);
2053 }
2054 
2055 static int
2056 segdev_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
2057 {
2058         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2059         register devmap_handle_t *dhp;
2060         register struct vpage *vp, *evp;
2061         devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
2062         ulong_t off;
2063         size_t mlen, sz;
2064 
2065         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_SETPROT,
2066             "segdev_setprot:start seg=%p addr=%p len=%lx prot=%x",
2067             (void *)seg, (void *)addr, len, prot);
2068         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2069 
2070         if ((sz = sdp->softlockcnt) > 0 && dhp_head != NULL) {
2071                 /*
2072                  * Fail the setprot if pages are SOFTLOCKed through this
2073                  * mapping.
2074                  * Softlockcnt is protected from change by the as read lock.
2075                  */
2076                 TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_SETPROT_CK1,
2077                     "segdev_setprot:error softlockcnt=%lx", sz);
2078                 DEBUGF(1, (CE_CONT, "segdev_setprot: softlockcnt %ld\n", sz));
2079                 return (EAGAIN);
2080         }
2081 
2082         if (dhp_head != NULL) {
2083                 if ((dhp = devmap_find_handle(dhp_head, addr)) == NULL)
2084                         return (EINVAL);
2085 
2086                 /*
2087                  * check if violate maxprot.
2088                  */
2089                 off = (ulong_t)(addr - dhp->dh_uvaddr);
2090                 mlen  = len;
2091                 while (dhp) {
2092                         if ((dhp->dh_maxprot & prot) != prot)
2093                                 return (EACCES);        /* violated maxprot */
2094 
2095                         if (mlen > (dhp->dh_len - off)) {
2096                                 mlen -= dhp->dh_len - off;
2097                                 dhp = dhp->dh_next;
2098                                 off = 0;
2099                         } else
2100                                 break;
2101                 }
2102         } else {
2103                 if ((sdp->maxprot & prot) != prot)
2104                         return (EACCES);
2105         }
2106 
2107         rw_enter(&sdp->lock, RW_WRITER);
2108         if (addr == seg->s_base && len == seg->s_size && sdp->pageprot == 0) {
2109                 if (sdp->prot == prot) {
2110                         rw_exit(&sdp->lock);
2111                         return (0);                     /* all done */
2112                 }
2113                 sdp->prot = (uchar_t)prot;
2114         } else {
2115                 sdp->pageprot = 1;
2116                 if (sdp->vpage == NULL) {
2117                         /*
2118                          * First time through setting per page permissions,
2119                          * initialize all the vpage structures to prot
2120                          */
2121                         sdp->vpage = kmem_zalloc(vpgtob(seg_pages(seg)),
2122                             KM_SLEEP);
2123                         evp = &sdp->vpage[seg_pages(seg)];
2124                         for (vp = sdp->vpage; vp < evp; vp++)
2125                                 VPP_SETPROT(vp, sdp->prot);
2126                 }
2127                 /*
2128                  * Now go change the needed vpages protections.
2129                  */
2130                 evp = &sdp->vpage[seg_page(seg, addr + len)];
2131                 for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++)
2132                         VPP_SETPROT(vp, prot);
2133         }
2134         rw_exit(&sdp->lock);
2135 
2136         if (dhp_head != NULL) {
2137                 devmap_handle_t *tdhp;
2138                 /*
2139                  * If large page size was used in hat_devload(),
2140                  * the same page size must be used in hat_unload().
2141                  */
2142                 dhp = tdhp = devmap_find_handle(dhp_head, addr);
2143                 while (tdhp != NULL) {
2144                         if (tdhp->dh_flags & DEVMAP_FLAG_LARGE) {
2145                                 break;
2146                         }
2147                         tdhp = tdhp->dh_next;
2148                 }
2149                 if (tdhp) {
2150                         size_t slen = len;
2151                         size_t mlen;
2152                         size_t soff;
2153 
2154                         soff = (ulong_t)(addr - dhp->dh_uvaddr);
2155                         while (slen != 0) {
2156                                 mlen = MIN(slen, (dhp->dh_len - soff));
2157                                 hat_unload(seg->s_as->a_hat, dhp->dh_uvaddr,
2158                                     dhp->dh_len, HAT_UNLOAD);
2159                                 dhp = dhp->dh_next;
2160                                 ASSERT(slen >= mlen);
2161                                 slen -= mlen;
2162                                 soff = 0;
2163                         }
2164                         return (0);
2165                 }
2166         }
2167 
2168         if ((prot & ~PROT_USER) == PROT_NONE) {
2169                 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD);
2170         } else {
2171                 /*
2172                  * RFE: the segment should keep track of all attributes
2173                  * allowing us to remove the deprecated hat_chgprot
2174                  * and use hat_chgattr.
2175                  */
2176                 hat_chgprot(seg->s_as->a_hat, addr, len, prot);
2177         }
2178 
2179         return (0);
2180 }
2181 
2182 static int
2183 segdev_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
2184 {
2185         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2186         struct vpage *vp, *evp;
2187 
2188         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_CHECKPROT,
2189             "segdev_checkprot:start seg=%p addr=%p len=%lx prot=%x",
2190             (void *)seg, (void *)addr, len, prot);
2191         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2192 
2193         /*
2194          * If segment protection can be used, simply check against them
2195          */
2196         rw_enter(&sdp->lock, RW_READER);
2197         if (sdp->pageprot == 0) {
2198                 register int err;
2199 
2200                 err = ((sdp->prot & prot) != prot) ? EACCES : 0;
2201                 rw_exit(&sdp->lock);
2202                 return (err);
2203         }
2204 
2205         /*
2206          * Have to check down to the vpage level
2207          */
2208         evp = &sdp->vpage[seg_page(seg, addr + len)];
2209         for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++) {
2210                 if ((VPP_PROT(vp) & prot) != prot) {
2211                         rw_exit(&sdp->lock);
2212                         return (EACCES);
2213                 }
2214         }
2215         rw_exit(&sdp->lock);
2216         return (0);
2217 }
2218 
2219 static int
2220 segdev_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
2221 {
2222         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2223         size_t pgno;
2224 
2225         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_GETPROT,
2226             "segdev_getprot:start seg=%p addr=%p len=%lx protv=%p",
2227             (void *)seg, (void *)addr, len, (void *)protv);
2228         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2229 
2230         pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
2231         if (pgno != 0) {
2232                 rw_enter(&sdp->lock, RW_READER);
2233                 if (sdp->pageprot == 0) {
2234                         do {
2235                                 protv[--pgno] = sdp->prot;
2236                         } while (pgno != 0);
2237                 } else {
2238                         size_t pgoff = seg_page(seg, addr);
2239 
2240                         do {
2241                                 pgno--;
2242                                 protv[pgno] =
2243                                     VPP_PROT(&sdp->vpage[pgno + pgoff]);
2244                         } while (pgno != 0);
2245                 }
2246                 rw_exit(&sdp->lock);
2247         }
2248         return (0);
2249 }
2250 
2251 static u_offset_t
2252 segdev_getoffset(register struct seg *seg, caddr_t addr)
2253 {
2254         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2255 
2256         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_GETOFFSET,
2257             "segdev_getoffset:start seg=%p addr=%p", (void *)seg, (void *)addr);
2258 
2259         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2260 
2261         return ((u_offset_t)sdp->offset + (addr - seg->s_base));
2262 }
2263 
2264 /*ARGSUSED*/
2265 static int
2266 segdev_gettype(register struct seg *seg, caddr_t addr)
2267 {
2268         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2269 
2270         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_GETTYPE,
2271             "segdev_gettype:start seg=%p addr=%p", (void *)seg, (void *)addr);
2272 
2273         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2274 
2275         return (sdp->type);
2276 }
2277 
2278 
2279 /*ARGSUSED*/
2280 static int
2281 segdev_getvp(register struct seg *seg, caddr_t addr, struct vnode **vpp)
2282 {
2283         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2284 
2285         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_GETVP,
2286             "segdev_getvp:start seg=%p addr=%p", (void *)seg, (void *)addr);
2287 
2288         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2289 
2290         /*
2291          * Note that this vp is the common_vp of the device, where the
2292          * pages are hung ..
2293          */
2294         *vpp = VTOCVP(sdp->vp);
2295 
2296         return (0);
2297 }
2298 
2299 static void
2300 segdev_badop(void)
2301 {
2302         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SEGDEV_BADOP,
2303             "segdev_badop:start");
2304         panic("segdev_badop");
2305         /*NOTREACHED*/
2306 }
2307 
2308 /*
2309  * segdev pages are not in the cache, and thus can't really be controlled.
2310  * Hence, syncs are simply always successful.
2311  */
2312 /*ARGSUSED*/
2313 static int
2314 segdev_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
2315 {
2316         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SYNC, "segdev_sync:start");
2317 
2318         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2319 
2320         return (0);
2321 }
2322 
2323 /*
2324  * segdev pages are always "in core".
2325  */
2326 /*ARGSUSED*/
2327 static size_t
2328 segdev_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
2329 {
2330         size_t v = 0;
2331 
2332         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_INCORE, "segdev_incore:start");
2333 
2334         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2335 
2336         for (len = (len + PAGEOFFSET) & PAGEMASK; len; len -= PAGESIZE,
2337             v += PAGESIZE)
2338                 *vec++ = 1;
2339         return (v);
2340 }
2341 
2342 /*
2343  * segdev pages are not in the cache, and thus can't really be controlled.
2344  * Hence, locks are simply always successful.
2345  */
2346 /*ARGSUSED*/
2347 static int
2348 segdev_lockop(struct seg *seg, caddr_t addr,
2349     size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
2350 {
2351         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_LOCKOP, "segdev_lockop:start");
2352 
2353         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2354 
2355         return (0);
2356 }
2357 
2358 /*
2359  * segdev pages are not in the cache, and thus can't really be controlled.
2360  * Hence, advise is simply always successful.
2361  */
2362 /*ARGSUSED*/
2363 static int
2364 segdev_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
2365 {
2366         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_ADVISE, "segdev_advise:start");
2367 
2368         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2369 
2370         return (0);
2371 }
2372 
2373 /*
2374  * ddi_segmap_setup:    Used by drivers who wish specify mapping attributes
2375  *                      for a segment.  Called from a drivers segmap(9E)
2376  *                      routine.
2377  */
2378 /*ARGSUSED*/
2379 int
2380 ddi_segmap_setup(dev_t dev, off_t offset, struct as *as, caddr_t *addrp,
2381     off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cred,
2382     ddi_device_acc_attr_t *accattrp, uint_t rnumber)
2383 {
2384         struct segdev_crargs dev_a;
2385         int (*mapfunc)(dev_t dev, off_t off, int prot);
2386         uint_t hat_attr;
2387         pfn_t pfn;
2388         int     error, i;
2389 
2390         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SEGMAP_SETUP,
2391             "ddi_segmap_setup:start");
2392 
2393         if ((mapfunc = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap) == nodev)
2394                 return (ENODEV);
2395 
2396         /*
2397          * Character devices that support the d_mmap
2398          * interface can only be mmap'ed shared.
2399          */
2400         if ((flags & MAP_TYPE) != MAP_SHARED)
2401                 return (EINVAL);
2402 
2403         /*
2404          * Check that this region is indeed mappable on this platform.
2405          * Use the mapping function.
2406          */
2407         if (ddi_device_mapping_check(dev, accattrp, rnumber, &hat_attr) == -1)
2408                 return (ENXIO);
2409 
2410         /*
2411          * Check to ensure that the entire range is
2412          * legal and we are not trying to map in
2413          * more than the device will let us.
2414          */
2415         for (i = 0; i < len; i += PAGESIZE) {
2416                 if (i == 0) {
2417                         /*
2418                          * Save the pfn at offset here. This pfn will be
2419                          * used later to get user address.
2420                          */
2421                         if ((pfn = (pfn_t)cdev_mmap(mapfunc, dev, offset,
2422                             maxprot)) == PFN_INVALID)
2423                                 return (ENXIO);
2424                 } else {
2425                         if (cdev_mmap(mapfunc, dev, offset + i, maxprot) ==
2426                             PFN_INVALID)
2427                                 return (ENXIO);
2428                 }
2429         }
2430 
2431         as_rangelock(as);
2432         /* Pick an address w/o worrying about any vac alignment constraints. */
2433         error = choose_addr(as, addrp, len, ptob(pfn), ADDR_NOVACALIGN, flags);
2434         if (error != 0) {
2435                 as_rangeunlock(as);
2436                 return (error);
2437         }
2438 
2439         dev_a.mapfunc = mapfunc;
2440         dev_a.dev = dev;
2441         dev_a.offset = (offset_t)offset;
2442         dev_a.type = flags & MAP_TYPE;
2443         dev_a.prot = (uchar_t)prot;
2444         dev_a.maxprot = (uchar_t)maxprot;
2445         dev_a.hat_attr = hat_attr;
2446         dev_a.hat_flags = 0;
2447         dev_a.devmap_data = NULL;
2448 
2449         error = as_map(as, *addrp, len, segdev_create, &dev_a);
2450         as_rangeunlock(as);
2451         return (error);
2452 
2453 }
2454 
2455 /*ARGSUSED*/
2456 static int
2457 segdev_pagelock(struct seg *seg, caddr_t addr, size_t len,
2458     struct page ***ppp, enum lock_type type, enum seg_rw rw)
2459 {
2460         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_PAGELOCK,
2461             "segdev_pagelock:start");
2462         return (ENOTSUP);
2463 }
2464 
2465 /*
2466  * devmap_device: Used by devmap framework to establish mapping
2467  *                called by devmap_seup(9F) during map setup time.
2468  */
2469 /*ARGSUSED*/
2470 static int
2471 devmap_device(devmap_handle_t *dhp, struct as *as, caddr_t *addr,
2472     offset_t off, size_t len, uint_t flags)
2473 {
2474         devmap_handle_t *rdhp, *maxdhp;
2475         struct segdev_crargs dev_a;
2476         int     err;
2477         uint_t maxprot = PROT_ALL;
2478         offset_t offset = 0;
2479         pfn_t pfn;
2480         struct devmap_pmem_cookie *pcp;
2481 
2482         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_DEVICE,
2483             "devmap_device:start dhp=%p addr=%p off=%llx, len=%lx",
2484             (void *)dhp, (void *)addr, off, len);
2485 
2486         DEBUGF(2, (CE_CONT, "devmap_device: dhp %p addr %p off %llx len %lx\n",
2487             (void *)dhp, (void *)addr, off, len));
2488 
2489         as_rangelock(as);
2490         if ((flags & MAP_FIXED) == 0) {
2491                 offset_t aligned_off;
2492 
2493                 rdhp = maxdhp = dhp;
2494                 while (rdhp != NULL) {
2495                         maxdhp = (maxdhp->dh_len > rdhp->dh_len) ?
2496                             maxdhp : rdhp;
2497                         rdhp = rdhp->dh_next;
2498                         maxprot |= dhp->dh_maxprot;
2499                 }
2500                 offset = maxdhp->dh_uoff - dhp->dh_uoff;
2501 
2502                 /*
2503                  * Use the dhp that has the
2504                  * largest len to get user address.
2505                  */
2506                 /*
2507                  * If MAPPING_INVALID, cannot use dh_pfn/dh_cvaddr,
2508                  * use 0 which is as good as any other.
2509                  */
2510                 if (maxdhp->dh_flags & DEVMAP_MAPPING_INVALID) {
2511                         aligned_off = (offset_t)0;
2512                 } else if (dhp_is_devmem(maxdhp)) {
2513                         aligned_off = (offset_t)ptob(maxdhp->dh_pfn) - offset;
2514                 } else if (dhp_is_pmem(maxdhp)) {
2515                         pcp = (struct devmap_pmem_cookie *)maxdhp->dh_pcookie;
2516                         pfn = page_pptonum(
2517                             pcp->dp_pparray[btop(maxdhp->dh_roff)]);
2518                         aligned_off = (offset_t)ptob(pfn) - offset;
2519                 } else {
2520                         aligned_off = (offset_t)(uintptr_t)maxdhp->dh_cvaddr -
2521                             offset;
2522                 }
2523 
2524                 /*
2525                  * Pick an address aligned to dh_cookie.
2526                  * for kernel memory/user memory, cookie is cvaddr.
2527                  * for device memory, cookie is physical address.
2528                  */
2529                 map_addr(addr, len, aligned_off, 1, flags);
2530                 if (*addr == NULL) {
2531                         as_rangeunlock(as);
2532                         return (ENOMEM);
2533                 }
2534         } else {
2535                 /*
2536                  * User-specified address; blow away any previous mappings.
2537                  */
2538                 (void) as_unmap(as, *addr, len);
2539         }
2540 
2541         dev_a.mapfunc = NULL;
2542         dev_a.dev = dhp->dh_dev;
2543         dev_a.type = flags & MAP_TYPE;
2544         dev_a.offset = off;
2545         /*
2546          * sdp->maxprot has the least restrict protection of all dhps.
2547          */
2548         dev_a.maxprot = maxprot;
2549         dev_a.prot = dhp->dh_prot;
2550         /*
2551          * devmap uses dhp->dh_hat_attr for hat.
2552          */
2553         dev_a.hat_flags = 0;
2554         dev_a.hat_attr = 0;
2555         dev_a.devmap_data = (void *)dhp;
2556 
2557         err = as_map(as, *addr, len, segdev_create, &dev_a);
2558         as_rangeunlock(as);
2559         return (err);
2560 }
2561 
2562 int
2563 devmap_do_ctxmgt(devmap_cookie_t dhc, void *pvtp, offset_t off, size_t len,
2564     uint_t type, uint_t rw, int (*ctxmgt)(devmap_cookie_t, void *, offset_t,
2565     size_t, uint_t, uint_t))
2566 {
2567         register devmap_handle_t *dhp = (devmap_handle_t *)dhc;
2568         struct devmap_ctx *devctx;
2569         int do_timeout = 0;
2570         int ret;
2571 
2572 #ifdef lint
2573         pvtp = pvtp;
2574 #endif
2575 
2576         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT,
2577             "devmap_do_ctxmgt:start dhp=%p off=%llx, len=%lx",
2578             (void *)dhp, off, len);
2579         DEBUGF(7, (CE_CONT, "devmap_do_ctxmgt: dhp %p off %llx len %lx\n",
2580             (void *)dhp, off, len));
2581 
2582         if (ctxmgt == NULL)
2583                 return (FC_HWERR);
2584 
2585         devctx = dhp->dh_ctx;
2586 
2587         /*
2588          * If we are on an MP system with more than one cpu running
2589          * and if a thread on some CPU already has the context, wait
2590          * for it to finish if there is a hysteresis timeout.
2591          *
2592          * We call cv_wait() instead of cv_wait_sig() because
2593          * it does not matter much if it returned due to a signal
2594          * or due to a cv_signal() or cv_broadcast().  In either event
2595          * we need to complete the mapping otherwise the processes
2596          * will die with a SEGV.
2597          */
2598         if ((dhp->dh_timeout_length > 0) && (ncpus > 1)) {
2599                 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK1,
2600                     "devmap_do_ctxmgt:doing hysteresis, devctl %p dhp %p",
2601                     devctx, dhp);
2602                 do_timeout = 1;
2603                 mutex_enter(&devctx->lock);
2604                 while (devctx->oncpu)
2605                         cv_wait(&devctx->cv, &devctx->lock);
2606                 devctx->oncpu = 1;
2607                 mutex_exit(&devctx->lock);
2608         }
2609 
2610         /*
2611          * Call the contextmgt callback so that the driver can handle
2612          * the fault.
2613          */
2614         ret = (*ctxmgt)(dhp, dhp->dh_pvtp, off, len, type, rw);
2615 
2616         /*
2617          * If devmap_access() returned -1, then there was a hardware
2618          * error so we need to convert the return value to something
2619          * that trap() will understand.  Otherwise, the return value
2620          * is already a fault code generated by devmap_unload()
2621          * or devmap_load().
2622          */
2623         if (ret) {
2624                 TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK2,
2625                     "devmap_do_ctxmgt: ret=%x dhp=%p devctx=%p",
2626                     ret, dhp, devctx);
2627                 DEBUGF(1, (CE_CONT, "devmap_do_ctxmgt: ret %x dhp %p\n",
2628                     ret, (void *)dhp));
2629                 if (devctx->oncpu) {
2630                         mutex_enter(&devctx->lock);
2631                         devctx->oncpu = 0;
2632                         cv_signal(&devctx->cv);
2633                         mutex_exit(&devctx->lock);
2634                 }
2635                 return (FC_HWERR);
2636         }
2637 
2638         /*
2639          * Setup the timeout if we need to
2640          */
2641         if (do_timeout) {
2642                 mutex_enter(&devctx->lock);
2643                 if (dhp->dh_timeout_length > 0) {
2644                         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK3,
2645                             "devmap_do_ctxmgt:timeout set");
2646                         devctx->timeout = timeout(devmap_ctxto,
2647                             devctx, dhp->dh_timeout_length);
2648                 } else {
2649                         /*
2650                          * We don't want to wait so set oncpu to
2651                          * 0 and wake up anyone waiting.
2652                          */
2653                         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK4,
2654                             "devmap_do_ctxmgt:timeout not set");
2655                         devctx->oncpu = 0;
2656                         cv_signal(&devctx->cv);
2657                 }
2658                 mutex_exit(&devctx->lock);
2659         }
2660 
2661         return (DDI_SUCCESS);
2662 }
2663 
2664 /*
2665  *                                       end of mapping
2666  *                    poff   fault_offset         |
2667  *            base     |        |                 |
2668  *              |      |        |                 |
2669  *              V      V        V                 V
2670  *  +-----------+---------------+-------+---------+-------+
2671  *              ^               ^       ^         ^
2672  *              |<--- offset--->|<-len->|         |
2673  *              |<--- dh_len(size of mapping) --->|
2674  *                     |<--  pg -->|
2675  *                              -->|rlen|<--
2676  */
2677 static ulong_t
2678 devmap_roundup(devmap_handle_t *dhp, ulong_t offset, size_t len,
2679     ulong_t *opfn, ulong_t *pagesize)
2680 {
2681         register int level;
2682         ulong_t pg;
2683         ulong_t poff;
2684         ulong_t base;
2685         caddr_t uvaddr;
2686         long rlen;
2687 
2688         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_ROUNDUP,
2689             "devmap_roundup:start dhp=%p off=%lx len=%lx",
2690             (void *)dhp, offset, len);
2691         DEBUGF(2, (CE_CONT, "devmap_roundup: dhp %p off %lx len %lx\n",
2692             (void *)dhp, offset, len));
2693 
2694         /*
2695          * get the max. pagesize that is aligned within the range
2696          * <dh_pfn, dh_pfn+offset>.
2697          *
2698          * The calculations below use physical address to ddetermine
2699          * the page size to use. The same calculations can use the
2700          * virtual address to determine the page size.
2701          */
2702         base = (ulong_t)ptob(dhp->dh_pfn);
2703         for (level = dhp->dh_mmulevel; level >= 0; level--) {
2704                 pg = page_get_pagesize(level);
2705                 poff = ((base + offset) & ~(pg - 1));
2706                 uvaddr = dhp->dh_uvaddr + (poff - base);
2707                 if ((poff >= base) &&
2708                     ((poff + pg) <= (base + dhp->dh_len)) &&
2709                     VA_PA_ALIGNED((uintptr_t)uvaddr, poff, pg))
2710                         break;
2711         }
2712 
2713         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_ROUNDUP_CK1,
2714             "devmap_roundup: base=%lx poff=%lx dhp=%p",
2715             base, poff, dhp);
2716         DEBUGF(2, (CE_CONT, "devmap_roundup: base %lx poff %lx pfn %lx\n",
2717             base, poff, dhp->dh_pfn));
2718 
2719         ASSERT(VA_PA_ALIGNED((uintptr_t)uvaddr, poff, pg));
2720         ASSERT(level >= 0);
2721 
2722         *pagesize = pg;
2723         *opfn = dhp->dh_pfn + btop(poff - base);
2724 
2725         rlen = len + offset - (poff - base + pg);
2726 
2727         ASSERT(rlen < (long)len);
2728 
2729         TRACE_5(TR_FAC_DEVMAP, TR_DEVMAP_ROUNDUP_CK2,
2730             "devmap_roundup:ret dhp=%p level=%x rlen=%lx psiz=%p opfn=%p",
2731             (void *)dhp, level, rlen, pagesize, opfn);
2732         DEBUGF(1, (CE_CONT, "devmap_roundup: dhp %p "
2733             "level %x rlen %lx psize %lx opfn %lx\n",
2734             (void *)dhp, level, rlen, *pagesize, *opfn));
2735 
2736         return ((ulong_t)((rlen > 0) ? rlen : 0));
2737 }
2738 
2739 /*
2740  * find the dhp that contains addr.
2741  */
2742 static devmap_handle_t *
2743 devmap_find_handle(devmap_handle_t *dhp_head, caddr_t addr)
2744 {
2745         devmap_handle_t *dhp;
2746 
2747         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_FIND_HANDLE,
2748             "devmap_find_handle:start");
2749 
2750         dhp = dhp_head;
2751         while (dhp) {
2752                 if (addr >= dhp->dh_uvaddr &&
2753                     addr < (dhp->dh_uvaddr + dhp->dh_len))
2754                         return (dhp);
2755                 dhp = dhp->dh_next;
2756         }
2757 
2758         return ((devmap_handle_t *)NULL);
2759 }
2760 
2761 /*
2762  * devmap_unload:
2763  *                      Marks a segdev segment or pages if offset->offset+len
2764  *                      is not the entire segment as intercept and unloads the
2765  *                      pages in the range offset -> offset+len.
2766  */
2767 int
2768 devmap_unload(devmap_cookie_t dhc, offset_t offset, size_t len)
2769 {
2770         register devmap_handle_t *dhp = (devmap_handle_t *)dhc;
2771         caddr_t addr;
2772         ulong_t size;
2773         ssize_t soff;
2774 
2775         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_UNLOAD,
2776             "devmap_unload:start dhp=%p offset=%llx len=%lx",
2777             (void *)dhp, offset, len);
2778         DEBUGF(7, (CE_CONT, "devmap_unload: dhp %p offset %llx len %lx\n",
2779             (void *)dhp, offset, len));
2780 
2781         soff = (ssize_t)(offset - dhp->dh_uoff);
2782         soff = round_down_p2(soff, PAGESIZE);
2783         if (soff < 0 || soff >= dhp->dh_len)
2784                 return (FC_MAKE_ERR(EINVAL));
2785 
2786         /*
2787          * Address and size must be page aligned.  Len is set to the
2788          * number of bytes in the number of pages that are required to
2789          * support len.  Offset is set to the byte offset of the first byte
2790          * of the page that contains offset.
2791          */
2792         len = round_up_p2(len, PAGESIZE);
2793 
2794         /*
2795          * If len is == 0, then calculate the size by getting
2796          * the number of bytes from offset to the end of the segment.
2797          */
2798         if (len == 0)
2799                 size = dhp->dh_len - soff;
2800         else {
2801                 size = len;
2802                 if ((soff + size) > dhp->dh_len)
2803                         return (FC_MAKE_ERR(EINVAL));
2804         }
2805 
2806         /*
2807          * The address is offset bytes from the base address of
2808          * the dhp.
2809          */
2810         addr = (caddr_t)(soff + dhp->dh_uvaddr);
2811 
2812         /*
2813          * If large page size was used in hat_devload(),
2814          * the same page size must be used in hat_unload().
2815          */
2816         if (dhp->dh_flags & DEVMAP_FLAG_LARGE) {
2817                 hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
2818                     dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
2819         } else {
2820                 hat_unload(dhp->dh_seg->s_as->a_hat,  addr, size,
2821                     HAT_UNLOAD|HAT_UNLOAD_OTHER);
2822         }
2823 
2824         return (0);
2825 }
2826 
2827 /*
2828  * calculates the optimal page size that will be used for hat_devload().
2829  */
2830 static void
2831 devmap_get_large_pgsize(devmap_handle_t *dhp, size_t len, caddr_t addr,
2832     size_t *llen, caddr_t *laddr)
2833 {
2834         ulong_t off;
2835         ulong_t pfn;
2836         ulong_t pgsize;
2837         uint_t first = 1;
2838 
2839         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_GET_LARGE_PGSIZE,
2840             "devmap_get_large_pgsize:start");
2841 
2842         /*
2843          * RFE - Code only supports large page mappings for devmem
2844          * This code could be changed in future if we want to support
2845          * large page mappings for kernel exported memory.
2846          */
2847         ASSERT(dhp_is_devmem(dhp));
2848         ASSERT(!(dhp->dh_flags & DEVMAP_MAPPING_INVALID));
2849 
2850         *llen = 0;
2851         off = (ulong_t)(addr - dhp->dh_uvaddr);
2852         while ((long)len > 0) {
2853                 /*
2854                  * get the optimal pfn to minimize address translations.
2855                  * devmap_roundup() returns residue bytes for next round
2856                  * calculations.
2857                  */
2858                 len = devmap_roundup(dhp, off, len, &pfn, &pgsize);
2859 
2860                 if (first) {
2861                         *laddr = dhp->dh_uvaddr + ptob(pfn - dhp->dh_pfn);
2862                         first = 0;
2863                 }
2864 
2865                 *llen += pgsize;
2866                 off = ptob(pfn - dhp->dh_pfn) + pgsize;
2867         }
2868         /* Large page mapping len/addr cover more range than original fault */
2869         ASSERT(*llen >= len && *laddr <= addr);
2870         ASSERT((*laddr + *llen) >= (addr + len));
2871 }
2872 
2873 /*
2874  * Initialize the devmap_softlock structure.
2875  */
2876 static struct devmap_softlock *
2877 devmap_softlock_init(dev_t dev, ulong_t id)
2878 {
2879         struct devmap_softlock *slock;
2880         struct devmap_softlock *tmp;
2881 
2882         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SOFTLOCK_INIT,
2883             "devmap_softlock_init:start");
2884 
2885         tmp = kmem_zalloc(sizeof (struct devmap_softlock), KM_SLEEP);
2886         mutex_enter(&devmap_slock);
2887 
2888         for (slock = devmap_slist; slock != NULL; slock = slock->next)
2889                 if ((slock->dev == dev) && (slock->id == id))
2890                         break;
2891 
2892         if (slock == NULL) {
2893                 slock = tmp;
2894                 slock->dev = dev;
2895                 slock->id = id;
2896                 mutex_init(&slock->lock, NULL, MUTEX_DEFAULT, NULL);
2897                 cv_init(&slock->cv, NULL, CV_DEFAULT, NULL);
2898                 slock->next = devmap_slist;
2899                 devmap_slist = slock;
2900         } else
2901                 kmem_free(tmp, sizeof (struct devmap_softlock));
2902 
2903         mutex_enter(&slock->lock);
2904         slock->refcnt++;
2905         mutex_exit(&slock->lock);
2906         mutex_exit(&devmap_slock);
2907 
2908         return (slock);
2909 }
2910 
2911 /*
2912  * Wake up processes that sleep on softlocked.
2913  * Free dh_softlock if refcnt is 0.
2914  */
2915 static void
2916 devmap_softlock_rele(devmap_handle_t *dhp)
2917 {
2918         struct devmap_softlock *slock = dhp->dh_softlock;
2919         struct devmap_softlock *tmp;
2920         struct devmap_softlock *parent;
2921 
2922         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SOFTLOCK_RELE,
2923             "devmap_softlock_rele:start");
2924 
2925         mutex_enter(&devmap_slock);
2926         mutex_enter(&slock->lock);
2927 
2928         ASSERT(slock->refcnt > 0);
2929 
2930         slock->refcnt--;
2931 
2932         /*
2933          * If no one is using the device, free up the slock data.
2934          */
2935         if (slock->refcnt == 0) {
2936                 slock->softlocked = 0;
2937                 cv_signal(&slock->cv);
2938 
2939                 if (devmap_slist == slock)
2940                         devmap_slist = slock->next;
2941                 else {
2942                         parent = devmap_slist;
2943                         for (tmp = devmap_slist->next; tmp != NULL;
2944                             tmp = tmp->next) {
2945                                 if (tmp == slock) {
2946                                         parent->next = tmp->next;
2947                                         break;
2948                                 }
2949                                 parent = tmp;
2950                         }
2951                 }
2952                 mutex_exit(&slock->lock);
2953                 mutex_destroy(&slock->lock);
2954                 cv_destroy(&slock->cv);
2955                 kmem_free(slock, sizeof (struct devmap_softlock));
2956         } else
2957                 mutex_exit(&slock->lock);
2958 
2959         mutex_exit(&devmap_slock);
2960 }
2961 
2962 /*
2963  * Wake up processes that sleep on dh_ctx->locked.
2964  * Free dh_ctx if refcnt is 0.
2965  */
2966 static void
2967 devmap_ctx_rele(devmap_handle_t *dhp)
2968 {
2969         struct devmap_ctx *devctx = dhp->dh_ctx;
2970         struct devmap_ctx *tmp;
2971         struct devmap_ctx *parent;
2972         timeout_id_t tid;
2973 
2974         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_CTX_RELE,
2975             "devmap_ctx_rele:start");
2976 
2977         mutex_enter(&devmapctx_lock);
2978         mutex_enter(&devctx->lock);
2979 
2980         ASSERT(devctx->refcnt > 0);
2981 
2982         devctx->refcnt--;
2983 
2984         /*
2985          * If no one is using the device, free up the devctx data.
2986          */
2987         if (devctx->refcnt == 0) {
2988                 /*
2989                  * Untimeout any threads using this mapping as they are about
2990                  * to go away.
2991                  */
2992                 if (devctx->timeout != 0) {
2993                         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_CTX_RELE_CK1,
2994                             "devmap_ctx_rele:untimeout ctx->timeout");
2995 
2996                         tid = devctx->timeout;
2997                         mutex_exit(&devctx->lock);
2998                         (void) untimeout(tid);
2999                         mutex_enter(&devctx->lock);
3000                 }
3001 
3002                 devctx->oncpu = 0;
3003                 cv_signal(&devctx->cv);
3004 
3005                 if (devmapctx_list == devctx)
3006                         devmapctx_list = devctx->next;
3007                 else {
3008                         parent = devmapctx_list;
3009                         for (tmp = devmapctx_list->next; tmp != NULL;
3010                             tmp = tmp->next) {
3011                                 if (tmp == devctx) {
3012                                         parent->next = tmp->next;
3013                                         break;
3014                                 }
3015                                 parent = tmp;
3016                         }
3017                 }
3018                 mutex_exit(&devctx->lock);
3019                 mutex_destroy(&devctx->lock);
3020                 cv_destroy(&devctx->cv);
3021                 kmem_free(devctx, sizeof (struct devmap_ctx));
3022         } else
3023                 mutex_exit(&devctx->lock);
3024 
3025         mutex_exit(&devmapctx_lock);
3026 }
3027 
3028 /*
3029  * devmap_load:
3030  *                      Marks a segdev segment or pages if offset->offset+len
3031  *                      is not the entire segment as nointercept and faults in
3032  *                      the pages in the range offset -> offset+len.
3033  */
3034 int
3035 devmap_load(devmap_cookie_t dhc, offset_t offset, size_t len, uint_t type,
3036     uint_t rw)
3037 {
3038         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3039         struct as *asp = dhp->dh_seg->s_as;
3040         caddr_t addr;
3041         ulong_t size;
3042         ssize_t soff;   /* offset from the beginning of the segment */
3043         int rc;
3044 
3045         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_LOAD,
3046             "devmap_load:start dhp=%p offset=%llx len=%lx",
3047             (void *)dhp, offset, len);
3048 
3049         DEBUGF(7, (CE_CONT, "devmap_load: dhp %p offset %llx len %lx\n",
3050             (void *)dhp, offset, len));
3051 
3052         /*
3053          *      Hat layer only supports devload to process' context for which
3054          *      the as lock is held. Verify here and return error if drivers
3055          *      inadvertently call devmap_load on a wrong devmap handle.
3056          */
3057         if ((asp != &kas) && !AS_LOCK_HELD(asp, &asp->a_lock))
3058                 return (FC_MAKE_ERR(EINVAL));
3059 
3060         soff = (ssize_t)(offset - dhp->dh_uoff);
3061         soff = round_down_p2(soff, PAGESIZE);
3062         if (soff < 0 || soff >= dhp->dh_len)
3063                 return (FC_MAKE_ERR(EINVAL));
3064 
3065         /*
3066          * Address and size must be page aligned.  Len is set to the
3067          * number of bytes in the number of pages that are required to
3068          * support len.  Offset is set to the byte offset of the first byte
3069          * of the page that contains offset.
3070          */
3071         len = round_up_p2(len, PAGESIZE);
3072 
3073         /*
3074          * If len == 0, then calculate the size by getting
3075          * the number of bytes from offset to the end of the segment.
3076          */
3077         if (len == 0)
3078                 size = dhp->dh_len - soff;
3079         else {
3080                 size = len;
3081                 if ((soff + size) > dhp->dh_len)
3082                         return (FC_MAKE_ERR(EINVAL));
3083         }
3084 
3085         /*
3086          * The address is offset bytes from the base address of
3087          * the segment.
3088          */
3089         addr = (caddr_t)(soff + dhp->dh_uvaddr);
3090 
3091         HOLD_DHP_LOCK(dhp);
3092         rc = segdev_faultpages(asp->a_hat,
3093             dhp->dh_seg, addr, size, type, rw, dhp);
3094         RELE_DHP_LOCK(dhp);
3095         return (rc);
3096 }
3097 
3098 int
3099 devmap_setup(dev_t dev, offset_t off, struct as *as, caddr_t *addrp,
3100     size_t len, uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
3101 {
3102         register devmap_handle_t *dhp;
3103         int (*devmap)(dev_t, devmap_cookie_t, offset_t, size_t,
3104             size_t *, uint_t);
3105         int (*mmap)(dev_t, off_t, int);
3106         struct devmap_callback_ctl *callbackops;
3107         devmap_handle_t *dhp_head = NULL;
3108         devmap_handle_t *dhp_prev = NULL;
3109         devmap_handle_t *dhp_curr;
3110         caddr_t addr;
3111         int map_flag;
3112         int ret;
3113         ulong_t total_len;
3114         size_t map_len;
3115         size_t resid_len = len;
3116         offset_t map_off = off;
3117         struct devmap_softlock *slock = NULL;
3118 
3119 #ifdef lint
3120         cred = cred;
3121 #endif
3122 
3123         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_SETUP,
3124             "devmap_setup:start off=%llx len=%lx", off, len);
3125         DEBUGF(3, (CE_CONT, "devmap_setup: off %llx len %lx\n",
3126             off, len));
3127 
3128         devmap = devopsp[getmajor(dev)]->devo_cb_ops->cb_devmap;
3129         mmap = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap;
3130 
3131         /*
3132          * driver must provide devmap(9E) entry point in cb_ops to use the
3133          * devmap framework.
3134          */
3135         if (devmap == NULL || devmap == nulldev || devmap == nodev)
3136                 return (EINVAL);
3137 
3138         /*
3139          * To protect from an inadvertent entry because the devmap entry point
3140          * is not NULL, return error if D_DEVMAP bit is not set in cb_flag and
3141          * mmap is NULL.
3142          */
3143         map_flag = devopsp[getmajor(dev)]->devo_cb_ops->cb_flag;
3144         if ((map_flag & D_DEVMAP) == 0 && (mmap == NULL || mmap == nulldev))
3145                 return (EINVAL);
3146 
3147         /*
3148          * devmap allows mmap(2) to map multiple registers.
3149          * one devmap_handle is created for each register mapped.
3150          */
3151         for (total_len = 0; total_len < len; total_len += map_len) {
3152                 dhp = kmem_zalloc(sizeof (devmap_handle_t), KM_SLEEP);
3153 
3154                 if (dhp_prev != NULL)
3155                         dhp_prev->dh_next = dhp;
3156                 else
3157                         dhp_head = dhp;
3158                 dhp_prev = dhp;
3159 
3160                 dhp->dh_prot = prot;
3161                 dhp->dh_orig_maxprot = dhp->dh_maxprot = maxprot;
3162                 dhp->dh_dev = dev;
3163                 dhp->dh_timeout_length = CTX_TIMEOUT_VALUE;
3164                 dhp->dh_uoff = map_off;
3165 
3166                 /*
3167                  * Get mapping specific info from
3168                  * the driver, such as rnumber, roff, len, callbackops,
3169                  * accattrp and, if the mapping is for kernel memory,
3170                  * ddi_umem_cookie.
3171                  */
3172                 if ((ret = cdev_devmap(dev, dhp, map_off,
3173                     resid_len, &map_len, get_udatamodel())) != 0) {
3174                         free_devmap_handle(dhp_head);
3175                         return (ENXIO);
3176                 }
3177 
3178                 if (map_len & PAGEOFFSET) {
3179                         free_devmap_handle(dhp_head);
3180                         return (EINVAL);
3181                 }
3182 
3183                 callbackops = &dhp->dh_callbackops;
3184 
3185                 if ((callbackops->devmap_access == NULL) ||
3186                     (callbackops->devmap_access == nulldev) ||
3187                     (callbackops->devmap_access == nodev)) {
3188                         /*
3189                          * Normally devmap does not support MAP_PRIVATE unless
3190                          * the drivers provide a valid devmap_access routine.
3191                          */
3192                         if ((flags & MAP_PRIVATE) != 0) {
3193                                 free_devmap_handle(dhp_head);
3194                                 return (EINVAL);
3195                         }
3196                 } else {
3197                         /*
3198                          * Initialize dhp_softlock and dh_ctx if the drivers
3199                          * provide devmap_access.
3200                          */
3201                         dhp->dh_softlock = devmap_softlock_init(dev,
3202                             (ulong_t)callbackops->devmap_access);
3203                         dhp->dh_ctx = devmap_ctxinit(dev,
3204                             (ulong_t)callbackops->devmap_access);
3205 
3206                         /*
3207                          * segdev_fault can only work when all
3208                          * dh_softlock in a multi-dhp mapping
3209                          * are same. see comments in segdev_fault
3210                          * This code keeps track of the first
3211                          * dh_softlock allocated in slock and
3212                          * compares all later allocations and if
3213                          * not similar, returns an error.
3214                          */
3215                         if (slock == NULL)
3216                                 slock = dhp->dh_softlock;
3217                         if (slock != dhp->dh_softlock) {
3218                                 free_devmap_handle(dhp_head);
3219                                 return (ENOTSUP);
3220                         }
3221                 }
3222 
3223                 map_off += map_len;
3224                 resid_len -= map_len;
3225         }
3226 
3227         /*
3228          * get the user virtual address and establish the mapping between
3229          * uvaddr and device physical address.
3230          */
3231         if ((ret = devmap_device(dhp_head, as, addrp, off, len, flags))
3232             != 0) {
3233                 /*
3234                  * free devmap handles if error during the mapping.
3235                  */
3236                 free_devmap_handle(dhp_head);
3237 
3238                 return (ret);
3239         }
3240 
3241         /*
3242          * call the driver's devmap_map callback to do more after the mapping,
3243          * such as to allocate driver private data for context management.
3244          */
3245         dhp = dhp_head;
3246         map_off = off;
3247         addr = *addrp;
3248         while (dhp != NULL) {
3249                 callbackops = &dhp->dh_callbackops;
3250                 dhp->dh_uvaddr = addr;
3251                 dhp_curr = dhp;
3252                 if (callbackops->devmap_map != NULL) {
3253                         ret = (*callbackops->devmap_map)((devmap_cookie_t)dhp,
3254                             dev, flags, map_off,
3255                             dhp->dh_len, &dhp->dh_pvtp);
3256                         if (ret != 0) {
3257                                 struct segdev_data *sdp;
3258 
3259                                 /*
3260                                  * call driver's devmap_unmap entry point
3261                                  * to free driver resources.
3262                                  */
3263                                 dhp = dhp_head;
3264                                 map_off = off;
3265                                 while (dhp != dhp_curr) {
3266                                         callbackops = &dhp->dh_callbackops;
3267                                         if (callbackops->devmap_unmap != NULL) {
3268                                                 (*callbackops->devmap_unmap)(
3269                                                     dhp, dhp->dh_pvtp,
3270                                                     map_off, dhp->dh_len,
3271                                                     NULL, NULL, NULL, NULL);
3272                                         }
3273                                         map_off += dhp->dh_len;
3274                                         dhp = dhp->dh_next;
3275                                 }
3276                                 sdp = dhp_head->dh_seg->s_data;
3277                                 sdp->devmap_data = NULL;
3278                                 free_devmap_handle(dhp_head);
3279                                 return (ENXIO);
3280                         }
3281                 }
3282                 map_off += dhp->dh_len;
3283                 addr += dhp->dh_len;
3284                 dhp = dhp->dh_next;
3285         }
3286 
3287         return (0);
3288 }
3289 
3290 int
3291 ddi_devmap_segmap(dev_t dev, off_t off, ddi_as_handle_t as, caddr_t *addrp,
3292     off_t len, uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
3293 {
3294         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SEGMAP,
3295             "devmap_segmap:start");
3296         return (devmap_setup(dev, (offset_t)off, (struct as *)as, addrp,
3297             (size_t)len, prot, maxprot, flags, cred));
3298 }
3299 
3300 /*
3301  * Called from devmap_devmem_setup/remap to see if can use large pages for
3302  * this device mapping.
3303  * Also calculate the max. page size for this mapping.
3304  * this page size will be used in fault routine for
3305  * optimal page size calculations.
3306  */
3307 static void
3308 devmap_devmem_large_page_setup(devmap_handle_t *dhp)
3309 {
3310         ASSERT(dhp_is_devmem(dhp));
3311         dhp->dh_mmulevel = 0;
3312 
3313         /*
3314          * use large page size only if:
3315          *  1. device memory.
3316          *  2. mmu supports multiple page sizes,
3317          *  3. Driver did not disallow it
3318          *  4. dhp length is at least as big as the large pagesize
3319          *  5. the uvaddr and pfn are large pagesize aligned
3320          */
3321         if (page_num_pagesizes() > 1 &&
3322             !(dhp->dh_flags & (DEVMAP_USE_PAGESIZE | DEVMAP_MAPPING_INVALID))) {
3323                 ulong_t base;
3324                 int level;
3325 
3326                 base = (ulong_t)ptob(dhp->dh_pfn);
3327                 for (level = 1; level < page_num_pagesizes(); level++) {
3328                         size_t pgsize = page_get_pagesize(level);
3329                         if ((dhp->dh_len < pgsize) ||
3330                             (!VA_PA_PGSIZE_ALIGNED((uintptr_t)dhp->dh_uvaddr,
3331                             base, pgsize))) {
3332                                 break;
3333                         }
3334                 }
3335                 dhp->dh_mmulevel = level - 1;
3336         }
3337         if (dhp->dh_mmulevel > 0) {
3338                 dhp->dh_flags |= DEVMAP_FLAG_LARGE;
3339         } else {
3340                 dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
3341         }
3342 }
3343 
3344 /*
3345  * Called by driver devmap routine to pass device specific info to
3346  * the framework.    used for device memory mapping only.
3347  */
3348 int
3349 devmap_devmem_setup(devmap_cookie_t dhc, dev_info_t *dip,
3350     struct devmap_callback_ctl *callbackops, uint_t rnumber, offset_t roff,
3351     size_t len, uint_t maxprot, uint_t flags, ddi_device_acc_attr_t *accattrp)
3352 {
3353         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3354         ddi_acc_handle_t handle;
3355         ddi_map_req_t mr;
3356         ddi_acc_hdl_t *hp;
3357         int err;
3358 
3359         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_DEVMEM_SETUP,
3360             "devmap_devmem_setup:start dhp=%p offset=%llx rnum=%d len=%lx",
3361             (void *)dhp, roff, rnumber, (uint_t)len);
3362         DEBUGF(2, (CE_CONT, "devmap_devmem_setup: dhp %p offset %llx "
3363             "rnum %d len %lx\n", (void *)dhp, roff, rnumber, len));
3364 
3365         /*
3366          * First to check if this function has been called for this dhp.
3367          */
3368         if (dhp->dh_flags & DEVMAP_SETUP_DONE)
3369                 return (DDI_FAILURE);
3370 
3371         if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3372                 return (DDI_FAILURE);
3373 
3374         if (flags & DEVMAP_MAPPING_INVALID) {
3375                 /*
3376                  * Don't go up the tree to get pfn if the driver specifies
3377                  * DEVMAP_MAPPING_INVALID in flags.
3378                  *
3379                  * If DEVMAP_MAPPING_INVALID is specified, we have to grant
3380                  * remap permission.
3381                  */
3382                 if (!(flags & DEVMAP_ALLOW_REMAP)) {
3383                         return (DDI_FAILURE);
3384                 }
3385                 dhp->dh_pfn = PFN_INVALID;
3386         } else {
3387                 handle = impl_acc_hdl_alloc(KM_SLEEP, NULL);
3388                 if (handle == NULL)
3389                         return (DDI_FAILURE);
3390 
3391                 hp = impl_acc_hdl_get(handle);
3392                 hp->ah_vers = VERS_ACCHDL;
3393                 hp->ah_dip = dip;
3394                 hp->ah_rnumber = rnumber;
3395                 hp->ah_offset = roff;
3396                 hp->ah_len = len;
3397                 if (accattrp != NULL)
3398                         hp->ah_acc = *accattrp;
3399 
3400                 mr.map_op = DDI_MO_MAP_LOCKED;
3401                 mr.map_type = DDI_MT_RNUMBER;
3402                 mr.map_obj.rnumber = rnumber;
3403                 mr.map_prot = maxprot & dhp->dh_orig_maxprot;
3404                 mr.map_flags = DDI_MF_DEVICE_MAPPING;
3405                 mr.map_handlep = hp;
3406                 mr.map_vers = DDI_MAP_VERSION;
3407 
3408                 /*
3409                  * up the device tree to get pfn.
3410                  * The rootnex_map_regspec() routine in nexus drivers has been
3411                  * modified to return pfn if map_flags is DDI_MF_DEVICE_MAPPING.
3412                  */
3413                 err = ddi_map(dip, &mr, roff, len, (caddr_t *)&dhp->dh_pfn);
3414                 dhp->dh_hat_attr = hp->ah_hat_flags;
3415                 impl_acc_hdl_free(handle);
3416 
3417                 if (err)
3418                         return (DDI_FAILURE);
3419         }
3420         /* Should not be using devmem setup for memory pages */
3421         ASSERT(!pf_is_memory(dhp->dh_pfn));
3422 
3423         /* Only some of the flags bits are settable by the driver */
3424         dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS);
3425         dhp->dh_len = ptob(btopr(len));
3426 
3427         dhp->dh_cookie = DEVMAP_DEVMEM_COOKIE;
3428         dhp->dh_roff = ptob(btop(roff));
3429 
3430         /* setup the dh_mmulevel and DEVMAP_FLAG_LARGE */
3431         devmap_devmem_large_page_setup(dhp);
3432         dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3433         ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3434 
3435 
3436         if (callbackops != NULL) {
3437                 bcopy(callbackops, &dhp->dh_callbackops,
3438                     sizeof (struct devmap_callback_ctl));
3439         }
3440 
3441         /*
3442          * Initialize dh_lock if we want to do remap.
3443          */
3444         if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) {
3445                 mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
3446                 dhp->dh_flags |= DEVMAP_LOCK_INITED;
3447         }
3448 
3449         dhp->dh_flags |= DEVMAP_SETUP_DONE;
3450 
3451         return (DDI_SUCCESS);
3452 }
3453 
3454 int
3455 devmap_devmem_remap(devmap_cookie_t dhc, dev_info_t *dip,
3456     uint_t rnumber, offset_t roff, size_t len, uint_t maxprot,
3457     uint_t flags, ddi_device_acc_attr_t *accattrp)
3458 {
3459         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3460         ddi_acc_handle_t handle;
3461         ddi_map_req_t mr;
3462         ddi_acc_hdl_t *hp;
3463         pfn_t   pfn;
3464         uint_t  hat_flags;
3465         int     err;
3466 
3467         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_DEVMEM_REMAP,
3468             "devmap_devmem_setup:start dhp=%p offset=%llx rnum=%d len=%lx",
3469             (void *)dhp, roff, rnumber, (uint_t)len);
3470         DEBUGF(2, (CE_CONT, "devmap_devmem_remap: dhp %p offset %llx "
3471             "rnum %d len %lx\n", (void *)dhp, roff, rnumber, len));
3472 
3473         /*
3474          * Return failure if setup has not been done or no remap permission
3475          * has been granted during the setup.
3476          */
3477         if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 ||
3478             (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0)
3479                 return (DDI_FAILURE);
3480 
3481         /* Only DEVMAP_MAPPING_INVALID flag supported for remap */
3482         if ((flags != 0) && (flags != DEVMAP_MAPPING_INVALID))
3483                 return (DDI_FAILURE);
3484 
3485         if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3486                 return (DDI_FAILURE);
3487 
3488         if (!(flags & DEVMAP_MAPPING_INVALID)) {
3489                 handle = impl_acc_hdl_alloc(KM_SLEEP, NULL);
3490                 if (handle == NULL)
3491                         return (DDI_FAILURE);
3492         }
3493 
3494         HOLD_DHP_LOCK(dhp);
3495 
3496         /*
3497          * Unload the old mapping, so next fault will setup the new mappings
3498          * Do this while holding the dhp lock so other faults dont reestablish
3499          * the mappings
3500          */
3501         hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
3502             dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
3503 
3504         if (flags & DEVMAP_MAPPING_INVALID) {
3505                 dhp->dh_flags |= DEVMAP_MAPPING_INVALID;
3506                 dhp->dh_pfn = PFN_INVALID;
3507         } else {
3508                 /* clear any prior DEVMAP_MAPPING_INVALID flag */
3509                 dhp->dh_flags &= ~DEVMAP_MAPPING_INVALID;
3510                 hp = impl_acc_hdl_get(handle);
3511                 hp->ah_vers = VERS_ACCHDL;
3512                 hp->ah_dip = dip;
3513                 hp->ah_rnumber = rnumber;
3514                 hp->ah_offset = roff;
3515                 hp->ah_len = len;
3516                 if (accattrp != NULL)
3517                         hp->ah_acc = *accattrp;
3518 
3519                 mr.map_op = DDI_MO_MAP_LOCKED;
3520                 mr.map_type = DDI_MT_RNUMBER;
3521                 mr.map_obj.rnumber = rnumber;
3522                 mr.map_prot = maxprot & dhp->dh_orig_maxprot;
3523                 mr.map_flags = DDI_MF_DEVICE_MAPPING;
3524                 mr.map_handlep = hp;
3525                 mr.map_vers = DDI_MAP_VERSION;
3526 
3527                 /*
3528                  * up the device tree to get pfn.
3529                  * The rootnex_map_regspec() routine in nexus drivers has been
3530                  * modified to return pfn if map_flags is DDI_MF_DEVICE_MAPPING.
3531                  */
3532                 err = ddi_map(dip, &mr, roff, len, (caddr_t *)&pfn);
3533                 hat_flags = hp->ah_hat_flags;
3534                 impl_acc_hdl_free(handle);
3535                 if (err) {
3536                         RELE_DHP_LOCK(dhp);
3537                         return (DDI_FAILURE);
3538                 }
3539                 /*
3540                  * Store result of ddi_map first in local variables, as we do
3541                  * not want to overwrite the existing dhp with wrong data.
3542                  */
3543                 dhp->dh_pfn = pfn;
3544                 dhp->dh_hat_attr = hat_flags;
3545         }
3546 
3547         /* clear the large page size flag */
3548         dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
3549 
3550         dhp->dh_cookie = DEVMAP_DEVMEM_COOKIE;
3551         dhp->dh_roff = ptob(btop(roff));
3552 
3553         /* setup the dh_mmulevel and DEVMAP_FLAG_LARGE */
3554         devmap_devmem_large_page_setup(dhp);
3555         dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3556         ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3557 
3558         RELE_DHP_LOCK(dhp);
3559         return (DDI_SUCCESS);
3560 }
3561 
3562 /*
3563  * called by driver devmap routine to pass kernel virtual address  mapping
3564  * info to the framework.    used only for kernel memory
3565  * allocated from ddi_umem_alloc().
3566  */
3567 int
3568 devmap_umem_setup(devmap_cookie_t dhc, dev_info_t *dip,
3569     struct devmap_callback_ctl *callbackops, ddi_umem_cookie_t cookie,
3570     offset_t off, size_t len, uint_t maxprot, uint_t flags,
3571     ddi_device_acc_attr_t *accattrp)
3572 {
3573         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3574         struct ddi_umem_cookie *cp = (struct ddi_umem_cookie *)cookie;
3575 
3576 #ifdef lint
3577         dip = dip;
3578 #endif
3579 
3580         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_SETUP,
3581             "devmap_umem_setup:start dhp=%p offset=%llx cookie=%p len=%lx",
3582             (void *)dhp, off, cookie, len);
3583         DEBUGF(2, (CE_CONT, "devmap_umem_setup: dhp %p offset %llx "
3584             "cookie %p len %lx\n", (void *)dhp, off, (void *)cookie, len));
3585 
3586         if (cookie == NULL)
3587                 return (DDI_FAILURE);
3588 
3589         /* For UMEM_TRASH, this restriction is not needed */
3590         if ((off + len) > cp->size)
3591                 return (DDI_FAILURE);
3592 
3593         /* check if the cache attributes are supported */
3594         if (i_ddi_check_cache_attr(flags) == B_FALSE)
3595                 return (DDI_FAILURE);
3596 
3597         /*
3598          * First to check if this function has been called for this dhp.
3599          */
3600         if (dhp->dh_flags & DEVMAP_SETUP_DONE)
3601                 return (DDI_FAILURE);
3602 
3603         if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3604                 return (DDI_FAILURE);
3605 
3606         if (flags & DEVMAP_MAPPING_INVALID) {
3607                 /*
3608                  * If DEVMAP_MAPPING_INVALID is specified, we have to grant
3609                  * remap permission.
3610                  */
3611                 if (!(flags & DEVMAP_ALLOW_REMAP)) {
3612                         return (DDI_FAILURE);
3613                 }
3614         } else {
3615                 dhp->dh_cookie = cookie;
3616                 dhp->dh_roff = ptob(btop(off));
3617                 dhp->dh_cvaddr = cp->cvaddr + dhp->dh_roff;
3618                 /* set HAT cache attributes */
3619                 i_ddi_cacheattr_to_hatacc(flags, &dhp->dh_hat_attr);
3620                 /* set HAT endianess attributes */
3621                 i_ddi_devacc_to_hatacc(accattrp, &dhp->dh_hat_attr);
3622         }
3623 
3624         /*
3625          * The default is _not_ to pass HAT_LOAD_NOCONSIST to hat_devload();
3626          * we pass HAT_LOAD_NOCONSIST _only_ in cases where hat tries to
3627          * create consistent mappings but our intention was to create
3628          * non-consistent mappings.
3629          *
3630          * DEVMEM: hat figures it out it's DEVMEM and creates non-consistent
3631          * mappings.
3632          *
3633          * kernel exported memory: hat figures it out it's memory and always
3634          * creates consistent mappings.
3635          *
3636          * /dev/mem: non-consistent mappings. See comments in common/io/mem.c
3637          *
3638          * /dev/kmem: consistent mappings are created unless they are
3639          * MAP_FIXED. We _explicitly_ tell hat to create non-consistent
3640          * mappings by passing HAT_LOAD_NOCONSIST in case of MAP_FIXED
3641          * mappings of /dev/kmem. See common/io/mem.c
3642          */
3643 
3644         /* Only some of the flags bits are settable by the driver */
3645         dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS);
3646 
3647         dhp->dh_len = ptob(btopr(len));
3648         dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3649         ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3650 
3651         if (callbackops != NULL) {
3652                 bcopy(callbackops, &dhp->dh_callbackops,
3653                     sizeof (struct devmap_callback_ctl));
3654         }
3655         /*
3656          * Initialize dh_lock if we want to do remap.
3657          */
3658         if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) {
3659                 mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
3660                 dhp->dh_flags |= DEVMAP_LOCK_INITED;
3661         }
3662 
3663         dhp->dh_flags |= DEVMAP_SETUP_DONE;
3664 
3665         return (DDI_SUCCESS);
3666 }
3667 
3668 int
3669 devmap_umem_remap(devmap_cookie_t dhc, dev_info_t *dip,
3670     ddi_umem_cookie_t cookie, offset_t off, size_t len, uint_t maxprot,
3671     uint_t flags, ddi_device_acc_attr_t *accattrp)
3672 {
3673         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3674         struct ddi_umem_cookie *cp = (struct ddi_umem_cookie *)cookie;
3675 
3676         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_REMAP,
3677             "devmap_umem_remap:start dhp=%p offset=%llx cookie=%p len=%lx",
3678             (void *)dhp, off, cookie, len);
3679         DEBUGF(2, (CE_CONT, "devmap_umem_remap: dhp %p offset %llx "
3680             "cookie %p len %lx\n", (void *)dhp, off, (void *)cookie, len));
3681 
3682 #ifdef lint
3683         dip = dip;
3684         accattrp = accattrp;
3685 #endif
3686         /*
3687          * Reture failure if setup has not been done or no remap permission
3688          * has been granted during the setup.
3689          */
3690         if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 ||
3691             (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0)
3692                 return (DDI_FAILURE);
3693 
3694         /* No flags supported for remap yet */
3695         if (flags != 0)
3696                 return (DDI_FAILURE);
3697 
3698         /* check if the cache attributes are supported */
3699         if (i_ddi_check_cache_attr(flags) == B_FALSE)
3700                 return (DDI_FAILURE);
3701 
3702         if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3703                 return (DDI_FAILURE);
3704 
3705         /* For UMEM_TRASH, this restriction is not needed */
3706         if ((off + len) > cp->size)
3707                 return (DDI_FAILURE);
3708 
3709         HOLD_DHP_LOCK(dhp);
3710         /*
3711          * Unload the old mapping, so next fault will setup the new mappings
3712          * Do this while holding the dhp lock so other faults dont reestablish
3713          * the mappings
3714          */
3715         hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
3716             dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
3717 
3718         dhp->dh_cookie = cookie;
3719         dhp->dh_roff = ptob(btop(off));
3720         dhp->dh_cvaddr = cp->cvaddr + dhp->dh_roff;
3721         /* set HAT cache attributes */
3722         i_ddi_cacheattr_to_hatacc(flags, &dhp->dh_hat_attr);
3723         /* set HAT endianess attributes */
3724         i_ddi_devacc_to_hatacc(accattrp, &dhp->dh_hat_attr);
3725 
3726         /* clear the large page size flag */
3727         dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
3728 
3729         dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3730         ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3731         RELE_DHP_LOCK(dhp);
3732         return (DDI_SUCCESS);
3733 }
3734 
3735 /*
3736  * to set timeout value for the driver's context management callback, e.g.
3737  * devmap_access().
3738  */
3739 void
3740 devmap_set_ctx_timeout(devmap_cookie_t dhc, clock_t ticks)
3741 {
3742         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3743 
3744         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_SET_CTX_TIMEOUT,
3745             "devmap_set_ctx_timeout:start dhp=%p ticks=%x",
3746             (void *)dhp, ticks);
3747         dhp->dh_timeout_length = ticks;
3748 }
3749 
3750 int
3751 devmap_default_access(devmap_cookie_t dhp, void *pvtp, offset_t off,
3752     size_t len, uint_t type, uint_t rw)
3753 {
3754 #ifdef lint
3755         pvtp = pvtp;
3756 #endif
3757 
3758         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_DEFAULT_ACCESS,
3759             "devmap_default_access:start");
3760         return (devmap_load(dhp, off, len, type, rw));
3761 }
3762 
3763 /*
3764  * segkmem_alloc() wrapper to allocate memory which is both
3765  * non-relocatable (for DR) and sharelocked, since the rest
3766  * of this segment driver requires it.
3767  */
3768 static void *
3769 devmap_alloc_pages(vmem_t *vmp, size_t size, int vmflag)
3770 {
3771         ASSERT(vmp != NULL);
3772         ASSERT(kvseg.s_base != NULL);
3773         vmflag |= (VM_NORELOC | SEGKMEM_SHARELOCKED);
3774         return (segkmem_alloc(vmp, size, vmflag));
3775 }
3776 
3777 /*
3778  * This is where things are a bit incestuous with seg_kmem: unlike
3779  * seg_kp, seg_kmem does not keep its pages long-term sharelocked, so
3780  * we need to do a bit of a dance around that to prevent duplication of
3781  * code until we decide to bite the bullet and implement a new kernel
3782  * segment for driver-allocated memory that is exported to user space.
3783  */
3784 static void
3785 devmap_free_pages(vmem_t *vmp, void *inaddr, size_t size)
3786 {
3787         page_t *pp;
3788         caddr_t addr = inaddr;
3789         caddr_t eaddr;
3790         pgcnt_t npages = btopr(size);
3791 
3792         ASSERT(vmp != NULL);
3793         ASSERT(kvseg.s_base != NULL);
3794         ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
3795 
3796         hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
3797 
3798         for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
3799                 /*
3800                  * Use page_find() instead of page_lookup() to find the page
3801                  * since we know that it is hashed and has a shared lock.
3802                  */
3803                 pp = page_find(&kvp, (u_offset_t)(uintptr_t)addr);
3804 
3805                 if (pp == NULL)
3806                         panic("devmap_free_pages: page not found");
3807                 if (!page_tryupgrade(pp)) {
3808                         page_unlock(pp);
3809                         pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr,
3810                             SE_EXCL);
3811                         if (pp == NULL)
3812                                 panic("devmap_free_pages: page already freed");
3813                 }
3814                 /* Clear p_lckcnt so page_destroy() doesn't update availrmem */
3815                 pp->p_lckcnt = 0;
3816                 page_destroy(pp, 0);
3817         }
3818         page_unresv(npages);
3819 
3820         if (vmp != NULL)
3821                 vmem_free(vmp, inaddr, size);
3822 }
3823 
3824 /*
3825  * devmap_umem_alloc_np() replaces kmem_zalloc() as the method for
3826  * allocating non-pageable kmem in response to a ddi_umem_alloc()
3827  * default request. For now we allocate our own pages and we keep
3828  * them long-term sharelocked, since: A) the fault routines expect the
3829  * memory to already be locked; B) pageable umem is already long-term
3830  * locked; C) it's a lot of work to make it otherwise, particularly
3831  * since the nexus layer expects the pages to never fault. An RFE is to
3832  * not keep the pages long-term locked, but instead to be able to
3833  * take faults on them and simply look them up in kvp in case we
3834  * fault on them. Even then, we must take care not to let pageout
3835  * steal them from us since the data must remain resident; if we
3836  * do this we must come up with some way to pin the pages to prevent
3837  * faults while a driver is doing DMA to/from them.
3838  */
3839 static void *
3840 devmap_umem_alloc_np(size_t size, size_t flags)
3841 {
3842         void *buf;
3843         int vmflags = (flags & DDI_UMEM_NOSLEEP)? VM_NOSLEEP : VM_SLEEP;
3844 
3845         buf = vmem_alloc(umem_np_arena, size, vmflags);
3846         if (buf != NULL)
3847                 bzero(buf, size);
3848         return (buf);
3849 }
3850 
3851 static void
3852 devmap_umem_free_np(void *addr, size_t size)
3853 {
3854         vmem_free(umem_np_arena, addr, size);
3855 }
3856 
3857 /*
3858  * allocate page aligned kernel memory for exporting to user land.
3859  * The devmap framework will use the cookie allocated by ddi_umem_alloc()
3860  * to find a user virtual address that is in same color as the address
3861  * allocated here.
3862  */
3863 void *
3864 ddi_umem_alloc(size_t size, int flags, ddi_umem_cookie_t *cookie)
3865 {
3866         register size_t len = ptob(btopr(size));
3867         void *buf = NULL;
3868         struct ddi_umem_cookie *cp;
3869         int iflags = 0;
3870 
3871         *cookie = NULL;
3872 
3873         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_ALLOC,
3874             "devmap_umem_alloc:start");
3875         if (len == 0)
3876                 return ((void *)NULL);
3877 
3878         /*
3879          * allocate cookie
3880          */
3881         if ((cp = kmem_zalloc(sizeof (struct ddi_umem_cookie),
3882             flags & DDI_UMEM_NOSLEEP ? KM_NOSLEEP : KM_SLEEP)) == NULL) {
3883                 ASSERT(flags & DDI_UMEM_NOSLEEP);
3884                 return ((void *)NULL);
3885         }
3886 
3887         if (flags & DDI_UMEM_PAGEABLE) {
3888                 /* Only one of the flags is allowed */
3889                 ASSERT(!(flags & DDI_UMEM_TRASH));
3890                 /* initialize resource with 0 */
3891                 iflags = KPD_ZERO;
3892 
3893                 /*
3894                  * to allocate unlocked pageable memory, use segkp_get() to
3895                  * create a segkp segment.  Since segkp can only service kas,
3896                  * other segment drivers such as segdev have to do
3897                  * as_fault(segkp, SOFTLOCK) in its fault routine,
3898                  */
3899                 if (flags & DDI_UMEM_NOSLEEP)
3900                         iflags |= KPD_NOWAIT;
3901 
3902                 if ((buf = segkp_get(segkp, len, iflags)) == NULL) {
3903                         kmem_free(cp, sizeof (struct ddi_umem_cookie));
3904                         return ((void *)NULL);
3905                 }
3906                 cp->type = KMEM_PAGEABLE;
3907                 mutex_init(&cp->lock, NULL, MUTEX_DEFAULT, NULL);
3908                 cp->locked = 0;
3909         } else if (flags & DDI_UMEM_TRASH) {
3910                 /* Only one of the flags is allowed */
3911                 ASSERT(!(flags & DDI_UMEM_PAGEABLE));
3912                 cp->type = UMEM_TRASH;
3913                 buf = NULL;
3914         } else {
3915                 if ((buf = devmap_umem_alloc_np(len, flags)) == NULL) {
3916                         kmem_free(cp, sizeof (struct ddi_umem_cookie));
3917                         return ((void *)NULL);
3918                 }
3919 
3920                 cp->type = KMEM_NON_PAGEABLE;
3921         }
3922 
3923         /*
3924          * need to save size here.  size will be used when
3925          * we do kmem_free.
3926          */
3927         cp->size = len;
3928         cp->cvaddr = (caddr_t)buf;
3929 
3930         *cookie =  (void *)cp;
3931         return (buf);
3932 }
3933 
3934 void
3935 ddi_umem_free(ddi_umem_cookie_t cookie)
3936 {
3937         struct ddi_umem_cookie *cp;
3938 
3939         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_FREE,
3940             "devmap_umem_free:start");
3941 
3942         /*
3943          * if cookie is NULL, no effects on the system
3944          */
3945         if (cookie == NULL)
3946                 return;
3947 
3948         cp = (struct ddi_umem_cookie *)cookie;
3949 
3950         switch (cp->type) {
3951         case KMEM_PAGEABLE :
3952                 ASSERT(cp->cvaddr != NULL && cp->size != 0);
3953                 /*
3954                  * Check if there are still any pending faults on the cookie
3955                  * while the driver is deleting it,
3956                  * XXX - could change to an ASSERT but wont catch errant drivers
3957                  */
3958                 mutex_enter(&cp->lock);
3959                 if (cp->locked) {
3960                         mutex_exit(&cp->lock);
3961                         panic("ddi_umem_free for cookie with pending faults %p",
3962                             (void *)cp);
3963                         return;
3964                 }
3965 
3966                 segkp_release(segkp, cp->cvaddr);
3967 
3968                 /*
3969                  * release mutex associated with this cookie.
3970                  */
3971                 mutex_destroy(&cp->lock);
3972                 break;
3973         case KMEM_NON_PAGEABLE :
3974                 ASSERT(cp->cvaddr != NULL && cp->size != 0);
3975                 devmap_umem_free_np(cp->cvaddr, cp->size);
3976                 break;
3977         case UMEM_TRASH :
3978                 break;
3979         case UMEM_LOCKED :
3980                 /* Callers should use ddi_umem_unlock for this type */
3981                 ddi_umem_unlock(cookie);
3982                 /* Frees the cookie too */
3983                 return;
3984         default:
3985                 /* panic so we can diagnose the underlying cause */
3986                 panic("ddi_umem_free: illegal cookie type 0x%x\n",
3987                     cp->type);
3988         }
3989 
3990         kmem_free(cookie, sizeof (struct ddi_umem_cookie));
3991 }
3992 
3993 
3994 static int
3995 segdev_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
3996 {
3997         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
3998 
3999         /*
4000          * It looks as if it is always mapped shared
4001          */
4002         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_GETMEMID,
4003             "segdev_getmemid:start");
4004         memidp->val[0] = (uintptr_t)VTOCVP(sdp->vp);
4005         memidp->val[1] = sdp->offset + (uintptr_t)(addr - seg->s_base);
4006         return (0);
4007 }
4008 
4009 /*
4010  * ddi_umem_alloc() non-pageable quantum cache max size.
4011  * This is just a SWAG.
4012  */
4013 #define DEVMAP_UMEM_QUANTUM     (8*PAGESIZE)
4014 
4015 /*
4016  * Initialize seg_dev from boot. This routine sets up the trash page
4017  * and creates the umem_np_arena used to back non-pageable memory
4018  * requests.
4019  */
4020 void
4021 segdev_init(void)
4022 {
4023         struct seg kseg;
4024 
4025         umem_np_arena = vmem_create("umem_np", NULL, 0, PAGESIZE,
4026             devmap_alloc_pages, devmap_free_pages, heap_arena,
4027             DEVMAP_UMEM_QUANTUM, VM_SLEEP);
4028 
4029         kseg.s_as = &kas;
4030         trashpp = page_create_va(&trashvp, 0, PAGESIZE,
4031             PG_NORELOC | PG_EXCL | PG_WAIT, &kseg, NULL);
4032         if (trashpp == NULL)
4033                 panic("segdev_init: failed to create trash page");
4034         pagezero(trashpp, 0, PAGESIZE);
4035         page_downgrade(trashpp);
4036 }
4037 
4038 /*
4039  * Invoke platform-dependent support routines so that /proc can have
4040  * the platform code deal with curious hardware.
4041  */
4042 int
4043 segdev_copyfrom(struct seg *seg,
4044     caddr_t uaddr, const void *devaddr, void *kaddr, size_t len)
4045 {
4046         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
4047         struct snode *sp = VTOS(VTOCVP(sdp->vp));
4048 
4049         return (e_ddi_copyfromdev(sp->s_dip,
4050             (off_t)(uaddr - seg->s_base), devaddr, kaddr, len));
4051 }
4052 
4053 int
4054 segdev_copyto(struct seg *seg,
4055     caddr_t uaddr, const void *kaddr, void *devaddr, size_t len)
4056 {
4057         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
4058         struct snode *sp = VTOS(VTOCVP(sdp->vp));
4059 
4060         return (e_ddi_copytodev(sp->s_dip,
4061             (off_t)(uaddr - seg->s_base), kaddr, devaddr, len));
4062 }