6147 segop_getpolicy already checks for a NULL op Reviewed by: Garrett D'Amore <garrett@damore.org>
1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Portions of this source code were derived from Berkeley 4.3 BSD 31 * under license from the Regents of the University of California. 32 */ 33 34 /* 35 * VM - generic vnode mapping segment. 36 * 37 * The segmap driver is used only by the kernel to get faster (than seg_vn) 38 * mappings [lower routine overhead; more persistent cache] to random 39 * vnode/offsets. Note than the kernel may (and does) use seg_vn as well. 40 */ 41 42 #include <sys/types.h> 43 #include <sys/t_lock.h> 44 #include <sys/param.h> 45 #include <sys/sysmacros.h> 46 #include <sys/buf.h> 47 #include <sys/systm.h> 48 #include <sys/vnode.h> 49 #include <sys/mman.h> 50 #include <sys/errno.h> 51 #include <sys/cred.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/cmn_err.h> 55 #include <sys/debug.h> 56 #include <sys/thread.h> 57 #include <sys/dumphdr.h> 58 #include <sys/bitmap.h> 59 #include <sys/lgrp.h> 60 61 #include <vm/seg_kmem.h> 62 #include <vm/hat.h> 63 #include <vm/as.h> 64 #include <vm/seg.h> 65 #include <vm/seg_kpm.h> 66 #include <vm/seg_map.h> 67 #include <vm/page.h> 68 #include <vm/pvn.h> 69 #include <vm/rm.h> 70 71 /* 72 * Private seg op routines. 73 */ 74 static void segmap_free(struct seg *seg); 75 faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr, 76 size_t len, enum fault_type type, enum seg_rw rw); 77 static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr); 78 static int segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, 79 uint_t prot); 80 static int segmap_kluster(struct seg *seg, caddr_t addr, ssize_t); 81 static int segmap_getprot(struct seg *seg, caddr_t addr, size_t len, 82 uint_t *protv); 83 static u_offset_t segmap_getoffset(struct seg *seg, caddr_t addr); 84 static int segmap_gettype(struct seg *seg, caddr_t addr); 85 static int segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp); 86 static void segmap_dump(struct seg *seg); 87 static int segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 88 struct page ***ppp, enum lock_type type, 89 enum seg_rw rw); 90 static void segmap_badop(void); 91 static int segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp); 92 static int segmap_capable(struct seg *seg, segcapability_t capability); 93 94 /* segkpm support */ 95 static caddr_t segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t, 96 struct smap *, enum seg_rw); 97 struct smap *get_smap_kpm(caddr_t, page_t **); 98 99 #define SEGMAP_BADOP(t) (t(*)())segmap_badop 100 101 static struct seg_ops segmap_ops = { 102 .dup = SEGMAP_BADOP(int), 103 .unmap = SEGMAP_BADOP(int), 104 .free = segmap_free, 105 .fault = segmap_fault, 106 .faulta = segmap_faulta, 107 .setprot = SEGMAP_BADOP(int), 108 .checkprot = segmap_checkprot, 109 .kluster = segmap_kluster, 110 .swapout = SEGMAP_BADOP(size_t), 111 .sync = SEGMAP_BADOP(int), 112 .incore = SEGMAP_BADOP(size_t), 113 .lockop = SEGMAP_BADOP(int), 114 .getprot = segmap_getprot, 115 .getoffset = segmap_getoffset, 116 .gettype = segmap_gettype, 117 .getvp = segmap_getvp, 118 .advise = SEGMAP_BADOP(int), 119 .dump = segmap_dump, 120 .pagelock = segmap_pagelock, 121 .setpagesize = SEGMAP_BADOP(int), 122 .getmemid = segmap_getmemid, 123 .capable = segmap_capable, 124 }; 125 126 /* 127 * Private segmap routines. 128 */ 129 static void segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr, 130 size_t len, enum seg_rw rw, struct smap *smp); 131 static void segmap_smapadd(struct smap *smp); 132 static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp, 133 u_offset_t off, int hashid); 134 static void segmap_hashout(struct smap *smp); 135 136 137 /* 138 * Statistics for segmap operations. 139 * 140 * No explicit locking to protect these stats. 141 */ 142 struct segmapcnt segmapcnt = { 143 { "fault", KSTAT_DATA_ULONG }, 144 { "faulta", KSTAT_DATA_ULONG }, 145 { "getmap", KSTAT_DATA_ULONG }, 146 { "get_use", KSTAT_DATA_ULONG }, 147 { "get_reclaim", KSTAT_DATA_ULONG }, 148 { "get_reuse", KSTAT_DATA_ULONG }, 149 { "get_unused", KSTAT_DATA_ULONG }, 150 { "get_nofree", KSTAT_DATA_ULONG }, 151 { "rel_async", KSTAT_DATA_ULONG }, 152 { "rel_write", KSTAT_DATA_ULONG }, 153 { "rel_free", KSTAT_DATA_ULONG }, 154 { "rel_abort", KSTAT_DATA_ULONG }, 155 { "rel_dontneed", KSTAT_DATA_ULONG }, 156 { "release", KSTAT_DATA_ULONG }, 157 { "pagecreate", KSTAT_DATA_ULONG }, 158 { "free_notfree", KSTAT_DATA_ULONG }, 159 { "free_dirty", KSTAT_DATA_ULONG }, 160 { "free", KSTAT_DATA_ULONG }, 161 { "stolen", KSTAT_DATA_ULONG }, 162 { "get_nomtx", KSTAT_DATA_ULONG } 163 }; 164 165 kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt; 166 uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t); 167 168 /* 169 * Return number of map pages in segment. 170 */ 171 #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT) 172 173 /* 174 * Translate addr into smap number within segment. 175 */ 176 #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT) 177 178 /* 179 * Translate addr in seg into struct smap pointer. 180 */ 181 #define GET_SMAP(seg, addr) \ 182 &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)]) 183 184 /* 185 * Bit in map (16 bit bitmap). 186 */ 187 #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf)) 188 189 static int smd_colormsk = 0; 190 static int smd_ncolor = 0; 191 static int smd_nfree = 0; 192 static int smd_freemsk = 0; 193 #ifdef DEBUG 194 static int *colors_used; 195 #endif 196 static struct smap *smd_smap; 197 static struct smaphash *smd_hash; 198 #ifdef SEGMAP_HASHSTATS 199 static unsigned int *smd_hash_len; 200 #endif 201 static struct smfree *smd_free; 202 static ulong_t smd_hashmsk = 0; 203 204 #define SEGMAP_MAXCOLOR 2 205 #define SEGMAP_CACHE_PAD 64 206 207 union segmap_cpu { 208 struct { 209 uint32_t scpu_free_ndx[SEGMAP_MAXCOLOR]; 210 struct smap *scpu_last_smap; 211 ulong_t scpu_getmap; 212 ulong_t scpu_release; 213 ulong_t scpu_get_reclaim; 214 ulong_t scpu_fault; 215 ulong_t scpu_pagecreate; 216 ulong_t scpu_get_reuse; 217 } scpu; 218 char scpu_pad[SEGMAP_CACHE_PAD]; 219 }; 220 static union segmap_cpu *smd_cpu; 221 222 /* 223 * There are three locks in seg_map: 224 * - per freelist mutexes 225 * - per hashchain mutexes 226 * - per smap mutexes 227 * 228 * The lock ordering is to get the smap mutex to lock down the slot 229 * first then the hash lock (for hash in/out (vp, off) list) or the 230 * freelist lock to put the slot back on the free list. 231 * 232 * The hash search is done by only holding the hashchain lock, when a wanted 233 * slot is found, we drop the hashchain lock then lock the slot so there 234 * is no overlapping of hashchain and smap locks. After the slot is 235 * locked, we verify again if the slot is still what we are looking 236 * for. 237 * 238 * Allocation of a free slot is done by holding the freelist lock, 239 * then locking the smap slot at the head of the freelist. This is 240 * in reversed lock order so mutex_tryenter() is used. 241 * 242 * The smap lock protects all fields in smap structure except for 243 * the link fields for hash/free lists which are protected by 244 * hashchain and freelist locks. 245 */ 246 247 #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx) 248 249 #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk]) 250 #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk) 251 252 #define SMAPMTX(smp) (&smp->sm_mtx) 253 254 #define SMAP_HASHFUNC(vp, off, hashid) \ 255 { \ 256 hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \ 257 ((off) >> MAXBSHIFT)) & smd_hashmsk); \ 258 } 259 260 /* 261 * The most frequently updated kstat counters are kept in the 262 * per cpu array to avoid hot cache blocks. The update function 263 * sums the cpu local counters to update the global counters. 264 */ 265 266 /* ARGSUSED */ 267 int 268 segmap_kstat_update(kstat_t *ksp, int rw) 269 { 270 int i; 271 ulong_t getmap, release, get_reclaim; 272 ulong_t fault, pagecreate, get_reuse; 273 274 if (rw == KSTAT_WRITE) 275 return (EACCES); 276 getmap = release = get_reclaim = (ulong_t)0; 277 fault = pagecreate = get_reuse = (ulong_t)0; 278 for (i = 0; i < max_ncpus; i++) { 279 getmap += smd_cpu[i].scpu.scpu_getmap; 280 release += smd_cpu[i].scpu.scpu_release; 281 get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim; 282 fault += smd_cpu[i].scpu.scpu_fault; 283 pagecreate += smd_cpu[i].scpu.scpu_pagecreate; 284 get_reuse += smd_cpu[i].scpu.scpu_get_reuse; 285 } 286 segmapcnt.smp_getmap.value.ul = getmap; 287 segmapcnt.smp_release.value.ul = release; 288 segmapcnt.smp_get_reclaim.value.ul = get_reclaim; 289 segmapcnt.smp_fault.value.ul = fault; 290 segmapcnt.smp_pagecreate.value.ul = pagecreate; 291 segmapcnt.smp_get_reuse.value.ul = get_reuse; 292 return (0); 293 } 294 295 int 296 segmap_create(struct seg *seg, void *argsp) 297 { 298 struct segmap_data *smd; 299 struct smap *smp; 300 struct smfree *sm; 301 struct segmap_crargs *a = (struct segmap_crargs *)argsp; 302 struct smaphash *shashp; 303 union segmap_cpu *scpu; 304 long i, npages; 305 size_t hashsz; 306 uint_t nfreelist; 307 extern void prefetch_smap_w(void *); 308 extern int max_ncpus; 309 310 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 311 312 if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) { 313 panic("segkmap not MAXBSIZE aligned"); 314 /*NOTREACHED*/ 315 } 316 317 smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP); 318 319 seg->s_data = (void *)smd; 320 seg->s_ops = &segmap_ops; 321 smd->smd_prot = a->prot; 322 323 /* 324 * Scale the number of smap freelists to be 325 * proportional to max_ncpus * number of virtual colors. 326 * The caller can over-ride this scaling by providing 327 * a non-zero a->nfreelist argument. 328 */ 329 nfreelist = a->nfreelist; 330 if (nfreelist == 0) 331 nfreelist = max_ncpus; 332 else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) { 333 cmn_err(CE_WARN, "segmap_create: nfreelist out of range " 334 "%d, using %d", nfreelist, max_ncpus); 335 nfreelist = max_ncpus; 336 } 337 if (!ISP2(nfreelist)) { 338 /* round up nfreelist to the next power of two. */ 339 nfreelist = 1 << (highbit(nfreelist)); 340 } 341 342 /* 343 * Get the number of virtual colors - must be a power of 2. 344 */ 345 if (a->shmsize) 346 smd_ncolor = a->shmsize >> MAXBSHIFT; 347 else 348 smd_ncolor = 1; 349 ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0); 350 ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR); 351 smd_colormsk = smd_ncolor - 1; 352 smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist; 353 smd_freemsk = smd_nfree - 1; 354 355 /* 356 * Allocate and initialize the freelist headers. 357 * Note that sm_freeq[1] starts out as the release queue. This 358 * is known when the smap structures are initialized below. 359 */ 360 smd_free = smd->smd_free = 361 kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP); 362 for (i = 0; i < smd_nfree; i++) { 363 sm = &smd->smd_free[i]; 364 mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 365 mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 366 sm->sm_allocq = &sm->sm_freeq[0]; 367 sm->sm_releq = &sm->sm_freeq[1]; 368 } 369 370 /* 371 * Allocate and initialize the smap hash chain headers. 372 * Compute hash size rounding down to the next power of two. 373 */ 374 npages = MAP_PAGES(seg); 375 smd->smd_npages = npages; 376 hashsz = npages / SMAP_HASHAVELEN; 377 hashsz = 1 << (highbit(hashsz)-1); 378 smd_hashmsk = hashsz - 1; 379 smd_hash = smd->smd_hash = 380 kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP); 381 #ifdef SEGMAP_HASHSTATS 382 smd_hash_len = 383 kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP); 384 #endif 385 for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) { 386 shashp->sh_hash_list = NULL; 387 mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL); 388 } 389 390 /* 391 * Allocate and initialize the smap structures. 392 * Link all slots onto the appropriate freelist. 393 * The smap array is large enough to affect boot time 394 * on large systems, so use memory prefetching and only 395 * go through the array 1 time. Inline a optimized version 396 * of segmap_smapadd to add structures to freelists with 397 * knowledge that no locks are needed here. 398 */ 399 smd_smap = smd->smd_sm = 400 kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP); 401 402 for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1]; 403 smp >= smd->smd_sm; smp--) { 404 struct smap *smpfreelist; 405 struct sm_freeq *releq; 406 407 prefetch_smap_w((char *)smp); 408 409 smp->sm_vp = NULL; 410 smp->sm_hash = NULL; 411 smp->sm_off = 0; 412 smp->sm_bitmap = 0; 413 smp->sm_refcnt = 0; 414 mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL); 415 smp->sm_free_ndx = SMP2SMF_NDX(smp); 416 417 sm = SMP2SMF(smp); 418 releq = sm->sm_releq; 419 420 smpfreelist = releq->smq_free; 421 if (smpfreelist == 0) { 422 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 423 } else { 424 smp->sm_next = smpfreelist; 425 smp->sm_prev = smpfreelist->sm_prev; 426 smpfreelist->sm_prev = smp; 427 smp->sm_prev->sm_next = smp; 428 releq->smq_free = smp->sm_next; 429 } 430 431 /* 432 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1] 433 */ 434 smp->sm_flags = 0; 435 436 #ifdef SEGKPM_SUPPORT 437 /* 438 * Due to the fragile prefetch loop no 439 * separate function is used here. 440 */ 441 smp->sm_kpme_next = NULL; 442 smp->sm_kpme_prev = NULL; 443 smp->sm_kpme_page = NULL; 444 #endif 445 } 446 447 /* 448 * Allocate the per color indices that distribute allocation 449 * requests over the free lists. Each cpu will have a private 450 * rotor index to spread the allocations even across the available 451 * smap freelists. Init the scpu_last_smap field to the first 452 * smap element so there is no need to check for NULL. 453 */ 454 smd_cpu = 455 kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP); 456 for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) { 457 int j; 458 for (j = 0; j < smd_ncolor; j++) 459 scpu->scpu.scpu_free_ndx[j] = j; 460 scpu->scpu.scpu_last_smap = smd_smap; 461 } 462 463 vpm_init(); 464 465 #ifdef DEBUG 466 /* 467 * Keep track of which colors are used more often. 468 */ 469 colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP); 470 #endif /* DEBUG */ 471 472 return (0); 473 } 474 475 static void 476 segmap_free(seg) 477 struct seg *seg; 478 { 479 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 480 } 481 482 /* 483 * Do a F_SOFTUNLOCK call over the range requested. 484 * The range must have already been F_SOFTLOCK'ed. 485 */ 486 static void 487 segmap_unlock( 488 struct hat *hat, 489 struct seg *seg, 490 caddr_t addr, 491 size_t len, 492 enum seg_rw rw, 493 struct smap *smp) 494 { 495 page_t *pp; 496 caddr_t adr; 497 u_offset_t off; 498 struct vnode *vp; 499 kmutex_t *smtx; 500 501 ASSERT(smp->sm_refcnt > 0); 502 503 #ifdef lint 504 seg = seg; 505 #endif 506 507 if (segmap_kpm && IS_KPM_ADDR(addr)) { 508 509 /* 510 * We're called only from segmap_fault and this was a 511 * NOP in case of a kpm based smap, so dangerous things 512 * must have happened in the meantime. Pages are prefaulted 513 * and locked in segmap_getmapflt and they will not be 514 * unlocked until segmap_release. 515 */ 516 panic("segmap_unlock: called with kpm addr %p", (void *)addr); 517 /*NOTREACHED*/ 518 } 519 520 vp = smp->sm_vp; 521 off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 522 523 hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE)); 524 for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) { 525 ushort_t bitmask; 526 527 /* 528 * Use page_find() instead of page_lookup() to 529 * find the page since we know that it has 530 * "shared" lock. 531 */ 532 pp = page_find(vp, off); 533 if (pp == NULL) { 534 panic("segmap_unlock: page not found"); 535 /*NOTREACHED*/ 536 } 537 538 if (rw == S_WRITE) { 539 hat_setrefmod(pp); 540 } else if (rw != S_OTHER) { 541 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 542 "segmap_fault:pp %p vp %p offset %llx", pp, vp, off); 543 hat_setref(pp); 544 } 545 546 /* 547 * Clear bitmap, if the bit corresponding to "off" is set, 548 * since the page and translation are being unlocked. 549 */ 550 bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT); 551 552 /* 553 * Large Files: Following assertion is to verify 554 * the correctness of the cast to (int) above. 555 */ 556 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 557 smtx = SMAPMTX(smp); 558 mutex_enter(smtx); 559 if (smp->sm_bitmap & bitmask) { 560 smp->sm_bitmap &= ~bitmask; 561 } 562 mutex_exit(smtx); 563 564 page_unlock(pp); 565 } 566 } 567 568 #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */ 569 570 /* 571 * This routine is called via a machine specific fault handling 572 * routine. It is also called by software routines wishing to 573 * lock or unlock a range of addresses. 574 * 575 * Note that this routine expects a page-aligned "addr". 576 */ 577 faultcode_t 578 segmap_fault( 579 struct hat *hat, 580 struct seg *seg, 581 caddr_t addr, 582 size_t len, 583 enum fault_type type, 584 enum seg_rw rw) 585 { 586 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 587 struct smap *smp; 588 page_t *pp, **ppp; 589 struct vnode *vp; 590 u_offset_t off; 591 page_t *pl[MAXPPB + 1]; 592 uint_t prot; 593 u_offset_t addroff; 594 caddr_t adr; 595 int err; 596 u_offset_t sm_off; 597 int hat_flag; 598 599 if (segmap_kpm && IS_KPM_ADDR(addr)) { 600 int newpage; 601 kmutex_t *smtx; 602 603 /* 604 * Pages are successfully prefaulted and locked in 605 * segmap_getmapflt and can't be unlocked until 606 * segmap_release. No hat mappings have to be locked 607 * and they also can't be unlocked as long as the 608 * caller owns an active kpm addr. 609 */ 610 #ifndef DEBUG 611 if (type != F_SOFTUNLOCK) 612 return (0); 613 #endif 614 615 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 616 panic("segmap_fault: smap not found " 617 "for addr %p", (void *)addr); 618 /*NOTREACHED*/ 619 } 620 621 smtx = SMAPMTX(smp); 622 #ifdef DEBUG 623 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 624 if (newpage) { 625 cmn_err(CE_WARN, "segmap_fault: newpage? smp %p", 626 (void *)smp); 627 } 628 629 if (type != F_SOFTUNLOCK) { 630 mutex_exit(smtx); 631 return (0); 632 } 633 #endif 634 mutex_exit(smtx); 635 vp = smp->sm_vp; 636 sm_off = smp->sm_off; 637 638 if (vp == NULL) 639 return (FC_MAKE_ERR(EIO)); 640 641 ASSERT(smp->sm_refcnt > 0); 642 643 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 644 if (addroff + len > MAXBSIZE) 645 panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk", 646 (void *)(addr + len)); 647 648 off = sm_off + addroff; 649 650 pp = page_find(vp, off); 651 652 if (pp == NULL) 653 panic("segmap_fault: softunlock page not found"); 654 655 /* 656 * Set ref bit also here in case of S_OTHER to avoid the 657 * overhead of supporting other cases than F_SOFTUNLOCK 658 * with segkpm. We can do this because the underlying 659 * pages are locked anyway. 660 */ 661 if (rw == S_WRITE) { 662 hat_setrefmod(pp); 663 } else { 664 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 665 "segmap_fault:pp %p vp %p offset %llx", 666 pp, vp, off); 667 hat_setref(pp); 668 } 669 670 return (0); 671 } 672 673 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 674 smp = GET_SMAP(seg, addr); 675 vp = smp->sm_vp; 676 sm_off = smp->sm_off; 677 678 if (vp == NULL) 679 return (FC_MAKE_ERR(EIO)); 680 681 ASSERT(smp->sm_refcnt > 0); 682 683 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 684 if (addroff + len > MAXBSIZE) { 685 panic("segmap_fault: endaddr %p " 686 "exceeds MAXBSIZE chunk", (void *)(addr + len)); 687 /*NOTREACHED*/ 688 } 689 off = sm_off + addroff; 690 691 /* 692 * First handle the easy stuff 693 */ 694 if (type == F_SOFTUNLOCK) { 695 segmap_unlock(hat, seg, addr, len, rw, smp); 696 return (0); 697 } 698 699 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 700 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 701 err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE, 702 seg, addr, rw, CRED(), NULL); 703 704 if (err) 705 return (FC_MAKE_ERR(err)); 706 707 prot &= smd->smd_prot; 708 709 /* 710 * Handle all pages returned in the pl[] array. 711 * This loop is coded on the assumption that if 712 * there was no error from the VOP_GETPAGE routine, 713 * that the page list returned will contain all the 714 * needed pages for the vp from [off..off + len]. 715 */ 716 ppp = pl; 717 while ((pp = *ppp++) != NULL) { 718 u_offset_t poff; 719 ASSERT(pp->p_vnode == vp); 720 hat_flag = HAT_LOAD; 721 722 /* 723 * Verify that the pages returned are within the range 724 * of this segmap region. Note that it is theoretically 725 * possible for pages outside this range to be returned, 726 * but it is not very likely. If we cannot use the 727 * page here, just release it and go on to the next one. 728 */ 729 if (pp->p_offset < sm_off || 730 pp->p_offset >= sm_off + MAXBSIZE) { 731 (void) page_release(pp, 1); 732 continue; 733 } 734 735 ASSERT(hat == kas.a_hat); 736 poff = pp->p_offset; 737 adr = addr + (poff - off); 738 if (adr >= addr && adr < addr + len) { 739 hat_setref(pp); 740 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 741 "segmap_fault:pp %p vp %p offset %llx", 742 pp, vp, poff); 743 if (type == F_SOFTLOCK) 744 hat_flag = HAT_LOAD_LOCK; 745 } 746 747 /* 748 * Deal with VMODSORT pages here. If we know this is a write 749 * do the setmod now and allow write protection. 750 * As long as it's modified or not S_OTHER, remove write 751 * protection. With S_OTHER it's up to the FS to deal with this. 752 */ 753 if (IS_VMODSORT(vp)) { 754 if (rw == S_WRITE) 755 hat_setmod(pp); 756 else if (rw != S_OTHER && !hat_ismod(pp)) 757 prot &= ~PROT_WRITE; 758 } 759 760 hat_memload(hat, adr, pp, prot, hat_flag); 761 if (hat_flag != HAT_LOAD_LOCK) 762 page_unlock(pp); 763 } 764 return (0); 765 } 766 767 /* 768 * This routine is used to start I/O on pages asynchronously. 769 */ 770 static faultcode_t 771 segmap_faulta(struct seg *seg, caddr_t addr) 772 { 773 struct smap *smp; 774 struct vnode *vp; 775 u_offset_t off; 776 int err; 777 778 if (segmap_kpm && IS_KPM_ADDR(addr)) { 779 int newpage; 780 kmutex_t *smtx; 781 782 /* 783 * Pages are successfully prefaulted and locked in 784 * segmap_getmapflt and can't be unlocked until 785 * segmap_release. No hat mappings have to be locked 786 * and they also can't be unlocked as long as the 787 * caller owns an active kpm addr. 788 */ 789 #ifdef DEBUG 790 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 791 panic("segmap_faulta: smap not found " 792 "for addr %p", (void *)addr); 793 /*NOTREACHED*/ 794 } 795 796 smtx = SMAPMTX(smp); 797 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 798 mutex_exit(smtx); 799 if (newpage) 800 cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p", 801 (void *)smp); 802 #endif 803 return (0); 804 } 805 806 segmapcnt.smp_faulta.value.ul++; 807 smp = GET_SMAP(seg, addr); 808 809 ASSERT(smp->sm_refcnt > 0); 810 811 vp = smp->sm_vp; 812 off = smp->sm_off; 813 814 if (vp == NULL) { 815 cmn_err(CE_WARN, "segmap_faulta - no vp"); 816 return (FC_MAKE_ERR(EIO)); 817 } 818 819 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 820 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 821 822 err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr 823 & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0, 824 seg, addr, S_READ, CRED(), NULL); 825 826 if (err) 827 return (FC_MAKE_ERR(err)); 828 return (0); 829 } 830 831 /*ARGSUSED*/ 832 static int 833 segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 834 { 835 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 836 837 ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); 838 839 /* 840 * Need not acquire the segment lock since 841 * "smd_prot" is a read-only field. 842 */ 843 return (((smd->smd_prot & prot) != prot) ? EACCES : 0); 844 } 845 846 static int 847 segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 848 { 849 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 850 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 851 852 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 853 854 if (pgno != 0) { 855 do { 856 protv[--pgno] = smd->smd_prot; 857 } while (pgno != 0); 858 } 859 return (0); 860 } 861 862 static u_offset_t 863 segmap_getoffset(struct seg *seg, caddr_t addr) 864 { 865 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 866 867 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 868 869 return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base)); 870 } 871 872 /*ARGSUSED*/ 873 static int 874 segmap_gettype(struct seg *seg, caddr_t addr) 875 { 876 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 877 878 return (MAP_SHARED); 879 } 880 881 /*ARGSUSED*/ 882 static int 883 segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 884 { 885 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 886 887 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 888 889 /* XXX - This doesn't make any sense */ 890 *vpp = smd->smd_sm->sm_vp; 891 return (0); 892 } 893 894 /* 895 * Check to see if it makes sense to do kluster/read ahead to 896 * addr + delta relative to the mapping at addr. We assume here 897 * that delta is a signed PAGESIZE'd multiple (which can be negative). 898 * 899 * For segmap we always "approve" of this action from our standpoint. 900 */ 901 /*ARGSUSED*/ 902 static int 903 segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 904 { 905 return (0); 906 } 907 908 static void 909 segmap_badop() 910 { 911 panic("segmap_badop"); 912 /*NOTREACHED*/ 913 } 914 915 /* 916 * Special private segmap operations 917 */ 918 919 /* 920 * Add smap to the appropriate free list. 921 */ 922 static void 923 segmap_smapadd(struct smap *smp) 924 { 925 struct smfree *sm; 926 struct smap *smpfreelist; 927 struct sm_freeq *releq; 928 929 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 930 931 if (smp->sm_refcnt != 0) { 932 panic("segmap_smapadd"); 933 /*NOTREACHED*/ 934 } 935 936 sm = &smd_free[smp->sm_free_ndx]; 937 /* 938 * Add to the tail of the release queue 939 * Note that sm_releq and sm_allocq could toggle 940 * before we get the lock. This does not affect 941 * correctness as the 2 queues are only maintained 942 * to reduce lock pressure. 943 */ 944 releq = sm->sm_releq; 945 if (releq == &sm->sm_freeq[0]) 946 smp->sm_flags |= SM_QNDX_ZERO; 947 else 948 smp->sm_flags &= ~SM_QNDX_ZERO; 949 mutex_enter(&releq->smq_mtx); 950 smpfreelist = releq->smq_free; 951 if (smpfreelist == 0) { 952 int want; 953 954 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 955 /* 956 * Both queue mutexes held to set sm_want; 957 * snapshot the value before dropping releq mutex. 958 * If sm_want appears after the releq mutex is dropped, 959 * then the smap just freed is already gone. 960 */ 961 want = sm->sm_want; 962 mutex_exit(&releq->smq_mtx); 963 /* 964 * See if there was a waiter before dropping the releq mutex 965 * then recheck after obtaining sm_freeq[0] mutex as 966 * the another thread may have already signaled. 967 */ 968 if (want) { 969 mutex_enter(&sm->sm_freeq[0].smq_mtx); 970 if (sm->sm_want) 971 cv_signal(&sm->sm_free_cv); 972 mutex_exit(&sm->sm_freeq[0].smq_mtx); 973 } 974 } else { 975 smp->sm_next = smpfreelist; 976 smp->sm_prev = smpfreelist->sm_prev; 977 smpfreelist->sm_prev = smp; 978 smp->sm_prev->sm_next = smp; 979 mutex_exit(&releq->smq_mtx); 980 } 981 } 982 983 984 static struct smap * 985 segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid) 986 { 987 struct smap **hpp; 988 struct smap *tmp; 989 kmutex_t *hmtx; 990 991 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 992 ASSERT(smp->sm_vp == NULL); 993 ASSERT(smp->sm_hash == NULL); 994 ASSERT(smp->sm_prev == NULL); 995 ASSERT(smp->sm_next == NULL); 996 ASSERT(hashid >= 0 && hashid <= smd_hashmsk); 997 998 hmtx = SHASHMTX(hashid); 999 1000 mutex_enter(hmtx); 1001 /* 1002 * First we need to verify that no one has created a smp 1003 * with (vp,off) as its tag before we us. 1004 */ 1005 for (tmp = smd_hash[hashid].sh_hash_list; 1006 tmp != NULL; tmp = tmp->sm_hash) 1007 if (tmp->sm_vp == vp && tmp->sm_off == off) 1008 break; 1009 1010 if (tmp == NULL) { 1011 /* 1012 * No one created one yet. 1013 * 1014 * Funniness here - we don't increment the ref count on the 1015 * vnode * even though we have another pointer to it here. 1016 * The reason for this is that we don't want the fact that 1017 * a seg_map entry somewhere refers to a vnode to prevent the 1018 * vnode * itself from going away. This is because this 1019 * reference to the vnode is a "soft one". In the case where 1020 * a mapping is being used by a rdwr [or directory routine?] 1021 * there already has to be a non-zero ref count on the vnode. 1022 * In the case where the vp has been freed and the the smap 1023 * structure is on the free list, there are no pages in memory 1024 * that can refer to the vnode. Thus even if we reuse the same 1025 * vnode/smap structure for a vnode which has the same 1026 * address but represents a different object, we are ok. 1027 */ 1028 smp->sm_vp = vp; 1029 smp->sm_off = off; 1030 1031 hpp = &smd_hash[hashid].sh_hash_list; 1032 smp->sm_hash = *hpp; 1033 *hpp = smp; 1034 #ifdef SEGMAP_HASHSTATS 1035 smd_hash_len[hashid]++; 1036 #endif 1037 } 1038 mutex_exit(hmtx); 1039 1040 return (tmp); 1041 } 1042 1043 static void 1044 segmap_hashout(struct smap *smp) 1045 { 1046 struct smap **hpp, *hp; 1047 struct vnode *vp; 1048 kmutex_t *mtx; 1049 int hashid; 1050 u_offset_t off; 1051 1052 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1053 1054 vp = smp->sm_vp; 1055 off = smp->sm_off; 1056 1057 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1058 mtx = SHASHMTX(hashid); 1059 mutex_enter(mtx); 1060 1061 hpp = &smd_hash[hashid].sh_hash_list; 1062 for (;;) { 1063 hp = *hpp; 1064 if (hp == NULL) { 1065 panic("segmap_hashout"); 1066 /*NOTREACHED*/ 1067 } 1068 if (hp == smp) 1069 break; 1070 hpp = &hp->sm_hash; 1071 } 1072 1073 *hpp = smp->sm_hash; 1074 smp->sm_hash = NULL; 1075 #ifdef SEGMAP_HASHSTATS 1076 smd_hash_len[hashid]--; 1077 #endif 1078 mutex_exit(mtx); 1079 1080 smp->sm_vp = NULL; 1081 smp->sm_off = (u_offset_t)0; 1082 1083 } 1084 1085 /* 1086 * Attempt to free unmodified, unmapped, and non locked segmap 1087 * pages. 1088 */ 1089 void 1090 segmap_pagefree(struct vnode *vp, u_offset_t off) 1091 { 1092 u_offset_t pgoff; 1093 page_t *pp; 1094 1095 for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) { 1096 1097 if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL) 1098 continue; 1099 1100 switch (page_release(pp, 1)) { 1101 case PGREL_NOTREL: 1102 segmapcnt.smp_free_notfree.value.ul++; 1103 break; 1104 case PGREL_MOD: 1105 segmapcnt.smp_free_dirty.value.ul++; 1106 break; 1107 case PGREL_CLEAN: 1108 segmapcnt.smp_free.value.ul++; 1109 break; 1110 } 1111 } 1112 } 1113 1114 /* 1115 * Locks held on entry: smap lock 1116 * Locks held on exit : smap lock. 1117 */ 1118 1119 static void 1120 grab_smp(struct smap *smp, page_t *pp) 1121 { 1122 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1123 ASSERT(smp->sm_refcnt == 0); 1124 1125 if (smp->sm_vp != (struct vnode *)NULL) { 1126 struct vnode *vp = smp->sm_vp; 1127 u_offset_t off = smp->sm_off; 1128 /* 1129 * Destroy old vnode association and 1130 * unload any hardware translations to 1131 * the old object. 1132 */ 1133 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++; 1134 segmap_hashout(smp); 1135 1136 /* 1137 * This node is off freelist and hashlist, 1138 * so there is no reason to drop/reacquire sm_mtx 1139 * across calls to hat_unload. 1140 */ 1141 if (segmap_kpm) { 1142 caddr_t vaddr; 1143 int hat_unload_needed = 0; 1144 1145 /* 1146 * unload kpm mapping 1147 */ 1148 if (pp != NULL) { 1149 vaddr = hat_kpm_page2va(pp, 1); 1150 hat_kpm_mapout(pp, GET_KPME(smp), vaddr); 1151 page_unlock(pp); 1152 } 1153 1154 /* 1155 * Check if we have (also) the rare case of a 1156 * non kpm mapping. 1157 */ 1158 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 1159 hat_unload_needed = 1; 1160 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1161 } 1162 1163 if (hat_unload_needed) { 1164 hat_unload(kas.a_hat, segkmap->s_base + 1165 ((smp - smd_smap) * MAXBSIZE), 1166 MAXBSIZE, HAT_UNLOAD); 1167 } 1168 1169 } else { 1170 ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED); 1171 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1172 hat_unload(kas.a_hat, segkmap->s_base + 1173 ((smp - smd_smap) * MAXBSIZE), 1174 MAXBSIZE, HAT_UNLOAD); 1175 } 1176 segmap_pagefree(vp, off); 1177 } 1178 } 1179 1180 static struct smap * 1181 get_free_smp(int free_ndx) 1182 { 1183 struct smfree *sm; 1184 kmutex_t *smtx; 1185 struct smap *smp, *first; 1186 struct sm_freeq *allocq, *releq; 1187 struct kpme *kpme; 1188 page_t *pp = NULL; 1189 int end_ndx, page_locked = 0; 1190 1191 end_ndx = free_ndx; 1192 sm = &smd_free[free_ndx]; 1193 1194 retry_queue: 1195 allocq = sm->sm_allocq; 1196 mutex_enter(&allocq->smq_mtx); 1197 1198 if ((smp = allocq->smq_free) == NULL) { 1199 1200 skip_queue: 1201 /* 1202 * The alloc list is empty or this queue is being skipped; 1203 * first see if the allocq toggled. 1204 */ 1205 if (sm->sm_allocq != allocq) { 1206 /* queue changed */ 1207 mutex_exit(&allocq->smq_mtx); 1208 goto retry_queue; 1209 } 1210 releq = sm->sm_releq; 1211 if (!mutex_tryenter(&releq->smq_mtx)) { 1212 /* cannot get releq; a free smp may be there now */ 1213 mutex_exit(&allocq->smq_mtx); 1214 1215 /* 1216 * This loop could spin forever if this thread has 1217 * higher priority than the thread that is holding 1218 * releq->smq_mtx. In order to force the other thread 1219 * to run, we'll lock/unlock the mutex which is safe 1220 * since we just unlocked the allocq mutex. 1221 */ 1222 mutex_enter(&releq->smq_mtx); 1223 mutex_exit(&releq->smq_mtx); 1224 goto retry_queue; 1225 } 1226 if (releq->smq_free == NULL) { 1227 /* 1228 * This freelist is empty. 1229 * This should not happen unless clients 1230 * are failing to release the segmap 1231 * window after accessing the data. 1232 * Before resorting to sleeping, try 1233 * the next list of the same color. 1234 */ 1235 free_ndx = (free_ndx + smd_ncolor) & smd_freemsk; 1236 if (free_ndx != end_ndx) { 1237 mutex_exit(&releq->smq_mtx); 1238 mutex_exit(&allocq->smq_mtx); 1239 sm = &smd_free[free_ndx]; 1240 goto retry_queue; 1241 } 1242 /* 1243 * Tried all freelists of the same color once, 1244 * wait on this list and hope something gets freed. 1245 */ 1246 segmapcnt.smp_get_nofree.value.ul++; 1247 sm->sm_want++; 1248 mutex_exit(&sm->sm_freeq[1].smq_mtx); 1249 cv_wait(&sm->sm_free_cv, 1250 &sm->sm_freeq[0].smq_mtx); 1251 sm->sm_want--; 1252 mutex_exit(&sm->sm_freeq[0].smq_mtx); 1253 sm = &smd_free[free_ndx]; 1254 goto retry_queue; 1255 } else { 1256 /* 1257 * Something on the rele queue; flip the alloc 1258 * and rele queues and retry. 1259 */ 1260 sm->sm_allocq = releq; 1261 sm->sm_releq = allocq; 1262 mutex_exit(&allocq->smq_mtx); 1263 mutex_exit(&releq->smq_mtx); 1264 if (page_locked) { 1265 delay(hz >> 2); 1266 page_locked = 0; 1267 } 1268 goto retry_queue; 1269 } 1270 } else { 1271 /* 1272 * Fastpath the case we get the smap mutex 1273 * on the first try. 1274 */ 1275 first = smp; 1276 next_smap: 1277 smtx = SMAPMTX(smp); 1278 if (!mutex_tryenter(smtx)) { 1279 /* 1280 * Another thread is trying to reclaim this slot. 1281 * Skip to the next queue or smap. 1282 */ 1283 if ((smp = smp->sm_next) == first) { 1284 goto skip_queue; 1285 } else { 1286 goto next_smap; 1287 } 1288 } else { 1289 /* 1290 * if kpme exists, get shared lock on the page 1291 */ 1292 if (segmap_kpm && smp->sm_vp != NULL) { 1293 1294 kpme = GET_KPME(smp); 1295 pp = kpme->kpe_page; 1296 1297 if (pp != NULL) { 1298 if (!page_trylock(pp, SE_SHARED)) { 1299 smp = smp->sm_next; 1300 mutex_exit(smtx); 1301 page_locked = 1; 1302 1303 pp = NULL; 1304 1305 if (smp == first) { 1306 goto skip_queue; 1307 } else { 1308 goto next_smap; 1309 } 1310 } else { 1311 if (kpme->kpe_page == NULL) { 1312 page_unlock(pp); 1313 pp = NULL; 1314 } 1315 } 1316 } 1317 } 1318 1319 /* 1320 * At this point, we've selected smp. Remove smp 1321 * from its freelist. If smp is the first one in 1322 * the freelist, update the head of the freelist. 1323 */ 1324 if (first == smp) { 1325 ASSERT(first == allocq->smq_free); 1326 allocq->smq_free = smp->sm_next; 1327 } 1328 1329 /* 1330 * if the head of the freelist still points to smp, 1331 * then there are no more free smaps in that list. 1332 */ 1333 if (allocq->smq_free == smp) 1334 /* 1335 * Took the last one 1336 */ 1337 allocq->smq_free = NULL; 1338 else { 1339 smp->sm_prev->sm_next = smp->sm_next; 1340 smp->sm_next->sm_prev = smp->sm_prev; 1341 } 1342 mutex_exit(&allocq->smq_mtx); 1343 smp->sm_prev = smp->sm_next = NULL; 1344 1345 /* 1346 * if pp != NULL, pp must have been locked; 1347 * grab_smp() unlocks pp. 1348 */ 1349 ASSERT((pp == NULL) || PAGE_LOCKED(pp)); 1350 grab_smp(smp, pp); 1351 /* return smp locked. */ 1352 ASSERT(SMAPMTX(smp) == smtx); 1353 ASSERT(MUTEX_HELD(smtx)); 1354 return (smp); 1355 } 1356 } 1357 } 1358 1359 /* 1360 * Special public segmap operations 1361 */ 1362 1363 /* 1364 * Create pages (without using VOP_GETPAGE) and load up translations to them. 1365 * If softlock is TRUE, then set things up so that it looks like a call 1366 * to segmap_fault with F_SOFTLOCK. 1367 * 1368 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise. 1369 * 1370 * All fields in the generic segment (struct seg) are considered to be 1371 * read-only for "segmap" even though the kernel address space (kas) may 1372 * not be locked, hence no lock is needed to access them. 1373 */ 1374 int 1375 segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock) 1376 { 1377 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 1378 page_t *pp; 1379 u_offset_t off; 1380 struct smap *smp; 1381 struct vnode *vp; 1382 caddr_t eaddr; 1383 int newpage = 0; 1384 uint_t prot; 1385 kmutex_t *smtx; 1386 int hat_flag; 1387 1388 ASSERT(seg->s_as == &kas); 1389 1390 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1391 /* 1392 * Pages are successfully prefaulted and locked in 1393 * segmap_getmapflt and can't be unlocked until 1394 * segmap_release. The SM_KPM_NEWPAGE flag is set 1395 * in segmap_pagecreate_kpm when new pages are created. 1396 * and it is returned as "newpage" indication here. 1397 */ 1398 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1399 panic("segmap_pagecreate: smap not found " 1400 "for addr %p", (void *)addr); 1401 /*NOTREACHED*/ 1402 } 1403 1404 smtx = SMAPMTX(smp); 1405 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 1406 smp->sm_flags &= ~SM_KPM_NEWPAGE; 1407 mutex_exit(smtx); 1408 1409 return (newpage); 1410 } 1411 1412 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 1413 1414 eaddr = addr + len; 1415 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1416 1417 smp = GET_SMAP(seg, addr); 1418 1419 /* 1420 * We don't grab smp mutex here since we assume the smp 1421 * has a refcnt set already which prevents the slot from 1422 * changing its id. 1423 */ 1424 ASSERT(smp->sm_refcnt > 0); 1425 1426 vp = smp->sm_vp; 1427 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1428 prot = smd->smd_prot; 1429 1430 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1431 hat_flag = HAT_LOAD; 1432 pp = page_lookup(vp, off, SE_SHARED); 1433 if (pp == NULL) { 1434 ushort_t bitindex; 1435 1436 if ((pp = page_create_va(vp, off, 1437 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 1438 panic("segmap_pagecreate: page_create failed"); 1439 /*NOTREACHED*/ 1440 } 1441 newpage = 1; 1442 page_io_unlock(pp); 1443 1444 /* 1445 * Since pages created here do not contain valid 1446 * data until the caller writes into them, the 1447 * "exclusive" lock will not be dropped to prevent 1448 * other users from accessing the page. We also 1449 * have to lock the translation to prevent a fault 1450 * from occurring when the virtual address mapped by 1451 * this page is written into. This is necessary to 1452 * avoid a deadlock since we haven't dropped the 1453 * "exclusive" lock. 1454 */ 1455 bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT); 1456 1457 /* 1458 * Large Files: The following assertion is to 1459 * verify the cast above. 1460 */ 1461 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1462 smtx = SMAPMTX(smp); 1463 mutex_enter(smtx); 1464 smp->sm_bitmap |= SMAP_BIT_MASK(bitindex); 1465 mutex_exit(smtx); 1466 1467 hat_flag = HAT_LOAD_LOCK; 1468 } else if (softlock) { 1469 hat_flag = HAT_LOAD_LOCK; 1470 } 1471 1472 if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE)) 1473 hat_setmod(pp); 1474 1475 hat_memload(kas.a_hat, addr, pp, prot, hat_flag); 1476 1477 if (hat_flag != HAT_LOAD_LOCK) 1478 page_unlock(pp); 1479 1480 TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE, 1481 "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx", 1482 seg, addr, pp, vp, off); 1483 } 1484 1485 return (newpage); 1486 } 1487 1488 void 1489 segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) 1490 { 1491 struct smap *smp; 1492 ushort_t bitmask; 1493 page_t *pp; 1494 struct vnode *vp; 1495 u_offset_t off; 1496 caddr_t eaddr; 1497 kmutex_t *smtx; 1498 1499 ASSERT(seg->s_as == &kas); 1500 1501 eaddr = addr + len; 1502 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1503 1504 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1505 /* 1506 * Pages are successfully prefaulted and locked in 1507 * segmap_getmapflt and can't be unlocked until 1508 * segmap_release, so no pages or hat mappings have 1509 * to be unlocked at this point. 1510 */ 1511 #ifdef DEBUG 1512 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1513 panic("segmap_pageunlock: smap not found " 1514 "for addr %p", (void *)addr); 1515 /*NOTREACHED*/ 1516 } 1517 1518 ASSERT(smp->sm_refcnt > 0); 1519 mutex_exit(SMAPMTX(smp)); 1520 #endif 1521 return; 1522 } 1523 1524 smp = GET_SMAP(seg, addr); 1525 smtx = SMAPMTX(smp); 1526 1527 ASSERT(smp->sm_refcnt > 0); 1528 1529 vp = smp->sm_vp; 1530 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1531 1532 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1533 bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT); 1534 1535 /* 1536 * Large Files: Following assertion is to verify 1537 * the correctness of the cast to (int) above. 1538 */ 1539 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1540 1541 /* 1542 * If the bit corresponding to "off" is set, 1543 * clear this bit in the bitmap, unlock translations, 1544 * and release the "exclusive" lock on the page. 1545 */ 1546 if (smp->sm_bitmap & bitmask) { 1547 mutex_enter(smtx); 1548 smp->sm_bitmap &= ~bitmask; 1549 mutex_exit(smtx); 1550 1551 hat_unlock(kas.a_hat, addr, PAGESIZE); 1552 1553 /* 1554 * Use page_find() instead of page_lookup() to 1555 * find the page since we know that it has 1556 * "exclusive" lock. 1557 */ 1558 pp = page_find(vp, off); 1559 if (pp == NULL) { 1560 panic("segmap_pageunlock: page not found"); 1561 /*NOTREACHED*/ 1562 } 1563 if (rw == S_WRITE) { 1564 hat_setrefmod(pp); 1565 } else if (rw != S_OTHER) { 1566 hat_setref(pp); 1567 } 1568 1569 page_unlock(pp); 1570 } 1571 } 1572 } 1573 1574 caddr_t 1575 segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off) 1576 { 1577 return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER)); 1578 } 1579 1580 /* 1581 * This is the magic virtual address that offset 0 of an ELF 1582 * file gets mapped to in user space. This is used to pick 1583 * the vac color on the freelist. 1584 */ 1585 #define ELF_OFFZERO_VA (0x10000) 1586 /* 1587 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp 1588 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned. 1589 * The return address is always MAXBSIZE aligned. 1590 * 1591 * If forcefault is nonzero and the MMU translations haven't yet been created, 1592 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them. 1593 */ 1594 caddr_t 1595 segmap_getmapflt( 1596 struct seg *seg, 1597 struct vnode *vp, 1598 u_offset_t off, 1599 size_t len, 1600 int forcefault, 1601 enum seg_rw rw) 1602 { 1603 struct smap *smp, *nsmp; 1604 extern struct vnode *common_specvp(); 1605 caddr_t baseaddr; /* MAXBSIZE aligned */ 1606 u_offset_t baseoff; 1607 int newslot; 1608 caddr_t vaddr; 1609 int color, hashid; 1610 kmutex_t *hashmtx, *smapmtx; 1611 struct smfree *sm; 1612 page_t *pp; 1613 struct kpme *kpme; 1614 uint_t prot; 1615 caddr_t base; 1616 page_t *pl[MAXPPB + 1]; 1617 int error; 1618 int is_kpm = 1; 1619 1620 ASSERT(seg->s_as == &kas); 1621 ASSERT(seg == segkmap); 1622 1623 baseoff = off & (offset_t)MAXBMASK; 1624 if (off + len > baseoff + MAXBSIZE) { 1625 panic("segmap_getmap bad len"); 1626 /*NOTREACHED*/ 1627 } 1628 1629 /* 1630 * If this is a block device we have to be sure to use the 1631 * "common" block device vnode for the mapping. 1632 */ 1633 if (vp->v_type == VBLK) 1634 vp = common_specvp(vp); 1635 1636 smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++; 1637 1638 if (segmap_kpm == 0 || 1639 (forcefault == SM_PAGECREATE && rw != S_WRITE)) { 1640 is_kpm = 0; 1641 } 1642 1643 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1644 hashmtx = SHASHMTX(hashid); 1645 1646 retry_hash: 1647 mutex_enter(hashmtx); 1648 for (smp = smd_hash[hashid].sh_hash_list; 1649 smp != NULL; smp = smp->sm_hash) 1650 if (smp->sm_vp == vp && smp->sm_off == baseoff) 1651 break; 1652 mutex_exit(hashmtx); 1653 1654 vrfy_smp: 1655 if (smp != NULL) { 1656 1657 ASSERT(vp->v_count != 0); 1658 1659 /* 1660 * Get smap lock and recheck its tag. The hash lock 1661 * is dropped since the hash is based on (vp, off) 1662 * and (vp, off) won't change when we have smap mtx. 1663 */ 1664 smapmtx = SMAPMTX(smp); 1665 mutex_enter(smapmtx); 1666 if (smp->sm_vp != vp || smp->sm_off != baseoff) { 1667 mutex_exit(smapmtx); 1668 goto retry_hash; 1669 } 1670 1671 if (smp->sm_refcnt == 0) { 1672 1673 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++; 1674 1675 /* 1676 * Could still be on the free list. However, this 1677 * could also be an smp that is transitioning from 1678 * the free list when we have too much contention 1679 * for the smapmtx's. In this case, we have an 1680 * unlocked smp that is not on the free list any 1681 * longer, but still has a 0 refcnt. The only way 1682 * to be sure is to check the freelist pointers. 1683 * Since we now have the smapmtx, we are guaranteed 1684 * that the (vp, off) won't change, so we are safe 1685 * to reclaim it. get_free_smp() knows that this 1686 * can happen, and it will check the refcnt. 1687 */ 1688 1689 if ((smp->sm_next != NULL)) { 1690 struct sm_freeq *freeq; 1691 1692 ASSERT(smp->sm_prev != NULL); 1693 sm = &smd_free[smp->sm_free_ndx]; 1694 1695 if (smp->sm_flags & SM_QNDX_ZERO) 1696 freeq = &sm->sm_freeq[0]; 1697 else 1698 freeq = &sm->sm_freeq[1]; 1699 1700 mutex_enter(&freeq->smq_mtx); 1701 if (freeq->smq_free != smp) { 1702 /* 1703 * fastpath normal case 1704 */ 1705 smp->sm_prev->sm_next = smp->sm_next; 1706 smp->sm_next->sm_prev = smp->sm_prev; 1707 } else if (smp == smp->sm_next) { 1708 /* 1709 * Taking the last smap on freelist 1710 */ 1711 freeq->smq_free = NULL; 1712 } else { 1713 /* 1714 * Reclaiming 1st smap on list 1715 */ 1716 freeq->smq_free = smp->sm_next; 1717 smp->sm_prev->sm_next = smp->sm_next; 1718 smp->sm_next->sm_prev = smp->sm_prev; 1719 } 1720 mutex_exit(&freeq->smq_mtx); 1721 smp->sm_prev = smp->sm_next = NULL; 1722 } else { 1723 ASSERT(smp->sm_prev == NULL); 1724 segmapcnt.smp_stolen.value.ul++; 1725 } 1726 1727 } else { 1728 segmapcnt.smp_get_use.value.ul++; 1729 } 1730 smp->sm_refcnt++; /* another user */ 1731 1732 /* 1733 * We don't invoke segmap_fault via TLB miss, so we set ref 1734 * and mod bits in advance. For S_OTHER we set them in 1735 * segmap_fault F_SOFTUNLOCK. 1736 */ 1737 if (is_kpm) { 1738 if (rw == S_WRITE) { 1739 smp->sm_flags |= SM_WRITE_DATA; 1740 } else if (rw == S_READ) { 1741 smp->sm_flags |= SM_READ_DATA; 1742 } 1743 } 1744 mutex_exit(smapmtx); 1745 1746 newslot = 0; 1747 } else { 1748 1749 uint32_t free_ndx, *free_ndxp; 1750 union segmap_cpu *scpu; 1751 1752 /* 1753 * On a PAC machine or a machine with anti-alias 1754 * hardware, smd_colormsk will be zero. 1755 * 1756 * On a VAC machine- pick color by offset in the file 1757 * so we won't get VAC conflicts on elf files. 1758 * On data files, color does not matter but we 1759 * don't know what kind of file it is so we always 1760 * pick color by offset. This causes color 1761 * corresponding to file offset zero to be used more 1762 * heavily. 1763 */ 1764 color = (baseoff >> MAXBSHIFT) & smd_colormsk; 1765 scpu = smd_cpu+CPU->cpu_seqid; 1766 free_ndxp = &scpu->scpu.scpu_free_ndx[color]; 1767 free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk; 1768 #ifdef DEBUG 1769 colors_used[free_ndx]++; 1770 #endif /* DEBUG */ 1771 1772 /* 1773 * Get a locked smp slot from the free list. 1774 */ 1775 smp = get_free_smp(free_ndx); 1776 smapmtx = SMAPMTX(smp); 1777 1778 ASSERT(smp->sm_vp == NULL); 1779 1780 if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) { 1781 /* 1782 * Failed to hashin, there exists one now. 1783 * Return the smp we just allocated. 1784 */ 1785 segmap_smapadd(smp); 1786 mutex_exit(smapmtx); 1787 1788 smp = nsmp; 1789 goto vrfy_smp; 1790 } 1791 smp->sm_refcnt++; /* another user */ 1792 1793 /* 1794 * We don't invoke segmap_fault via TLB miss, so we set ref 1795 * and mod bits in advance. For S_OTHER we set them in 1796 * segmap_fault F_SOFTUNLOCK. 1797 */ 1798 if (is_kpm) { 1799 if (rw == S_WRITE) { 1800 smp->sm_flags |= SM_WRITE_DATA; 1801 } else if (rw == S_READ) { 1802 smp->sm_flags |= SM_READ_DATA; 1803 } 1804 } 1805 mutex_exit(smapmtx); 1806 1807 newslot = 1; 1808 } 1809 1810 if (!is_kpm) 1811 goto use_segmap_range; 1812 1813 /* 1814 * Use segkpm 1815 */ 1816 /* Lint directive required until 6746211 is fixed */ 1817 /*CONSTCOND*/ 1818 ASSERT(PAGESIZE == MAXBSIZE); 1819 1820 /* 1821 * remember the last smp faulted on this cpu. 1822 */ 1823 (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp; 1824 1825 if (forcefault == SM_PAGECREATE) { 1826 baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw); 1827 return (baseaddr); 1828 } 1829 1830 if (newslot == 0 && 1831 (pp = GET_KPME(smp)->kpe_page) != NULL) { 1832 1833 /* fastpath */ 1834 switch (rw) { 1835 case S_READ: 1836 case S_WRITE: 1837 if (page_trylock(pp, SE_SHARED)) { 1838 if (PP_ISFREE(pp) || 1839 !(pp->p_vnode == vp && 1840 pp->p_offset == baseoff)) { 1841 page_unlock(pp); 1842 pp = page_lookup(vp, baseoff, 1843 SE_SHARED); 1844 } 1845 } else { 1846 pp = page_lookup(vp, baseoff, SE_SHARED); 1847 } 1848 1849 if (pp == NULL) { 1850 ASSERT(GET_KPME(smp)->kpe_page == NULL); 1851 break; 1852 } 1853 1854 if (rw == S_WRITE && 1855 hat_page_getattr(pp, P_MOD | P_REF) != 1856 (P_MOD | P_REF)) { 1857 page_unlock(pp); 1858 break; 1859 } 1860 1861 /* 1862 * We have the p_selock as reader, grab_smp 1863 * can't hit us, we have bumped the smap 1864 * refcnt and hat_pageunload needs the 1865 * p_selock exclusive. 1866 */ 1867 kpme = GET_KPME(smp); 1868 if (kpme->kpe_page == pp) { 1869 baseaddr = hat_kpm_page2va(pp, 0); 1870 } else if (kpme->kpe_page == NULL) { 1871 baseaddr = hat_kpm_mapin(pp, kpme); 1872 } else { 1873 panic("segmap_getmapflt: stale " 1874 "kpme page, kpme %p", (void *)kpme); 1875 /*NOTREACHED*/ 1876 } 1877 1878 /* 1879 * We don't invoke segmap_fault via TLB miss, 1880 * so we set ref and mod bits in advance. 1881 * For S_OTHER and we set them in segmap_fault 1882 * F_SOFTUNLOCK. 1883 */ 1884 if (rw == S_READ && !hat_isref(pp)) 1885 hat_setref(pp); 1886 1887 return (baseaddr); 1888 default: 1889 break; 1890 } 1891 } 1892 1893 base = segkpm_create_va(baseoff); 1894 error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE, 1895 seg, base, rw, CRED(), NULL); 1896 1897 pp = pl[0]; 1898 if (error || pp == NULL) { 1899 /* 1900 * Use segmap address slot and let segmap_fault deal 1901 * with the error cases. There is no error return 1902 * possible here. 1903 */ 1904 goto use_segmap_range; 1905 } 1906 1907 ASSERT(pl[1] == NULL); 1908 1909 /* 1910 * When prot is not returned w/ PROT_ALL the returned pages 1911 * are not backed by fs blocks. For most of the segmap users 1912 * this is no problem, they don't write to the pages in the 1913 * same request and therefore don't rely on a following 1914 * trap driven segmap_fault. With SM_LOCKPROTO users it 1915 * is more secure to use segkmap adresses to allow 1916 * protection segmap_fault's. 1917 */ 1918 if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) { 1919 /* 1920 * Use segmap address slot and let segmap_fault 1921 * do the error return. 1922 */ 1923 ASSERT(rw != S_WRITE); 1924 ASSERT(PAGE_LOCKED(pp)); 1925 page_unlock(pp); 1926 forcefault = 0; 1927 goto use_segmap_range; 1928 } 1929 1930 /* 1931 * We have the p_selock as reader, grab_smp can't hit us, we 1932 * have bumped the smap refcnt and hat_pageunload needs the 1933 * p_selock exclusive. 1934 */ 1935 kpme = GET_KPME(smp); 1936 if (kpme->kpe_page == pp) { 1937 baseaddr = hat_kpm_page2va(pp, 0); 1938 } else if (kpme->kpe_page == NULL) { 1939 baseaddr = hat_kpm_mapin(pp, kpme); 1940 } else { 1941 panic("segmap_getmapflt: stale kpme page after " 1942 "VOP_GETPAGE, kpme %p", (void *)kpme); 1943 /*NOTREACHED*/ 1944 } 1945 1946 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 1947 1948 return (baseaddr); 1949 1950 1951 use_segmap_range: 1952 baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE); 1953 TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP, 1954 "segmap_getmap:seg %p addr %p vp %p offset %llx", 1955 seg, baseaddr, vp, baseoff); 1956 1957 /* 1958 * Prefault the translations 1959 */ 1960 vaddr = baseaddr + (off - baseoff); 1961 if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) { 1962 1963 caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr & 1964 (uintptr_t)PAGEMASK); 1965 1966 (void) segmap_fault(kas.a_hat, seg, pgaddr, 1967 (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK, 1968 F_INVAL, rw); 1969 } 1970 1971 return (baseaddr); 1972 } 1973 1974 int 1975 segmap_release(struct seg *seg, caddr_t addr, uint_t flags) 1976 { 1977 struct smap *smp; 1978 int error; 1979 int bflags = 0; 1980 struct vnode *vp; 1981 u_offset_t offset; 1982 kmutex_t *smtx; 1983 int is_kpm = 0; 1984 page_t *pp; 1985 1986 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1987 1988 if (((uintptr_t)addr & MAXBOFFSET) != 0) { 1989 panic("segmap_release: addr %p not " 1990 "MAXBSIZE aligned", (void *)addr); 1991 /*NOTREACHED*/ 1992 } 1993 1994 if ((smp = get_smap_kpm(addr, &pp)) == NULL) { 1995 panic("segmap_release: smap not found " 1996 "for addr %p", (void *)addr); 1997 /*NOTREACHED*/ 1998 } 1999 2000 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2001 "segmap_relmap:seg %p addr %p smp %p", 2002 seg, addr, smp); 2003 2004 smtx = SMAPMTX(smp); 2005 2006 /* 2007 * For compatibility reasons segmap_pagecreate_kpm sets this 2008 * flag to allow a following segmap_pagecreate to return 2009 * this as "newpage" flag. When segmap_pagecreate is not 2010 * called at all we clear it now. 2011 */ 2012 smp->sm_flags &= ~SM_KPM_NEWPAGE; 2013 is_kpm = 1; 2014 if (smp->sm_flags & SM_WRITE_DATA) { 2015 hat_setrefmod(pp); 2016 } else if (smp->sm_flags & SM_READ_DATA) { 2017 hat_setref(pp); 2018 } 2019 } else { 2020 if (addr < seg->s_base || addr >= seg->s_base + seg->s_size || 2021 ((uintptr_t)addr & MAXBOFFSET) != 0) { 2022 panic("segmap_release: bad addr %p", (void *)addr); 2023 /*NOTREACHED*/ 2024 } 2025 smp = GET_SMAP(seg, addr); 2026 2027 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2028 "segmap_relmap:seg %p addr %p smp %p", 2029 seg, addr, smp); 2030 2031 smtx = SMAPMTX(smp); 2032 mutex_enter(smtx); 2033 smp->sm_flags |= SM_NOTKPM_RELEASED; 2034 } 2035 2036 ASSERT(smp->sm_refcnt > 0); 2037 2038 /* 2039 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2040 * are set. 2041 */ 2042 if ((flags & ~SM_DONTNEED) != 0) { 2043 if (flags & SM_WRITE) 2044 segmapcnt.smp_rel_write.value.ul++; 2045 if (flags & SM_ASYNC) { 2046 bflags |= B_ASYNC; 2047 segmapcnt.smp_rel_async.value.ul++; 2048 } 2049 if (flags & SM_INVAL) { 2050 bflags |= B_INVAL; 2051 segmapcnt.smp_rel_abort.value.ul++; 2052 } 2053 if (flags & SM_DESTROY) { 2054 bflags |= (B_INVAL|B_TRUNC); 2055 segmapcnt.smp_rel_abort.value.ul++; 2056 } 2057 if (smp->sm_refcnt == 1) { 2058 /* 2059 * We only bother doing the FREE and DONTNEED flags 2060 * if no one else is still referencing this mapping. 2061 */ 2062 if (flags & SM_FREE) { 2063 bflags |= B_FREE; 2064 segmapcnt.smp_rel_free.value.ul++; 2065 } 2066 if (flags & SM_DONTNEED) { 2067 bflags |= B_DONTNEED; 2068 segmapcnt.smp_rel_dontneed.value.ul++; 2069 } 2070 } 2071 } else { 2072 smd_cpu[CPU->cpu_seqid].scpu.scpu_release++; 2073 } 2074 2075 vp = smp->sm_vp; 2076 offset = smp->sm_off; 2077 2078 if (--smp->sm_refcnt == 0) { 2079 2080 smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA); 2081 2082 if (flags & (SM_INVAL|SM_DESTROY)) { 2083 segmap_hashout(smp); /* remove map info */ 2084 if (is_kpm) { 2085 hat_kpm_mapout(pp, GET_KPME(smp), addr); 2086 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 2087 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2088 hat_unload(kas.a_hat, segkmap->s_base + 2089 ((smp - smd_smap) * MAXBSIZE), 2090 MAXBSIZE, HAT_UNLOAD); 2091 } 2092 2093 } else { 2094 if (segmap_kpm) 2095 segkpm_mapout_validkpme(GET_KPME(smp)); 2096 2097 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2098 hat_unload(kas.a_hat, addr, MAXBSIZE, 2099 HAT_UNLOAD); 2100 } 2101 } 2102 segmap_smapadd(smp); /* add to free list */ 2103 } 2104 2105 mutex_exit(smtx); 2106 2107 if (is_kpm) 2108 page_unlock(pp); 2109 /* 2110 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2111 * are set. 2112 */ 2113 if ((flags & ~SM_DONTNEED) != 0) { 2114 error = VOP_PUTPAGE(vp, offset, MAXBSIZE, 2115 bflags, CRED(), NULL); 2116 } else { 2117 error = 0; 2118 } 2119 2120 return (error); 2121 } 2122 2123 /* 2124 * Dump the pages belonging to this segmap segment. 2125 */ 2126 static void 2127 segmap_dump(struct seg *seg) 2128 { 2129 struct segmap_data *smd; 2130 struct smap *smp, *smp_end; 2131 page_t *pp; 2132 pfn_t pfn; 2133 u_offset_t off; 2134 caddr_t addr; 2135 2136 smd = (struct segmap_data *)seg->s_data; 2137 addr = seg->s_base; 2138 for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages; 2139 smp < smp_end; smp++) { 2140 2141 if (smp->sm_refcnt) { 2142 for (off = 0; off < MAXBSIZE; off += PAGESIZE) { 2143 int we_own_it = 0; 2144 2145 /* 2146 * If pp == NULL, the page either does 2147 * not exist or is exclusively locked. 2148 * So determine if it exists before 2149 * searching for it. 2150 */ 2151 if ((pp = page_lookup_nowait(smp->sm_vp, 2152 smp->sm_off + off, SE_SHARED))) 2153 we_own_it = 1; 2154 else 2155 pp = page_exists(smp->sm_vp, 2156 smp->sm_off + off); 2157 2158 if (pp) { 2159 pfn = page_pptonum(pp); 2160 dump_addpage(seg->s_as, 2161 addr + off, pfn); 2162 if (we_own_it) 2163 page_unlock(pp); 2164 } 2165 dump_timeleft = dump_timeout; 2166 } 2167 } 2168 addr += MAXBSIZE; 2169 } 2170 } 2171 2172 /*ARGSUSED*/ 2173 static int 2174 segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 2175 struct page ***ppp, enum lock_type type, enum seg_rw rw) 2176 { 2177 return (ENOTSUP); 2178 } 2179 2180 static int 2181 segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 2182 { 2183 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 2184 2185 memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp; 2186 memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base); 2187 return (0); 2188 } 2189 2190 /*ARGSUSED*/ 2191 static int 2192 segmap_capable(struct seg *seg, segcapability_t capability) 2193 { 2194 return (0); 2195 } 2196 2197 2198 #ifdef SEGKPM_SUPPORT 2199 2200 /* 2201 * segkpm support routines 2202 */ 2203 2204 static caddr_t 2205 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2206 struct smap *smp, enum seg_rw rw) 2207 { 2208 caddr_t base; 2209 page_t *pp; 2210 int newpage = 0; 2211 struct kpme *kpme; 2212 2213 ASSERT(smp->sm_refcnt > 0); 2214 2215 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 2216 kmutex_t *smtx; 2217 2218 base = segkpm_create_va(off); 2219 2220 if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, 2221 seg, base)) == NULL) { 2222 panic("segmap_pagecreate_kpm: " 2223 "page_create failed"); 2224 /*NOTREACHED*/ 2225 } 2226 2227 newpage = 1; 2228 page_io_unlock(pp); 2229 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 2230 2231 /* 2232 * Mark this here until the following segmap_pagecreate 2233 * or segmap_release. 2234 */ 2235 smtx = SMAPMTX(smp); 2236 mutex_enter(smtx); 2237 smp->sm_flags |= SM_KPM_NEWPAGE; 2238 mutex_exit(smtx); 2239 } 2240 2241 kpme = GET_KPME(smp); 2242 if (!newpage && kpme->kpe_page == pp) 2243 base = hat_kpm_page2va(pp, 0); 2244 else 2245 base = hat_kpm_mapin(pp, kpme); 2246 2247 /* 2248 * FS code may decide not to call segmap_pagecreate and we 2249 * don't invoke segmap_fault via TLB miss, so we have to set 2250 * ref and mod bits in advance. 2251 */ 2252 if (rw == S_WRITE) { 2253 hat_setrefmod(pp); 2254 } else { 2255 ASSERT(rw == S_READ); 2256 hat_setref(pp); 2257 } 2258 2259 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 2260 2261 return (base); 2262 } 2263 2264 /* 2265 * Find the smap structure corresponding to the 2266 * KPM addr and return it locked. 2267 */ 2268 struct smap * 2269 get_smap_kpm(caddr_t addr, page_t **ppp) 2270 { 2271 struct smap *smp; 2272 struct vnode *vp; 2273 u_offset_t offset; 2274 caddr_t baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK); 2275 int hashid; 2276 kmutex_t *hashmtx; 2277 page_t *pp; 2278 union segmap_cpu *scpu; 2279 2280 pp = hat_kpm_vaddr2page(baseaddr); 2281 2282 ASSERT(pp && !PP_ISFREE(pp)); 2283 ASSERT(PAGE_LOCKED(pp)); 2284 ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0); 2285 2286 vp = pp->p_vnode; 2287 offset = pp->p_offset; 2288 ASSERT(vp != NULL); 2289 2290 /* 2291 * Assume the last smap used on this cpu is the one needed. 2292 */ 2293 scpu = smd_cpu+CPU->cpu_seqid; 2294 smp = scpu->scpu.scpu_last_smap; 2295 mutex_enter(&smp->sm_mtx); 2296 if (smp->sm_vp == vp && smp->sm_off == offset) { 2297 ASSERT(smp->sm_refcnt > 0); 2298 } else { 2299 /* 2300 * Assumption wrong, find the smap on the hash chain. 2301 */ 2302 mutex_exit(&smp->sm_mtx); 2303 SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */ 2304 hashmtx = SHASHMTX(hashid); 2305 2306 mutex_enter(hashmtx); 2307 smp = smd_hash[hashid].sh_hash_list; 2308 for (; smp != NULL; smp = smp->sm_hash) { 2309 if (smp->sm_vp == vp && smp->sm_off == offset) 2310 break; 2311 } 2312 mutex_exit(hashmtx); 2313 if (smp) { 2314 mutex_enter(&smp->sm_mtx); 2315 ASSERT(smp->sm_vp == vp && smp->sm_off == offset); 2316 } 2317 } 2318 2319 if (ppp) 2320 *ppp = smp ? pp : NULL; 2321 2322 return (smp); 2323 } 2324 2325 #else /* SEGKPM_SUPPORT */ 2326 2327 /* segkpm stubs */ 2328 2329 /*ARGSUSED*/ 2330 static caddr_t 2331 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2332 struct smap *smp, enum seg_rw rw) 2333 { 2334 return (NULL); 2335 } 2336 2337 /*ARGSUSED*/ 2338 struct smap * 2339 get_smap_kpm(caddr_t addr, page_t **ppp) 2340 { 2341 return (NULL); 2342 } 2343 2344 #endif /* SEGKPM_SUPPORT */ --- EOF ---