Print this page
patch as-lock-macro-simplification


 534                         }
 535                 }
 536                 mutex_exit(&vp->v_lock);
 537         }
 538 }
 539 
 540 int
 541 segvn_create(struct seg *seg, void *argsp)
 542 {
 543         struct segvn_crargs *a = (struct segvn_crargs *)argsp;
 544         struct segvn_data *svd;
 545         size_t swresv = 0;
 546         struct cred *cred;
 547         struct anon_map *amp;
 548         int error = 0;
 549         size_t pgsz;
 550         lgrp_mem_policy_t mpolicy = LGRP_MEM_POLICY_DEFAULT;
 551         int use_rgn = 0;
 552         int trok = 0;
 553 
 554         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 555 
 556         if (a->type != MAP_PRIVATE && a->type != MAP_SHARED) {
 557                 panic("segvn_create type");
 558                 /*NOTREACHED*/
 559         }
 560 
 561         /*
 562          * Check arguments.  If a shared anon structure is given then
 563          * it is illegal to also specify a vp.
 564          */
 565         if (a->amp != NULL && a->vp != NULL) {
 566                 panic("segvn_create anon_map");
 567                 /*NOTREACHED*/
 568         }
 569 
 570         if (a->type == MAP_PRIVATE && (a->flags & MAP_TEXT) &&
 571             a->vp != NULL && a->prot == (PROT_USER | PROT_READ | PROT_EXEC) &&
 572             segvn_use_regions) {
 573                 use_rgn = 1;
 574         }


1005 
1006 /*
1007  * Concatenate two existing segments, if possible.
1008  * Return 0 on success, -1 if two segments are not compatible
1009  * or -2 on memory allocation failure.
1010  * If amp_cat == 1 then try and concat segments with anon maps
1011  */
1012 static int
1013 segvn_concat(struct seg *seg1, struct seg *seg2, int amp_cat)
1014 {
1015         struct segvn_data *svd1 = seg1->s_data;
1016         struct segvn_data *svd2 = seg2->s_data;
1017         struct anon_map *amp1 = svd1->amp;
1018         struct anon_map *amp2 = svd2->amp;
1019         struct vpage *vpage1 = svd1->vpage;
1020         struct vpage *vpage2 = svd2->vpage, *nvpage = NULL;
1021         size_t size, nvpsize;
1022         pgcnt_t npages1, npages2;
1023 
1024         ASSERT(seg1->s_as && seg2->s_as && seg1->s_as == seg2->s_as);
1025         ASSERT(AS_WRITE_HELD(seg1->s_as, &seg1->s_as->a_lock));
1026         ASSERT(seg1->s_ops == seg2->s_ops);
1027 
1028         if (HAT_IS_REGION_COOKIE_VALID(svd1->rcookie) ||
1029             HAT_IS_REGION_COOKIE_VALID(svd2->rcookie)) {
1030                 return (-1);
1031         }
1032 
1033         /* both segments exist, try to merge them */
1034 #define incompat(x)     (svd1->x != svd2->x)
1035         if (incompat(vp) || incompat(maxprot) ||
1036             (!svd1->pageadvice && !svd2->pageadvice && incompat(advice)) ||
1037             (!svd1->pageprot && !svd2->pageprot && incompat(prot)) ||
1038             incompat(type) || incompat(cred) || incompat(flags) ||
1039             seg1->s_szc != seg2->s_szc || incompat(policy_info.mem_policy) ||
1040             (svd2->softlockcnt > 0) || svd1->softlockcnt_send > 0)
1041                 return (-1);
1042 #undef incompat
1043 
1044         /*
1045          * vp == NULL implies zfod, offset doesn't matter


1250 /*
1251  * Extend the previous segment (seg1) to include the
1252  * new segment (seg2 + a), if possible.
1253  * Return 0 on success.
1254  */
1255 static int
1256 segvn_extend_prev(seg1, seg2, a, swresv)
1257         struct seg *seg1, *seg2;
1258         struct segvn_crargs *a;
1259         size_t swresv;
1260 {
1261         struct segvn_data *svd1 = (struct segvn_data *)seg1->s_data;
1262         size_t size;
1263         struct anon_map *amp1;
1264         struct vpage *new_vpage;
1265 
1266         /*
1267          * We don't need any segment level locks for "segvn" data
1268          * since the address space is "write" locked.
1269          */
1270         ASSERT(seg1->s_as && AS_WRITE_HELD(seg1->s_as, &seg1->s_as->a_lock));
1271 
1272         if (HAT_IS_REGION_COOKIE_VALID(svd1->rcookie)) {
1273                 return (-1);
1274         }
1275 
1276         /* second segment is new, try to extend first */
1277         /* XXX - should also check cred */
1278         if (svd1->vp != a->vp || svd1->maxprot != a->maxprot ||
1279             (!svd1->pageprot && (svd1->prot != a->prot)) ||
1280             svd1->type != a->type || svd1->flags != a->flags ||
1281             seg1->s_szc != a->szc || svd1->softlockcnt_send > 0)
1282                 return (-1);
1283 
1284         /* vp == NULL implies zfod, offset doesn't matter */
1285         if (svd1->vp != NULL &&
1286             svd1->offset + seg1->s_size != (a->offset & PAGEMASK))
1287                 return (-1);
1288 
1289         if (svd1->tr_state != SEGVN_TR_OFF) {
1290                 return (-1);


1371  * Extend the next segment (seg2) to include the
1372  * new segment (seg1 + a), if possible.
1373  * Return 0 on success.
1374  */
1375 static int
1376 segvn_extend_next(
1377         struct seg *seg1,
1378         struct seg *seg2,
1379         struct segvn_crargs *a,
1380         size_t swresv)
1381 {
1382         struct segvn_data *svd2 = (struct segvn_data *)seg2->s_data;
1383         size_t size;
1384         struct anon_map *amp2;
1385         struct vpage *new_vpage;
1386 
1387         /*
1388          * We don't need any segment level locks for "segvn" data
1389          * since the address space is "write" locked.
1390          */
1391         ASSERT(seg2->s_as && AS_WRITE_HELD(seg2->s_as, &seg2->s_as->a_lock));
1392 
1393         if (HAT_IS_REGION_COOKIE_VALID(svd2->rcookie)) {
1394                 return (-1);
1395         }
1396 
1397         /* first segment is new, try to extend second */
1398         /* XXX - should also check cred */
1399         if (svd2->vp != a->vp || svd2->maxprot != a->maxprot ||
1400             (!svd2->pageprot && (svd2->prot != a->prot)) ||
1401             svd2->type != a->type || svd2->flags != a->flags ||
1402             seg2->s_szc != a->szc || svd2->softlockcnt_sbase > 0)
1403                 return (-1);
1404         /* vp == NULL implies zfod, offset doesn't matter */
1405         if (svd2->vp != NULL &&
1406             (a->offset & PAGEMASK) + seg1->s_size != svd2->offset)
1407                 return (-1);
1408 
1409         if (svd2->tr_state != SEGVN_TR_OFF) {
1410                 return (-1);
1411         }


1558                         page_unlock(pp);
1559                 }
1560                 addr += PAGESIZE;
1561                 old_idx++;
1562                 new_idx++;
1563         }
1564 
1565         return (0);
1566 }
1567 
1568 static int
1569 segvn_dup(struct seg *seg, struct seg *newseg)
1570 {
1571         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
1572         struct segvn_data *newsvd;
1573         pgcnt_t npages = seg_pages(seg);
1574         int error = 0;
1575         size_t len;
1576         struct anon_map *amp;
1577 
1578         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1579         ASSERT(newseg->s_as->a_proc->p_parent == curproc);
1580 
1581         /*
1582          * If segment has anon reserved, reserve more for the new seg.
1583          * For a MAP_NORESERVE segment swresv will be a count of all the
1584          * allocated anon slots; thus we reserve for the child as many slots
1585          * as the parent has allocated. This semantic prevents the child or
1586          * parent from dieing during a copy-on-write fault caused by trying
1587          * to write a shared pre-existing anon page.
1588          */
1589         if ((len = svd->swresv) != 0) {
1590                 if (anon_resv(svd->swresv) == 0)
1591                         return (ENOMEM);
1592 
1593                 TRACE_3(TR_FAC_VM, TR_ANON_PROC, "anon proc:%p %lu %u",
1594                     seg, len, 0);
1595         }
1596 
1597         newsvd = kmem_cache_alloc(segvn_cache, KM_SLEEP);
1598 


1855 segvn_unmap(struct seg *seg, caddr_t addr, size_t len)
1856 {
1857         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
1858         struct segvn_data *nsvd;
1859         struct seg *nseg;
1860         struct anon_map *amp;
1861         pgcnt_t opages;         /* old segment size in pages */
1862         pgcnt_t npages;         /* new segment size in pages */
1863         pgcnt_t dpages;         /* pages being deleted (unmapped) */
1864         hat_callback_t callback;        /* used for free_vp_pages() */
1865         hat_callback_t *cbp = NULL;
1866         caddr_t nbase;
1867         size_t nsize;
1868         size_t oswresv;
1869         int reclaim = 1;
1870 
1871         /*
1872          * We don't need any segment level locks for "segvn" data
1873          * since the address space is "write" locked.
1874          */
1875         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1876 
1877         /*
1878          * Fail the unmap if pages are SOFTLOCKed through this mapping.
1879          * softlockcnt is protected from change by the as write lock.
1880          */
1881 retry:
1882         if (svd->softlockcnt > 0) {
1883                 ASSERT(svd->tr_state == SEGVN_TR_OFF);
1884 
1885                 /*
1886                  * If this is shared segment non 0 softlockcnt
1887                  * means locked pages are still in use.
1888                  */
1889                 if (svd->type == MAP_SHARED) {
1890                         return (EAGAIN);
1891                 }
1892 
1893                 /*
1894                  * since we do have the writers lock nobody can fill
1895                  * the cache during the purge. The flush either succeeds


2405                 }
2406                 TRACE_3(TR_FAC_VM, TR_ANON_PROC, "anon proc:%p %lu %u",
2407                     seg, len, 0);
2408         }
2409 
2410         return (0);                     /* I'm glad that's all over with! */
2411 }
2412 
2413 static void
2414 segvn_free(struct seg *seg)
2415 {
2416         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
2417         pgcnt_t npages = seg_pages(seg);
2418         struct anon_map *amp;
2419         size_t len;
2420 
2421         /*
2422          * We don't need any segment level locks for "segvn" data
2423          * since the address space is "write" locked.
2424          */
2425         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
2426         ASSERT(svd->tr_state == SEGVN_TR_OFF);
2427 
2428         ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
2429 
2430         /*
2431          * Be sure to unlock pages. XXX Why do things get free'ed instead
2432          * of unmapped? XXX
2433          */
2434         (void) segvn_lockop(seg, seg->s_base, seg->s_size,
2435             0, MC_UNLOCK, NULL, 0);
2436 
2437         /*
2438          * Deallocate the vpage and anon pointers if necessary and possible.
2439          */
2440         if (svd->vpage != NULL) {
2441                 kmem_free(svd->vpage, vpgtob(npages));
2442                 svd->vpage = NULL;
2443         }
2444         if ((amp = svd->amp) != NULL) {
2445                 /*


2555 }
2556 
2557 /*
2558  * Do a F_SOFTUNLOCK call over the range requested.  The range must have
2559  * already been F_SOFTLOCK'ed.
2560  * Caller must always match addr and len of a softunlock with a previous
2561  * softlock with exactly the same addr and len.
2562  */
2563 static void
2564 segvn_softunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
2565 {
2566         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
2567         page_t *pp;
2568         caddr_t adr;
2569         struct vnode *vp;
2570         u_offset_t offset;
2571         ulong_t anon_index;
2572         struct anon_map *amp;
2573         struct anon *ap = NULL;
2574 
2575         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2576         ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock));
2577 
2578         if ((amp = svd->amp) != NULL)
2579                 anon_index = svd->anon_index + seg_page(seg, addr);
2580 
2581         if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) {
2582                 ASSERT(svd->tr_state == SEGVN_TR_OFF);
2583                 hat_unlock_region(seg->s_as->a_hat, addr, len, svd->rcookie);
2584         } else {
2585                 hat_unlock(seg->s_as->a_hat, addr, len);
2586         }
2587         for (adr = addr; adr < addr + len; adr += PAGESIZE) {
2588                 if (amp != NULL) {
2589                         ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2590                         if ((ap = anon_get_ptr(amp->ahp, anon_index++))
2591                             != NULL) {
2592                                 swap_xlate(ap, &vp, &offset);
2593                         } else {
2594                                 vp = svd->vp;
2595                                 offset = svd->offset +


4944     enum fault_type type, enum seg_rw rw)
4945 {
4946         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
4947         page_t **plp, **ppp, *pp;
4948         u_offset_t off;
4949         caddr_t a;
4950         struct vpage *vpage;
4951         uint_t vpprot, prot;
4952         int err;
4953         page_t *pl[PVN_GETPAGE_NUM + 1];
4954         size_t plsz, pl_alloc_sz;
4955         size_t page;
4956         ulong_t anon_index;
4957         struct anon_map *amp;
4958         int dogetpage = 0;
4959         caddr_t lpgaddr, lpgeaddr;
4960         size_t pgsz;
4961         anon_sync_obj_t cookie;
4962         int brkcow = BREAK_COW_SHARE(rw, type, svd->type);
4963 
4964         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
4965         ASSERT(svd->amp == NULL || svd->rcookie == HAT_INVALID_REGION_COOKIE);
4966 
4967         /*
4968          * First handle the easy stuff
4969          */
4970         if (type == F_SOFTUNLOCK) {
4971                 if (rw == S_READ_NOCOW) {
4972                         rw = S_READ;
4973                         ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
4974                 }
4975                 SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
4976                 pgsz = (seg->s_szc == 0) ? PAGESIZE :
4977                     page_get_pagesize(seg->s_szc);
4978                 VM_STAT_COND_ADD(pgsz > PAGESIZE, segvnvmstats.fltanpages[16]);
4979                 CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr);
4980                 segvn_softunlock(seg, lpgaddr, lpgeaddr - lpgaddr, rw);
4981                 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
4982                 return (0);
4983         }
4984 
4985         ASSERT(svd->tr_state == SEGVN_TR_OFF ||
4986             !HAT_IS_REGION_COOKIE_VALID(svd->rcookie));
4987         if (brkcow == 0) {
4988                 if (svd->tr_state == SEGVN_TR_INIT) {
4989                         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
4990                         if (svd->tr_state == SEGVN_TR_INIT) {
4991                                 ASSERT(svd->vp != NULL && svd->amp == NULL);
4992                                 ASSERT(svd->flags & MAP_TEXT);
4993                                 ASSERT(svd->type == MAP_PRIVATE);


5098          *
5099          * For S_READ_NOCOW, it's safe not to do a copy on write because the
5100          * caller makes sure no COW will be caused by another thread for a
5101          * softlocked page.
5102          */
5103         if (type == F_SOFTLOCK && svd->vp != NULL && seg->s_szc != 0) {
5104                 int demote = 0;
5105 
5106                 if (rw != S_READ_NOCOW) {
5107                         demote = 1;
5108                 }
5109                 if (!demote && len > PAGESIZE) {
5110                         pgsz = page_get_pagesize(seg->s_szc);
5111                         CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr,
5112                             lpgeaddr);
5113                         if (lpgeaddr - lpgaddr > pgsz) {
5114                                 demote = 1;
5115                         }
5116                 }
5117 
5118                 ASSERT(demote || AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
5119 
5120                 if (demote) {
5121                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5122                         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
5123                         if (seg->s_szc != 0) {
5124                                 segvn_vmpss_clrszc_cnt++;
5125                                 ASSERT(svd->softlockcnt == 0);
5126                                 err = segvn_clrszc(seg);
5127                                 if (err) {
5128                                         segvn_vmpss_clrszc_err++;
5129                                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5130                                         return (FC_MAKE_ERR(err));
5131                                 }
5132                         }
5133                         ASSERT(seg->s_szc == 0);
5134                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5135                         goto top;
5136                 }
5137         }
5138 


5152                 if (svd->amp == NULL) {
5153                         svd->amp = anonmap_alloc(seg->s_size, 0, ANON_SLEEP);
5154                         svd->amp->a_szc = seg->s_szc;
5155                 }
5156                 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5157 
5158                 /*
5159                  * Start all over again since segment protections
5160                  * may have changed after we dropped the "read" lock.
5161                  */
5162                 goto top;
5163         }
5164 
5165         /*
5166          * S_READ_NOCOW vs S_READ distinction was
5167          * only needed for the code above. After
5168          * that we treat it as S_READ.
5169          */
5170         if (rw == S_READ_NOCOW) {
5171                 ASSERT(type == F_SOFTLOCK);
5172                 ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
5173                 rw = S_READ;
5174         }
5175 
5176         amp = svd->amp;
5177 
5178         /*
5179          * MADV_SEQUENTIAL work is ignored for large page segments.
5180          */
5181         if (seg->s_szc != 0) {
5182                 pgsz = page_get_pagesize(seg->s_szc);
5183                 ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock));
5184                 CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr);
5185                 if (svd->vp == NULL) {
5186                         err = segvn_fault_anonpages(hat, seg, lpgaddr,
5187                             lpgeaddr, type, rw, addr, addr + len, brkcow);
5188                 } else {
5189                         err = segvn_fault_vnodepages(hat, seg, lpgaddr,
5190                             lpgeaddr, type, rw, addr, addr + len, brkcow);
5191                         if (err == IE_RETRY) {
5192                                 ASSERT(seg->s_szc == 0);


5623                 ANON_LOCK_EXIT(&amp->a_rwlock);
5624         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5625         if (pl_alloc_sz)
5626                 kmem_free(plp, pl_alloc_sz);
5627         return (0);
5628 }
5629 
5630 /*
5631  * This routine is used to start I/O on pages asynchronously.  XXX it will
5632  * only create PAGESIZE pages. At fault time they will be relocated into
5633  * larger pages.
5634  */
5635 static faultcode_t
5636 segvn_faulta(struct seg *seg, caddr_t addr)
5637 {
5638         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
5639         int err;
5640         struct anon_map *amp;
5641         vnode_t *vp;
5642 
5643         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
5644 
5645         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
5646         if ((amp = svd->amp) != NULL) {
5647                 struct anon *ap;
5648 
5649                 /*
5650                  * Reader lock to prevent amp->ahp from being changed.
5651                  * This is advisory, it's ok to miss a page, so
5652                  * we don't do anon_array_enter lock.
5653                  */
5654                 ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
5655                 if ((ap = anon_get_ptr(amp->ahp,
5656                     svd->anon_index + seg_page(seg, addr))) != NULL) {
5657 
5658                         err = anon_getpage(&ap, NULL, NULL,
5659                             0, seg, addr, S_READ, svd->cred);
5660 
5661                         ANON_LOCK_EXIT(&amp->a_rwlock);
5662                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5663                         if (err)


5680             PAGESIZE, NULL, NULL, 0, seg, addr,
5681             S_OTHER, svd->cred, NULL);
5682 
5683         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5684         if (err)
5685                 return (FC_MAKE_ERR(err));
5686         return (0);
5687 }
5688 
5689 static int
5690 segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
5691 {
5692         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
5693         struct vpage *cvp, *svp, *evp;
5694         struct vnode *vp;
5695         size_t pgsz;
5696         pgcnt_t pgcnt;
5697         anon_sync_obj_t cookie;
5698         int unload_done = 0;
5699 
5700         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
5701 
5702         if ((svd->maxprot & prot) != prot)
5703                 return (EACCES);                        /* violated maxprot */
5704 
5705         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
5706 
5707         /* return if prot is the same */
5708         if (!svd->pageprot && svd->prot == prot) {
5709                 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5710                 return (0);
5711         }
5712 
5713         /*
5714          * Since we change protections we first have to flush the cache.
5715          * This makes sure all the pagelock calls have to recheck
5716          * protections.
5717          */
5718         if (svd->softlockcnt > 0) {
5719                 ASSERT(svd->tr_state == SEGVN_TR_OFF);
5720 


5757         }
5758 
5759         if ((prot & PROT_WRITE) && svd->type == MAP_SHARED &&
5760             svd->vp != NULL && (svd->vp->v_flag & VVMEXEC)) {
5761                 ASSERT(vn_is_mapped(svd->vp, V_WRITE));
5762                 segvn_inval_trcache(svd->vp);
5763         }
5764         if (seg->s_szc != 0) {
5765                 int err;
5766                 pgsz = page_get_pagesize(seg->s_szc);
5767                 pgcnt = pgsz >> PAGESHIFT;
5768                 ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
5769                 if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(len, pgsz)) {
5770                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5771                         ASSERT(seg->s_base != addr || seg->s_size != len);
5772                         /*
5773                          * If we are holding the as lock as a reader then
5774                          * we need to return IE_RETRY and let the as
5775                          * layer drop and re-acquire the lock as a writer.
5776                          */
5777                         if (AS_READ_HELD(seg->s_as, &seg->s_as->a_lock))
5778                                 return (IE_RETRY);
5779                         VM_STAT_ADD(segvnvmstats.demoterange[1]);
5780                         if (svd->type == MAP_PRIVATE || svd->vp != NULL) {
5781                                 err = segvn_demote_range(seg, addr, len,
5782                                     SDR_END, 0);
5783                         } else {
5784                                 uint_t szcvec = map_pgszcvec(seg->s_base,
5785                                     pgsz, (uintptr_t)seg->s_base,
5786                                     (svd->flags & MAP_TEXT), MAPPGSZC_SHM, 0);
5787                                 err = segvn_demote_range(seg, addr, len,
5788                                     SDR_END, szcvec);
5789                         }
5790                         if (err == 0)
5791                                 return (IE_RETRY);
5792                         if (err == ENOMEM)
5793                                 return (IE_NOMEM);
5794                         return (err);
5795                 }
5796         }
5797 


6083         return (0);
6084 }
6085 
6086 /*
6087  * segvn_setpagesize is called via SEGOP_SETPAGESIZE from as_setpagesize,
6088  * to determine if the seg is capable of mapping the requested szc.
6089  */
6090 static int
6091 segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
6092 {
6093         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6094         struct segvn_data *nsvd;
6095         struct anon_map *amp = svd->amp;
6096         struct seg *nseg;
6097         caddr_t eaddr = addr + len, a;
6098         size_t pgsz = page_get_pagesize(szc);
6099         pgcnt_t pgcnt = page_get_pagecnt(szc);
6100         int err;
6101         u_offset_t off = svd->offset + (uintptr_t)(addr - seg->s_base);
6102 
6103         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
6104         ASSERT(addr >= seg->s_base && eaddr <= seg->s_base + seg->s_size);
6105 
6106         if (seg->s_szc == szc || segvn_lpg_disable != 0) {
6107                 return (0);
6108         }
6109 
6110         /*
6111          * addr should always be pgsz aligned but eaddr may be misaligned if
6112          * it's at the end of the segment.
6113          *
6114          * XXX we should assert this condition since as_setpagesize() logic
6115          * guarantees it.
6116          */
6117         if (!IS_P2ALIGNED(addr, pgsz) ||
6118             (!IS_P2ALIGNED(eaddr, pgsz) &&
6119             eaddr != seg->s_base + seg->s_size)) {
6120 
6121                 segvn_setpgsz_align_err++;
6122                 return (EINVAL);
6123         }


6371 }
6372 
6373 static int
6374 segvn_clrszc(struct seg *seg)
6375 {
6376         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6377         struct anon_map *amp = svd->amp;
6378         size_t pgsz;
6379         pgcnt_t pages;
6380         int err = 0;
6381         caddr_t a = seg->s_base;
6382         caddr_t ea = a + seg->s_size;
6383         ulong_t an_idx = svd->anon_index;
6384         vnode_t *vp = svd->vp;
6385         struct vpage *vpage = svd->vpage;
6386         page_t *anon_pl[1 + 1], *pp;
6387         struct anon *ap, *oldap;
6388         uint_t prot = svd->prot, vpprot;
6389         int pageflag = 0;
6390 
6391         ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock) ||
6392             SEGVN_WRITE_HELD(seg->s_as, &svd->lock));
6393         ASSERT(svd->softlockcnt == 0);
6394 
6395         if (vp == NULL && amp == NULL) {
6396                 ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
6397                 seg->s_szc = 0;
6398                 return (0);
6399         }
6400 
6401         if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) {
6402                 ASSERT(svd->amp == NULL);
6403                 ASSERT(svd->tr_state == SEGVN_TR_OFF);
6404                 hat_leave_region(seg->s_as->a_hat, svd->rcookie,
6405                     HAT_REGION_TEXT);
6406                 svd->rcookie = HAT_INVALID_REGION_COOKIE;
6407         } else if (svd->tr_state == SEGVN_TR_ON) {
6408                 ASSERT(svd->amp != NULL);
6409                 segvn_textunrepl(seg, 1);
6410                 ASSERT(svd->amp == NULL && svd->tr_state == SEGVN_TR_OFF);
6411                 amp = NULL;


6583                 page_unlock(ppa[i]);
6584         }
6585 
6586         kmem_free(ppa, ppasize);
6587         return (err);
6588 }
6589 
6590 /*
6591  * Returns right (upper address) segment if split occurred.
6592  * If the address is equal to the beginning or end of its segment it returns
6593  * the current segment.
6594  */
6595 static struct seg *
6596 segvn_split_seg(struct seg *seg, caddr_t addr)
6597 {
6598         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6599         struct seg *nseg;
6600         size_t nsize;
6601         struct segvn_data *nsvd;
6602 
6603         ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
6604         ASSERT(svd->tr_state == SEGVN_TR_OFF);
6605 
6606         ASSERT(addr >= seg->s_base);
6607         ASSERT(addr <= seg->s_base + seg->s_size);
6608         ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
6609 
6610         if (addr == seg->s_base || addr == seg->s_base + seg->s_size)
6611                 return (seg);
6612 
6613         nsize = seg->s_base + seg->s_size - addr;
6614         seg->s_size = addr - seg->s_base;
6615         nseg = seg_alloc(seg->s_as, addr, nsize);
6616         ASSERT(nseg != NULL);
6617         nseg->s_ops = seg->s_ops;
6618         nsvd = kmem_cache_alloc(segvn_cache, KM_SLEEP);
6619         nseg->s_data = (void *)nsvd;
6620         nseg->s_szc = seg->s_szc;
6621         *nsvd = *svd;
6622         ASSERT(nsvd->rcookie == HAT_INVALID_REGION_COOKIE);
6623         nsvd->seg = nseg;


6738  */
6739 static int
6740 segvn_demote_range(
6741         struct seg *seg,
6742         caddr_t addr,
6743         size_t len,
6744         int flag,
6745         uint_t szcvec)
6746 {
6747         caddr_t eaddr = addr + len;
6748         caddr_t lpgaddr, lpgeaddr;
6749         struct seg *nseg;
6750         struct seg *badseg1 = NULL;
6751         struct seg *badseg2 = NULL;
6752         size_t pgsz;
6753         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6754         int err;
6755         uint_t szc = seg->s_szc;
6756         uint_t tszcvec;
6757 
6758         ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
6759         ASSERT(svd->tr_state == SEGVN_TR_OFF);
6760         ASSERT(szc != 0);
6761         pgsz = page_get_pagesize(szc);
6762         ASSERT(seg->s_base != addr || seg->s_size != len);
6763         ASSERT(addr >= seg->s_base && eaddr <= seg->s_base + seg->s_size);
6764         ASSERT(svd->softlockcnt == 0);
6765         ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
6766         ASSERT(szcvec == 0 || (flag == SDR_END && svd->type == MAP_SHARED));
6767 
6768         CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr);
6769         ASSERT(flag == SDR_RANGE || eaddr < lpgeaddr || addr > lpgaddr);
6770         if (flag == SDR_RANGE) {
6771                 /* demote entire range */
6772                 badseg1 = nseg = segvn_split_seg(seg, lpgaddr);
6773                 (void) segvn_split_seg(nseg, lpgeaddr);
6774                 ASSERT(badseg1->s_base == lpgaddr);
6775                 ASSERT(badseg1->s_size == lpgeaddr - lpgaddr);
6776         } else if (addr != lpgaddr) {
6777                 ASSERT(flag == SDR_END);
6778                 badseg1 = nseg = segvn_split_seg(seg, lpgaddr);


6867                 ASSERT(badseg2->s_base > addr);
6868                 ASSERT(eaddr > badseg2->s_base);
6869                 ASSERT(eaddr < badseg2->s_base + badseg2->s_size);
6870 
6871                 badseg2->s_szc = tszc;
6872                 if (!IS_P2ALIGNED(eaddr, tpgsz)) {
6873                         return (segvn_demote_range(badseg2, badseg2->s_base,
6874                             eaddr - badseg2->s_base, SDR_END, tszcvec));
6875                 }
6876         }
6877 
6878         return (0);
6879 }
6880 
6881 static int
6882 segvn_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
6883 {
6884         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6885         struct vpage *vp, *evp;
6886 
6887         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
6888 
6889         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
6890         /*
6891          * If segment protection can be used, simply check against them.
6892          */
6893         if (svd->pageprot == 0) {
6894                 int err;
6895 
6896                 err = ((svd->prot & prot) != prot) ? EACCES : 0;
6897                 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
6898                 return (err);
6899         }
6900 
6901         /*
6902          * Have to check down to the vpage level.
6903          */
6904         evp = &svd->vpage[seg_page(seg, addr + len)];
6905         for (vp = &svd->vpage[seg_page(seg, addr)]; vp < evp; vp++) {
6906                 if ((VPP_PROT(vp) & prot) != prot) {
6907                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
6908                         return (EACCES);
6909                 }
6910         }
6911         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
6912         return (0);
6913 }
6914 
6915 static int
6916 segvn_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
6917 {
6918         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6919         size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
6920 
6921         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
6922 
6923         if (pgno != 0) {
6924                 SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
6925                 if (svd->pageprot == 0) {
6926                         do {
6927                                 protv[--pgno] = svd->prot;
6928                         } while (pgno != 0);
6929                 } else {
6930                         size_t pgoff = seg_page(seg, addr);
6931 
6932                         do {
6933                                 pgno--;
6934                                 protv[pgno] = VPP_PROT(&svd->vpage[pgno+pgoff]);
6935                         } while (pgno != 0);
6936                 }
6937                 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
6938         }
6939         return (0);
6940 }
6941 
6942 static u_offset_t
6943 segvn_getoffset(struct seg *seg, caddr_t addr)
6944 {
6945         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6946 
6947         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
6948 
6949         return (svd->offset + (uintptr_t)(addr - seg->s_base));
6950 }
6951 
6952 /*ARGSUSED*/
6953 static int
6954 segvn_gettype(struct seg *seg, caddr_t addr)
6955 {
6956         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6957 
6958         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
6959 
6960         return (svd->type | (svd->flags & (MAP_NORESERVE | MAP_TEXT |
6961             MAP_INITDATA)));
6962 }
6963 
6964 /*ARGSUSED*/
6965 static int
6966 segvn_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
6967 {
6968         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6969 
6970         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
6971 
6972         *vpp = svd->vp;
6973         return (0);
6974 }
6975 
6976 /*
6977  * Check to see if it makes sense to do kluster/read ahead to
6978  * addr + delta relative to the mapping at addr.  We assume here
6979  * that delta is a signed PAGESIZE'd multiple (which can be negative).
6980  *
6981  * For segvn, we currently "approve" of the action if we are
6982  * still in the segment and it maps from the same vp/off,
6983  * or if the advice stored in segvn_data or vpages allows it.
6984  * Currently, klustering is not allowed only if MADV_RANDOM is set.
6985  */
6986 static int
6987 segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
6988 {
6989         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6990         struct anon *oap, *ap;
6991         ssize_t pd;
6992         size_t page;
6993         struct vnode *vp1, *vp2;
6994         u_offset_t off1, off2;
6995         struct anon_map *amp;
6996 
6997         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
6998         ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock) ||
6999             SEGVN_LOCK_HELD(seg->s_as, &svd->lock));
7000 
7001         if (addr + delta < seg->s_base ||
7002             addr + delta >= (seg->s_base + seg->s_size))
7003                 return (-1);            /* exceeded segment bounds */
7004 
7005         pd = delta / (ssize_t)PAGESIZE; /* divide to preserve sign bit */
7006         page = seg_page(seg, addr);
7007 
7008         /*
7009          * Check to see if either of the pages addr or addr + delta
7010          * have advice set that prevents klustering (if MADV_RANDOM advice
7011          * is set for entire segment, or MADV_SEQUENTIAL is set and delta
7012          * is negative).
7013          */
7014         if (svd->advice == MADV_RANDOM ||
7015             svd->advice == MADV_SEQUENTIAL && delta < 0)
7016                 return (-1);
7017         else if (svd->pageadvice && svd->vpage) {
7018                 struct vpage *bvpp, *evpp;


7083  *
7084  * The value returned is intended to correlate well with the process's
7085  * memory requirements.  However, there are some caveats:
7086  * 1)   When given a shared segment as argument, this routine will
7087  *      only succeed in swapping out pages for the last sharer of the
7088  *      segment.  (Previous callers will only have decremented mapping
7089  *      reference counts.)
7090  * 2)   We assume that the hat layer maintains a large enough translation
7091  *      cache to capture process reference patterns.
7092  */
7093 static size_t
7094 segvn_swapout(struct seg *seg)
7095 {
7096         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7097         struct anon_map *amp;
7098         pgcnt_t pgcnt = 0;
7099         pgcnt_t npages;
7100         pgcnt_t page;
7101         ulong_t anon_index;
7102 
7103         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
7104 
7105         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
7106         /*
7107          * Find pages unmapped by our caller and force them
7108          * out to the virtual swap device.
7109          */
7110         if ((amp = svd->amp) != NULL)
7111                 anon_index = svd->anon_index;
7112         npages = seg->s_size >> PAGESHIFT;
7113         for (page = 0; page < npages; page++) {
7114                 page_t *pp;
7115                 struct anon *ap;
7116                 struct vnode *vp;
7117                 u_offset_t off;
7118                 anon_sync_obj_t cookie;
7119 
7120                 /*
7121                  * Obtain <vp, off> pair for the page, then look it up.
7122                  *
7123                  * Note that this code is willing to consider regular


7262 static int
7263 segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
7264 {
7265         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7266         struct vpage *vpp;
7267         page_t *pp;
7268         u_offset_t offset;
7269         struct vnode *vp;
7270         u_offset_t off;
7271         caddr_t eaddr;
7272         int bflags;
7273         int err = 0;
7274         int segtype;
7275         int pageprot;
7276         int prot;
7277         ulong_t anon_index;
7278         struct anon_map *amp;
7279         struct anon *ap;
7280         anon_sync_obj_t cookie;
7281 
7282         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
7283 
7284         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
7285 
7286         if (svd->softlockcnt > 0) {
7287                 /*
7288                  * If this is shared segment non 0 softlockcnt
7289                  * means locked pages are still in use.
7290                  */
7291                 if (svd->type == MAP_SHARED) {
7292                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
7293                         return (EAGAIN);
7294                 }
7295 
7296                 /*
7297                  * flush all pages from seg cache
7298                  * otherwise we may deadlock in swap_putpage
7299                  * for B_INVAL page (4175402).
7300                  *
7301                  * Even if we grab segvn WRITER's lock
7302                  * here, there might be another thread which could've


7484 /*
7485  * Determine if we have data corresponding to pages in the
7486  * primary storage virtual memory cache (i.e., "in core").
7487  */
7488 static size_t
7489 segvn_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
7490 {
7491         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7492         struct vnode *vp, *avp;
7493         u_offset_t offset, aoffset;
7494         size_t p, ep;
7495         int ret;
7496         struct vpage *vpp;
7497         page_t *pp;
7498         uint_t start;
7499         struct anon_map *amp;           /* XXX - for locknest */
7500         struct anon *ap;
7501         uint_t attr;
7502         anon_sync_obj_t cookie;
7503 
7504         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
7505 
7506         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
7507         if (svd->amp == NULL && svd->vp == NULL) {
7508                 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
7509                 bzero(vec, btopr(len));
7510                 return (len);   /* no anonymous pages created yet */
7511         }
7512 
7513         p = seg_page(seg, addr);
7514         ep = seg_page(seg, addr + len);
7515         start = svd->vp ? SEG_PAGE_VNODEBACKED : 0;
7516 
7517         amp = svd->amp;
7518         for (; p < ep; p++, addr += PAGESIZE) {
7519                 vpp = (svd->vpage) ? &svd->vpage[p]: NULL;
7520                 ret = start;
7521                 ap = NULL;
7522                 avp = NULL;
7523                 /* Grab the vnode/offset for the anon slot */
7524                 if (amp != NULL) {


7660         int pageprot;
7661         int claim;
7662         struct vnode *vp;
7663         ulong_t anon_index;
7664         struct anon_map *amp;
7665         struct anon *ap;
7666         struct vattr va;
7667         anon_sync_obj_t cookie;
7668         struct kshmid *sp = NULL;
7669         struct proc     *p = curproc;
7670         kproject_t      *proj = NULL;
7671         int chargeproc = 1;
7672         size_t locked_bytes = 0;
7673         size_t unlocked_bytes = 0;
7674         int err = 0;
7675 
7676         /*
7677          * Hold write lock on address space because may split or concatenate
7678          * segments
7679          */
7680         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
7681 
7682         /*
7683          * If this is a shm, use shm's project and zone, else use
7684          * project and zone of calling process
7685          */
7686 
7687         /* Determine if this segment backs a sysV shm */
7688         if (svd->amp != NULL && svd->amp->a_sp != NULL) {
7689                 ASSERT(svd->type == MAP_SHARED);
7690                 ASSERT(svd->tr_state == SEGVN_TR_OFF);
7691                 sp = svd->amp->a_sp;
7692                 proj = sp->shm_perm.ipc_proj;
7693                 chargeproc = 0;
7694         }
7695 
7696         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
7697         if (attr) {
7698                 pageprot = attr & ~(SHARED|PRIVATE);
7699                 segtype = attr & SHARED ? MAP_SHARED : MAP_PRIVATE;
7700 


8076  *      MADV_DONTNEED   - Pages are not needed (synced out in mctl)
8077  *      MADV_FREE       - Contents can be discarded
8078  *      MADV_ACCESS_DEFAULT- Default access
8079  *      MADV_ACCESS_LWP - Next LWP will access heavily
8080  *      MADV_ACCESS_MANY- Many LWPs or processes will access heavily
8081  */
8082 static int
8083 segvn_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
8084 {
8085         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
8086         size_t page;
8087         int err = 0;
8088         int already_set;
8089         struct anon_map *amp;
8090         ulong_t anon_index;
8091         struct seg *next;
8092         lgrp_mem_policy_t policy;
8093         struct seg *prev;
8094         struct vnode *vp;
8095 
8096         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
8097 
8098         /*
8099          * In case of MADV_FREE, we won't be modifying any segment private
8100          * data structures; so, we only need to grab READER's lock
8101          */
8102         if (behav != MADV_FREE) {
8103                 SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
8104                 if (svd->tr_state != SEGVN_TR_OFF) {
8105                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
8106                         return (0);
8107                 }
8108         } else {
8109                 SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
8110         }
8111 
8112         /*
8113          * Large pages are assumed to be only turned on when accesses to the
8114          * segment's address range have spatial and temporal locality. That
8115          * justifies ignoring MADV_SEQUENTIAL for large page segments.
8116          * Also, ignore advice affecting lgroup memory allocation


8204          * otherwise use appropriate vpage entry.
8205          */
8206         if ((addr == seg->s_base) && (len == seg->s_size)) {
8207                 switch (behav) {
8208                 case MADV_ACCESS_LWP:
8209                 case MADV_ACCESS_MANY:
8210                 case MADV_ACCESS_DEFAULT:
8211                         /*
8212                          * Set memory allocation policy for this segment
8213                          */
8214                         policy = lgrp_madv_to_policy(behav, len, svd->type);
8215                         if (svd->type == MAP_SHARED)
8216                                 already_set = lgrp_shm_policy_set(policy, amp,
8217                                     svd->anon_index, vp, svd->offset, len);
8218                         else {
8219                                 /*
8220                                  * For private memory, need writers lock on
8221                                  * address space because the segment may be
8222                                  * split or concatenated when changing policy
8223                                  */
8224                                 if (AS_READ_HELD(seg->s_as,
8225                                     &seg->s_as->a_lock)) {
8226                                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
8227                                         return (IE_RETRY);
8228                                 }
8229 
8230                                 already_set = lgrp_privm_policy_set(policy,
8231                                     &svd->policy_info, len);
8232                         }
8233 
8234                         /*
8235                          * If policy set already and it shouldn't be reapplied,
8236                          * don't do anything.
8237                          */
8238                         if (already_set &&
8239                             !LGRP_MEM_POLICY_REAPPLICABLE(policy))
8240                                 break;
8241 
8242                         /*
8243                          * Mark any existing pages in given range for
8244                          * migration
8245                          */


8355                                 already_set = lgrp_shm_policy_set(policy, amp,
8356                                     anon_index, vp, off, len);
8357                         else
8358                                 already_set =
8359                                     (policy == svd->policy_info.mem_policy);
8360 
8361                         /*
8362                          * If policy set already and it shouldn't be reapplied,
8363                          * don't do anything.
8364                          */
8365                         if (already_set &&
8366                             !LGRP_MEM_POLICY_REAPPLICABLE(policy))
8367                                 break;
8368 
8369                         /*
8370                          * For private memory, need writers lock on
8371                          * address space because the segment may be
8372                          * split or concatenated when changing policy
8373                          */
8374                         if (svd->type == MAP_PRIVATE &&
8375                             AS_READ_HELD(seg->s_as, &seg->s_as->a_lock)) {
8376                                 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
8377                                 return (IE_RETRY);
8378                         }
8379 
8380                         /*
8381                          * Mark any existing pages in given range for
8382                          * migration
8383                          */
8384                         page_mark_migrate(seg, addr, len, amp, svd->anon_index,
8385                             vp, svd->offset, 1);
8386 
8387                         /*
8388                          * Don't need to try to split or concatenate
8389                          * segments, since policy is same or this is a shared
8390                          * memory segment
8391                          */
8392                         if (already_set || svd->type == MAP_SHARED)
8393                                 break;
8394 
8395                         if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) {


8541                         err = EINVAL;
8542                 }
8543         }
8544         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
8545         return (err);
8546 }
8547 
8548 /*
8549  * There is one kind of inheritance that can be specified for pages:
8550  *
8551  *     SEGP_INH_ZERO - Pages should be zeroed in the child
8552  */
8553 static int
8554 segvn_inherit(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
8555 {
8556         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
8557         struct vpage *bvpp, *evpp;
8558         size_t page;
8559         int ret = 0;
8560 
8561         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
8562 
8563         /* Can't support something we don't know about */
8564         if (behav != SEGP_INH_ZERO)
8565                 return (ENOTSUP);
8566 
8567         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
8568 
8569         /*
8570          * This must be a straightforward anonymous segment that is mapped
8571          * privately and is not backed by a vnode.
8572          */
8573         if (svd->tr_state != SEGVN_TR_OFF ||
8574             svd->type != MAP_PRIVATE ||
8575             svd->vp != NULL) {
8576                 ret = EINVAL;
8577                 goto out;
8578         }
8579 
8580         /*
8581          * If the entire segment has been marked as inherit zero, then no reason


8810         size_t wlen;
8811         uint_t pflags = 0;
8812         int sftlck_sbase = 0;
8813         int sftlck_send = 0;
8814 
8815 #ifdef DEBUG
8816         if (type == L_PAGELOCK && segvn_pglock_mtbf) {
8817                 hrtime_t ts = gethrtime();
8818                 if ((ts % segvn_pglock_mtbf) == 0) {
8819                         return (ENOTSUP);
8820                 }
8821                 if ((ts % segvn_pglock_mtbf) == 1) {
8822                         return (EFAULT);
8823                 }
8824         }
8825 #endif
8826 
8827         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEGVN_START,
8828             "segvn_pagelock: start seg %p addr %p", seg, addr);
8829 
8830         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
8831         ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK);
8832 
8833         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
8834 
8835         /*
8836          * for now we only support pagelock to anon memory. We would have to
8837          * check protections for vnode objects and call into the vnode driver.
8838          * That's too much for a fast path. Let the fault entry point handle
8839          * it.
8840          */
8841         if (svd->vp != NULL) {
8842                 if (type == L_PAGELOCK) {
8843                         error = ENOTSUP;
8844                         goto out;
8845                 }
8846                 panic("segvn_pagelock(L_PAGEUNLOCK): vp != NULL");
8847         }
8848         if ((amp = svd->amp) == NULL) {
8849                 if (type == L_PAGELOCK) {
8850                         error = EFAULT;


9432 /*
9433  * If async argument is not 0 we are called from pcache async thread and don't
9434  * hold AS lock.
9435  */
9436 
9437 /*ARGSUSED*/
9438 static int
9439 segvn_reclaim(void *ptag, caddr_t addr, size_t len, struct page **pplist,
9440         enum seg_rw rw, int async)
9441 {
9442         struct seg *seg = (struct seg *)ptag;
9443         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
9444         pgcnt_t np, npages;
9445         struct page **pl;
9446 
9447         npages = np = btop(len);
9448         ASSERT(npages);
9449 
9450         ASSERT(svd->vp == NULL && svd->amp != NULL);
9451         ASSERT(svd->softlockcnt >= npages);
9452         ASSERT(async || AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
9453 
9454         pl = pplist;
9455 
9456         ASSERT(pl[np] == NOPCACHE_SHWLIST || pl[np] == PCACHE_SHWLIST);
9457         ASSERT(!async || pl[np] == PCACHE_SHWLIST);
9458 
9459         while (np > (uint_t)0) {
9460                 if (rw == S_WRITE) {
9461                         hat_setrefmod(*pplist);
9462                 } else {
9463                         hat_setref(*pplist);
9464                 }
9465                 page_unlock(*pplist);
9466                 np--;
9467                 pplist++;
9468         }
9469 
9470         kmem_free(pl, sizeof (page_t *) * (npages + 1));
9471 
9472         /*


9710  * svd->amp remains as NULL.
9711  */
9712 static void
9713 segvn_textrepl(struct seg *seg)
9714 {
9715         struct segvn_data       *svd = (struct segvn_data *)seg->s_data;
9716         vnode_t                 *vp = svd->vp;
9717         u_offset_t              off = svd->offset;
9718         size_t                  size = seg->s_size;
9719         u_offset_t              eoff = off + size;
9720         uint_t                  szc = seg->s_szc;
9721         ulong_t                 hash = SVNTR_HASH_FUNC(vp);
9722         svntr_t                 *svntrp;
9723         struct vattr            va;
9724         proc_t                  *p = seg->s_as->a_proc;
9725         lgrp_id_t               lgrp_id;
9726         lgrp_id_t               olid;
9727         int                     first;
9728         struct anon_map         *amp;
9729 
9730         ASSERT(AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
9731         ASSERT(SEGVN_WRITE_HELD(seg->s_as, &svd->lock));
9732         ASSERT(p != NULL);
9733         ASSERT(svd->tr_state == SEGVN_TR_INIT);
9734         ASSERT(!HAT_IS_REGION_COOKIE_VALID(svd->rcookie));
9735         ASSERT(svd->flags & MAP_TEXT);
9736         ASSERT(svd->type == MAP_PRIVATE);
9737         ASSERT(vp != NULL && svd->amp == NULL);
9738         ASSERT(!svd->pageprot && !(svd->prot & PROT_WRITE));
9739         ASSERT(!(svd->flags & MAP_NORESERVE) && svd->swresv == 0);
9740         ASSERT(seg->s_as != &kas);
9741         ASSERT(off < eoff);
9742         ASSERT(svntr_hashtab != NULL);
9743 
9744         /*
9745          * If numa optimizations are no longer desired bail out.
9746          */
9747         if (!lgrp_optimizations()) {
9748                 svd->tr_state = SEGVN_TR_OFF;
9749                 return;
9750         }


9991  * unmapped but can also be called when segment no longer qualifies for text
9992  * replication (e.g. due to protection changes). If unload_unmap is set use
9993  * HAT_UNLOAD_UNMAP flag in hat_unload_callback().  If we are the last user of
9994  * svntr free all its anon maps and remove it from the hash table.
9995  */
9996 static void
9997 segvn_textunrepl(struct seg *seg, int unload_unmap)
9998 {
9999         struct segvn_data       *svd = (struct segvn_data *)seg->s_data;
10000         vnode_t                 *vp = svd->vp;
10001         u_offset_t              off = svd->offset;
10002         size_t                  size = seg->s_size;
10003         u_offset_t              eoff = off + size;
10004         uint_t                  szc = seg->s_szc;
10005         ulong_t                 hash = SVNTR_HASH_FUNC(vp);
10006         svntr_t                 *svntrp;
10007         svntr_t                 **prv_svntrp;
10008         lgrp_id_t               lgrp_id = svd->tr_policy_info.mem_lgrpid;
10009         lgrp_id_t               i;
10010 
10011         ASSERT(AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
10012         ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock) ||
10013             SEGVN_WRITE_HELD(seg->s_as, &svd->lock));
10014         ASSERT(svd->tr_state == SEGVN_TR_ON);
10015         ASSERT(!HAT_IS_REGION_COOKIE_VALID(svd->rcookie));
10016         ASSERT(svd->amp != NULL);
10017         ASSERT(svd->amp->refcnt >= 1);
10018         ASSERT(svd->anon_index == 0);
10019         ASSERT(lgrp_id != LGRP_NONE && lgrp_id < NLGRPS_MAX);
10020         ASSERT(svntr_hashtab != NULL);
10021 
10022         mutex_enter(&svntr_hashtab[hash].tr_lock);
10023         prv_svntrp = &svntr_hashtab[hash].tr_head;
10024         for (; (svntrp = *prv_svntrp) != NULL; prv_svntrp = &svntrp->tr_next) {
10025                 ASSERT(svntrp->tr_refcnt != 0);
10026                 if (svntrp->tr_vp == vp && svntrp->tr_off == off &&
10027                     svntrp->tr_eoff == eoff && svntrp->tr_szc == szc) {
10028                         break;
10029                 }
10030         }
10031         if (svntrp == NULL) {
10032                 panic("segvn_textunrepl: svntr record not found");


10217         ASSERT(mutex_owned(&svntr_hashtab[hash].tr_lock));
10218 
10219         as = seg->s_as;
10220         ASSERT(as != NULL && as != &kas);
10221         p = as->a_proc;
10222         ASSERT(p != NULL);
10223         ASSERT(p->p_tr_lgrpid != LGRP_NONE);
10224         lgrp_id = p->p_t1_lgrpid;
10225         if (lgrp_id == LGRP_NONE) {
10226                 return;
10227         }
10228         ASSERT(lgrp_id < NLGRPS_MAX);
10229         if (svd->tr_policy_info.mem_lgrpid == lgrp_id) {
10230                 return;
10231         }
10232 
10233         /*
10234          * Use tryenter locking since we are locking as/seg and svntr hash
10235          * lock in reverse from syncrounous thread order.
10236          */
10237         if (!AS_LOCK_TRYENTER(as, &as->a_lock, RW_READER)) {
10238                 SEGVN_TR_ADDSTAT(nolock);
10239                 if (segvn_lgrp_trthr_migrs_snpsht) {
10240                         segvn_lgrp_trthr_migrs_snpsht = 0;
10241                 }
10242                 return;
10243         }
10244         if (!SEGVN_LOCK_TRYENTER(seg->s_as, &svd->lock, RW_WRITER)) {
10245                 AS_LOCK_EXIT(as, &as->a_lock);
10246                 SEGVN_TR_ADDSTAT(nolock);
10247                 if (segvn_lgrp_trthr_migrs_snpsht) {
10248                         segvn_lgrp_trthr_migrs_snpsht = 0;
10249                 }
10250                 return;
10251         }
10252         size = seg->s_size;
10253         if (svntrp->tr_amp[lgrp_id] == NULL) {
10254                 size_t trmem = atomic_add_long_nv(&segvn_textrepl_bytes, size);
10255                 if (trmem > segvn_textrepl_max_bytes) {
10256                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
10257                         AS_LOCK_EXIT(as, &as->a_lock);
10258                         atomic_add_long(&segvn_textrepl_bytes, -size);
10259                         SEGVN_TR_ADDSTAT(normem);
10260                         return;
10261                 }
10262                 if (anon_try_resv_zone(size, NULL) == 0) {
10263                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
10264                         AS_LOCK_EXIT(as, &as->a_lock);
10265                         atomic_add_long(&segvn_textrepl_bytes, -size);
10266                         SEGVN_TR_ADDSTAT(noanon);
10267                         return;
10268                 }
10269                 amp = anonmap_alloc(size, size, KM_NOSLEEP);
10270                 if (amp == NULL) {
10271                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
10272                         AS_LOCK_EXIT(as, &as->a_lock);
10273                         atomic_add_long(&segvn_textrepl_bytes, -size);
10274                         anon_unresv_zone(size, NULL);
10275                         SEGVN_TR_ADDSTAT(nokmem);
10276                         return;
10277                 }
10278                 ASSERT(amp->refcnt == 1);
10279                 amp->a_szc = seg->s_szc;
10280                 svntrp->tr_amp[lgrp_id] = amp;
10281         }
10282         /*
10283          * We don't need to drop the bucket lock but here we give other
10284          * threads a chance.  svntr and svd can't be unlinked as long as
10285          * segment lock is held as a writer and AS held as well.  After we
10286          * retake bucket lock we'll continue from where we left. We'll be able
10287          * to reach the end of either list since new entries are always added
10288          * to the beginning of the lists.
10289          */
10290         mutex_exit(&svntr_hashtab[hash].tr_lock);
10291         hat_unload_callback(as->a_hat, seg->s_base, size, 0, NULL);
10292         mutex_enter(&svntr_hashtab[hash].tr_lock);
10293 
10294         ASSERT(svd->tr_state == SEGVN_TR_ON);
10295         ASSERT(svd->amp != NULL);
10296         ASSERT(svd->tr_policy_info.mem_policy == LGRP_MEM_POLICY_NEXT_SEG);
10297         ASSERT(svd->tr_policy_info.mem_lgrpid != lgrp_id);
10298         ASSERT(svd->amp != svntrp->tr_amp[lgrp_id]);
10299 
10300         svd->tr_policy_info.mem_lgrpid = lgrp_id;
10301         svd->amp = svntrp->tr_amp[lgrp_id];
10302         p->p_tr_lgrpid = NLGRPS_MAX;
10303         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
10304         AS_LOCK_EXIT(as, &as->a_lock);
10305 
10306         ASSERT(svntrp->tr_refcnt != 0);
10307         ASSERT(svd->vp == svntrp->tr_vp);
10308         ASSERT(svd->tr_policy_info.mem_lgrpid == lgrp_id);
10309         ASSERT(svd->amp != NULL && svd->amp == svntrp->tr_amp[lgrp_id]);
10310         ASSERT(svd->seg == seg);
10311         ASSERT(svd->tr_state == SEGVN_TR_ON);
10312 
10313         SEGVN_TR_ADDSTAT(asyncrepl);
10314 }


 534                         }
 535                 }
 536                 mutex_exit(&vp->v_lock);
 537         }
 538 }
 539 
 540 int
 541 segvn_create(struct seg *seg, void *argsp)
 542 {
 543         struct segvn_crargs *a = (struct segvn_crargs *)argsp;
 544         struct segvn_data *svd;
 545         size_t swresv = 0;
 546         struct cred *cred;
 547         struct anon_map *amp;
 548         int error = 0;
 549         size_t pgsz;
 550         lgrp_mem_policy_t mpolicy = LGRP_MEM_POLICY_DEFAULT;
 551         int use_rgn = 0;
 552         int trok = 0;
 553 
 554         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
 555 
 556         if (a->type != MAP_PRIVATE && a->type != MAP_SHARED) {
 557                 panic("segvn_create type");
 558                 /*NOTREACHED*/
 559         }
 560 
 561         /*
 562          * Check arguments.  If a shared anon structure is given then
 563          * it is illegal to also specify a vp.
 564          */
 565         if (a->amp != NULL && a->vp != NULL) {
 566                 panic("segvn_create anon_map");
 567                 /*NOTREACHED*/
 568         }
 569 
 570         if (a->type == MAP_PRIVATE && (a->flags & MAP_TEXT) &&
 571             a->vp != NULL && a->prot == (PROT_USER | PROT_READ | PROT_EXEC) &&
 572             segvn_use_regions) {
 573                 use_rgn = 1;
 574         }


1005 
1006 /*
1007  * Concatenate two existing segments, if possible.
1008  * Return 0 on success, -1 if two segments are not compatible
1009  * or -2 on memory allocation failure.
1010  * If amp_cat == 1 then try and concat segments with anon maps
1011  */
1012 static int
1013 segvn_concat(struct seg *seg1, struct seg *seg2, int amp_cat)
1014 {
1015         struct segvn_data *svd1 = seg1->s_data;
1016         struct segvn_data *svd2 = seg2->s_data;
1017         struct anon_map *amp1 = svd1->amp;
1018         struct anon_map *amp2 = svd2->amp;
1019         struct vpage *vpage1 = svd1->vpage;
1020         struct vpage *vpage2 = svd2->vpage, *nvpage = NULL;
1021         size_t size, nvpsize;
1022         pgcnt_t npages1, npages2;
1023 
1024         ASSERT(seg1->s_as && seg2->s_as && seg1->s_as == seg2->s_as);
1025         ASSERT(AS_WRITE_HELD(seg1->s_as));
1026         ASSERT(seg1->s_ops == seg2->s_ops);
1027 
1028         if (HAT_IS_REGION_COOKIE_VALID(svd1->rcookie) ||
1029             HAT_IS_REGION_COOKIE_VALID(svd2->rcookie)) {
1030                 return (-1);
1031         }
1032 
1033         /* both segments exist, try to merge them */
1034 #define incompat(x)     (svd1->x != svd2->x)
1035         if (incompat(vp) || incompat(maxprot) ||
1036             (!svd1->pageadvice && !svd2->pageadvice && incompat(advice)) ||
1037             (!svd1->pageprot && !svd2->pageprot && incompat(prot)) ||
1038             incompat(type) || incompat(cred) || incompat(flags) ||
1039             seg1->s_szc != seg2->s_szc || incompat(policy_info.mem_policy) ||
1040             (svd2->softlockcnt > 0) || svd1->softlockcnt_send > 0)
1041                 return (-1);
1042 #undef incompat
1043 
1044         /*
1045          * vp == NULL implies zfod, offset doesn't matter


1250 /*
1251  * Extend the previous segment (seg1) to include the
1252  * new segment (seg2 + a), if possible.
1253  * Return 0 on success.
1254  */
1255 static int
1256 segvn_extend_prev(seg1, seg2, a, swresv)
1257         struct seg *seg1, *seg2;
1258         struct segvn_crargs *a;
1259         size_t swresv;
1260 {
1261         struct segvn_data *svd1 = (struct segvn_data *)seg1->s_data;
1262         size_t size;
1263         struct anon_map *amp1;
1264         struct vpage *new_vpage;
1265 
1266         /*
1267          * We don't need any segment level locks for "segvn" data
1268          * since the address space is "write" locked.
1269          */
1270         ASSERT(seg1->s_as && AS_WRITE_HELD(seg1->s_as));
1271 
1272         if (HAT_IS_REGION_COOKIE_VALID(svd1->rcookie)) {
1273                 return (-1);
1274         }
1275 
1276         /* second segment is new, try to extend first */
1277         /* XXX - should also check cred */
1278         if (svd1->vp != a->vp || svd1->maxprot != a->maxprot ||
1279             (!svd1->pageprot && (svd1->prot != a->prot)) ||
1280             svd1->type != a->type || svd1->flags != a->flags ||
1281             seg1->s_szc != a->szc || svd1->softlockcnt_send > 0)
1282                 return (-1);
1283 
1284         /* vp == NULL implies zfod, offset doesn't matter */
1285         if (svd1->vp != NULL &&
1286             svd1->offset + seg1->s_size != (a->offset & PAGEMASK))
1287                 return (-1);
1288 
1289         if (svd1->tr_state != SEGVN_TR_OFF) {
1290                 return (-1);


1371  * Extend the next segment (seg2) to include the
1372  * new segment (seg1 + a), if possible.
1373  * Return 0 on success.
1374  */
1375 static int
1376 segvn_extend_next(
1377         struct seg *seg1,
1378         struct seg *seg2,
1379         struct segvn_crargs *a,
1380         size_t swresv)
1381 {
1382         struct segvn_data *svd2 = (struct segvn_data *)seg2->s_data;
1383         size_t size;
1384         struct anon_map *amp2;
1385         struct vpage *new_vpage;
1386 
1387         /*
1388          * We don't need any segment level locks for "segvn" data
1389          * since the address space is "write" locked.
1390          */
1391         ASSERT(seg2->s_as && AS_WRITE_HELD(seg2->s_as));
1392 
1393         if (HAT_IS_REGION_COOKIE_VALID(svd2->rcookie)) {
1394                 return (-1);
1395         }
1396 
1397         /* first segment is new, try to extend second */
1398         /* XXX - should also check cred */
1399         if (svd2->vp != a->vp || svd2->maxprot != a->maxprot ||
1400             (!svd2->pageprot && (svd2->prot != a->prot)) ||
1401             svd2->type != a->type || svd2->flags != a->flags ||
1402             seg2->s_szc != a->szc || svd2->softlockcnt_sbase > 0)
1403                 return (-1);
1404         /* vp == NULL implies zfod, offset doesn't matter */
1405         if (svd2->vp != NULL &&
1406             (a->offset & PAGEMASK) + seg1->s_size != svd2->offset)
1407                 return (-1);
1408 
1409         if (svd2->tr_state != SEGVN_TR_OFF) {
1410                 return (-1);
1411         }


1558                         page_unlock(pp);
1559                 }
1560                 addr += PAGESIZE;
1561                 old_idx++;
1562                 new_idx++;
1563         }
1564 
1565         return (0);
1566 }
1567 
1568 static int
1569 segvn_dup(struct seg *seg, struct seg *newseg)
1570 {
1571         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
1572         struct segvn_data *newsvd;
1573         pgcnt_t npages = seg_pages(seg);
1574         int error = 0;
1575         size_t len;
1576         struct anon_map *amp;
1577 
1578         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1579         ASSERT(newseg->s_as->a_proc->p_parent == curproc);
1580 
1581         /*
1582          * If segment has anon reserved, reserve more for the new seg.
1583          * For a MAP_NORESERVE segment swresv will be a count of all the
1584          * allocated anon slots; thus we reserve for the child as many slots
1585          * as the parent has allocated. This semantic prevents the child or
1586          * parent from dieing during a copy-on-write fault caused by trying
1587          * to write a shared pre-existing anon page.
1588          */
1589         if ((len = svd->swresv) != 0) {
1590                 if (anon_resv(svd->swresv) == 0)
1591                         return (ENOMEM);
1592 
1593                 TRACE_3(TR_FAC_VM, TR_ANON_PROC, "anon proc:%p %lu %u",
1594                     seg, len, 0);
1595         }
1596 
1597         newsvd = kmem_cache_alloc(segvn_cache, KM_SLEEP);
1598 


1855 segvn_unmap(struct seg *seg, caddr_t addr, size_t len)
1856 {
1857         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
1858         struct segvn_data *nsvd;
1859         struct seg *nseg;
1860         struct anon_map *amp;
1861         pgcnt_t opages;         /* old segment size in pages */
1862         pgcnt_t npages;         /* new segment size in pages */
1863         pgcnt_t dpages;         /* pages being deleted (unmapped) */
1864         hat_callback_t callback;        /* used for free_vp_pages() */
1865         hat_callback_t *cbp = NULL;
1866         caddr_t nbase;
1867         size_t nsize;
1868         size_t oswresv;
1869         int reclaim = 1;
1870 
1871         /*
1872          * We don't need any segment level locks for "segvn" data
1873          * since the address space is "write" locked.
1874          */
1875         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1876 
1877         /*
1878          * Fail the unmap if pages are SOFTLOCKed through this mapping.
1879          * softlockcnt is protected from change by the as write lock.
1880          */
1881 retry:
1882         if (svd->softlockcnt > 0) {
1883                 ASSERT(svd->tr_state == SEGVN_TR_OFF);
1884 
1885                 /*
1886                  * If this is shared segment non 0 softlockcnt
1887                  * means locked pages are still in use.
1888                  */
1889                 if (svd->type == MAP_SHARED) {
1890                         return (EAGAIN);
1891                 }
1892 
1893                 /*
1894                  * since we do have the writers lock nobody can fill
1895                  * the cache during the purge. The flush either succeeds


2405                 }
2406                 TRACE_3(TR_FAC_VM, TR_ANON_PROC, "anon proc:%p %lu %u",
2407                     seg, len, 0);
2408         }
2409 
2410         return (0);                     /* I'm glad that's all over with! */
2411 }
2412 
2413 static void
2414 segvn_free(struct seg *seg)
2415 {
2416         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
2417         pgcnt_t npages = seg_pages(seg);
2418         struct anon_map *amp;
2419         size_t len;
2420 
2421         /*
2422          * We don't need any segment level locks for "segvn" data
2423          * since the address space is "write" locked.
2424          */
2425         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
2426         ASSERT(svd->tr_state == SEGVN_TR_OFF);
2427 
2428         ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
2429 
2430         /*
2431          * Be sure to unlock pages. XXX Why do things get free'ed instead
2432          * of unmapped? XXX
2433          */
2434         (void) segvn_lockop(seg, seg->s_base, seg->s_size,
2435             0, MC_UNLOCK, NULL, 0);
2436 
2437         /*
2438          * Deallocate the vpage and anon pointers if necessary and possible.
2439          */
2440         if (svd->vpage != NULL) {
2441                 kmem_free(svd->vpage, vpgtob(npages));
2442                 svd->vpage = NULL;
2443         }
2444         if ((amp = svd->amp) != NULL) {
2445                 /*


2555 }
2556 
2557 /*
2558  * Do a F_SOFTUNLOCK call over the range requested.  The range must have
2559  * already been F_SOFTLOCK'ed.
2560  * Caller must always match addr and len of a softunlock with a previous
2561  * softlock with exactly the same addr and len.
2562  */
2563 static void
2564 segvn_softunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
2565 {
2566         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
2567         page_t *pp;
2568         caddr_t adr;
2569         struct vnode *vp;
2570         u_offset_t offset;
2571         ulong_t anon_index;
2572         struct anon_map *amp;
2573         struct anon *ap = NULL;
2574 
2575         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2576         ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock));
2577 
2578         if ((amp = svd->amp) != NULL)
2579                 anon_index = svd->anon_index + seg_page(seg, addr);
2580 
2581         if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) {
2582                 ASSERT(svd->tr_state == SEGVN_TR_OFF);
2583                 hat_unlock_region(seg->s_as->a_hat, addr, len, svd->rcookie);
2584         } else {
2585                 hat_unlock(seg->s_as->a_hat, addr, len);
2586         }
2587         for (adr = addr; adr < addr + len; adr += PAGESIZE) {
2588                 if (amp != NULL) {
2589                         ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2590                         if ((ap = anon_get_ptr(amp->ahp, anon_index++))
2591                             != NULL) {
2592                                 swap_xlate(ap, &vp, &offset);
2593                         } else {
2594                                 vp = svd->vp;
2595                                 offset = svd->offset +


4944     enum fault_type type, enum seg_rw rw)
4945 {
4946         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
4947         page_t **plp, **ppp, *pp;
4948         u_offset_t off;
4949         caddr_t a;
4950         struct vpage *vpage;
4951         uint_t vpprot, prot;
4952         int err;
4953         page_t *pl[PVN_GETPAGE_NUM + 1];
4954         size_t plsz, pl_alloc_sz;
4955         size_t page;
4956         ulong_t anon_index;
4957         struct anon_map *amp;
4958         int dogetpage = 0;
4959         caddr_t lpgaddr, lpgeaddr;
4960         size_t pgsz;
4961         anon_sync_obj_t cookie;
4962         int brkcow = BREAK_COW_SHARE(rw, type, svd->type);
4963 
4964         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
4965         ASSERT(svd->amp == NULL || svd->rcookie == HAT_INVALID_REGION_COOKIE);
4966 
4967         /*
4968          * First handle the easy stuff
4969          */
4970         if (type == F_SOFTUNLOCK) {
4971                 if (rw == S_READ_NOCOW) {
4972                         rw = S_READ;
4973                         ASSERT(AS_WRITE_HELD(seg->s_as));
4974                 }
4975                 SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
4976                 pgsz = (seg->s_szc == 0) ? PAGESIZE :
4977                     page_get_pagesize(seg->s_szc);
4978                 VM_STAT_COND_ADD(pgsz > PAGESIZE, segvnvmstats.fltanpages[16]);
4979                 CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr);
4980                 segvn_softunlock(seg, lpgaddr, lpgeaddr - lpgaddr, rw);
4981                 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
4982                 return (0);
4983         }
4984 
4985         ASSERT(svd->tr_state == SEGVN_TR_OFF ||
4986             !HAT_IS_REGION_COOKIE_VALID(svd->rcookie));
4987         if (brkcow == 0) {
4988                 if (svd->tr_state == SEGVN_TR_INIT) {
4989                         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
4990                         if (svd->tr_state == SEGVN_TR_INIT) {
4991                                 ASSERT(svd->vp != NULL && svd->amp == NULL);
4992                                 ASSERT(svd->flags & MAP_TEXT);
4993                                 ASSERT(svd->type == MAP_PRIVATE);


5098          *
5099          * For S_READ_NOCOW, it's safe not to do a copy on write because the
5100          * caller makes sure no COW will be caused by another thread for a
5101          * softlocked page.
5102          */
5103         if (type == F_SOFTLOCK && svd->vp != NULL && seg->s_szc != 0) {
5104                 int demote = 0;
5105 
5106                 if (rw != S_READ_NOCOW) {
5107                         demote = 1;
5108                 }
5109                 if (!demote && len > PAGESIZE) {
5110                         pgsz = page_get_pagesize(seg->s_szc);
5111                         CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr,
5112                             lpgeaddr);
5113                         if (lpgeaddr - lpgaddr > pgsz) {
5114                                 demote = 1;
5115                         }
5116                 }
5117 
5118                 ASSERT(demote || AS_WRITE_HELD(seg->s_as));
5119 
5120                 if (demote) {
5121                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5122                         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
5123                         if (seg->s_szc != 0) {
5124                                 segvn_vmpss_clrszc_cnt++;
5125                                 ASSERT(svd->softlockcnt == 0);
5126                                 err = segvn_clrszc(seg);
5127                                 if (err) {
5128                                         segvn_vmpss_clrszc_err++;
5129                                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5130                                         return (FC_MAKE_ERR(err));
5131                                 }
5132                         }
5133                         ASSERT(seg->s_szc == 0);
5134                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5135                         goto top;
5136                 }
5137         }
5138 


5152                 if (svd->amp == NULL) {
5153                         svd->amp = anonmap_alloc(seg->s_size, 0, ANON_SLEEP);
5154                         svd->amp->a_szc = seg->s_szc;
5155                 }
5156                 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5157 
5158                 /*
5159                  * Start all over again since segment protections
5160                  * may have changed after we dropped the "read" lock.
5161                  */
5162                 goto top;
5163         }
5164 
5165         /*
5166          * S_READ_NOCOW vs S_READ distinction was
5167          * only needed for the code above. After
5168          * that we treat it as S_READ.
5169          */
5170         if (rw == S_READ_NOCOW) {
5171                 ASSERT(type == F_SOFTLOCK);
5172                 ASSERT(AS_WRITE_HELD(seg->s_as));
5173                 rw = S_READ;
5174         }
5175 
5176         amp = svd->amp;
5177 
5178         /*
5179          * MADV_SEQUENTIAL work is ignored for large page segments.
5180          */
5181         if (seg->s_szc != 0) {
5182                 pgsz = page_get_pagesize(seg->s_szc);
5183                 ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock));
5184                 CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr);
5185                 if (svd->vp == NULL) {
5186                         err = segvn_fault_anonpages(hat, seg, lpgaddr,
5187                             lpgeaddr, type, rw, addr, addr + len, brkcow);
5188                 } else {
5189                         err = segvn_fault_vnodepages(hat, seg, lpgaddr,
5190                             lpgeaddr, type, rw, addr, addr + len, brkcow);
5191                         if (err == IE_RETRY) {
5192                                 ASSERT(seg->s_szc == 0);


5623                 ANON_LOCK_EXIT(&amp->a_rwlock);
5624         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5625         if (pl_alloc_sz)
5626                 kmem_free(plp, pl_alloc_sz);
5627         return (0);
5628 }
5629 
5630 /*
5631  * This routine is used to start I/O on pages asynchronously.  XXX it will
5632  * only create PAGESIZE pages. At fault time they will be relocated into
5633  * larger pages.
5634  */
5635 static faultcode_t
5636 segvn_faulta(struct seg *seg, caddr_t addr)
5637 {
5638         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
5639         int err;
5640         struct anon_map *amp;
5641         vnode_t *vp;
5642 
5643         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
5644 
5645         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
5646         if ((amp = svd->amp) != NULL) {
5647                 struct anon *ap;
5648 
5649                 /*
5650                  * Reader lock to prevent amp->ahp from being changed.
5651                  * This is advisory, it's ok to miss a page, so
5652                  * we don't do anon_array_enter lock.
5653                  */
5654                 ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
5655                 if ((ap = anon_get_ptr(amp->ahp,
5656                     svd->anon_index + seg_page(seg, addr))) != NULL) {
5657 
5658                         err = anon_getpage(&ap, NULL, NULL,
5659                             0, seg, addr, S_READ, svd->cred);
5660 
5661                         ANON_LOCK_EXIT(&amp->a_rwlock);
5662                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5663                         if (err)


5680             PAGESIZE, NULL, NULL, 0, seg, addr,
5681             S_OTHER, svd->cred, NULL);
5682 
5683         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5684         if (err)
5685                 return (FC_MAKE_ERR(err));
5686         return (0);
5687 }
5688 
5689 static int
5690 segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
5691 {
5692         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
5693         struct vpage *cvp, *svp, *evp;
5694         struct vnode *vp;
5695         size_t pgsz;
5696         pgcnt_t pgcnt;
5697         anon_sync_obj_t cookie;
5698         int unload_done = 0;
5699 
5700         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
5701 
5702         if ((svd->maxprot & prot) != prot)
5703                 return (EACCES);                        /* violated maxprot */
5704 
5705         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
5706 
5707         /* return if prot is the same */
5708         if (!svd->pageprot && svd->prot == prot) {
5709                 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5710                 return (0);
5711         }
5712 
5713         /*
5714          * Since we change protections we first have to flush the cache.
5715          * This makes sure all the pagelock calls have to recheck
5716          * protections.
5717          */
5718         if (svd->softlockcnt > 0) {
5719                 ASSERT(svd->tr_state == SEGVN_TR_OFF);
5720 


5757         }
5758 
5759         if ((prot & PROT_WRITE) && svd->type == MAP_SHARED &&
5760             svd->vp != NULL && (svd->vp->v_flag & VVMEXEC)) {
5761                 ASSERT(vn_is_mapped(svd->vp, V_WRITE));
5762                 segvn_inval_trcache(svd->vp);
5763         }
5764         if (seg->s_szc != 0) {
5765                 int err;
5766                 pgsz = page_get_pagesize(seg->s_szc);
5767                 pgcnt = pgsz >> PAGESHIFT;
5768                 ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
5769                 if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(len, pgsz)) {
5770                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
5771                         ASSERT(seg->s_base != addr || seg->s_size != len);
5772                         /*
5773                          * If we are holding the as lock as a reader then
5774                          * we need to return IE_RETRY and let the as
5775                          * layer drop and re-acquire the lock as a writer.
5776                          */
5777                         if (AS_READ_HELD(seg->s_as))
5778                                 return (IE_RETRY);
5779                         VM_STAT_ADD(segvnvmstats.demoterange[1]);
5780                         if (svd->type == MAP_PRIVATE || svd->vp != NULL) {
5781                                 err = segvn_demote_range(seg, addr, len,
5782                                     SDR_END, 0);
5783                         } else {
5784                                 uint_t szcvec = map_pgszcvec(seg->s_base,
5785                                     pgsz, (uintptr_t)seg->s_base,
5786                                     (svd->flags & MAP_TEXT), MAPPGSZC_SHM, 0);
5787                                 err = segvn_demote_range(seg, addr, len,
5788                                     SDR_END, szcvec);
5789                         }
5790                         if (err == 0)
5791                                 return (IE_RETRY);
5792                         if (err == ENOMEM)
5793                                 return (IE_NOMEM);
5794                         return (err);
5795                 }
5796         }
5797 


6083         return (0);
6084 }
6085 
6086 /*
6087  * segvn_setpagesize is called via SEGOP_SETPAGESIZE from as_setpagesize,
6088  * to determine if the seg is capable of mapping the requested szc.
6089  */
6090 static int
6091 segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
6092 {
6093         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6094         struct segvn_data *nsvd;
6095         struct anon_map *amp = svd->amp;
6096         struct seg *nseg;
6097         caddr_t eaddr = addr + len, a;
6098         size_t pgsz = page_get_pagesize(szc);
6099         pgcnt_t pgcnt = page_get_pagecnt(szc);
6100         int err;
6101         u_offset_t off = svd->offset + (uintptr_t)(addr - seg->s_base);
6102 
6103         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
6104         ASSERT(addr >= seg->s_base && eaddr <= seg->s_base + seg->s_size);
6105 
6106         if (seg->s_szc == szc || segvn_lpg_disable != 0) {
6107                 return (0);
6108         }
6109 
6110         /*
6111          * addr should always be pgsz aligned but eaddr may be misaligned if
6112          * it's at the end of the segment.
6113          *
6114          * XXX we should assert this condition since as_setpagesize() logic
6115          * guarantees it.
6116          */
6117         if (!IS_P2ALIGNED(addr, pgsz) ||
6118             (!IS_P2ALIGNED(eaddr, pgsz) &&
6119             eaddr != seg->s_base + seg->s_size)) {
6120 
6121                 segvn_setpgsz_align_err++;
6122                 return (EINVAL);
6123         }


6371 }
6372 
6373 static int
6374 segvn_clrszc(struct seg *seg)
6375 {
6376         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6377         struct anon_map *amp = svd->amp;
6378         size_t pgsz;
6379         pgcnt_t pages;
6380         int err = 0;
6381         caddr_t a = seg->s_base;
6382         caddr_t ea = a + seg->s_size;
6383         ulong_t an_idx = svd->anon_index;
6384         vnode_t *vp = svd->vp;
6385         struct vpage *vpage = svd->vpage;
6386         page_t *anon_pl[1 + 1], *pp;
6387         struct anon *ap, *oldap;
6388         uint_t prot = svd->prot, vpprot;
6389         int pageflag = 0;
6390 
6391         ASSERT(AS_WRITE_HELD(seg->s_as) ||
6392             SEGVN_WRITE_HELD(seg->s_as, &svd->lock));
6393         ASSERT(svd->softlockcnt == 0);
6394 
6395         if (vp == NULL && amp == NULL) {
6396                 ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
6397                 seg->s_szc = 0;
6398                 return (0);
6399         }
6400 
6401         if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) {
6402                 ASSERT(svd->amp == NULL);
6403                 ASSERT(svd->tr_state == SEGVN_TR_OFF);
6404                 hat_leave_region(seg->s_as->a_hat, svd->rcookie,
6405                     HAT_REGION_TEXT);
6406                 svd->rcookie = HAT_INVALID_REGION_COOKIE;
6407         } else if (svd->tr_state == SEGVN_TR_ON) {
6408                 ASSERT(svd->amp != NULL);
6409                 segvn_textunrepl(seg, 1);
6410                 ASSERT(svd->amp == NULL && svd->tr_state == SEGVN_TR_OFF);
6411                 amp = NULL;


6583                 page_unlock(ppa[i]);
6584         }
6585 
6586         kmem_free(ppa, ppasize);
6587         return (err);
6588 }
6589 
6590 /*
6591  * Returns right (upper address) segment if split occurred.
6592  * If the address is equal to the beginning or end of its segment it returns
6593  * the current segment.
6594  */
6595 static struct seg *
6596 segvn_split_seg(struct seg *seg, caddr_t addr)
6597 {
6598         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6599         struct seg *nseg;
6600         size_t nsize;
6601         struct segvn_data *nsvd;
6602 
6603         ASSERT(AS_WRITE_HELD(seg->s_as));
6604         ASSERT(svd->tr_state == SEGVN_TR_OFF);
6605 
6606         ASSERT(addr >= seg->s_base);
6607         ASSERT(addr <= seg->s_base + seg->s_size);
6608         ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
6609 
6610         if (addr == seg->s_base || addr == seg->s_base + seg->s_size)
6611                 return (seg);
6612 
6613         nsize = seg->s_base + seg->s_size - addr;
6614         seg->s_size = addr - seg->s_base;
6615         nseg = seg_alloc(seg->s_as, addr, nsize);
6616         ASSERT(nseg != NULL);
6617         nseg->s_ops = seg->s_ops;
6618         nsvd = kmem_cache_alloc(segvn_cache, KM_SLEEP);
6619         nseg->s_data = (void *)nsvd;
6620         nseg->s_szc = seg->s_szc;
6621         *nsvd = *svd;
6622         ASSERT(nsvd->rcookie == HAT_INVALID_REGION_COOKIE);
6623         nsvd->seg = nseg;


6738  */
6739 static int
6740 segvn_demote_range(
6741         struct seg *seg,
6742         caddr_t addr,
6743         size_t len,
6744         int flag,
6745         uint_t szcvec)
6746 {
6747         caddr_t eaddr = addr + len;
6748         caddr_t lpgaddr, lpgeaddr;
6749         struct seg *nseg;
6750         struct seg *badseg1 = NULL;
6751         struct seg *badseg2 = NULL;
6752         size_t pgsz;
6753         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6754         int err;
6755         uint_t szc = seg->s_szc;
6756         uint_t tszcvec;
6757 
6758         ASSERT(AS_WRITE_HELD(seg->s_as));
6759         ASSERT(svd->tr_state == SEGVN_TR_OFF);
6760         ASSERT(szc != 0);
6761         pgsz = page_get_pagesize(szc);
6762         ASSERT(seg->s_base != addr || seg->s_size != len);
6763         ASSERT(addr >= seg->s_base && eaddr <= seg->s_base + seg->s_size);
6764         ASSERT(svd->softlockcnt == 0);
6765         ASSERT(svd->rcookie == HAT_INVALID_REGION_COOKIE);
6766         ASSERT(szcvec == 0 || (flag == SDR_END && svd->type == MAP_SHARED));
6767 
6768         CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr);
6769         ASSERT(flag == SDR_RANGE || eaddr < lpgeaddr || addr > lpgaddr);
6770         if (flag == SDR_RANGE) {
6771                 /* demote entire range */
6772                 badseg1 = nseg = segvn_split_seg(seg, lpgaddr);
6773                 (void) segvn_split_seg(nseg, lpgeaddr);
6774                 ASSERT(badseg1->s_base == lpgaddr);
6775                 ASSERT(badseg1->s_size == lpgeaddr - lpgaddr);
6776         } else if (addr != lpgaddr) {
6777                 ASSERT(flag == SDR_END);
6778                 badseg1 = nseg = segvn_split_seg(seg, lpgaddr);


6867                 ASSERT(badseg2->s_base > addr);
6868                 ASSERT(eaddr > badseg2->s_base);
6869                 ASSERT(eaddr < badseg2->s_base + badseg2->s_size);
6870 
6871                 badseg2->s_szc = tszc;
6872                 if (!IS_P2ALIGNED(eaddr, tpgsz)) {
6873                         return (segvn_demote_range(badseg2, badseg2->s_base,
6874                             eaddr - badseg2->s_base, SDR_END, tszcvec));
6875                 }
6876         }
6877 
6878         return (0);
6879 }
6880 
6881 static int
6882 segvn_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
6883 {
6884         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6885         struct vpage *vp, *evp;
6886 
6887         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
6888 
6889         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
6890         /*
6891          * If segment protection can be used, simply check against them.
6892          */
6893         if (svd->pageprot == 0) {
6894                 int err;
6895 
6896                 err = ((svd->prot & prot) != prot) ? EACCES : 0;
6897                 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
6898                 return (err);
6899         }
6900 
6901         /*
6902          * Have to check down to the vpage level.
6903          */
6904         evp = &svd->vpage[seg_page(seg, addr + len)];
6905         for (vp = &svd->vpage[seg_page(seg, addr)]; vp < evp; vp++) {
6906                 if ((VPP_PROT(vp) & prot) != prot) {
6907                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
6908                         return (EACCES);
6909                 }
6910         }
6911         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
6912         return (0);
6913 }
6914 
6915 static int
6916 segvn_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
6917 {
6918         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6919         size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
6920 
6921         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
6922 
6923         if (pgno != 0) {
6924                 SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
6925                 if (svd->pageprot == 0) {
6926                         do {
6927                                 protv[--pgno] = svd->prot;
6928                         } while (pgno != 0);
6929                 } else {
6930                         size_t pgoff = seg_page(seg, addr);
6931 
6932                         do {
6933                                 pgno--;
6934                                 protv[pgno] = VPP_PROT(&svd->vpage[pgno+pgoff]);
6935                         } while (pgno != 0);
6936                 }
6937                 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
6938         }
6939         return (0);
6940 }
6941 
6942 static u_offset_t
6943 segvn_getoffset(struct seg *seg, caddr_t addr)
6944 {
6945         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6946 
6947         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
6948 
6949         return (svd->offset + (uintptr_t)(addr - seg->s_base));
6950 }
6951 
6952 /*ARGSUSED*/
6953 static int
6954 segvn_gettype(struct seg *seg, caddr_t addr)
6955 {
6956         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6957 
6958         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
6959 
6960         return (svd->type | (svd->flags & (MAP_NORESERVE | MAP_TEXT |
6961             MAP_INITDATA)));
6962 }
6963 
6964 /*ARGSUSED*/
6965 static int
6966 segvn_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
6967 {
6968         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6969 
6970         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
6971 
6972         *vpp = svd->vp;
6973         return (0);
6974 }
6975 
6976 /*
6977  * Check to see if it makes sense to do kluster/read ahead to
6978  * addr + delta relative to the mapping at addr.  We assume here
6979  * that delta is a signed PAGESIZE'd multiple (which can be negative).
6980  *
6981  * For segvn, we currently "approve" of the action if we are
6982  * still in the segment and it maps from the same vp/off,
6983  * or if the advice stored in segvn_data or vpages allows it.
6984  * Currently, klustering is not allowed only if MADV_RANDOM is set.
6985  */
6986 static int
6987 segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
6988 {
6989         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
6990         struct anon *oap, *ap;
6991         ssize_t pd;
6992         size_t page;
6993         struct vnode *vp1, *vp2;
6994         u_offset_t off1, off2;
6995         struct anon_map *amp;
6996 
6997         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
6998         ASSERT(AS_WRITE_HELD(seg->s_as) ||
6999             SEGVN_LOCK_HELD(seg->s_as, &svd->lock));
7000 
7001         if (addr + delta < seg->s_base ||
7002             addr + delta >= (seg->s_base + seg->s_size))
7003                 return (-1);            /* exceeded segment bounds */
7004 
7005         pd = delta / (ssize_t)PAGESIZE; /* divide to preserve sign bit */
7006         page = seg_page(seg, addr);
7007 
7008         /*
7009          * Check to see if either of the pages addr or addr + delta
7010          * have advice set that prevents klustering (if MADV_RANDOM advice
7011          * is set for entire segment, or MADV_SEQUENTIAL is set and delta
7012          * is negative).
7013          */
7014         if (svd->advice == MADV_RANDOM ||
7015             svd->advice == MADV_SEQUENTIAL && delta < 0)
7016                 return (-1);
7017         else if (svd->pageadvice && svd->vpage) {
7018                 struct vpage *bvpp, *evpp;


7083  *
7084  * The value returned is intended to correlate well with the process's
7085  * memory requirements.  However, there are some caveats:
7086  * 1)   When given a shared segment as argument, this routine will
7087  *      only succeed in swapping out pages for the last sharer of the
7088  *      segment.  (Previous callers will only have decremented mapping
7089  *      reference counts.)
7090  * 2)   We assume that the hat layer maintains a large enough translation
7091  *      cache to capture process reference patterns.
7092  */
7093 static size_t
7094 segvn_swapout(struct seg *seg)
7095 {
7096         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7097         struct anon_map *amp;
7098         pgcnt_t pgcnt = 0;
7099         pgcnt_t npages;
7100         pgcnt_t page;
7101         ulong_t anon_index;
7102 
7103         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
7104 
7105         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
7106         /*
7107          * Find pages unmapped by our caller and force them
7108          * out to the virtual swap device.
7109          */
7110         if ((amp = svd->amp) != NULL)
7111                 anon_index = svd->anon_index;
7112         npages = seg->s_size >> PAGESHIFT;
7113         for (page = 0; page < npages; page++) {
7114                 page_t *pp;
7115                 struct anon *ap;
7116                 struct vnode *vp;
7117                 u_offset_t off;
7118                 anon_sync_obj_t cookie;
7119 
7120                 /*
7121                  * Obtain <vp, off> pair for the page, then look it up.
7122                  *
7123                  * Note that this code is willing to consider regular


7262 static int
7263 segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
7264 {
7265         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7266         struct vpage *vpp;
7267         page_t *pp;
7268         u_offset_t offset;
7269         struct vnode *vp;
7270         u_offset_t off;
7271         caddr_t eaddr;
7272         int bflags;
7273         int err = 0;
7274         int segtype;
7275         int pageprot;
7276         int prot;
7277         ulong_t anon_index;
7278         struct anon_map *amp;
7279         struct anon *ap;
7280         anon_sync_obj_t cookie;
7281 
7282         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
7283 
7284         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
7285 
7286         if (svd->softlockcnt > 0) {
7287                 /*
7288                  * If this is shared segment non 0 softlockcnt
7289                  * means locked pages are still in use.
7290                  */
7291                 if (svd->type == MAP_SHARED) {
7292                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
7293                         return (EAGAIN);
7294                 }
7295 
7296                 /*
7297                  * flush all pages from seg cache
7298                  * otherwise we may deadlock in swap_putpage
7299                  * for B_INVAL page (4175402).
7300                  *
7301                  * Even if we grab segvn WRITER's lock
7302                  * here, there might be another thread which could've


7484 /*
7485  * Determine if we have data corresponding to pages in the
7486  * primary storage virtual memory cache (i.e., "in core").
7487  */
7488 static size_t
7489 segvn_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
7490 {
7491         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7492         struct vnode *vp, *avp;
7493         u_offset_t offset, aoffset;
7494         size_t p, ep;
7495         int ret;
7496         struct vpage *vpp;
7497         page_t *pp;
7498         uint_t start;
7499         struct anon_map *amp;           /* XXX - for locknest */
7500         struct anon *ap;
7501         uint_t attr;
7502         anon_sync_obj_t cookie;
7503 
7504         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
7505 
7506         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
7507         if (svd->amp == NULL && svd->vp == NULL) {
7508                 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
7509                 bzero(vec, btopr(len));
7510                 return (len);   /* no anonymous pages created yet */
7511         }
7512 
7513         p = seg_page(seg, addr);
7514         ep = seg_page(seg, addr + len);
7515         start = svd->vp ? SEG_PAGE_VNODEBACKED : 0;
7516 
7517         amp = svd->amp;
7518         for (; p < ep; p++, addr += PAGESIZE) {
7519                 vpp = (svd->vpage) ? &svd->vpage[p]: NULL;
7520                 ret = start;
7521                 ap = NULL;
7522                 avp = NULL;
7523                 /* Grab the vnode/offset for the anon slot */
7524                 if (amp != NULL) {


7660         int pageprot;
7661         int claim;
7662         struct vnode *vp;
7663         ulong_t anon_index;
7664         struct anon_map *amp;
7665         struct anon *ap;
7666         struct vattr va;
7667         anon_sync_obj_t cookie;
7668         struct kshmid *sp = NULL;
7669         struct proc     *p = curproc;
7670         kproject_t      *proj = NULL;
7671         int chargeproc = 1;
7672         size_t locked_bytes = 0;
7673         size_t unlocked_bytes = 0;
7674         int err = 0;
7675 
7676         /*
7677          * Hold write lock on address space because may split or concatenate
7678          * segments
7679          */
7680         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
7681 
7682         /*
7683          * If this is a shm, use shm's project and zone, else use
7684          * project and zone of calling process
7685          */
7686 
7687         /* Determine if this segment backs a sysV shm */
7688         if (svd->amp != NULL && svd->amp->a_sp != NULL) {
7689                 ASSERT(svd->type == MAP_SHARED);
7690                 ASSERT(svd->tr_state == SEGVN_TR_OFF);
7691                 sp = svd->amp->a_sp;
7692                 proj = sp->shm_perm.ipc_proj;
7693                 chargeproc = 0;
7694         }
7695 
7696         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
7697         if (attr) {
7698                 pageprot = attr & ~(SHARED|PRIVATE);
7699                 segtype = attr & SHARED ? MAP_SHARED : MAP_PRIVATE;
7700 


8076  *      MADV_DONTNEED   - Pages are not needed (synced out in mctl)
8077  *      MADV_FREE       - Contents can be discarded
8078  *      MADV_ACCESS_DEFAULT- Default access
8079  *      MADV_ACCESS_LWP - Next LWP will access heavily
8080  *      MADV_ACCESS_MANY- Many LWPs or processes will access heavily
8081  */
8082 static int
8083 segvn_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
8084 {
8085         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
8086         size_t page;
8087         int err = 0;
8088         int already_set;
8089         struct anon_map *amp;
8090         ulong_t anon_index;
8091         struct seg *next;
8092         lgrp_mem_policy_t policy;
8093         struct seg *prev;
8094         struct vnode *vp;
8095 
8096         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
8097 
8098         /*
8099          * In case of MADV_FREE, we won't be modifying any segment private
8100          * data structures; so, we only need to grab READER's lock
8101          */
8102         if (behav != MADV_FREE) {
8103                 SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
8104                 if (svd->tr_state != SEGVN_TR_OFF) {
8105                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
8106                         return (0);
8107                 }
8108         } else {
8109                 SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
8110         }
8111 
8112         /*
8113          * Large pages are assumed to be only turned on when accesses to the
8114          * segment's address range have spatial and temporal locality. That
8115          * justifies ignoring MADV_SEQUENTIAL for large page segments.
8116          * Also, ignore advice affecting lgroup memory allocation


8204          * otherwise use appropriate vpage entry.
8205          */
8206         if ((addr == seg->s_base) && (len == seg->s_size)) {
8207                 switch (behav) {
8208                 case MADV_ACCESS_LWP:
8209                 case MADV_ACCESS_MANY:
8210                 case MADV_ACCESS_DEFAULT:
8211                         /*
8212                          * Set memory allocation policy for this segment
8213                          */
8214                         policy = lgrp_madv_to_policy(behav, len, svd->type);
8215                         if (svd->type == MAP_SHARED)
8216                                 already_set = lgrp_shm_policy_set(policy, amp,
8217                                     svd->anon_index, vp, svd->offset, len);
8218                         else {
8219                                 /*
8220                                  * For private memory, need writers lock on
8221                                  * address space because the segment may be
8222                                  * split or concatenated when changing policy
8223                                  */
8224                                 if (AS_READ_HELD(seg->s_as)) {

8225                                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
8226                                         return (IE_RETRY);
8227                                 }
8228 
8229                                 already_set = lgrp_privm_policy_set(policy,
8230                                     &svd->policy_info, len);
8231                         }
8232 
8233                         /*
8234                          * If policy set already and it shouldn't be reapplied,
8235                          * don't do anything.
8236                          */
8237                         if (already_set &&
8238                             !LGRP_MEM_POLICY_REAPPLICABLE(policy))
8239                                 break;
8240 
8241                         /*
8242                          * Mark any existing pages in given range for
8243                          * migration
8244                          */


8354                                 already_set = lgrp_shm_policy_set(policy, amp,
8355                                     anon_index, vp, off, len);
8356                         else
8357                                 already_set =
8358                                     (policy == svd->policy_info.mem_policy);
8359 
8360                         /*
8361                          * If policy set already and it shouldn't be reapplied,
8362                          * don't do anything.
8363                          */
8364                         if (already_set &&
8365                             !LGRP_MEM_POLICY_REAPPLICABLE(policy))
8366                                 break;
8367 
8368                         /*
8369                          * For private memory, need writers lock on
8370                          * address space because the segment may be
8371                          * split or concatenated when changing policy
8372                          */
8373                         if (svd->type == MAP_PRIVATE &&
8374                             AS_READ_HELD(seg->s_as)) {
8375                                 SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
8376                                 return (IE_RETRY);
8377                         }
8378 
8379                         /*
8380                          * Mark any existing pages in given range for
8381                          * migration
8382                          */
8383                         page_mark_migrate(seg, addr, len, amp, svd->anon_index,
8384                             vp, svd->offset, 1);
8385 
8386                         /*
8387                          * Don't need to try to split or concatenate
8388                          * segments, since policy is same or this is a shared
8389                          * memory segment
8390                          */
8391                         if (already_set || svd->type == MAP_SHARED)
8392                                 break;
8393 
8394                         if (HAT_IS_REGION_COOKIE_VALID(svd->rcookie)) {


8540                         err = EINVAL;
8541                 }
8542         }
8543         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
8544         return (err);
8545 }
8546 
8547 /*
8548  * There is one kind of inheritance that can be specified for pages:
8549  *
8550  *     SEGP_INH_ZERO - Pages should be zeroed in the child
8551  */
8552 static int
8553 segvn_inherit(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
8554 {
8555         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
8556         struct vpage *bvpp, *evpp;
8557         size_t page;
8558         int ret = 0;
8559 
8560         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
8561 
8562         /* Can't support something we don't know about */
8563         if (behav != SEGP_INH_ZERO)
8564                 return (ENOTSUP);
8565 
8566         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
8567 
8568         /*
8569          * This must be a straightforward anonymous segment that is mapped
8570          * privately and is not backed by a vnode.
8571          */
8572         if (svd->tr_state != SEGVN_TR_OFF ||
8573             svd->type != MAP_PRIVATE ||
8574             svd->vp != NULL) {
8575                 ret = EINVAL;
8576                 goto out;
8577         }
8578 
8579         /*
8580          * If the entire segment has been marked as inherit zero, then no reason


8809         size_t wlen;
8810         uint_t pflags = 0;
8811         int sftlck_sbase = 0;
8812         int sftlck_send = 0;
8813 
8814 #ifdef DEBUG
8815         if (type == L_PAGELOCK && segvn_pglock_mtbf) {
8816                 hrtime_t ts = gethrtime();
8817                 if ((ts % segvn_pglock_mtbf) == 0) {
8818                         return (ENOTSUP);
8819                 }
8820                 if ((ts % segvn_pglock_mtbf) == 1) {
8821                         return (EFAULT);
8822                 }
8823         }
8824 #endif
8825 
8826         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEGVN_START,
8827             "segvn_pagelock: start seg %p addr %p", seg, addr);
8828 
8829         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
8830         ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK);
8831 
8832         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
8833 
8834         /*
8835          * for now we only support pagelock to anon memory. We would have to
8836          * check protections for vnode objects and call into the vnode driver.
8837          * That's too much for a fast path. Let the fault entry point handle
8838          * it.
8839          */
8840         if (svd->vp != NULL) {
8841                 if (type == L_PAGELOCK) {
8842                         error = ENOTSUP;
8843                         goto out;
8844                 }
8845                 panic("segvn_pagelock(L_PAGEUNLOCK): vp != NULL");
8846         }
8847         if ((amp = svd->amp) == NULL) {
8848                 if (type == L_PAGELOCK) {
8849                         error = EFAULT;


9431 /*
9432  * If async argument is not 0 we are called from pcache async thread and don't
9433  * hold AS lock.
9434  */
9435 
9436 /*ARGSUSED*/
9437 static int
9438 segvn_reclaim(void *ptag, caddr_t addr, size_t len, struct page **pplist,
9439         enum seg_rw rw, int async)
9440 {
9441         struct seg *seg = (struct seg *)ptag;
9442         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
9443         pgcnt_t np, npages;
9444         struct page **pl;
9445 
9446         npages = np = btop(len);
9447         ASSERT(npages);
9448 
9449         ASSERT(svd->vp == NULL && svd->amp != NULL);
9450         ASSERT(svd->softlockcnt >= npages);
9451         ASSERT(async || AS_LOCK_HELD(seg->s_as));
9452 
9453         pl = pplist;
9454 
9455         ASSERT(pl[np] == NOPCACHE_SHWLIST || pl[np] == PCACHE_SHWLIST);
9456         ASSERT(!async || pl[np] == PCACHE_SHWLIST);
9457 
9458         while (np > (uint_t)0) {
9459                 if (rw == S_WRITE) {
9460                         hat_setrefmod(*pplist);
9461                 } else {
9462                         hat_setref(*pplist);
9463                 }
9464                 page_unlock(*pplist);
9465                 np--;
9466                 pplist++;
9467         }
9468 
9469         kmem_free(pl, sizeof (page_t *) * (npages + 1));
9470 
9471         /*


9709  * svd->amp remains as NULL.
9710  */
9711 static void
9712 segvn_textrepl(struct seg *seg)
9713 {
9714         struct segvn_data       *svd = (struct segvn_data *)seg->s_data;
9715         vnode_t                 *vp = svd->vp;
9716         u_offset_t              off = svd->offset;
9717         size_t                  size = seg->s_size;
9718         u_offset_t              eoff = off + size;
9719         uint_t                  szc = seg->s_szc;
9720         ulong_t                 hash = SVNTR_HASH_FUNC(vp);
9721         svntr_t                 *svntrp;
9722         struct vattr            va;
9723         proc_t                  *p = seg->s_as->a_proc;
9724         lgrp_id_t               lgrp_id;
9725         lgrp_id_t               olid;
9726         int                     first;
9727         struct anon_map         *amp;
9728 
9729         ASSERT(AS_LOCK_HELD(seg->s_as));
9730         ASSERT(SEGVN_WRITE_HELD(seg->s_as, &svd->lock));
9731         ASSERT(p != NULL);
9732         ASSERT(svd->tr_state == SEGVN_TR_INIT);
9733         ASSERT(!HAT_IS_REGION_COOKIE_VALID(svd->rcookie));
9734         ASSERT(svd->flags & MAP_TEXT);
9735         ASSERT(svd->type == MAP_PRIVATE);
9736         ASSERT(vp != NULL && svd->amp == NULL);
9737         ASSERT(!svd->pageprot && !(svd->prot & PROT_WRITE));
9738         ASSERT(!(svd->flags & MAP_NORESERVE) && svd->swresv == 0);
9739         ASSERT(seg->s_as != &kas);
9740         ASSERT(off < eoff);
9741         ASSERT(svntr_hashtab != NULL);
9742 
9743         /*
9744          * If numa optimizations are no longer desired bail out.
9745          */
9746         if (!lgrp_optimizations()) {
9747                 svd->tr_state = SEGVN_TR_OFF;
9748                 return;
9749         }


9990  * unmapped but can also be called when segment no longer qualifies for text
9991  * replication (e.g. due to protection changes). If unload_unmap is set use
9992  * HAT_UNLOAD_UNMAP flag in hat_unload_callback().  If we are the last user of
9993  * svntr free all its anon maps and remove it from the hash table.
9994  */
9995 static void
9996 segvn_textunrepl(struct seg *seg, int unload_unmap)
9997 {
9998         struct segvn_data       *svd = (struct segvn_data *)seg->s_data;
9999         vnode_t                 *vp = svd->vp;
10000         u_offset_t              off = svd->offset;
10001         size_t                  size = seg->s_size;
10002         u_offset_t              eoff = off + size;
10003         uint_t                  szc = seg->s_szc;
10004         ulong_t                 hash = SVNTR_HASH_FUNC(vp);
10005         svntr_t                 *svntrp;
10006         svntr_t                 **prv_svntrp;
10007         lgrp_id_t               lgrp_id = svd->tr_policy_info.mem_lgrpid;
10008         lgrp_id_t               i;
10009 
10010         ASSERT(AS_LOCK_HELD(seg->s_as));
10011         ASSERT(AS_WRITE_HELD(seg->s_as) ||
10012             SEGVN_WRITE_HELD(seg->s_as, &svd->lock));
10013         ASSERT(svd->tr_state == SEGVN_TR_ON);
10014         ASSERT(!HAT_IS_REGION_COOKIE_VALID(svd->rcookie));
10015         ASSERT(svd->amp != NULL);
10016         ASSERT(svd->amp->refcnt >= 1);
10017         ASSERT(svd->anon_index == 0);
10018         ASSERT(lgrp_id != LGRP_NONE && lgrp_id < NLGRPS_MAX);
10019         ASSERT(svntr_hashtab != NULL);
10020 
10021         mutex_enter(&svntr_hashtab[hash].tr_lock);
10022         prv_svntrp = &svntr_hashtab[hash].tr_head;
10023         for (; (svntrp = *prv_svntrp) != NULL; prv_svntrp = &svntrp->tr_next) {
10024                 ASSERT(svntrp->tr_refcnt != 0);
10025                 if (svntrp->tr_vp == vp && svntrp->tr_off == off &&
10026                     svntrp->tr_eoff == eoff && svntrp->tr_szc == szc) {
10027                         break;
10028                 }
10029         }
10030         if (svntrp == NULL) {
10031                 panic("segvn_textunrepl: svntr record not found");


10216         ASSERT(mutex_owned(&svntr_hashtab[hash].tr_lock));
10217 
10218         as = seg->s_as;
10219         ASSERT(as != NULL && as != &kas);
10220         p = as->a_proc;
10221         ASSERT(p != NULL);
10222         ASSERT(p->p_tr_lgrpid != LGRP_NONE);
10223         lgrp_id = p->p_t1_lgrpid;
10224         if (lgrp_id == LGRP_NONE) {
10225                 return;
10226         }
10227         ASSERT(lgrp_id < NLGRPS_MAX);
10228         if (svd->tr_policy_info.mem_lgrpid == lgrp_id) {
10229                 return;
10230         }
10231 
10232         /*
10233          * Use tryenter locking since we are locking as/seg and svntr hash
10234          * lock in reverse from syncrounous thread order.
10235          */
10236         if (!AS_LOCK_TRYENTER(as, RW_READER)) {
10237                 SEGVN_TR_ADDSTAT(nolock);
10238                 if (segvn_lgrp_trthr_migrs_snpsht) {
10239                         segvn_lgrp_trthr_migrs_snpsht = 0;
10240                 }
10241                 return;
10242         }
10243         if (!SEGVN_LOCK_TRYENTER(seg->s_as, &svd->lock, RW_WRITER)) {
10244                 AS_LOCK_EXIT(as);
10245                 SEGVN_TR_ADDSTAT(nolock);
10246                 if (segvn_lgrp_trthr_migrs_snpsht) {
10247                         segvn_lgrp_trthr_migrs_snpsht = 0;
10248                 }
10249                 return;
10250         }
10251         size = seg->s_size;
10252         if (svntrp->tr_amp[lgrp_id] == NULL) {
10253                 size_t trmem = atomic_add_long_nv(&segvn_textrepl_bytes, size);
10254                 if (trmem > segvn_textrepl_max_bytes) {
10255                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
10256                         AS_LOCK_EXIT(as);
10257                         atomic_add_long(&segvn_textrepl_bytes, -size);
10258                         SEGVN_TR_ADDSTAT(normem);
10259                         return;
10260                 }
10261                 if (anon_try_resv_zone(size, NULL) == 0) {
10262                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
10263                         AS_LOCK_EXIT(as);
10264                         atomic_add_long(&segvn_textrepl_bytes, -size);
10265                         SEGVN_TR_ADDSTAT(noanon);
10266                         return;
10267                 }
10268                 amp = anonmap_alloc(size, size, KM_NOSLEEP);
10269                 if (amp == NULL) {
10270                         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
10271                         AS_LOCK_EXIT(as);
10272                         atomic_add_long(&segvn_textrepl_bytes, -size);
10273                         anon_unresv_zone(size, NULL);
10274                         SEGVN_TR_ADDSTAT(nokmem);
10275                         return;
10276                 }
10277                 ASSERT(amp->refcnt == 1);
10278                 amp->a_szc = seg->s_szc;
10279                 svntrp->tr_amp[lgrp_id] = amp;
10280         }
10281         /*
10282          * We don't need to drop the bucket lock but here we give other
10283          * threads a chance.  svntr and svd can't be unlinked as long as
10284          * segment lock is held as a writer and AS held as well.  After we
10285          * retake bucket lock we'll continue from where we left. We'll be able
10286          * to reach the end of either list since new entries are always added
10287          * to the beginning of the lists.
10288          */
10289         mutex_exit(&svntr_hashtab[hash].tr_lock);
10290         hat_unload_callback(as->a_hat, seg->s_base, size, 0, NULL);
10291         mutex_enter(&svntr_hashtab[hash].tr_lock);
10292 
10293         ASSERT(svd->tr_state == SEGVN_TR_ON);
10294         ASSERT(svd->amp != NULL);
10295         ASSERT(svd->tr_policy_info.mem_policy == LGRP_MEM_POLICY_NEXT_SEG);
10296         ASSERT(svd->tr_policy_info.mem_lgrpid != lgrp_id);
10297         ASSERT(svd->amp != svntrp->tr_amp[lgrp_id]);
10298 
10299         svd->tr_policy_info.mem_lgrpid = lgrp_id;
10300         svd->amp = svntrp->tr_amp[lgrp_id];
10301         p->p_tr_lgrpid = NLGRPS_MAX;
10302         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
10303         AS_LOCK_EXIT(as);
10304 
10305         ASSERT(svntrp->tr_refcnt != 0);
10306         ASSERT(svd->vp == svntrp->tr_vp);
10307         ASSERT(svd->tr_policy_info.mem_lgrpid == lgrp_id);
10308         ASSERT(svd->amp != NULL && svd->amp == svntrp->tr_amp[lgrp_id]);
10309         ASSERT(svd->seg == seg);
10310         ASSERT(svd->tr_state == SEGVN_TR_ON);
10311 
10312         SEGVN_TR_ADDSTAT(asyncrepl);
10313 }