Print this page
patch as-lock-macro-simplification


1359                         sp->sa_flags |= SA_NODEFER;
1360                 if (sig == SIGCLD) {
1361                         if (p->p_flag & SNOWAIT)
1362                                 sp->sa_flags |= SA_NOCLDWAIT;
1363                         if ((p->p_flag & SJCTL) == 0)
1364                                 sp->sa_flags |= SA_NOCLDSTOP;
1365                 }
1366         }
1367 }
1368 #endif  /* _SYSCALL32_IMPL */
1369 
1370 /*
1371  * Count the number of segments in this process's address space.
1372  */
1373 int
1374 prnsegs(struct as *as, int reserved)
1375 {
1376         int n = 0;
1377         struct seg *seg;
1378 
1379         ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1380 
1381         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1382                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1383                 caddr_t saddr, naddr;
1384                 void *tmp = NULL;
1385 
1386                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1387                         (void) pr_getprot(seg, reserved, &tmp,
1388                             &saddr, &naddr, eaddr);
1389                         if (saddr != naddr)
1390                                 n++;
1391                 }
1392 
1393                 ASSERT(tmp == NULL);
1394         }
1395 
1396         return (n);
1397 }
1398 
1399 /*


1602         list_destroy(iolhead);
1603 
1604         return (error);
1605 }
1606 
1607 /*
1608  * Return an array of structures with memory map information.
1609  * We allocate here; the caller must deallocate.
1610  */
1611 int
1612 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1613 {
1614         struct as *as = p->p_as;
1615         prmap_t *mp;
1616         struct seg *seg;
1617         struct seg *brkseg, *stkseg;
1618         struct vnode *vp;
1619         struct vattr vattr;
1620         uint_t prot;
1621 
1622         ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1623 
1624         /*
1625          * Request an initial buffer size that doesn't waste memory
1626          * if the address space has only a small number of segments.
1627          */
1628         pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1629 
1630         if ((seg = AS_SEGFIRST(as)) == NULL)
1631                 return (0);
1632 
1633         brkseg = break_seg(p);
1634         stkseg = as_segat(as, prgetstackbase(p));
1635 
1636         do {
1637                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1638                 caddr_t saddr, naddr;
1639                 void *tmp = NULL;
1640 
1641                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1642                         prot = pr_getprot(seg, reserved, &tmp,


1713                         }
1714                 }
1715                 ASSERT(tmp == NULL);
1716         } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1717 
1718         return (0);
1719 }
1720 
1721 #ifdef _SYSCALL32_IMPL
1722 int
1723 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1724 {
1725         struct as *as = p->p_as;
1726         prmap32_t *mp;
1727         struct seg *seg;
1728         struct seg *brkseg, *stkseg;
1729         struct vnode *vp;
1730         struct vattr vattr;
1731         uint_t prot;
1732 
1733         ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1734 
1735         /*
1736          * Request an initial buffer size that doesn't waste memory
1737          * if the address space has only a small number of segments.
1738          */
1739         pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1740 
1741         if ((seg = AS_SEGFIRST(as)) == NULL)
1742                 return (0);
1743 
1744         brkseg = break_seg(p);
1745         stkseg = as_segat(as, prgetstackbase(p));
1746 
1747         do {
1748                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1749                 caddr_t saddr, naddr;
1750                 void *tmp = NULL;
1751 
1752                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1753                         prot = pr_getprot(seg, reserved, &tmp,


1823                         } else {
1824                                 mp->pr_shmid = -1;
1825                         }
1826                 }
1827                 ASSERT(tmp == NULL);
1828         } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1829 
1830         return (0);
1831 }
1832 #endif  /* _SYSCALL32_IMPL */
1833 
1834 /*
1835  * Return the size of the /proc page data file.
1836  */
1837 size_t
1838 prpdsize(struct as *as)
1839 {
1840         struct seg *seg;
1841         size_t size;
1842 
1843         ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1844 
1845         if ((seg = AS_SEGFIRST(as)) == NULL)
1846                 return (0);
1847 
1848         size = sizeof (prpageheader_t);
1849         do {
1850                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1851                 caddr_t saddr, naddr;
1852                 void *tmp = NULL;
1853                 size_t npage;
1854 
1855                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1856                         (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1857                         if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1858                                 size += sizeof (prasmap_t) + round8(npage);
1859                 }
1860                 ASSERT(tmp == NULL);
1861         } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1862 
1863         return (size);
1864 }
1865 
1866 #ifdef _SYSCALL32_IMPL
1867 size_t
1868 prpdsize32(struct as *as)
1869 {
1870         struct seg *seg;
1871         size_t size;
1872 
1873         ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1874 
1875         if ((seg = AS_SEGFIRST(as)) == NULL)
1876                 return (0);
1877 
1878         size = sizeof (prpageheader32_t);
1879         do {
1880                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1881                 caddr_t saddr, naddr;
1882                 void *tmp = NULL;
1883                 size_t npage;
1884 
1885                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1886                         (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1887                         if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1888                                 size += sizeof (prasmap32_t) + round8(npage);
1889                 }
1890                 ASSERT(tmp == NULL);
1891         } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1892 
1893         return (size);
1894 }
1895 #endif  /* _SYSCALL32_IMPL */
1896 
1897 /*
1898  * Read page data information.
1899  */
1900 int
1901 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1902 {
1903         struct as *as = p->p_as;
1904         caddr_t buf;
1905         size_t size;
1906         prpageheader_t *php;
1907         prasmap_t *pmp;
1908         struct seg *seg;
1909         int error;
1910 
1911 again:
1912         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1913 
1914         if ((seg = AS_SEGFIRST(as)) == NULL) {
1915                 AS_LOCK_EXIT(as, &as->a_lock);
1916                 return (0);
1917         }
1918         size = prpdsize(as);
1919         if (uiop->uio_resid < size) {
1920                 AS_LOCK_EXIT(as, &as->a_lock);
1921                 return (E2BIG);
1922         }
1923 
1924         buf = kmem_zalloc(size, KM_SLEEP);
1925         php = (prpageheader_t *)buf;
1926         pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
1927 
1928         hrt2ts(gethrtime(), &php->pr_tstamp);
1929         php->pr_nmap = 0;
1930         php->pr_npage = 0;
1931         do {
1932                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1933                 caddr_t saddr, naddr;
1934                 void *tmp = NULL;
1935 
1936                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1937                         struct vnode *vp;
1938                         struct vattr vattr;
1939                         size_t len;
1940                         size_t npage;


1948                         next = (uintptr_t)(pmp + 1) + round8(npage);
1949                         /*
1950                          * It's possible that the address space can change
1951                          * subtlely even though we're holding as->a_lock
1952                          * due to the nondeterminism of page_exists() in
1953                          * the presence of asychronously flushed pages or
1954                          * mapped files whose sizes are changing.
1955                          * page_exists() may be called indirectly from
1956                          * pr_getprot() by a SEGOP_INCORE() routine.
1957                          * If this happens we need to make sure we don't
1958                          * overrun the buffer whose size we computed based
1959                          * on the initial iteration through the segments.
1960                          * Once we've detected an overflow, we need to clean
1961                          * up the temporary memory allocated in pr_getprot()
1962                          * and retry. If there's a pending signal, we return
1963                          * EINTR so that this thread can be dislodged if
1964                          * a latent bug causes us to spin indefinitely.
1965                          */
1966                         if (next > (uintptr_t)buf + size) {
1967                                 pr_getprot_done(&tmp);
1968                                 AS_LOCK_EXIT(as, &as->a_lock);
1969 
1970                                 kmem_free(buf, size);
1971 
1972                                 if (ISSIG(curthread, JUSTLOOKING))
1973                                         return (EINTR);
1974 
1975                                 goto again;
1976                         }
1977 
1978                         php->pr_nmap++;
1979                         php->pr_npage += npage;
1980                         pmp->pr_vaddr = (uintptr_t)saddr;
1981                         pmp->pr_npage = npage;
1982                         pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1983                         pmp->pr_mflags = 0;
1984                         if (prot & PROT_READ)
1985                                 pmp->pr_mflags |= MA_READ;
1986                         if (prot & PROT_WRITE)
1987                                 pmp->pr_mflags |= MA_WRITE;
1988                         if (prot & PROT_EXEC)


2017                          * Get the SysV shared memory id, if any.
2018                          */
2019                         if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2020                             (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2021                             SHMID_NONE) {
2022                                 if (pmp->pr_shmid == SHMID_FREE)
2023                                         pmp->pr_shmid = -1;
2024 
2025                                 pmp->pr_mflags |= MA_SHM;
2026                         } else {
2027                                 pmp->pr_shmid = -1;
2028                         }
2029 
2030                         hat_getstat(as, saddr, len, hatid,
2031                             (char *)(pmp + 1), HAT_SYNC_ZERORM);
2032                         pmp = (prasmap_t *)next;
2033                 }
2034                 ASSERT(tmp == NULL);
2035         } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2036 
2037         AS_LOCK_EXIT(as, &as->a_lock);
2038 
2039         ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2040         error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2041         kmem_free(buf, size);
2042 
2043         return (error);
2044 }
2045 
2046 #ifdef _SYSCALL32_IMPL
2047 int
2048 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2049 {
2050         struct as *as = p->p_as;
2051         caddr_t buf;
2052         size_t size;
2053         prpageheader32_t *php;
2054         prasmap32_t *pmp;
2055         struct seg *seg;
2056         int error;
2057 
2058 again:
2059         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2060 
2061         if ((seg = AS_SEGFIRST(as)) == NULL) {
2062                 AS_LOCK_EXIT(as, &as->a_lock);
2063                 return (0);
2064         }
2065         size = prpdsize32(as);
2066         if (uiop->uio_resid < size) {
2067                 AS_LOCK_EXIT(as, &as->a_lock);
2068                 return (E2BIG);
2069         }
2070 
2071         buf = kmem_zalloc(size, KM_SLEEP);
2072         php = (prpageheader32_t *)buf;
2073         pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2074 
2075         hrt2ts32(gethrtime(), &php->pr_tstamp);
2076         php->pr_nmap = 0;
2077         php->pr_npage = 0;
2078         do {
2079                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2080                 caddr_t saddr, naddr;
2081                 void *tmp = NULL;
2082 
2083                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2084                         struct vnode *vp;
2085                         struct vattr vattr;
2086                         size_t len;
2087                         size_t npage;


2095                         next = (uintptr_t)(pmp + 1) + round8(npage);
2096                         /*
2097                          * It's possible that the address space can change
2098                          * subtlely even though we're holding as->a_lock
2099                          * due to the nondeterminism of page_exists() in
2100                          * the presence of asychronously flushed pages or
2101                          * mapped files whose sizes are changing.
2102                          * page_exists() may be called indirectly from
2103                          * pr_getprot() by a SEGOP_INCORE() routine.
2104                          * If this happens we need to make sure we don't
2105                          * overrun the buffer whose size we computed based
2106                          * on the initial iteration through the segments.
2107                          * Once we've detected an overflow, we need to clean
2108                          * up the temporary memory allocated in pr_getprot()
2109                          * and retry. If there's a pending signal, we return
2110                          * EINTR so that this thread can be dislodged if
2111                          * a latent bug causes us to spin indefinitely.
2112                          */
2113                         if (next > (uintptr_t)buf + size) {
2114                                 pr_getprot_done(&tmp);
2115                                 AS_LOCK_EXIT(as, &as->a_lock);
2116 
2117                                 kmem_free(buf, size);
2118 
2119                                 if (ISSIG(curthread, JUSTLOOKING))
2120                                         return (EINTR);
2121 
2122                                 goto again;
2123                         }
2124 
2125                         php->pr_nmap++;
2126                         php->pr_npage += npage;
2127                         pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2128                         pmp->pr_npage = (size32_t)npage;
2129                         pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2130                         pmp->pr_mflags = 0;
2131                         if (prot & PROT_READ)
2132                                 pmp->pr_mflags |= MA_READ;
2133                         if (prot & PROT_WRITE)
2134                                 pmp->pr_mflags |= MA_WRITE;
2135                         if (prot & PROT_EXEC)


2164                          * Get the SysV shared memory id, if any.
2165                          */
2166                         if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2167                             (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2168                             SHMID_NONE) {
2169                                 if (pmp->pr_shmid == SHMID_FREE)
2170                                         pmp->pr_shmid = -1;
2171 
2172                                 pmp->pr_mflags |= MA_SHM;
2173                         } else {
2174                                 pmp->pr_shmid = -1;
2175                         }
2176 
2177                         hat_getstat(as, saddr, len, hatid,
2178                             (char *)(pmp + 1), HAT_SYNC_ZERORM);
2179                         pmp = (prasmap32_t *)next;
2180                 }
2181                 ASSERT(tmp == NULL);
2182         } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2183 
2184         AS_LOCK_EXIT(as, &as->a_lock);
2185 
2186         ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2187         error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2188         kmem_free(buf, size);
2189 
2190         return (error);
2191 }
2192 #endif  /* _SYSCALL32_IMPL */
2193 
2194 ushort_t
2195 prgetpctcpu(uint64_t pct)
2196 {
2197         /*
2198          * The value returned will be relevant in the zone of the examiner,
2199          * which may not be the same as the zone which performed the procfs
2200          * mount.
2201          */
2202         int nonline = zone_ncpus_online_get(curproc->p_zone);
2203 
2204         /*


2319 
2320                 /* compute %cpu for the process */
2321                 if (p->p_lwpcnt == 1)
2322                         psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2323                 else {
2324                         uint64_t pct = 0;
2325                         hrtime_t cur_time = gethrtime_unscaled();
2326 
2327                         t = p->p_tlist;
2328                         do {
2329                                 pct += cpu_update_pct(t, cur_time);
2330                         } while ((t = t->t_forw) != p->p_tlist);
2331 
2332                         psp->pr_pctcpu = prgetpctcpu(pct);
2333                 }
2334                 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2335                         psp->pr_size = 0;
2336                         psp->pr_rssize = 0;
2337                 } else {
2338                         mutex_exit(&p->p_lock);
2339                         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2340                         psp->pr_size = btopr(as->a_resvsize) *
2341                             (PAGESIZE / 1024);
2342                         psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2343                         psp->pr_pctmem = rm_pctmemory(as);
2344                         AS_LOCK_EXIT(as, &as->a_lock);
2345                         mutex_enter(&p->p_lock);
2346                 }
2347         }
2348 }
2349 
2350 #ifdef _SYSCALL32_IMPL
2351 void
2352 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2353 {
2354         kthread_t *t;
2355         struct cred *cred;
2356         hrtime_t hrutime, hrstime;
2357 
2358         ASSERT(MUTEX_HELD(&p->p_lock));
2359 
2360         if ((t = prchoose(p)) == NULL)  /* returns locked thread */
2361                 bzero(psp, sizeof (*psp));
2362         else {
2363                 thread_unlock(t);
2364                 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));


2452                 /* compute %cpu for the process */
2453                 if (p->p_lwpcnt == 1)
2454                         psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2455                 else {
2456                         uint64_t pct = 0;
2457                         hrtime_t cur_time;
2458 
2459                         t = p->p_tlist;
2460                         cur_time = gethrtime_unscaled();
2461                         do {
2462                                 pct += cpu_update_pct(t, cur_time);
2463                         } while ((t = t->t_forw) != p->p_tlist);
2464 
2465                         psp->pr_pctcpu = prgetpctcpu(pct);
2466                 }
2467                 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2468                         psp->pr_size = 0;
2469                         psp->pr_rssize = 0;
2470                 } else {
2471                         mutex_exit(&p->p_lock);
2472                         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2473                         psp->pr_size = (size32_t)
2474                             (btopr(as->a_resvsize) * (PAGESIZE / 1024));
2475                         psp->pr_rssize = (size32_t)
2476                             (rm_asrss(as) * (PAGESIZE / 1024));
2477                         psp->pr_pctmem = rm_pctmemory(as);
2478                         AS_LOCK_EXIT(as, &as->a_lock);
2479                         mutex_enter(&p->p_lock);
2480                 }
2481         }
2482 
2483         /*
2484          * If we are looking at an LP64 process, zero out
2485          * the fields that cannot be represented in ILP32.
2486          */
2487         if (p->p_model != DATAMODEL_ILP32) {
2488                 psp->pr_size = 0;
2489                 psp->pr_rssize = 0;
2490                 psp->pr_argv = 0;
2491                 psp->pr_envp = 0;
2492         }
2493 }
2494 
2495 #endif  /* _SYSCALL32_IMPL */
2496 
2497 void
2498 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)


3296         avl_destroy(&p->p_warea);
3297 }
3298 
3299 /*
3300  * This one is called by the traced process to unwatch all the
3301  * pages while deallocating the list of watched_page structs.
3302  */
3303 void
3304 pr_free_watched_pages(proc_t *p)
3305 {
3306         struct as *as = p->p_as;
3307         struct watched_page *pwp;
3308         uint_t prot;
3309         int    retrycnt, err;
3310         void *cookie;
3311 
3312         if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3313                 return;
3314 
3315         ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3316         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3317 
3318         pwp = avl_first(&as->a_wpage);
3319 
3320         cookie = NULL;
3321         while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3322                 retrycnt = 0;
3323                 if ((prot = pwp->wp_oprot) != 0) {
3324                         caddr_t addr = pwp->wp_vaddr;
3325                         struct seg *seg;
3326                 retry:
3327 
3328                         if ((pwp->wp_prot != prot ||
3329                             (pwp->wp_flags & WP_NOWATCH)) &&
3330                             (seg = as_segat(as, addr)) != NULL) {
3331                                 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3332                                 if (err == IE_RETRY) {
3333                                         ASSERT(retrycnt == 0);
3334                                         retrycnt++;
3335                                         goto retry;
3336                                 }
3337                         }
3338                 }
3339                 kmem_free(pwp, sizeof (struct watched_page));
3340         }
3341 
3342         avl_destroy(&as->a_wpage);
3343         p->p_wprot = NULL;
3344 
3345         AS_LOCK_EXIT(as, &as->a_lock);
3346 }
3347 
3348 /*
3349  * Insert a watched area into the list of watched pages.
3350  * If oflags is zero then we are adding a new watched area.
3351  * Otherwise we are changing the flags of an existing watched area.
3352  */
3353 static int
3354 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3355         ulong_t flags, ulong_t oflags)
3356 {
3357         struct as *as = p->p_as;
3358         avl_tree_t *pwp_tree;
3359         struct watched_page *pwp, *newpwp;
3360         struct watched_page tpw;
3361         avl_index_t where;
3362         struct seg *seg;
3363         uint_t prot;
3364         caddr_t addr;
3365 
3366         /*
3367          * We need to pre-allocate a list of structures before we grab the
3368          * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3369          * held.
3370          */
3371         newpwp = NULL;
3372         for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3373             addr < eaddr; addr += PAGESIZE) {
3374                 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3375                 pwp->wp_list = newpwp;
3376                 newpwp = pwp;
3377         }
3378 
3379         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3380 
3381         /*
3382          * Search for an existing watched page to contain the watched area.
3383          * If none is found, grab a new one from the available list
3384          * and insert it in the active list, keeping the list sorted
3385          * by user-level virtual address.
3386          */
3387         if (p->p_flag & SVFWAIT)
3388                 pwp_tree = &p->p_wpage;
3389         else
3390                 pwp_tree = &as->a_wpage;
3391 
3392 again:
3393         if (avl_numnodes(pwp_tree) > prnwatch) {
3394                 AS_LOCK_EXIT(as, &as->a_lock);
3395                 while (newpwp != NULL) {
3396                         pwp = newpwp->wp_list;
3397                         kmem_free(newpwp, sizeof (struct watched_page));
3398                         newpwp = pwp;
3399                 }
3400                 return (E2BIG);
3401         }
3402 
3403         tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3404         if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3405                 pwp = newpwp;
3406                 newpwp = newpwp->wp_list;
3407                 pwp->wp_list = NULL;
3408                 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3409                     (uintptr_t)PAGEMASK);
3410                 avl_insert(pwp_tree, pwp, where);
3411         }
3412 
3413         ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3414 


3447                         if (pwp->wp_exec)
3448                                 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3449                         if (!(pwp->wp_flags & WP_NOWATCH) &&
3450                             pwp->wp_prot != prot &&
3451                             (pwp->wp_flags & WP_SETPROT) == 0) {
3452                                 pwp->wp_flags |= WP_SETPROT;
3453                                 pwp->wp_list = p->p_wprot;
3454                                 p->p_wprot = pwp;
3455                         }
3456                         pwp->wp_prot = (uchar_t)prot;
3457                 }
3458         }
3459 
3460         /*
3461          * If the watched area extends into the next page then do
3462          * it over again with the virtual address of the next page.
3463          */
3464         if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
3465                 goto again;
3466 
3467         AS_LOCK_EXIT(as, &as->a_lock);
3468 
3469         /*
3470          * Free any pages we may have over-allocated
3471          */
3472         while (newpwp != NULL) {
3473                 pwp = newpwp->wp_list;
3474                 kmem_free(newpwp, sizeof (struct watched_page));
3475                 newpwp = pwp;
3476         }
3477 
3478         return (0);
3479 }
3480 
3481 /*
3482  * Remove a watched area from the list of watched pages.
3483  * A watched area may extend over more than one page.
3484  */
3485 static void
3486 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
3487 {
3488         struct as *as = p->p_as;
3489         struct watched_page *pwp;
3490         struct watched_page tpw;
3491         avl_tree_t *tree;
3492         avl_index_t where;
3493 
3494         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3495 
3496         if (p->p_flag & SVFWAIT)
3497                 tree = &p->p_wpage;
3498         else
3499                 tree = &as->a_wpage;
3500 
3501         tpw.wp_vaddr = vaddr =
3502             (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3503         pwp = avl_find(tree, &tpw, &where);
3504         if (pwp == NULL)
3505                 pwp = avl_nearest(tree, where, AVL_AFTER);
3506 
3507         while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3508                 ASSERT(vaddr <=  pwp->wp_vaddr);
3509 
3510                 if (flags & WA_READ)
3511                         pwp->wp_read--;
3512                 if (flags & WA_WRITE)
3513                         pwp->wp_write--;
3514                 if (flags & WA_EXEC)


3539                                 pwp->wp_prot = (uchar_t)prot;
3540                         }
3541                 } else {
3542                         /*
3543                          * No watched areas remain in this page.
3544                          * Reset everything to normal.
3545                          */
3546                         if (pwp->wp_oprot != 0) {
3547                                 pwp->wp_prot = pwp->wp_oprot;
3548                                 if ((pwp->wp_flags & WP_SETPROT) == 0) {
3549                                         pwp->wp_flags |= WP_SETPROT;
3550                                         pwp->wp_list = p->p_wprot;
3551                                         p->p_wprot = pwp;
3552                                 }
3553                         }
3554                 }
3555 
3556                 pwp = AVL_NEXT(tree, pwp);
3557         }
3558 
3559         AS_LOCK_EXIT(as, &as->a_lock);
3560 }
3561 
3562 /*
3563  * Return the original protections for the specified page.
3564  */
3565 static void
3566 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
3567 {
3568         struct watched_page *pwp;
3569         struct watched_page tpw;
3570 
3571         ASSERT(AS_LOCK_HELD(as, &as->a_lock));
3572 
3573         tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3574         if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
3575                 *prot = pwp->wp_oprot;
3576 }
3577 
3578 static prpagev_t *
3579 pr_pagev_create(struct seg *seg, int check_noreserve)
3580 {
3581         prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
3582         size_t total_pages = seg_pages(seg);
3583 
3584         /*
3585          * Limit the size of our vectors to pagev_lim pages at a time.  We need
3586          * 4 or 5 bytes of storage per page, so this means we limit ourself
3587          * to about a megabyte of kernel heap by default.
3588          */
3589         pagev->pg_npages = MIN(total_pages, pagev_lim);
3590         pagev->pg_pnbase = 0;
3591 


3838 uint_t
3839 pr_getprot(struct seg *seg, int reserved, void **tmp,
3840         caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3841 {
3842         struct as *as = seg->s_as;
3843 
3844         caddr_t saddr = *saddrp;
3845         caddr_t naddr;
3846 
3847         int check_noreserve;
3848         uint_t prot;
3849 
3850         union {
3851                 struct segvn_data *svd;
3852                 struct segdev_data *sdp;
3853                 void *data;
3854         } s;
3855 
3856         s.data = seg->s_data;
3857 
3858         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3859         ASSERT(saddr >= seg->s_base && saddr < eaddr);
3860         ASSERT(eaddr <= seg->s_base + seg->s_size);
3861 
3862         /*
3863          * Don't include MAP_NORESERVE pages in the address range
3864          * unless their mappings have actually materialized.
3865          * We cheat by knowing that segvn is the only segment
3866          * driver that supports MAP_NORESERVE.
3867          */
3868         check_noreserve =
3869             (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
3870             (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
3871             (s.svd->flags & MAP_NORESERVE));
3872 
3873         /*
3874          * Examine every page only as a last resort.  We use guilty knowledge
3875          * of segvn and segdev to avoid this: if there are no per-page
3876          * protections present in the segment and we don't care about
3877          * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
3878          */


3952  * Return true iff the vnode is a /proc file opened by the process itself.
3953  */
3954 int
3955 pr_isself(vnode_t *vp)
3956 {
3957         /*
3958          * XXX: To retain binary compatibility with the old
3959          * ioctl()-based version of /proc, we exempt self-opens
3960          * of /proc/<pid> from being marked close-on-exec.
3961          */
3962         return (vn_matchops(vp, prvnodeops) &&
3963             (VTOP(vp)->pr_flags & PR_ISSELF) &&
3964             VTOP(vp)->pr_type != PR_PIDDIR);
3965 }
3966 
3967 static ssize_t
3968 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
3969 {
3970         ssize_t pagesize, hatsize;
3971 
3972         ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
3973         ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
3974         ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
3975         ASSERT(saddr < eaddr);
3976 
3977         pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
3978         ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
3979         ASSERT(pagesize != 0);
3980 
3981         if (pagesize == -1)
3982                 pagesize = PAGESIZE;
3983 
3984         saddr += P2NPHASE((uintptr_t)saddr, pagesize);
3985 
3986         while (saddr < eaddr) {
3987                 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
3988                         break;
3989                 ASSERT(IS_P2ALIGNED(saddr, pagesize));
3990                 saddr += pagesize;
3991         }
3992 
3993         *naddrp = ((saddr < eaddr) ? saddr : eaddr);
3994         return (hatsize);
3995 }
3996 
3997 /*
3998  * Return an array of structures with extended memory map information.
3999  * We allocate here; the caller must deallocate.
4000  */
4001 int
4002 prgetxmap(proc_t *p, list_t *iolhead)
4003 {
4004         struct as *as = p->p_as;
4005         prxmap_t *mp;
4006         struct seg *seg;
4007         struct seg *brkseg, *stkseg;
4008         struct vnode *vp;
4009         struct vattr vattr;
4010         uint_t prot;
4011 
4012         ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
4013 
4014         /*
4015          * Request an initial buffer size that doesn't waste memory
4016          * if the address space has only a small number of segments.
4017          */
4018         pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4019 
4020         if ((seg = AS_SEGFIRST(as)) == NULL)
4021                 return (0);
4022 
4023         brkseg = break_seg(p);
4024         stkseg = as_segat(as, prgetstackbase(p));
4025 
4026         do {
4027                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4028                 caddr_t saddr, naddr, baddr;
4029                 void *tmp = NULL;
4030                 ssize_t psz;
4031                 char *parr;
4032                 uint64_t npages;


4176         cred2prpriv(p->p_cred, pprp);
4177         mutex_exit(&p->p_crlock);
4178 }
4179 
4180 #ifdef _SYSCALL32_IMPL
4181 /*
4182  * Return an array of structures with HAT memory map information.
4183  * We allocate here; the caller must deallocate.
4184  */
4185 int
4186 prgetxmap32(proc_t *p, list_t *iolhead)
4187 {
4188         struct as *as = p->p_as;
4189         prxmap32_t *mp;
4190         struct seg *seg;
4191         struct seg *brkseg, *stkseg;
4192         struct vnode *vp;
4193         struct vattr vattr;
4194         uint_t prot;
4195 
4196         ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
4197 
4198         /*
4199          * Request an initial buffer size that doesn't waste memory
4200          * if the address space has only a small number of segments.
4201          */
4202         pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4203 
4204         if ((seg = AS_SEGFIRST(as)) == NULL)
4205                 return (0);
4206 
4207         brkseg = break_seg(p);
4208         stkseg = as_segat(as, prgetstackbase(p));
4209 
4210         do {
4211                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4212                 caddr_t saddr, naddr, baddr;
4213                 void *tmp = NULL;
4214                 ssize_t psz;
4215                 char *parr;
4216                 uint64_t npages;




1359                         sp->sa_flags |= SA_NODEFER;
1360                 if (sig == SIGCLD) {
1361                         if (p->p_flag & SNOWAIT)
1362                                 sp->sa_flags |= SA_NOCLDWAIT;
1363                         if ((p->p_flag & SJCTL) == 0)
1364                                 sp->sa_flags |= SA_NOCLDSTOP;
1365                 }
1366         }
1367 }
1368 #endif  /* _SYSCALL32_IMPL */
1369 
1370 /*
1371  * Count the number of segments in this process's address space.
1372  */
1373 int
1374 prnsegs(struct as *as, int reserved)
1375 {
1376         int n = 0;
1377         struct seg *seg;
1378 
1379         ASSERT(as != &kas && AS_WRITE_HELD(as));
1380 
1381         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1382                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1383                 caddr_t saddr, naddr;
1384                 void *tmp = NULL;
1385 
1386                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1387                         (void) pr_getprot(seg, reserved, &tmp,
1388                             &saddr, &naddr, eaddr);
1389                         if (saddr != naddr)
1390                                 n++;
1391                 }
1392 
1393                 ASSERT(tmp == NULL);
1394         }
1395 
1396         return (n);
1397 }
1398 
1399 /*


1602         list_destroy(iolhead);
1603 
1604         return (error);
1605 }
1606 
1607 /*
1608  * Return an array of structures with memory map information.
1609  * We allocate here; the caller must deallocate.
1610  */
1611 int
1612 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1613 {
1614         struct as *as = p->p_as;
1615         prmap_t *mp;
1616         struct seg *seg;
1617         struct seg *brkseg, *stkseg;
1618         struct vnode *vp;
1619         struct vattr vattr;
1620         uint_t prot;
1621 
1622         ASSERT(as != &kas && AS_WRITE_HELD(as));
1623 
1624         /*
1625          * Request an initial buffer size that doesn't waste memory
1626          * if the address space has only a small number of segments.
1627          */
1628         pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1629 
1630         if ((seg = AS_SEGFIRST(as)) == NULL)
1631                 return (0);
1632 
1633         brkseg = break_seg(p);
1634         stkseg = as_segat(as, prgetstackbase(p));
1635 
1636         do {
1637                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1638                 caddr_t saddr, naddr;
1639                 void *tmp = NULL;
1640 
1641                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1642                         prot = pr_getprot(seg, reserved, &tmp,


1713                         }
1714                 }
1715                 ASSERT(tmp == NULL);
1716         } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1717 
1718         return (0);
1719 }
1720 
1721 #ifdef _SYSCALL32_IMPL
1722 int
1723 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1724 {
1725         struct as *as = p->p_as;
1726         prmap32_t *mp;
1727         struct seg *seg;
1728         struct seg *brkseg, *stkseg;
1729         struct vnode *vp;
1730         struct vattr vattr;
1731         uint_t prot;
1732 
1733         ASSERT(as != &kas && AS_WRITE_HELD(as));
1734 
1735         /*
1736          * Request an initial buffer size that doesn't waste memory
1737          * if the address space has only a small number of segments.
1738          */
1739         pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1740 
1741         if ((seg = AS_SEGFIRST(as)) == NULL)
1742                 return (0);
1743 
1744         brkseg = break_seg(p);
1745         stkseg = as_segat(as, prgetstackbase(p));
1746 
1747         do {
1748                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1749                 caddr_t saddr, naddr;
1750                 void *tmp = NULL;
1751 
1752                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1753                         prot = pr_getprot(seg, reserved, &tmp,


1823                         } else {
1824                                 mp->pr_shmid = -1;
1825                         }
1826                 }
1827                 ASSERT(tmp == NULL);
1828         } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1829 
1830         return (0);
1831 }
1832 #endif  /* _SYSCALL32_IMPL */
1833 
1834 /*
1835  * Return the size of the /proc page data file.
1836  */
1837 size_t
1838 prpdsize(struct as *as)
1839 {
1840         struct seg *seg;
1841         size_t size;
1842 
1843         ASSERT(as != &kas && AS_WRITE_HELD(as));
1844 
1845         if ((seg = AS_SEGFIRST(as)) == NULL)
1846                 return (0);
1847 
1848         size = sizeof (prpageheader_t);
1849         do {
1850                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1851                 caddr_t saddr, naddr;
1852                 void *tmp = NULL;
1853                 size_t npage;
1854 
1855                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1856                         (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1857                         if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1858                                 size += sizeof (prasmap_t) + round8(npage);
1859                 }
1860                 ASSERT(tmp == NULL);
1861         } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1862 
1863         return (size);
1864 }
1865 
1866 #ifdef _SYSCALL32_IMPL
1867 size_t
1868 prpdsize32(struct as *as)
1869 {
1870         struct seg *seg;
1871         size_t size;
1872 
1873         ASSERT(as != &kas && AS_WRITE_HELD(as));
1874 
1875         if ((seg = AS_SEGFIRST(as)) == NULL)
1876                 return (0);
1877 
1878         size = sizeof (prpageheader32_t);
1879         do {
1880                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1881                 caddr_t saddr, naddr;
1882                 void *tmp = NULL;
1883                 size_t npage;
1884 
1885                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1886                         (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1887                         if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1888                                 size += sizeof (prasmap32_t) + round8(npage);
1889                 }
1890                 ASSERT(tmp == NULL);
1891         } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1892 
1893         return (size);
1894 }
1895 #endif  /* _SYSCALL32_IMPL */
1896 
1897 /*
1898  * Read page data information.
1899  */
1900 int
1901 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1902 {
1903         struct as *as = p->p_as;
1904         caddr_t buf;
1905         size_t size;
1906         prpageheader_t *php;
1907         prasmap_t *pmp;
1908         struct seg *seg;
1909         int error;
1910 
1911 again:
1912         AS_LOCK_ENTER(as, RW_WRITER);
1913 
1914         if ((seg = AS_SEGFIRST(as)) == NULL) {
1915                 AS_LOCK_EXIT(as);
1916                 return (0);
1917         }
1918         size = prpdsize(as);
1919         if (uiop->uio_resid < size) {
1920                 AS_LOCK_EXIT(as);
1921                 return (E2BIG);
1922         }
1923 
1924         buf = kmem_zalloc(size, KM_SLEEP);
1925         php = (prpageheader_t *)buf;
1926         pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
1927 
1928         hrt2ts(gethrtime(), &php->pr_tstamp);
1929         php->pr_nmap = 0;
1930         php->pr_npage = 0;
1931         do {
1932                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1933                 caddr_t saddr, naddr;
1934                 void *tmp = NULL;
1935 
1936                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1937                         struct vnode *vp;
1938                         struct vattr vattr;
1939                         size_t len;
1940                         size_t npage;


1948                         next = (uintptr_t)(pmp + 1) + round8(npage);
1949                         /*
1950                          * It's possible that the address space can change
1951                          * subtlely even though we're holding as->a_lock
1952                          * due to the nondeterminism of page_exists() in
1953                          * the presence of asychronously flushed pages or
1954                          * mapped files whose sizes are changing.
1955                          * page_exists() may be called indirectly from
1956                          * pr_getprot() by a SEGOP_INCORE() routine.
1957                          * If this happens we need to make sure we don't
1958                          * overrun the buffer whose size we computed based
1959                          * on the initial iteration through the segments.
1960                          * Once we've detected an overflow, we need to clean
1961                          * up the temporary memory allocated in pr_getprot()
1962                          * and retry. If there's a pending signal, we return
1963                          * EINTR so that this thread can be dislodged if
1964                          * a latent bug causes us to spin indefinitely.
1965                          */
1966                         if (next > (uintptr_t)buf + size) {
1967                                 pr_getprot_done(&tmp);
1968                                 AS_LOCK_EXIT(as);
1969 
1970                                 kmem_free(buf, size);
1971 
1972                                 if (ISSIG(curthread, JUSTLOOKING))
1973                                         return (EINTR);
1974 
1975                                 goto again;
1976                         }
1977 
1978                         php->pr_nmap++;
1979                         php->pr_npage += npage;
1980                         pmp->pr_vaddr = (uintptr_t)saddr;
1981                         pmp->pr_npage = npage;
1982                         pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1983                         pmp->pr_mflags = 0;
1984                         if (prot & PROT_READ)
1985                                 pmp->pr_mflags |= MA_READ;
1986                         if (prot & PROT_WRITE)
1987                                 pmp->pr_mflags |= MA_WRITE;
1988                         if (prot & PROT_EXEC)


2017                          * Get the SysV shared memory id, if any.
2018                          */
2019                         if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2020                             (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2021                             SHMID_NONE) {
2022                                 if (pmp->pr_shmid == SHMID_FREE)
2023                                         pmp->pr_shmid = -1;
2024 
2025                                 pmp->pr_mflags |= MA_SHM;
2026                         } else {
2027                                 pmp->pr_shmid = -1;
2028                         }
2029 
2030                         hat_getstat(as, saddr, len, hatid,
2031                             (char *)(pmp + 1), HAT_SYNC_ZERORM);
2032                         pmp = (prasmap_t *)next;
2033                 }
2034                 ASSERT(tmp == NULL);
2035         } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2036 
2037         AS_LOCK_EXIT(as);
2038 
2039         ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2040         error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2041         kmem_free(buf, size);
2042 
2043         return (error);
2044 }
2045 
2046 #ifdef _SYSCALL32_IMPL
2047 int
2048 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2049 {
2050         struct as *as = p->p_as;
2051         caddr_t buf;
2052         size_t size;
2053         prpageheader32_t *php;
2054         prasmap32_t *pmp;
2055         struct seg *seg;
2056         int error;
2057 
2058 again:
2059         AS_LOCK_ENTER(as, RW_WRITER);
2060 
2061         if ((seg = AS_SEGFIRST(as)) == NULL) {
2062                 AS_LOCK_EXIT(as);
2063                 return (0);
2064         }
2065         size = prpdsize32(as);
2066         if (uiop->uio_resid < size) {
2067                 AS_LOCK_EXIT(as);
2068                 return (E2BIG);
2069         }
2070 
2071         buf = kmem_zalloc(size, KM_SLEEP);
2072         php = (prpageheader32_t *)buf;
2073         pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2074 
2075         hrt2ts32(gethrtime(), &php->pr_tstamp);
2076         php->pr_nmap = 0;
2077         php->pr_npage = 0;
2078         do {
2079                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2080                 caddr_t saddr, naddr;
2081                 void *tmp = NULL;
2082 
2083                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2084                         struct vnode *vp;
2085                         struct vattr vattr;
2086                         size_t len;
2087                         size_t npage;


2095                         next = (uintptr_t)(pmp + 1) + round8(npage);
2096                         /*
2097                          * It's possible that the address space can change
2098                          * subtlely even though we're holding as->a_lock
2099                          * due to the nondeterminism of page_exists() in
2100                          * the presence of asychronously flushed pages or
2101                          * mapped files whose sizes are changing.
2102                          * page_exists() may be called indirectly from
2103                          * pr_getprot() by a SEGOP_INCORE() routine.
2104                          * If this happens we need to make sure we don't
2105                          * overrun the buffer whose size we computed based
2106                          * on the initial iteration through the segments.
2107                          * Once we've detected an overflow, we need to clean
2108                          * up the temporary memory allocated in pr_getprot()
2109                          * and retry. If there's a pending signal, we return
2110                          * EINTR so that this thread can be dislodged if
2111                          * a latent bug causes us to spin indefinitely.
2112                          */
2113                         if (next > (uintptr_t)buf + size) {
2114                                 pr_getprot_done(&tmp);
2115                                 AS_LOCK_EXIT(as);
2116 
2117                                 kmem_free(buf, size);
2118 
2119                                 if (ISSIG(curthread, JUSTLOOKING))
2120                                         return (EINTR);
2121 
2122                                 goto again;
2123                         }
2124 
2125                         php->pr_nmap++;
2126                         php->pr_npage += npage;
2127                         pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2128                         pmp->pr_npage = (size32_t)npage;
2129                         pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2130                         pmp->pr_mflags = 0;
2131                         if (prot & PROT_READ)
2132                                 pmp->pr_mflags |= MA_READ;
2133                         if (prot & PROT_WRITE)
2134                                 pmp->pr_mflags |= MA_WRITE;
2135                         if (prot & PROT_EXEC)


2164                          * Get the SysV shared memory id, if any.
2165                          */
2166                         if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2167                             (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2168                             SHMID_NONE) {
2169                                 if (pmp->pr_shmid == SHMID_FREE)
2170                                         pmp->pr_shmid = -1;
2171 
2172                                 pmp->pr_mflags |= MA_SHM;
2173                         } else {
2174                                 pmp->pr_shmid = -1;
2175                         }
2176 
2177                         hat_getstat(as, saddr, len, hatid,
2178                             (char *)(pmp + 1), HAT_SYNC_ZERORM);
2179                         pmp = (prasmap32_t *)next;
2180                 }
2181                 ASSERT(tmp == NULL);
2182         } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2183 
2184         AS_LOCK_EXIT(as);
2185 
2186         ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2187         error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2188         kmem_free(buf, size);
2189 
2190         return (error);
2191 }
2192 #endif  /* _SYSCALL32_IMPL */
2193 
2194 ushort_t
2195 prgetpctcpu(uint64_t pct)
2196 {
2197         /*
2198          * The value returned will be relevant in the zone of the examiner,
2199          * which may not be the same as the zone which performed the procfs
2200          * mount.
2201          */
2202         int nonline = zone_ncpus_online_get(curproc->p_zone);
2203 
2204         /*


2319 
2320                 /* compute %cpu for the process */
2321                 if (p->p_lwpcnt == 1)
2322                         psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2323                 else {
2324                         uint64_t pct = 0;
2325                         hrtime_t cur_time = gethrtime_unscaled();
2326 
2327                         t = p->p_tlist;
2328                         do {
2329                                 pct += cpu_update_pct(t, cur_time);
2330                         } while ((t = t->t_forw) != p->p_tlist);
2331 
2332                         psp->pr_pctcpu = prgetpctcpu(pct);
2333                 }
2334                 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2335                         psp->pr_size = 0;
2336                         psp->pr_rssize = 0;
2337                 } else {
2338                         mutex_exit(&p->p_lock);
2339                         AS_LOCK_ENTER(as, RW_READER);
2340                         psp->pr_size = btopr(as->a_resvsize) *
2341                             (PAGESIZE / 1024);
2342                         psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2343                         psp->pr_pctmem = rm_pctmemory(as);
2344                         AS_LOCK_EXIT(as);
2345                         mutex_enter(&p->p_lock);
2346                 }
2347         }
2348 }
2349 
2350 #ifdef _SYSCALL32_IMPL
2351 void
2352 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2353 {
2354         kthread_t *t;
2355         struct cred *cred;
2356         hrtime_t hrutime, hrstime;
2357 
2358         ASSERT(MUTEX_HELD(&p->p_lock));
2359 
2360         if ((t = prchoose(p)) == NULL)  /* returns locked thread */
2361                 bzero(psp, sizeof (*psp));
2362         else {
2363                 thread_unlock(t);
2364                 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));


2452                 /* compute %cpu for the process */
2453                 if (p->p_lwpcnt == 1)
2454                         psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2455                 else {
2456                         uint64_t pct = 0;
2457                         hrtime_t cur_time;
2458 
2459                         t = p->p_tlist;
2460                         cur_time = gethrtime_unscaled();
2461                         do {
2462                                 pct += cpu_update_pct(t, cur_time);
2463                         } while ((t = t->t_forw) != p->p_tlist);
2464 
2465                         psp->pr_pctcpu = prgetpctcpu(pct);
2466                 }
2467                 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2468                         psp->pr_size = 0;
2469                         psp->pr_rssize = 0;
2470                 } else {
2471                         mutex_exit(&p->p_lock);
2472                         AS_LOCK_ENTER(as, RW_READER);
2473                         psp->pr_size = (size32_t)
2474                             (btopr(as->a_resvsize) * (PAGESIZE / 1024));
2475                         psp->pr_rssize = (size32_t)
2476                             (rm_asrss(as) * (PAGESIZE / 1024));
2477                         psp->pr_pctmem = rm_pctmemory(as);
2478                         AS_LOCK_EXIT(as);
2479                         mutex_enter(&p->p_lock);
2480                 }
2481         }
2482 
2483         /*
2484          * If we are looking at an LP64 process, zero out
2485          * the fields that cannot be represented in ILP32.
2486          */
2487         if (p->p_model != DATAMODEL_ILP32) {
2488                 psp->pr_size = 0;
2489                 psp->pr_rssize = 0;
2490                 psp->pr_argv = 0;
2491                 psp->pr_envp = 0;
2492         }
2493 }
2494 
2495 #endif  /* _SYSCALL32_IMPL */
2496 
2497 void
2498 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)


3296         avl_destroy(&p->p_warea);
3297 }
3298 
3299 /*
3300  * This one is called by the traced process to unwatch all the
3301  * pages while deallocating the list of watched_page structs.
3302  */
3303 void
3304 pr_free_watched_pages(proc_t *p)
3305 {
3306         struct as *as = p->p_as;
3307         struct watched_page *pwp;
3308         uint_t prot;
3309         int    retrycnt, err;
3310         void *cookie;
3311 
3312         if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3313                 return;
3314 
3315         ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3316         AS_LOCK_ENTER(as, RW_WRITER);
3317 
3318         pwp = avl_first(&as->a_wpage);
3319 
3320         cookie = NULL;
3321         while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3322                 retrycnt = 0;
3323                 if ((prot = pwp->wp_oprot) != 0) {
3324                         caddr_t addr = pwp->wp_vaddr;
3325                         struct seg *seg;
3326                 retry:
3327 
3328                         if ((pwp->wp_prot != prot ||
3329                             (pwp->wp_flags & WP_NOWATCH)) &&
3330                             (seg = as_segat(as, addr)) != NULL) {
3331                                 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3332                                 if (err == IE_RETRY) {
3333                                         ASSERT(retrycnt == 0);
3334                                         retrycnt++;
3335                                         goto retry;
3336                                 }
3337                         }
3338                 }
3339                 kmem_free(pwp, sizeof (struct watched_page));
3340         }
3341 
3342         avl_destroy(&as->a_wpage);
3343         p->p_wprot = NULL;
3344 
3345         AS_LOCK_EXIT(as);
3346 }
3347 
3348 /*
3349  * Insert a watched area into the list of watched pages.
3350  * If oflags is zero then we are adding a new watched area.
3351  * Otherwise we are changing the flags of an existing watched area.
3352  */
3353 static int
3354 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3355         ulong_t flags, ulong_t oflags)
3356 {
3357         struct as *as = p->p_as;
3358         avl_tree_t *pwp_tree;
3359         struct watched_page *pwp, *newpwp;
3360         struct watched_page tpw;
3361         avl_index_t where;
3362         struct seg *seg;
3363         uint_t prot;
3364         caddr_t addr;
3365 
3366         /*
3367          * We need to pre-allocate a list of structures before we grab the
3368          * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3369          * held.
3370          */
3371         newpwp = NULL;
3372         for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3373             addr < eaddr; addr += PAGESIZE) {
3374                 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3375                 pwp->wp_list = newpwp;
3376                 newpwp = pwp;
3377         }
3378 
3379         AS_LOCK_ENTER(as, RW_WRITER);
3380 
3381         /*
3382          * Search for an existing watched page to contain the watched area.
3383          * If none is found, grab a new one from the available list
3384          * and insert it in the active list, keeping the list sorted
3385          * by user-level virtual address.
3386          */
3387         if (p->p_flag & SVFWAIT)
3388                 pwp_tree = &p->p_wpage;
3389         else
3390                 pwp_tree = &as->a_wpage;
3391 
3392 again:
3393         if (avl_numnodes(pwp_tree) > prnwatch) {
3394                 AS_LOCK_EXIT(as);
3395                 while (newpwp != NULL) {
3396                         pwp = newpwp->wp_list;
3397                         kmem_free(newpwp, sizeof (struct watched_page));
3398                         newpwp = pwp;
3399                 }
3400                 return (E2BIG);
3401         }
3402 
3403         tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3404         if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3405                 pwp = newpwp;
3406                 newpwp = newpwp->wp_list;
3407                 pwp->wp_list = NULL;
3408                 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3409                     (uintptr_t)PAGEMASK);
3410                 avl_insert(pwp_tree, pwp, where);
3411         }
3412 
3413         ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3414 


3447                         if (pwp->wp_exec)
3448                                 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3449                         if (!(pwp->wp_flags & WP_NOWATCH) &&
3450                             pwp->wp_prot != prot &&
3451                             (pwp->wp_flags & WP_SETPROT) == 0) {
3452                                 pwp->wp_flags |= WP_SETPROT;
3453                                 pwp->wp_list = p->p_wprot;
3454                                 p->p_wprot = pwp;
3455                         }
3456                         pwp->wp_prot = (uchar_t)prot;
3457                 }
3458         }
3459 
3460         /*
3461          * If the watched area extends into the next page then do
3462          * it over again with the virtual address of the next page.
3463          */
3464         if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
3465                 goto again;
3466 
3467         AS_LOCK_EXIT(as);
3468 
3469         /*
3470          * Free any pages we may have over-allocated
3471          */
3472         while (newpwp != NULL) {
3473                 pwp = newpwp->wp_list;
3474                 kmem_free(newpwp, sizeof (struct watched_page));
3475                 newpwp = pwp;
3476         }
3477 
3478         return (0);
3479 }
3480 
3481 /*
3482  * Remove a watched area from the list of watched pages.
3483  * A watched area may extend over more than one page.
3484  */
3485 static void
3486 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
3487 {
3488         struct as *as = p->p_as;
3489         struct watched_page *pwp;
3490         struct watched_page tpw;
3491         avl_tree_t *tree;
3492         avl_index_t where;
3493 
3494         AS_LOCK_ENTER(as, RW_WRITER);
3495 
3496         if (p->p_flag & SVFWAIT)
3497                 tree = &p->p_wpage;
3498         else
3499                 tree = &as->a_wpage;
3500 
3501         tpw.wp_vaddr = vaddr =
3502             (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3503         pwp = avl_find(tree, &tpw, &where);
3504         if (pwp == NULL)
3505                 pwp = avl_nearest(tree, where, AVL_AFTER);
3506 
3507         while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3508                 ASSERT(vaddr <=  pwp->wp_vaddr);
3509 
3510                 if (flags & WA_READ)
3511                         pwp->wp_read--;
3512                 if (flags & WA_WRITE)
3513                         pwp->wp_write--;
3514                 if (flags & WA_EXEC)


3539                                 pwp->wp_prot = (uchar_t)prot;
3540                         }
3541                 } else {
3542                         /*
3543                          * No watched areas remain in this page.
3544                          * Reset everything to normal.
3545                          */
3546                         if (pwp->wp_oprot != 0) {
3547                                 pwp->wp_prot = pwp->wp_oprot;
3548                                 if ((pwp->wp_flags & WP_SETPROT) == 0) {
3549                                         pwp->wp_flags |= WP_SETPROT;
3550                                         pwp->wp_list = p->p_wprot;
3551                                         p->p_wprot = pwp;
3552                                 }
3553                         }
3554                 }
3555 
3556                 pwp = AVL_NEXT(tree, pwp);
3557         }
3558 
3559         AS_LOCK_EXIT(as);
3560 }
3561 
3562 /*
3563  * Return the original protections for the specified page.
3564  */
3565 static void
3566 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
3567 {
3568         struct watched_page *pwp;
3569         struct watched_page tpw;
3570 
3571         ASSERT(AS_LOCK_HELD(as));
3572 
3573         tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3574         if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
3575                 *prot = pwp->wp_oprot;
3576 }
3577 
3578 static prpagev_t *
3579 pr_pagev_create(struct seg *seg, int check_noreserve)
3580 {
3581         prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
3582         size_t total_pages = seg_pages(seg);
3583 
3584         /*
3585          * Limit the size of our vectors to pagev_lim pages at a time.  We need
3586          * 4 or 5 bytes of storage per page, so this means we limit ourself
3587          * to about a megabyte of kernel heap by default.
3588          */
3589         pagev->pg_npages = MIN(total_pages, pagev_lim);
3590         pagev->pg_pnbase = 0;
3591 


3838 uint_t
3839 pr_getprot(struct seg *seg, int reserved, void **tmp,
3840         caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3841 {
3842         struct as *as = seg->s_as;
3843 
3844         caddr_t saddr = *saddrp;
3845         caddr_t naddr;
3846 
3847         int check_noreserve;
3848         uint_t prot;
3849 
3850         union {
3851                 struct segvn_data *svd;
3852                 struct segdev_data *sdp;
3853                 void *data;
3854         } s;
3855 
3856         s.data = seg->s_data;
3857 
3858         ASSERT(AS_WRITE_HELD(as));
3859         ASSERT(saddr >= seg->s_base && saddr < eaddr);
3860         ASSERT(eaddr <= seg->s_base + seg->s_size);
3861 
3862         /*
3863          * Don't include MAP_NORESERVE pages in the address range
3864          * unless their mappings have actually materialized.
3865          * We cheat by knowing that segvn is the only segment
3866          * driver that supports MAP_NORESERVE.
3867          */
3868         check_noreserve =
3869             (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
3870             (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
3871             (s.svd->flags & MAP_NORESERVE));
3872 
3873         /*
3874          * Examine every page only as a last resort.  We use guilty knowledge
3875          * of segvn and segdev to avoid this: if there are no per-page
3876          * protections present in the segment and we don't care about
3877          * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
3878          */


3952  * Return true iff the vnode is a /proc file opened by the process itself.
3953  */
3954 int
3955 pr_isself(vnode_t *vp)
3956 {
3957         /*
3958          * XXX: To retain binary compatibility with the old
3959          * ioctl()-based version of /proc, we exempt self-opens
3960          * of /proc/<pid> from being marked close-on-exec.
3961          */
3962         return (vn_matchops(vp, prvnodeops) &&
3963             (VTOP(vp)->pr_flags & PR_ISSELF) &&
3964             VTOP(vp)->pr_type != PR_PIDDIR);
3965 }
3966 
3967 static ssize_t
3968 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
3969 {
3970         ssize_t pagesize, hatsize;
3971 
3972         ASSERT(AS_WRITE_HELD(seg->s_as));
3973         ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
3974         ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
3975         ASSERT(saddr < eaddr);
3976 
3977         pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
3978         ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
3979         ASSERT(pagesize != 0);
3980 
3981         if (pagesize == -1)
3982                 pagesize = PAGESIZE;
3983 
3984         saddr += P2NPHASE((uintptr_t)saddr, pagesize);
3985 
3986         while (saddr < eaddr) {
3987                 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
3988                         break;
3989                 ASSERT(IS_P2ALIGNED(saddr, pagesize));
3990                 saddr += pagesize;
3991         }
3992 
3993         *naddrp = ((saddr < eaddr) ? saddr : eaddr);
3994         return (hatsize);
3995 }
3996 
3997 /*
3998  * Return an array of structures with extended memory map information.
3999  * We allocate here; the caller must deallocate.
4000  */
4001 int
4002 prgetxmap(proc_t *p, list_t *iolhead)
4003 {
4004         struct as *as = p->p_as;
4005         prxmap_t *mp;
4006         struct seg *seg;
4007         struct seg *brkseg, *stkseg;
4008         struct vnode *vp;
4009         struct vattr vattr;
4010         uint_t prot;
4011 
4012         ASSERT(as != &kas && AS_WRITE_HELD(as));
4013 
4014         /*
4015          * Request an initial buffer size that doesn't waste memory
4016          * if the address space has only a small number of segments.
4017          */
4018         pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4019 
4020         if ((seg = AS_SEGFIRST(as)) == NULL)
4021                 return (0);
4022 
4023         brkseg = break_seg(p);
4024         stkseg = as_segat(as, prgetstackbase(p));
4025 
4026         do {
4027                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4028                 caddr_t saddr, naddr, baddr;
4029                 void *tmp = NULL;
4030                 ssize_t psz;
4031                 char *parr;
4032                 uint64_t npages;


4176         cred2prpriv(p->p_cred, pprp);
4177         mutex_exit(&p->p_crlock);
4178 }
4179 
4180 #ifdef _SYSCALL32_IMPL
4181 /*
4182  * Return an array of structures with HAT memory map information.
4183  * We allocate here; the caller must deallocate.
4184  */
4185 int
4186 prgetxmap32(proc_t *p, list_t *iolhead)
4187 {
4188         struct as *as = p->p_as;
4189         prxmap32_t *mp;
4190         struct seg *seg;
4191         struct seg *brkseg, *stkseg;
4192         struct vnode *vp;
4193         struct vattr vattr;
4194         uint_t prot;
4195 
4196         ASSERT(as != &kas && AS_WRITE_HELD(as));
4197 
4198         /*
4199          * Request an initial buffer size that doesn't waste memory
4200          * if the address space has only a small number of segments.
4201          */
4202         pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4203 
4204         if ((seg = AS_SEGFIRST(as)) == NULL)
4205                 return (0);
4206 
4207         brkseg = break_seg(p);
4208         stkseg = as_segat(as, prgetstackbase(p));
4209 
4210         do {
4211                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4212                 caddr_t saddr, naddr, baddr;
4213                 void *tmp = NULL;
4214                 ssize_t psz;
4215                 char *parr;
4216                 uint64_t npages;