1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  27 /*        All Rights Reserved   */
  28 
  29 #pragma ident   "%Z%%M% %I%     %E% SMI"
  30 
  31 #include <sys/atomic.h>
  32 #include <sys/errno.h>
  33 #include <sys/stat.h>
  34 #include <sys/modctl.h>
  35 #include <sys/conf.h>
  36 #include <sys/systm.h>
  37 #include <sys/ddi.h>
  38 #include <sys/sunddi.h>
  39 #include <sys/cpuvar.h>
  40 #include <sys/kmem.h>
  41 #include <sys/strsubr.h>
  42 #include <sys/sysmacros.h>
  43 #include <sys/frame.h>
  44 #include <sys/stack.h>
  45 #include <sys/proc.h>
  46 #include <sys/priv.h>
  47 #include <sys/policy.h>
  48 #include <sys/ontrap.h>
  49 #include <sys/vmsystm.h>
  50 #include <sys/prsystm.h>
  51 
  52 #include <vm/as.h>
  53 #include <vm/seg.h>
  54 #include <vm/seg_dev.h>
  55 #include <vm/seg_vn.h>
  56 #include <vm/seg_spt.h>
  57 #include <vm/seg_kmem.h>
  58 
  59 extern struct seg_ops segdev_ops;       /* needs a header file */
  60 extern struct seg_ops segspt_shmops;    /* needs a header file */
  61 
  62 static int
  63 page_valid(struct seg *seg, caddr_t addr)
  64 {
  65         struct segvn_data *svd;
  66         vnode_t *vp;
  67         vattr_t vattr;
  68 
  69         /*
  70          * Fail if the page doesn't map to a page in the underlying
  71          * mapped file, if an underlying mapped file exists.
  72          */
  73         vattr.va_mask = AT_SIZE;
  74         if (seg->s_ops == &segvn_ops &&
  75             SEGOP_GETVP(seg, addr, &vp) == 0 &&
  76             vp != NULL && vp->v_type == VREG &&
  77             VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
  78                 u_offset_t size = roundup(vattr.va_size, (u_offset_t)PAGESIZE);
  79                 u_offset_t offset = SEGOP_GETOFFSET(seg, addr);
  80 
  81                 if (offset >= size)
  82                         return (0);
  83         }
  84 
  85         /*
  86          * Fail if this is an ISM shared segment and the address is
  87          * not within the real size of the spt segment that backs it.
  88          */
  89         if (seg->s_ops == &segspt_shmops &&
  90             addr >= seg->s_base + spt_realsize(seg))
  91                 return (0);
  92 
  93         /*
  94          * Fail if the segment is mapped from /dev/null.
  95          * The key is that the mapping comes from segdev and the
  96          * type is neither MAP_SHARED nor MAP_PRIVATE.
  97          */
  98         if (seg->s_ops == &segdev_ops &&
  99             ((SEGOP_GETTYPE(seg, addr) & (MAP_SHARED | MAP_PRIVATE)) == 0))
 100                 return (0);
 101 
 102         /*
 103          * Fail if the page is a MAP_NORESERVE page that has
 104          * not actually materialized.
 105          * We cheat by knowing that segvn is the only segment
 106          * driver that supports MAP_NORESERVE.
 107          */
 108         if (seg->s_ops == &segvn_ops &&
 109             (svd = (struct segvn_data *)seg->s_data) != NULL &&
 110             (svd->vp == NULL || svd->vp->v_type != VREG) &&
 111             (svd->flags & MAP_NORESERVE)) {
 112                 /*
 113                  * Guilty knowledge here.  We know that
 114                  * segvn_incore returns more than just the
 115                  * low-order bit that indicates the page is
 116                  * actually in memory.  If any bits are set,
 117                  * then there is backing store for the page.
 118                  */
 119                 char incore = 0;
 120                 (void) SEGOP_INCORE(seg, addr, PAGESIZE, &incore);
 121                 if (incore == 0)
 122                         return (0);
 123         }
 124         return (1);
 125 }
 126 
 127 /*
 128  * Map address "addr" in address space "as" into a kernel virtual address.
 129  * The memory is guaranteed to be resident and locked down.
 130  */
 131 static caddr_t
 132 mapin(struct as *as, caddr_t addr, int writing)
 133 {
 134         page_t *pp;
 135         caddr_t kaddr;
 136         pfn_t pfnum;
 137 
 138         /*
 139          * NB: Because of past mistakes, we have bits being returned
 140          * by getpfnum that are actually the page type bits of the pte.
 141          * When the object we are trying to map is a memory page with
 142          * a page structure everything is ok and we can use the optimal
 143          * method, ppmapin.  Otherwise, we have to do something special.
 144          */
 145         pfnum = hat_getpfnum(as->a_hat, addr);
 146         if (pf_is_memory(pfnum)) {
 147                 pp = page_numtopp_nolock(pfnum);
 148                 if (pp != NULL) {
 149                         ASSERT(PAGE_LOCKED(pp));
 150                         kaddr = ppmapin(pp, writing ?
 151                                 (PROT_READ | PROT_WRITE) : PROT_READ,
 152                                 (caddr_t)-1);
 153                         return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
 154                 }
 155         }
 156 
 157         /*
 158          * Oh well, we didn't have a page struct for the object we were
 159          * trying to map in; ppmapin doesn't handle devices, but allocating a
 160          * heap address allows ppmapout to free virutal space when done.
 161          */
 162         kaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
 163 
 164         hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum,
 165                 writing ? (PROT_READ | PROT_WRITE) : PROT_READ, HAT_LOAD_LOCK);
 166 
 167         return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
 168 }
 169 
 170 /*ARGSUSED*/
 171 static void
 172 mapout(struct as *as, caddr_t addr, caddr_t vaddr, int writing)
 173 {
 174         vaddr = (caddr_t)(uintptr_t)((uintptr_t)vaddr & PAGEMASK);
 175         ppmapout(vaddr);
 176 }
 177 
 178 /*
 179  * Perform I/O to a given process. This will return EIO if we detect
 180  * corrupt memory and ENXIO if there is no such mapped address in the
 181  * user process's address space.
 182  */
 183 static int
 184 urw(proc_t *p, int writing, void *buf, size_t len, uintptr_t a)
 185 {
 186         caddr_t addr = (caddr_t)a;
 187         caddr_t page;
 188         caddr_t vaddr;
 189         struct seg *seg;
 190         int error = 0;
 191         int err = 0;
 192         uint_t prot;
 193         uint_t prot_rw = writing ? PROT_WRITE : PROT_READ;
 194         int protchanged;
 195         on_trap_data_t otd;
 196         int retrycnt;
 197         struct as *as = p->p_as;
 198         enum seg_rw rw;
 199 
 200         /*
 201          * Locate segment containing address of interest.
 202          */
 203         page = (caddr_t)(uintptr_t)((uintptr_t)addr & PAGEMASK);
 204         retrycnt = 0;
 205         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 206 retry:
 207         if ((seg = as_segat(as, page)) == NULL ||
 208             !page_valid(seg, page)) {
 209                 AS_LOCK_EXIT(as, &as->a_lock);
 210                 return (ENXIO);
 211         }
 212         SEGOP_GETPROT(seg, page, 0, &prot);
 213 
 214         protchanged = 0;
 215         if ((prot & prot_rw) == 0) {
 216                 protchanged = 1;
 217                 err = SEGOP_SETPROT(seg, page, PAGESIZE, prot | prot_rw);
 218 
 219                 if (err == IE_RETRY) {
 220                         protchanged = 0;
 221                         ASSERT(retrycnt == 0);
 222                         retrycnt++;
 223                         goto retry;
 224                 }
 225 
 226                 if (err != 0) {
 227                         AS_LOCK_EXIT(as, &as->a_lock);
 228                         return (ENXIO);
 229                 }
 230         }
 231 
 232         /*
 233          * segvn may do a copy-on-write for F_SOFTLOCK/S_READ case to break
 234          * sharing to avoid a copy on write of a softlocked page by another
 235          * thread. But since we locked the address space as a writer no other
 236          * thread can cause a copy on write. S_READ_NOCOW is passed as the
 237          * access type to tell segvn that it's ok not to do a copy-on-write
 238          * for this SOFTLOCK fault.
 239          */
 240         if (writing)
 241                 rw = S_WRITE;
 242         else if (seg->s_ops == &segvn_ops)
 243                 rw = S_READ_NOCOW;
 244         else
 245                 rw = S_READ;
 246 
 247         if (SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTLOCK, rw)) {
 248                 if (protchanged)
 249                         (void) SEGOP_SETPROT(seg, page, PAGESIZE, prot);
 250                 AS_LOCK_EXIT(as, &as->a_lock);
 251                 return (ENXIO);
 252         }
 253         CPU_STATS_ADD_K(vm, softlock, 1);
 254 
 255         /*
 256          * Make sure we're not trying to read or write off the end of the page.
 257          */
 258         ASSERT(len <= page + PAGESIZE - addr);
 259 
 260         /*
 261          * Map in the locked page, copy to our local buffer,
 262          * then map the page out and unlock it.
 263          */
 264         vaddr = mapin(as, addr, writing);
 265 
 266         /*
 267          * Since we are copying memory on behalf of the user process,
 268          * protect against memory error correction faults.
 269          */
 270         if (!on_trap(&otd, OT_DATA_EC)) {
 271                 if (seg->s_ops == &segdev_ops) {
 272                         /*
 273                          * Device memory can behave strangely; invoke
 274                          * a segdev-specific copy operation instead.
 275                          */
 276                         if (writing) {
 277                                 if (segdev_copyto(seg, addr, buf, vaddr, len))
 278                                         error = ENXIO;
 279                         } else {
 280                                 if (segdev_copyfrom(seg, addr, vaddr, buf, len))
 281                                         error = ENXIO;
 282                         }
 283                 } else {
 284                         if (writing)
 285                                 bcopy(buf, vaddr, len);
 286                         else
 287                                 bcopy(vaddr, buf, len);
 288                 }
 289         } else {
 290                 error = EIO;
 291         }
 292         no_trap();
 293 
 294         /*
 295          * If we're writing to an executable page, we may need to sychronize
 296          * the I$ with the modifications we made through the D$.
 297          */
 298         if (writing && (prot & PROT_EXEC))
 299                 sync_icache(vaddr, (uint_t)len);
 300 
 301         mapout(as, addr, vaddr, writing);
 302 
 303         if (rw == S_READ_NOCOW)
 304                 rw = S_READ;
 305 
 306         (void) SEGOP_FAULT(as->a_hat, seg, page, PAGESIZE, F_SOFTUNLOCK, rw);
 307 
 308         if (protchanged)
 309                 (void) SEGOP_SETPROT(seg, page, PAGESIZE, prot);
 310 
 311         AS_LOCK_EXIT(as, &as->a_lock);
 312 
 313         return (error);
 314 }
 315 
 316 int
 317 uread(proc_t *p, void *buf, size_t len, uintptr_t a)
 318 {
 319         return (urw(p, 0, buf, len, a));
 320 }
 321 
 322 int
 323 uwrite(proc_t *p, void *buf, size_t len, uintptr_t a)
 324 {
 325         return (urw(p, 1, buf, len, a));
 326 }