1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  27 /* All Rights Reserved */
  28 
  29 /*
  30  * Portions of this source code were derived from Berkeley 4.3 BSD
  31  * under license from the Regents of the University of California.
  32  */
  33 
  34 /*
  35  * VM - segment for non-faulting loads.
  36  */
  37 
  38 #include <sys/types.h>
  39 #include <sys/t_lock.h>
  40 #include <sys/param.h>
  41 #include <sys/mman.h>
  42 #include <sys/errno.h>
  43 #include <sys/kmem.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/vnode.h>
  46 #include <sys/proc.h>
  47 #include <sys/conf.h>
  48 #include <sys/debug.h>
  49 #include <sys/archsystm.h>
  50 #include <sys/lgrp.h>
  51 
  52 #include <vm/page.h>
  53 #include <vm/hat.h>
  54 #include <vm/as.h>
  55 #include <vm/seg.h>
  56 #include <vm/vpage.h>
  57 
  58 /*
  59  * Private seg op routines.
  60  */
  61 static int      segnf_dup(struct seg *seg, struct seg *newseg);
  62 static int      segnf_unmap(struct seg *seg, caddr_t addr, size_t len);
  63 static void     segnf_free(struct seg *seg);
  64 static faultcode_t segnf_nomap(void);
  65 static int      segnf_setprot(struct seg *seg, caddr_t addr,
  66                     size_t len, uint_t prot);
  67 static int      segnf_checkprot(struct seg *seg, caddr_t addr,
  68                     size_t len, uint_t prot);
  69 static void     segnf_badop(void);
  70 static int      segnf_nop(void);
  71 static int      segnf_getprot(struct seg *seg, caddr_t addr,
  72                     size_t len, uint_t *protv);
  73 static u_offset_t segnf_getoffset(struct seg *seg, caddr_t addr);
  74 static int      segnf_gettype(struct seg *seg, caddr_t addr);
  75 static int      segnf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
  76 static void     segnf_dump(struct seg *seg);
  77 static int      segnf_pagelock(struct seg *seg, caddr_t addr, size_t len,
  78                     struct page ***ppp, enum lock_type type, enum seg_rw rw);
  79 static int      segnf_setpagesize(struct seg *seg, caddr_t addr, size_t len,
  80                     uint_t szc);
  81 static int      segnf_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
  82 static lgrp_mem_policy_info_t   *segnf_getpolicy(struct seg *seg,
  83     caddr_t addr);
  84 
  85 
  86 struct seg_ops segnf_ops = {
  87         segnf_dup,
  88         segnf_unmap,
  89         segnf_free,
  90         (faultcode_t (*)(struct hat *, struct seg *, caddr_t, size_t,
  91             enum fault_type, enum seg_rw))
  92                 segnf_nomap,            /* fault */
  93         (faultcode_t (*)(struct seg *, caddr_t))
  94                 segnf_nomap,            /* faulta */
  95         segnf_setprot,
  96         segnf_checkprot,
  97         (int (*)())segnf_badop,         /* kluster */
  98         (int (*)(struct seg *, caddr_t, size_t, int, uint_t))
  99                 segnf_nop,              /* sync */
 100         (size_t (*)(struct seg *, caddr_t, size_t, char *))
 101                 segnf_nop,              /* incore */
 102         (int (*)(struct seg *, caddr_t, size_t, int, int, ulong_t *, size_t))
 103                 segnf_nop,              /* lockop */
 104         segnf_getprot,
 105         segnf_getoffset,
 106         segnf_gettype,
 107         segnf_getvp,
 108         (int (*)(struct seg *, caddr_t, size_t, uint_t))
 109                 segnf_nop,              /* advise */
 110         segnf_dump,
 111         segnf_pagelock,
 112         segnf_setpagesize,
 113         segnf_getmemid,
 114         segnf_getpolicy,
 115 };
 116 
 117 /*
 118  * vnode and page for the page of zeros we use for the nf mappings.
 119  */
 120 static kmutex_t segnf_lock;
 121 static struct vnode nfvp;
 122 static struct page **nfpp;
 123 
 124 #define addr_to_vcolor(addr)                                            \
 125         (shm_alignment) ?                                               \
 126         ((int)(((uintptr_t)(addr) & (shm_alignment - 1)) >> PAGESHIFT)) : 0
 127 
 128 /*
 129  * We try to limit the number of Non-fault segments created.
 130  * Non fault segments are created to optimize sparc V9 code which uses
 131  * the sparc nonfaulting load ASI (ASI_PRIMARY_NOFAULT).
 132  *
 133  * There are several reasons why creating too many non-fault segments
 134  * could cause problems.
 135  *
 136  *      First, excessive allocation of kernel resources for the seg
 137  *      structures and the HAT data to map the zero pages.
 138  *
 139  *      Secondly, creating nofault segments actually uses up user virtual
 140  *      address space. This makes it unavailable for subsequent mmap(0, ...)
 141  *      calls which use as_gap() to find empty va regions.  Creation of too
 142  *      many nofault segments could thus interfere with the ability of the
 143  *      runtime linker to load a shared object.
 144  */
 145 #define MAXSEGFORNF     (10000)
 146 #define MAXNFSEARCH     (5)
 147 
 148 
 149 /*
 150  * Must be called from startup()
 151  */
 152 void
 153 segnf_init()
 154 {
 155         mutex_init(&segnf_lock, NULL, MUTEX_DEFAULT, NULL);
 156 }
 157 
 158 
 159 /*
 160  * Create a no-fault segment.
 161  *
 162  * The no-fault segment is not technically necessary, as the code in
 163  * nfload() in trap.c will emulate the SPARC instruction and load
 164  * a value of zero in the destination register.
 165  *
 166  * However, this code tries to put a page of zero's at the nofault address
 167  * so that subsequent non-faulting loads to the same page will not
 168  * trap with a tlb miss.
 169  *
 170  * In order to help limit the number of segments we merge adjacent nofault
 171  * segments into a single segment.  If we get a large number of segments
 172  * we'll also try to delete a random other nf segment.
 173  */
 174 /* ARGSUSED */
 175 int
 176 segnf_create(struct seg *seg, void *argsp)
 177 {
 178         uint_t prot;
 179         pgcnt_t vacpgs;
 180         u_offset_t off = 0;
 181         caddr_t vaddr = NULL;
 182         int i, color;
 183         struct seg *s1;
 184         struct seg *s2;
 185         size_t size;
 186         struct as *as = seg->s_as;
 187 
 188         ASSERT(as && AS_WRITE_HELD(as));
 189 
 190         /*
 191          * Need a page per virtual color or just 1 if no vac.
 192          */
 193         mutex_enter(&segnf_lock);
 194         if (nfpp == NULL) {
 195                 struct seg kseg;
 196 
 197                 vacpgs = 1;
 198                 if (shm_alignment > PAGESIZE) {
 199                         vacpgs = shm_alignment >> PAGESHIFT;
 200                 }
 201 
 202                 nfpp = kmem_alloc(sizeof (*nfpp) * vacpgs, KM_SLEEP);
 203 
 204                 kseg.s_as = &kas;
 205                 for (i = 0; i < vacpgs; i++, off += PAGESIZE,
 206                     vaddr += PAGESIZE) {
 207                         nfpp[i] = page_create_va(&nfvp, off, PAGESIZE,
 208                             PG_WAIT | PG_NORELOC, &kseg, vaddr);
 209                         page_io_unlock(nfpp[i]);
 210                         page_downgrade(nfpp[i]);
 211                         pagezero(nfpp[i], 0, PAGESIZE);
 212                 }
 213         }
 214         mutex_exit(&segnf_lock);
 215 
 216         hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
 217 
 218         /*
 219          * s_data can't be NULL because of ASSERTS in the common vm code.
 220          */
 221         seg->s_ops = &segnf_ops;
 222         seg->s_data = seg;
 223         seg->s_flags |= S_PURGE;
 224 
 225         mutex_enter(&as->a_contents);
 226         as->a_flags |= AS_NEEDSPURGE;
 227         mutex_exit(&as->a_contents);
 228 
 229         prot = PROT_READ;
 230         color = addr_to_vcolor(seg->s_base);
 231         if (as != &kas)
 232                 prot |= PROT_USER;
 233         hat_memload(as->a_hat, seg->s_base, nfpp[color],
 234             prot | HAT_NOFAULT, HAT_LOAD);
 235 
 236         /*
 237          * At this point see if we can concatenate a segment to
 238          * a non-fault segment immediately before and/or after it.
 239          */
 240         if ((s1 = AS_SEGPREV(as, seg)) != NULL &&
 241             s1->s_ops == &segnf_ops &&
 242             s1->s_base + s1->s_size == seg->s_base) {
 243                 size = s1->s_size;
 244                 seg_free(s1);
 245                 seg->s_base -= size;
 246                 seg->s_size += size;
 247         }
 248 
 249         if ((s2 = AS_SEGNEXT(as, seg)) != NULL &&
 250             s2->s_ops == &segnf_ops &&
 251             seg->s_base + seg->s_size == s2->s_base) {
 252                 size = s2->s_size;
 253                 seg_free(s2);
 254                 seg->s_size += size;
 255         }
 256 
 257         /*
 258          * if we already have a lot of segments, try to delete some other
 259          * nofault segment to reduce the probability of uncontrolled segment
 260          * creation.
 261          *
 262          * the code looks around quickly (no more than MAXNFSEARCH segments
 263          * each way) for another NF segment and then deletes it.
 264          */
 265         if (avl_numnodes(&as->a_segtree) > MAXSEGFORNF) {
 266                 size = 0;
 267                 s2 = NULL;
 268                 s1 = AS_SEGPREV(as, seg);
 269                 while (size++ < MAXNFSEARCH && s1 != NULL) {
 270                         if (s1->s_ops == &segnf_ops)
 271                                 s2 = s1;
 272                         s1 = AS_SEGPREV(s1->s_as, seg);
 273                 }
 274                 if (s2 == NULL) {
 275                         s1 = AS_SEGNEXT(as, seg);
 276                         while (size-- > 0 && s1 != NULL) {
 277                                 if (s1->s_ops == &segnf_ops)
 278                                         s2 = s1;
 279                                 s1 = AS_SEGNEXT(as, seg);
 280                         }
 281                 }
 282                 if (s2 != NULL)
 283                         seg_unmap(s2);
 284         }
 285 
 286         return (0);
 287 }
 288 
 289 /*
 290  * Never really need "No fault" segments, so they aren't dup'd.
 291  */
 292 /* ARGSUSED */
 293 static int
 294 segnf_dup(struct seg *seg, struct seg *newseg)
 295 {
 296         panic("segnf_dup");
 297         return (0);
 298 }
 299 
 300 /*
 301  * Split a segment at addr for length len.
 302  */
 303 static int
 304 segnf_unmap(struct seg *seg, caddr_t addr, size_t len)
 305 {
 306         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
 307 
 308         /*
 309          * Check for bad sizes.
 310          */
 311         if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
 312             (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET)) {
 313                 cmn_err(CE_PANIC, "segnf_unmap: bad unmap size");
 314         }
 315 
 316         /*
 317          * Unload any hardware translations in the range to be taken out.
 318          */
 319         hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
 320 
 321         if (addr == seg->s_base && len == seg->s_size) {
 322                 /*
 323                  * Freeing entire segment.
 324                  */
 325                 seg_free(seg);
 326         } else if (addr == seg->s_base) {
 327                 /*
 328                  * Freeing the beginning of the segment.
 329                  */
 330                 seg->s_base += len;
 331                 seg->s_size -= len;
 332         } else if (addr + len == seg->s_base + seg->s_size) {
 333                 /*
 334                  * Freeing the end of the segment.
 335                  */
 336                 seg->s_size -= len;
 337         } else {
 338                 /*
 339                  * The section to go is in the middle of the segment, so we
 340                  * have to cut it into two segments.  We shrink the existing
 341                  * "seg" at the low end, and create "nseg" for the high end.
 342                  */
 343                 caddr_t nbase = addr + len;
 344                 size_t nsize = (seg->s_base + seg->s_size) - nbase;
 345                 struct seg *nseg;
 346 
 347                 /*
 348                  * Trim down "seg" before trying to stick "nseg" into the as.
 349                  */
 350                 seg->s_size = addr - seg->s_base;
 351                 nseg = seg_alloc(seg->s_as, nbase, nsize);
 352                 if (nseg == NULL)
 353                         cmn_err(CE_PANIC, "segnf_unmap: seg_alloc failed");
 354 
 355                 /*
 356                  * s_data can't be NULL because of ASSERTs in common VM code.
 357                  */
 358                 nseg->s_ops = seg->s_ops;
 359                 nseg->s_data = nseg;
 360                 nseg->s_flags |= S_PURGE;
 361                 mutex_enter(&seg->s_as->a_contents);
 362                 seg->s_as->a_flags |= AS_NEEDSPURGE;
 363                 mutex_exit(&seg->s_as->a_contents);
 364         }
 365 
 366         return (0);
 367 }
 368 
 369 /*
 370  * Free a segment.
 371  */
 372 static void
 373 segnf_free(struct seg *seg)
 374 {
 375         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
 376 }
 377 
 378 /*
 379  * No faults allowed on segnf.
 380  */
 381 static faultcode_t
 382 segnf_nomap(void)
 383 {
 384         return (FC_NOMAP);
 385 }
 386 
 387 /* ARGSUSED */
 388 static int
 389 segnf_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
 390 {
 391         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 392         return (EACCES);
 393 }
 394 
 395 /* ARGSUSED */
 396 static int
 397 segnf_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
 398 {
 399         uint_t sprot;
 400         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 401 
 402         sprot = seg->s_as == &kas ?  PROT_READ : PROT_READ|PROT_USER;
 403         return ((prot & sprot) == prot ? 0 : EACCES);
 404 }
 405 
 406 static void
 407 segnf_badop(void)
 408 {
 409         panic("segnf_badop");
 410         /*NOTREACHED*/
 411 }
 412 
 413 static int
 414 segnf_nop(void)
 415 {
 416         return (0);
 417 }
 418 
 419 static int
 420 segnf_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
 421 {
 422         size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
 423         size_t p;
 424         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 425 
 426         for (p = 0; p < pgno; ++p)
 427                 protv[p] = PROT_READ;
 428         return (0);
 429 }
 430 
 431 /* ARGSUSED */
 432 static u_offset_t
 433 segnf_getoffset(struct seg *seg, caddr_t addr)
 434 {
 435         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 436 
 437         return ((u_offset_t)0);
 438 }
 439 
 440 /* ARGSUSED */
 441 static int
 442 segnf_gettype(struct seg *seg, caddr_t addr)
 443 {
 444         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 445 
 446         return (MAP_SHARED);
 447 }
 448 
 449 /* ARGSUSED */
 450 static int
 451 segnf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
 452 {
 453         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 454 
 455         *vpp = &nfvp;
 456         return (0);
 457 }
 458 
 459 /*
 460  * segnf pages are not dumped, so we just return
 461  */
 462 /* ARGSUSED */
 463 static void
 464 segnf_dump(struct seg *seg)
 465 {}
 466 
 467 /*ARGSUSED*/
 468 static int
 469 segnf_pagelock(struct seg *seg, caddr_t addr, size_t len,
 470     struct page ***ppp, enum lock_type type, enum seg_rw rw)
 471 {
 472         return (ENOTSUP);
 473 }
 474 
 475 /*ARGSUSED*/
 476 static int
 477 segnf_setpagesize(struct seg *seg, caddr_t addr, size_t len,
 478     uint_t szc)
 479 {
 480         return (ENOTSUP);
 481 }
 482 
 483 /*ARGSUSED*/
 484 static int
 485 segnf_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
 486 {
 487         return (ENODEV);
 488 }
 489 
 490 /*ARGSUSED*/
 491 static lgrp_mem_policy_info_t *
 492 segnf_getpolicy(struct seg *seg, caddr_t addr)
 493 {
 494         return (NULL);
 495 }