6583 remove whole-process swapping

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright (c) 2015, Joyent, Inc.
  25  */
  26 
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  28 /*        All Rights Reserved   */
  29 
  30 /*
  31  * University Copyright- Copyright (c) 1982, 1986, 1988
  32  * The Regents of the University of California
  33  * All Rights Reserved
  34  *
  35  * University Acknowledgment- Portions of this document are derived from
  36  * software developed by the University of California, Berkeley, and its
  37  * contributors.
  38  */
  39 
  40 #ifndef _VM_SEG_H
  41 #define _VM_SEG_H
  42 
  43 #include <sys/vnode.h>
  44 #include <sys/avl.h>
  45 #include <vm/seg_enum.h>
  46 #include <vm/faultcode.h>
  47 #include <vm/hat.h>
  48 
  49 #ifdef  __cplusplus
  50 extern "C" {
  51 #endif
  52 
  53 /*
  54  * VM - Segments.
  55  */
  56 
  57 struct anon_map;
  58 
  59 /*
  60  * kstat statistics for segment advise
  61  */
  62 typedef struct {
  63         kstat_named_t MADV_FREE_hit;
  64         kstat_named_t MADV_FREE_miss;
  65 } segadvstat_t;
  66 
  67 /*
  68  * memory object ids
  69  */
  70 typedef struct memid { u_longlong_t val[2]; } memid_t;
  71 
  72 /*
  73  * An address space contains a set of segments, managed by drivers.
  74  * Drivers support mapped devices, sharing, copy-on-write, etc.
  75  *
  76  * The seg structure contains a lock to prevent races, the base virtual
  77  * address and size of the segment, a back pointer to the containing
  78  * address space, pointers to maintain an AVL tree of segments in the
  79  * same address space, and procedure and data hooks for the driver.
  80  * The AVL tree of segments for the address space is sorted by
  81  * ascending base addresses and overlapping segments are not allowed.
  82  *
  83  * After a segment is created, faults may occur on pages of the segment.
  84  * When a fault occurs, the fault handling code must get the desired
  85  * object and set up the hardware translation to the object.  For some
  86  * objects, the fault handling code also implements copy-on-write.
  87  *
  88  * When the hat wants to unload a translation, it can call the unload
  89  * routine which is responsible for processing reference and modify bits.
  90  *
  91  * Each segment is protected by it's containing address space lock.  To
  92  * access any field in the segment structure, the "as" must be locked.
  93  * If a segment field is to be modified, the address space lock must be
  94  * write locked.
  95  */
  96 
  97 typedef struct pcache_link {
  98         struct pcache_link      *p_lnext;
  99         struct pcache_link      *p_lprev;
 100 } pcache_link_t;
 101 
 102 typedef struct seg {
 103         caddr_t s_base;                 /* base virtual address */
 104         size_t  s_size;                 /* size in bytes */
 105         uint_t  s_szc;                  /* max page size code */
 106         uint_t  s_flags;                /* flags for segment, see below */
 107         struct  as *s_as;               /* containing address space */
 108         avl_node_t s_tree;              /* AVL tree links to segs in this as */
 109         struct  seg_ops *s_ops;         /* ops vector: see below */
 110         void *s_data;                   /* private data for instance */
 111         kmutex_t s_pmtx;                /* protects seg's pcache list */
 112         pcache_link_t s_phead;          /* head of seg's pcache list */
 113 } seg_t;
 114 
 115 #define S_PURGE         (0x01)          /* seg should be purged in as_gap() */
 116 
 117 struct  seg_ops {
 118         int     (*dup)(struct seg *, struct seg *);
 119         int     (*unmap)(struct seg *, caddr_t, size_t);
 120         void    (*free)(struct seg *);
 121         faultcode_t (*fault)(struct hat *, struct seg *, caddr_t, size_t,
 122             enum fault_type, enum seg_rw);
 123         faultcode_t (*faulta)(struct seg *, caddr_t);
 124         int     (*setprot)(struct seg *, caddr_t, size_t, uint_t);
 125         int     (*checkprot)(struct seg *, caddr_t, size_t, uint_t);
 126         int     (*kluster)(struct seg *, caddr_t, ssize_t);

 127         int     (*sync)(struct seg *, caddr_t, size_t, int, uint_t);
 128         size_t  (*incore)(struct seg *, caddr_t, size_t, char *);
 129         int     (*lockop)(struct seg *, caddr_t, size_t, int, int, ulong_t *,
 130                         size_t);
 131         int     (*getprot)(struct seg *, caddr_t, size_t, uint_t *);
 132         u_offset_t      (*getoffset)(struct seg *, caddr_t);
 133         int     (*gettype)(struct seg *, caddr_t);
 134         int     (*getvp)(struct seg *, caddr_t, struct vnode **);
 135         int     (*advise)(struct seg *, caddr_t, size_t, uint_t);
 136         void    (*dump)(struct seg *);
 137         int     (*pagelock)(struct seg *, caddr_t, size_t, struct page ***,
 138                         enum lock_type, enum seg_rw);
 139         int     (*setpagesize)(struct seg *, caddr_t, size_t, uint_t);
 140         int     (*getmemid)(struct seg *, caddr_t, memid_t *);
 141         struct lgrp_mem_policy_info     *(*getpolicy)(struct seg *, caddr_t);
 142         int     (*capable)(struct seg *, segcapability_t);
 143         int     (*inherit)(struct seg *, caddr_t, size_t, uint_t);
 144 };
 145 
 146 #ifdef _KERNEL
 147 
 148 /*
 149  * Generic segment operations
 150  */
 151 extern  void    seg_init(void);
 152 extern  struct  seg *seg_alloc(struct as *as, caddr_t base, size_t size);
 153 extern  int     seg_attach(struct as *as, caddr_t base, size_t size,
 154                         struct seg *seg);
 155 extern  void    seg_unmap(struct seg *seg);
 156 extern  void    seg_free(struct seg *seg);
 157 
 158 /*
 159  * functions for pagelock cache support
 160  */
 161 typedef int (*seg_preclaim_cbfunc_t)(void *, caddr_t, size_t,
 162     struct page **, enum seg_rw, int);
 163 
 164 extern  struct  page **seg_plookup(struct seg *seg, struct anon_map *amp,
 165     caddr_t addr, size_t len, enum seg_rw rw, uint_t flags);
 166 extern  void    seg_pinactive(struct seg *seg, struct anon_map *amp,
 167     caddr_t addr, size_t len, struct page **pp, enum seg_rw rw,
 168     uint_t flags, seg_preclaim_cbfunc_t callback);
 169 
 170 extern  void    seg_ppurge(struct seg *seg, struct anon_map *amp,
 171     uint_t flags);
 172 extern  void    seg_ppurge_wiredpp(struct page **pp);
 173 
 174 extern  int     seg_pinsert_check(struct seg *seg, struct anon_map *amp,
 175     caddr_t addr, size_t len, uint_t flags);
 176 extern  int     seg_pinsert(struct seg *seg, struct anon_map *amp,
 177     caddr_t addr, size_t len, size_t wlen, struct page **pp, enum seg_rw rw,
 178     uint_t flags, seg_preclaim_cbfunc_t callback);
 179 
 180 extern  void    seg_pasync_thread(void);
 181 extern  void    seg_preap(void);
 182 extern  int     seg_p_disable(void);
 183 extern  void    seg_p_enable(void);
 184 
 185 extern  segadvstat_t    segadvstat;
 186 
 187 /*
 188  * Flags for pagelock cache support.
 189  * Flags argument is passed as uint_t to pcache routines.  upper 16 bits of
 190  * the flags argument are reserved for alignment page shift when SEGP_PSHIFT
 191  * is set.
 192  */
 193 #define SEGP_FORCE_WIRED        0x1     /* skip check against seg_pwindow */
 194 #define SEGP_AMP                0x2     /* anon map's pcache entry */
 195 #define SEGP_PSHIFT             0x4     /* addr pgsz shift for hash function */
 196 
 197 /*
 198  * Return values for seg_pinsert and seg_pinsert_check functions.
 199  */
 200 #define SEGP_SUCCESS            0       /* seg_pinsert() succeeded */
 201 #define SEGP_FAIL               1       /* seg_pinsert() failed */
 202 
 203 /* Page status bits for segop_incore */
 204 #define SEG_PAGE_INCORE         0x01    /* VA has a page backing it */
 205 #define SEG_PAGE_LOCKED         0x02    /* VA has a page that is locked */
 206 #define SEG_PAGE_HASCOW         0x04    /* VA has a page with a copy-on-write */
 207 #define SEG_PAGE_SOFTLOCK       0x08    /* VA has a page with softlock held */
 208 #define SEG_PAGE_VNODEBACKED    0x10    /* Segment is backed by a vnode */
 209 #define SEG_PAGE_ANON           0x20    /* VA has an anonymous page */
 210 #define SEG_PAGE_VNODE          0x40    /* VA has a vnode page backing it */
 211 
 212 #define SEGOP_DUP(s, n)             (*(s)->s_ops->dup)((s), (n))
 213 #define SEGOP_UNMAP(s, a, l)        (*(s)->s_ops->unmap)((s), (a), (l))
 214 #define SEGOP_FREE(s)               (*(s)->s_ops->free)((s))
 215 #define SEGOP_FAULT(h, s, a, l, t, rw) \
 216                 (*(s)->s_ops->fault)((h), (s), (a), (l), (t), (rw))
 217 #define SEGOP_FAULTA(s, a)          (*(s)->s_ops->faulta)((s), (a))
 218 #define SEGOP_SETPROT(s, a, l, p)   (*(s)->s_ops->setprot)((s), (a), (l), (p))
 219 #define SEGOP_CHECKPROT(s, a, l, p) (*(s)->s_ops->checkprot)((s), (a), (l), (p))
 220 #define SEGOP_KLUSTER(s, a, d)      (*(s)->s_ops->kluster)((s), (a), (d))

 221 #define SEGOP_SYNC(s, a, l, atr, f) \
 222                 (*(s)->s_ops->sync)((s), (a), (l), (atr), (f))
 223 #define SEGOP_INCORE(s, a, l, v)    (*(s)->s_ops->incore)((s), (a), (l), (v))
 224 #define SEGOP_LOCKOP(s, a, l, atr, op, b, p) \
 225                 (*(s)->s_ops->lockop)((s), (a), (l), (atr), (op), (b), (p))
 226 #define SEGOP_GETPROT(s, a, l, p)   (*(s)->s_ops->getprot)((s), (a), (l), (p))
 227 #define SEGOP_GETOFFSET(s, a)       (*(s)->s_ops->getoffset)((s), (a))
 228 #define SEGOP_GETTYPE(s, a)         (*(s)->s_ops->gettype)((s), (a))
 229 #define SEGOP_GETVP(s, a, vpp)      (*(s)->s_ops->getvp)((s), (a), (vpp))
 230 #define SEGOP_ADVISE(s, a, l, b)    (*(s)->s_ops->advise)((s), (a), (l), (b))
 231 #define SEGOP_DUMP(s)               (*(s)->s_ops->dump)((s))
 232 #define SEGOP_PAGELOCK(s, a, l, p, t, rw) \
 233                 (*(s)->s_ops->pagelock)((s), (a), (l), (p), (t), (rw))
 234 #define SEGOP_SETPAGESIZE(s, a, l, szc) \
 235                 (*(s)->s_ops->setpagesize)((s), (a), (l), (szc))
 236 #define SEGOP_GETMEMID(s, a, mp)    (*(s)->s_ops->getmemid)((s), (a), (mp))
 237 #define SEGOP_GETPOLICY(s, a)       (*(s)->s_ops->getpolicy)((s), (a))
 238 #define SEGOP_CAPABLE(s, c)         (*(s)->s_ops->capable)((s), (c))
 239 #define SEGOP_INHERIT(s, a, l, b)   (*(s)->s_ops->inherit)((s), (a), (l), (b))
 240 
 241 #define seg_page(seg, addr) \
 242         (((uintptr_t)((addr) - (seg)->s_base)) >> PAGESHIFT)
 243 
 244 #define seg_pages(seg) \
 245         (((uintptr_t)((seg)->s_size + PAGEOFFSET)) >> PAGESHIFT)
 246 
 247 #define IE_NOMEM        -1      /* internal to seg layer */
 248 #define IE_RETRY        -2      /* internal to seg layer */
 249 #define IE_REATTACH     -3      /* internal to seg layer */
 250 
 251 /* Values for SEGOP_INHERIT */
 252 #define SEGP_INH_ZERO   0x01
 253 
 254 int seg_inherit_notsup(struct seg *, caddr_t, size_t, uint_t);
 255 
 256 /* Delay/retry factors for seg_p_mem_config_pre_del */
 257 #define SEGP_PREDEL_DELAY_FACTOR        4
 258 /*
 259  * As a workaround to being unable to purge the pagelock
 260  * cache during a DR delete memory operation, we use
 261  * a stall threshold that is twice the maximum seen
 262  * during testing.  This workaround will be removed
 263  * when a suitable fix is found.
 264  */
 265 #define SEGP_STALL_SECONDS      25
 266 #define SEGP_STALL_THRESHOLD \
 267         (SEGP_STALL_SECONDS * SEGP_PREDEL_DELAY_FACTOR)
 268 
 269 #ifdef VMDEBUG
 270 
 271 uint_t  seg_page(struct seg *, caddr_t);
 272 uint_t  seg_pages(struct seg *);
 273 
 274 #endif  /* VMDEBUG */
 275 
 276 boolean_t       seg_can_change_zones(struct seg *);
 277 size_t          seg_swresv(struct seg *);
 278 
 279 #endif  /* _KERNEL */
 280 
 281 #ifdef  __cplusplus
 282 }
 283 #endif
 284 
 285 #endif  /* _VM_SEG_H */
--- EOF ---