Print this page
6583 remove whole-process swapping


  94 #else
  95 #define PVN_GETPAGE_SZ  PVN_MAX_GETPAGE_SZ
  96 #define PVN_GETPAGE_NUM btop(PVN_MAX_GETPAGE_SZ)
  97 #endif
  98 
  99 /*
 100  * Private seg op routines.
 101  */
 102 static int      segvn_dup(struct seg *seg, struct seg *newseg);
 103 static int      segvn_unmap(struct seg *seg, caddr_t addr, size_t len);
 104 static void     segvn_free(struct seg *seg);
 105 static faultcode_t segvn_fault(struct hat *hat, struct seg *seg,
 106                     caddr_t addr, size_t len, enum fault_type type,
 107                     enum seg_rw rw);
 108 static faultcode_t segvn_faulta(struct seg *seg, caddr_t addr);
 109 static int      segvn_setprot(struct seg *seg, caddr_t addr,
 110                     size_t len, uint_t prot);
 111 static int      segvn_checkprot(struct seg *seg, caddr_t addr,
 112                     size_t len, uint_t prot);
 113 static int      segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
 114 static size_t   segvn_swapout(struct seg *seg);
 115 static int      segvn_sync(struct seg *seg, caddr_t addr, size_t len,
 116                     int attr, uint_t flags);
 117 static size_t   segvn_incore(struct seg *seg, caddr_t addr, size_t len,
 118                     char *vec);
 119 static int      segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
 120                     int attr, int op, ulong_t *lockmap, size_t pos);
 121 static int      segvn_getprot(struct seg *seg, caddr_t addr, size_t len,
 122                     uint_t *protv);
 123 static u_offset_t       segvn_getoffset(struct seg *seg, caddr_t addr);
 124 static int      segvn_gettype(struct seg *seg, caddr_t addr);
 125 static int      segvn_getvp(struct seg *seg, caddr_t addr,
 126                     struct vnode **vpp);
 127 static int      segvn_advise(struct seg *seg, caddr_t addr, size_t len,
 128                     uint_t behav);
 129 static void     segvn_dump(struct seg *seg);
 130 static int      segvn_pagelock(struct seg *seg, caddr_t addr, size_t len,
 131                     struct page ***ppp, enum lock_type type, enum seg_rw rw);
 132 static int      segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len,
 133                     uint_t szc);
 134 static int      segvn_getmemid(struct seg *seg, caddr_t addr,
 135                     memid_t *memidp);
 136 static lgrp_mem_policy_info_t   *segvn_getpolicy(struct seg *, caddr_t);
 137 static int      segvn_capable(struct seg *seg, segcapability_t capable);
 138 static int      segvn_inherit(struct seg *, caddr_t, size_t, uint_t);
 139 
 140 struct  seg_ops segvn_ops = {
 141         segvn_dup,
 142         segvn_unmap,
 143         segvn_free,
 144         segvn_fault,
 145         segvn_faulta,
 146         segvn_setprot,
 147         segvn_checkprot,
 148         segvn_kluster,
 149         segvn_swapout,
 150         segvn_sync,
 151         segvn_incore,
 152         segvn_lockop,
 153         segvn_getprot,
 154         segvn_getoffset,
 155         segvn_gettype,
 156         segvn_getvp,
 157         segvn_advise,
 158         segvn_dump,
 159         segvn_pagelock,
 160         segvn_setpagesize,
 161         segvn_getmemid,
 162         segvn_getpolicy,
 163         segvn_capable,
 164         segvn_inherit
 165 };
 166 
 167 /*
 168  * Common zfod structures, provided as a shorthand for others to use.
 169  */


7031 
7032         /*
7033          * Now we know we have two anon pointers - check to
7034          * see if they happen to be properly allocated.
7035          */
7036 
7037         /*
7038          * XXX We cheat here and don't lock the anon slots. We can't because
7039          * we may have been called from the anon layer which might already
7040          * have locked them. We are holding a refcnt on the slots so they
7041          * can't disappear. The worst that will happen is we'll get the wrong
7042          * names (vp, off) for the slots and make a poor klustering decision.
7043          */
7044         swap_xlate(ap, &vp1, &off1);
7045         swap_xlate(oap, &vp2, &off2);
7046 
7047 
7048         if (!VOP_CMP(vp1, vp2, NULL) || off1 - off2 != delta)
7049                 return (-1);
7050         return (0);
7051 }
7052 
7053 /*
7054  * Swap the pages of seg out to secondary storage, returning the
7055  * number of bytes of storage freed.
7056  *
7057  * The basic idea is first to unload all translations and then to call
7058  * VOP_PUTPAGE() for all newly-unmapped pages, to push them out to the
7059  * swap device.  Pages to which other segments have mappings will remain
7060  * mapped and won't be swapped.  Our caller (as_swapout) has already
7061  * performed the unloading step.
7062  *
7063  * The value returned is intended to correlate well with the process's
7064  * memory requirements.  However, there are some caveats:
7065  * 1)   When given a shared segment as argument, this routine will
7066  *      only succeed in swapping out pages for the last sharer of the
7067  *      segment.  (Previous callers will only have decremented mapping
7068  *      reference counts.)
7069  * 2)   We assume that the hat layer maintains a large enough translation
7070  *      cache to capture process reference patterns.
7071  */
7072 static size_t
7073 segvn_swapout(struct seg *seg)
7074 {
7075         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7076         struct anon_map *amp;
7077         pgcnt_t pgcnt = 0;
7078         pgcnt_t npages;
7079         pgcnt_t page;
7080         ulong_t anon_index;
7081 
7082         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
7083 
7084         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
7085         /*
7086          * Find pages unmapped by our caller and force them
7087          * out to the virtual swap device.
7088          */
7089         if ((amp = svd->amp) != NULL)
7090                 anon_index = svd->anon_index;
7091         npages = seg->s_size >> PAGESHIFT;
7092         for (page = 0; page < npages; page++) {
7093                 page_t *pp;
7094                 struct anon *ap;
7095                 struct vnode *vp;
7096                 u_offset_t off;
7097                 anon_sync_obj_t cookie;
7098 
7099                 /*
7100                  * Obtain <vp, off> pair for the page, then look it up.
7101                  *
7102                  * Note that this code is willing to consider regular
7103                  * pages as well as anon pages.  Is this appropriate here?
7104                  */
7105                 ap = NULL;
7106                 if (amp != NULL) {
7107                         ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
7108                         if (anon_array_try_enter(amp, anon_index + page,
7109                             &cookie)) {
7110                                 ANON_LOCK_EXIT(&amp->a_rwlock);
7111                                 continue;
7112                         }
7113                         ap = anon_get_ptr(amp->ahp, anon_index + page);
7114                         if (ap != NULL) {
7115                                 swap_xlate(ap, &vp, &off);
7116                         } else {
7117                                 vp = svd->vp;
7118                                 off = svd->offset + ptob(page);
7119                         }
7120                         anon_array_exit(&cookie);
7121                         ANON_LOCK_EXIT(&amp->a_rwlock);
7122                 } else {
7123                         vp = svd->vp;
7124                         off = svd->offset + ptob(page);
7125                 }
7126                 if (vp == NULL) {               /* untouched zfod page */
7127                         ASSERT(ap == NULL);
7128                         continue;
7129                 }
7130 
7131                 pp = page_lookup_nowait(vp, off, SE_SHARED);
7132                 if (pp == NULL)
7133                         continue;
7134 
7135 
7136                 /*
7137                  * Examine the page to see whether it can be tossed out,
7138                  * keeping track of how many we've found.
7139                  */
7140                 if (!page_tryupgrade(pp)) {
7141                         /*
7142                          * If the page has an i/o lock and no mappings,
7143                          * it's very likely that the page is being
7144                          * written out as a result of klustering.
7145                          * Assume this is so and take credit for it here.
7146                          */
7147                         if (!page_io_trylock(pp)) {
7148                                 if (!hat_page_is_mapped(pp))
7149                                         pgcnt++;
7150                         } else {
7151                                 page_io_unlock(pp);
7152                         }
7153                         page_unlock(pp);
7154                         continue;
7155                 }
7156                 ASSERT(!page_iolock_assert(pp));
7157 
7158 
7159                 /*
7160                  * Skip if page is locked or has mappings.
7161                  * We don't need the page_struct_lock to look at lckcnt
7162                  * and cowcnt because the page is exclusive locked.
7163                  */
7164                 if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0 ||
7165                     hat_page_is_mapped(pp)) {
7166                         page_unlock(pp);
7167                         continue;
7168                 }
7169 
7170                 /*
7171                  * dispose skips large pages so try to demote first.
7172                  */
7173                 if (pp->p_szc != 0 && !page_try_demote_pages(pp)) {
7174                         page_unlock(pp);
7175                         /*
7176                          * XXX should skip the remaining page_t's of this
7177                          * large page.
7178                          */
7179                         continue;
7180                 }
7181 
7182                 ASSERT(pp->p_szc == 0);
7183 
7184                 /*
7185                  * No longer mapped -- we can toss it out.  How
7186                  * we do so depends on whether or not it's dirty.
7187                  */
7188                 if (hat_ismod(pp) && pp->p_vnode) {
7189                         /*
7190                          * We must clean the page before it can be
7191                          * freed.  Setting B_FREE will cause pvn_done
7192                          * to free the page when the i/o completes.
7193                          * XXX: This also causes it to be accounted
7194                          *      as a pageout instead of a swap: need
7195                          *      B_SWAPOUT bit to use instead of B_FREE.
7196                          *
7197                          * Hold the vnode before releasing the page lock
7198                          * to prevent it from being freed and re-used by
7199                          * some other thread.
7200                          */
7201                         VN_HOLD(vp);
7202                         page_unlock(pp);
7203 
7204                         /*
7205                          * Queue all i/o requests for the pageout thread
7206                          * to avoid saturating the pageout devices.
7207                          */
7208                         if (!queue_io_request(vp, off))
7209                                 VN_RELE(vp);
7210                 } else {
7211                         /*
7212                          * The page was clean, free it.
7213                          *
7214                          * XXX: Can we ever encounter modified pages
7215                          *      with no associated vnode here?
7216                          */
7217                         ASSERT(pp->p_vnode != NULL);
7218                         /*LINTED: constant in conditional context*/
7219                         VN_DISPOSE(pp, B_FREE, 0, kcred);
7220                 }
7221 
7222                 /*
7223                  * Credit now even if i/o is in progress.
7224                  */
7225                 pgcnt++;
7226         }
7227         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
7228 
7229         /*
7230          * Wakeup pageout to initiate i/o on all queued requests.
7231          */
7232         cv_signal_pageout();
7233         return (ptob(pgcnt));
7234 }
7235 
7236 /*
7237  * Synchronize primary storage cache with real object in virtual memory.
7238  *
7239  * XXX - Anonymous pages should not be sync'ed out at all.
7240  */
7241 static int
7242 segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
7243 {
7244         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7245         struct vpage *vpp;
7246         page_t *pp;
7247         u_offset_t offset;
7248         struct vnode *vp;
7249         u_offset_t off;
7250         caddr_t eaddr;
7251         int bflags;
7252         int err = 0;
7253         int segtype;




  94 #else
  95 #define PVN_GETPAGE_SZ  PVN_MAX_GETPAGE_SZ
  96 #define PVN_GETPAGE_NUM btop(PVN_MAX_GETPAGE_SZ)
  97 #endif
  98 
  99 /*
 100  * Private seg op routines.
 101  */
 102 static int      segvn_dup(struct seg *seg, struct seg *newseg);
 103 static int      segvn_unmap(struct seg *seg, caddr_t addr, size_t len);
 104 static void     segvn_free(struct seg *seg);
 105 static faultcode_t segvn_fault(struct hat *hat, struct seg *seg,
 106                     caddr_t addr, size_t len, enum fault_type type,
 107                     enum seg_rw rw);
 108 static faultcode_t segvn_faulta(struct seg *seg, caddr_t addr);
 109 static int      segvn_setprot(struct seg *seg, caddr_t addr,
 110                     size_t len, uint_t prot);
 111 static int      segvn_checkprot(struct seg *seg, caddr_t addr,
 112                     size_t len, uint_t prot);
 113 static int      segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta);

 114 static int      segvn_sync(struct seg *seg, caddr_t addr, size_t len,
 115                     int attr, uint_t flags);
 116 static size_t   segvn_incore(struct seg *seg, caddr_t addr, size_t len,
 117                     char *vec);
 118 static int      segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
 119                     int attr, int op, ulong_t *lockmap, size_t pos);
 120 static int      segvn_getprot(struct seg *seg, caddr_t addr, size_t len,
 121                     uint_t *protv);
 122 static u_offset_t       segvn_getoffset(struct seg *seg, caddr_t addr);
 123 static int      segvn_gettype(struct seg *seg, caddr_t addr);
 124 static int      segvn_getvp(struct seg *seg, caddr_t addr,
 125                     struct vnode **vpp);
 126 static int      segvn_advise(struct seg *seg, caddr_t addr, size_t len,
 127                     uint_t behav);
 128 static void     segvn_dump(struct seg *seg);
 129 static int      segvn_pagelock(struct seg *seg, caddr_t addr, size_t len,
 130                     struct page ***ppp, enum lock_type type, enum seg_rw rw);
 131 static int      segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len,
 132                     uint_t szc);
 133 static int      segvn_getmemid(struct seg *seg, caddr_t addr,
 134                     memid_t *memidp);
 135 static lgrp_mem_policy_info_t   *segvn_getpolicy(struct seg *, caddr_t);
 136 static int      segvn_capable(struct seg *seg, segcapability_t capable);
 137 static int      segvn_inherit(struct seg *, caddr_t, size_t, uint_t);
 138 
 139 struct  seg_ops segvn_ops = {
 140         segvn_dup,
 141         segvn_unmap,
 142         segvn_free,
 143         segvn_fault,
 144         segvn_faulta,
 145         segvn_setprot,
 146         segvn_checkprot,
 147         segvn_kluster,

 148         segvn_sync,
 149         segvn_incore,
 150         segvn_lockop,
 151         segvn_getprot,
 152         segvn_getoffset,
 153         segvn_gettype,
 154         segvn_getvp,
 155         segvn_advise,
 156         segvn_dump,
 157         segvn_pagelock,
 158         segvn_setpagesize,
 159         segvn_getmemid,
 160         segvn_getpolicy,
 161         segvn_capable,
 162         segvn_inherit
 163 };
 164 
 165 /*
 166  * Common zfod structures, provided as a shorthand for others to use.
 167  */


7029 
7030         /*
7031          * Now we know we have two anon pointers - check to
7032          * see if they happen to be properly allocated.
7033          */
7034 
7035         /*
7036          * XXX We cheat here and don't lock the anon slots. We can't because
7037          * we may have been called from the anon layer which might already
7038          * have locked them. We are holding a refcnt on the slots so they
7039          * can't disappear. The worst that will happen is we'll get the wrong
7040          * names (vp, off) for the slots and make a poor klustering decision.
7041          */
7042         swap_xlate(ap, &vp1, &off1);
7043         swap_xlate(oap, &vp2, &off2);
7044 
7045 
7046         if (!VOP_CMP(vp1, vp2, NULL) || off1 - off2 != delta)
7047                 return (-1);
7048         return (0);























































































































































































7049 }
7050 
7051 /*
7052  * Synchronize primary storage cache with real object in virtual memory.
7053  *
7054  * XXX - Anonymous pages should not be sync'ed out at all.
7055  */
7056 static int
7057 segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
7058 {
7059         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7060         struct vpage *vpp;
7061         page_t *pp;
7062         u_offset_t offset;
7063         struct vnode *vp;
7064         u_offset_t off;
7065         caddr_t eaddr;
7066         int bflags;
7067         int err = 0;
7068         int segtype;