Print this page
patch SEGOP_SWAPOUT-delete


  71 #include <sys/proc.h>
  72 #include <sys/task.h>
  73 #include <sys/project.h>
  74 #include <sys/zone.h>
  75 #include <sys/shm_impl.h>
  76 /*
  77  * Private seg op routines.
  78  */
  79 static int      segvn_dup(struct seg *seg, struct seg *newseg);
  80 static int      segvn_unmap(struct seg *seg, caddr_t addr, size_t len);
  81 static void     segvn_free(struct seg *seg);
  82 static faultcode_t segvn_fault(struct hat *hat, struct seg *seg,
  83                     caddr_t addr, size_t len, enum fault_type type,
  84                     enum seg_rw rw);
  85 static faultcode_t segvn_faulta(struct seg *seg, caddr_t addr);
  86 static int      segvn_setprot(struct seg *seg, caddr_t addr,
  87                     size_t len, uint_t prot);
  88 static int      segvn_checkprot(struct seg *seg, caddr_t addr,
  89                     size_t len, uint_t prot);
  90 static int      segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
  91 static size_t   segvn_swapout(struct seg *seg);
  92 static int      segvn_sync(struct seg *seg, caddr_t addr, size_t len,
  93                     int attr, uint_t flags);
  94 static size_t   segvn_incore(struct seg *seg, caddr_t addr, size_t len,
  95                     char *vec);
  96 static int      segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
  97                     int attr, int op, ulong_t *lockmap, size_t pos);
  98 static int      segvn_getprot(struct seg *seg, caddr_t addr, size_t len,
  99                     uint_t *protv);
 100 static u_offset_t       segvn_getoffset(struct seg *seg, caddr_t addr);
 101 static int      segvn_gettype(struct seg *seg, caddr_t addr);
 102 static int      segvn_getvp(struct seg *seg, caddr_t addr,
 103                     struct vnode **vpp);
 104 static int      segvn_advise(struct seg *seg, caddr_t addr, size_t len,
 105                     uint_t behav);
 106 static void     segvn_dump(struct seg *seg);
 107 static int      segvn_pagelock(struct seg *seg, caddr_t addr, size_t len,
 108                     struct page ***ppp, enum lock_type type, enum seg_rw rw);
 109 static int      segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len,
 110                     uint_t szc);
 111 static int      segvn_getmemid(struct seg *seg, caddr_t addr,
 112                     memid_t *memidp);
 113 static lgrp_mem_policy_info_t   *segvn_getpolicy(struct seg *, caddr_t);
 114 static int      segvn_capable(struct seg *seg, segcapability_t capable);
 115 
 116 struct  seg_ops segvn_ops = {
 117         segvn_dup,
 118         segvn_unmap,
 119         segvn_free,
 120         segvn_fault,
 121         segvn_faulta,
 122         segvn_setprot,
 123         segvn_checkprot,
 124         segvn_kluster,
 125         segvn_swapout,
 126         segvn_sync,
 127         segvn_incore,
 128         segvn_lockop,
 129         segvn_getprot,
 130         segvn_getoffset,
 131         segvn_gettype,
 132         segvn_getvp,
 133         segvn_advise,
 134         segvn_dump,
 135         segvn_pagelock,
 136         segvn_setpagesize,
 137         segvn_getmemid,
 138         segvn_getpolicy,
 139         segvn_capable,
 140 };
 141 
 142 /*
 143  * Common zfod structures, provided as a shorthand for others to use.
 144  */
 145 static segvn_crargs_t zfod_segvn_crargs =


6980          * see if they happen to be properly allocated.
6981          */
6982 
6983         /*
6984          * XXX We cheat here and don't lock the anon slots. We can't because
6985          * we may have been called from the anon layer which might already
6986          * have locked them. We are holding a refcnt on the slots so they
6987          * can't disappear. The worst that will happen is we'll get the wrong
6988          * names (vp, off) for the slots and make a poor klustering decision.
6989          */
6990         swap_xlate(ap, &vp1, &off1);
6991         swap_xlate(oap, &vp2, &off2);
6992 
6993 
6994         if (!VOP_CMP(vp1, vp2, NULL) || off1 - off2 != delta)
6995                 return (-1);
6996         return (0);
6997 }
6998 
6999 /*
7000  * Swap the pages of seg out to secondary storage, returning the
7001  * number of bytes of storage freed.
7002  *
7003  * The basic idea is first to unload all translations and then to call
7004  * VOP_PUTPAGE() for all newly-unmapped pages, to push them out to the
7005  * swap device.  Pages to which other segments have mappings will remain
7006  * mapped and won't be swapped.  Our caller (as_swapout) has already
7007  * performed the unloading step.
7008  *
7009  * The value returned is intended to correlate well with the process's
7010  * memory requirements.  However, there are some caveats:
7011  * 1)   When given a shared segment as argument, this routine will
7012  *      only succeed in swapping out pages for the last sharer of the
7013  *      segment.  (Previous callers will only have decremented mapping
7014  *      reference counts.)
7015  * 2)   We assume that the hat layer maintains a large enough translation
7016  *      cache to capture process reference patterns.
7017  */
7018 static size_t
7019 segvn_swapout(struct seg *seg)
7020 {
7021         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7022         struct anon_map *amp;
7023         pgcnt_t pgcnt = 0;
7024         pgcnt_t npages;
7025         pgcnt_t page;
7026         ulong_t anon_index;
7027 
7028         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
7029 
7030         SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_READER);
7031         /*
7032          * Find pages unmapped by our caller and force them
7033          * out to the virtual swap device.
7034          */
7035         if ((amp = svd->amp) != NULL)
7036                 anon_index = svd->anon_index;
7037         npages = seg->s_size >> PAGESHIFT;
7038         for (page = 0; page < npages; page++) {
7039                 page_t *pp;
7040                 struct anon *ap;
7041                 struct vnode *vp;
7042                 u_offset_t off;
7043                 anon_sync_obj_t cookie;
7044 
7045                 /*
7046                  * Obtain <vp, off> pair for the page, then look it up.
7047                  *
7048                  * Note that this code is willing to consider regular
7049                  * pages as well as anon pages.  Is this appropriate here?
7050                  */
7051                 ap = NULL;
7052                 if (amp != NULL) {
7053                         ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
7054                         if (anon_array_try_enter(amp, anon_index + page,
7055                             &cookie)) {
7056                                 ANON_LOCK_EXIT(&amp->a_rwlock);
7057                                 continue;
7058                         }
7059                         ap = anon_get_ptr(amp->ahp, anon_index + page);
7060                         if (ap != NULL) {
7061                                 swap_xlate(ap, &vp, &off);
7062                         } else {
7063                                 vp = svd->vp;
7064                                 off = svd->offset + ptob(page);
7065                         }
7066                         anon_array_exit(&cookie);
7067                         ANON_LOCK_EXIT(&amp->a_rwlock);
7068                 } else {
7069                         vp = svd->vp;
7070                         off = svd->offset + ptob(page);
7071                 }
7072                 if (vp == NULL) {               /* untouched zfod page */
7073                         ASSERT(ap == NULL);
7074                         continue;
7075                 }
7076 
7077                 pp = page_lookup_nowait(vp, off, SE_SHARED);
7078                 if (pp == NULL)
7079                         continue;
7080 
7081 
7082                 /*
7083                  * Examine the page to see whether it can be tossed out,
7084                  * keeping track of how many we've found.
7085                  */
7086                 if (!page_tryupgrade(pp)) {
7087                         /*
7088                          * If the page has an i/o lock and no mappings,
7089                          * it's very likely that the page is being
7090                          * written out as a result of klustering.
7091                          * Assume this is so and take credit for it here.
7092                          */
7093                         if (!page_io_trylock(pp)) {
7094                                 if (!hat_page_is_mapped(pp))
7095                                         pgcnt++;
7096                         } else {
7097                                 page_io_unlock(pp);
7098                         }
7099                         page_unlock(pp);
7100                         continue;
7101                 }
7102                 ASSERT(!page_iolock_assert(pp));
7103 
7104 
7105                 /*
7106                  * Skip if page is locked or has mappings.
7107                  * We don't need the page_struct_lock to look at lckcnt
7108                  * and cowcnt because the page is exclusive locked.
7109                  */
7110                 if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0 ||
7111                     hat_page_is_mapped(pp)) {
7112                         page_unlock(pp);
7113                         continue;
7114                 }
7115 
7116                 /*
7117                  * dispose skips large pages so try to demote first.
7118                  */
7119                 if (pp->p_szc != 0 && !page_try_demote_pages(pp)) {
7120                         page_unlock(pp);
7121                         /*
7122                          * XXX should skip the remaining page_t's of this
7123                          * large page.
7124                          */
7125                         continue;
7126                 }
7127 
7128                 ASSERT(pp->p_szc == 0);
7129 
7130                 /*
7131                  * No longer mapped -- we can toss it out.  How
7132                  * we do so depends on whether or not it's dirty.
7133                  */
7134                 if (hat_ismod(pp) && pp->p_vnode) {
7135                         /*
7136                          * We must clean the page before it can be
7137                          * freed.  Setting B_FREE will cause pvn_done
7138                          * to free the page when the i/o completes.
7139                          * XXX: This also causes it to be accounted
7140                          *      as a pageout instead of a swap: need
7141                          *      B_SWAPOUT bit to use instead of B_FREE.
7142                          *
7143                          * Hold the vnode before releasing the page lock
7144                          * to prevent it from being freed and re-used by
7145                          * some other thread.
7146                          */
7147                         VN_HOLD(vp);
7148                         page_unlock(pp);
7149 
7150                         /*
7151                          * Queue all i/o requests for the pageout thread
7152                          * to avoid saturating the pageout devices.
7153                          */
7154                         if (!queue_io_request(vp, off))
7155                                 VN_RELE(vp);
7156                 } else {
7157                         /*
7158                          * The page was clean, free it.
7159                          *
7160                          * XXX: Can we ever encounter modified pages
7161                          *      with no associated vnode here?
7162                          */
7163                         ASSERT(pp->p_vnode != NULL);
7164                         /*LINTED: constant in conditional context*/
7165                         VN_DISPOSE(pp, B_FREE, 0, kcred);
7166                 }
7167 
7168                 /*
7169                  * Credit now even if i/o is in progress.
7170                  */
7171                 pgcnt++;
7172         }
7173         SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
7174 
7175         /*
7176          * Wakeup pageout to initiate i/o on all queued requests.
7177          */
7178         cv_signal_pageout();
7179         return (ptob(pgcnt));
7180 }
7181 
7182 /*
7183  * Synchronize primary storage cache with real object in virtual memory.
7184  *
7185  * XXX - Anonymous pages should not be sync'ed out at all.
7186  */
7187 static int
7188 segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
7189 {
7190         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7191         struct vpage *vpp;
7192         page_t *pp;
7193         u_offset_t offset;
7194         struct vnode *vp;
7195         u_offset_t off;
7196         caddr_t eaddr;
7197         int bflags;
7198         int err = 0;
7199         int segtype;
7200         int pageprot;
7201         int prot;
7202         ulong_t anon_index;




  71 #include <sys/proc.h>
  72 #include <sys/task.h>
  73 #include <sys/project.h>
  74 #include <sys/zone.h>
  75 #include <sys/shm_impl.h>
  76 /*
  77  * Private seg op routines.
  78  */
  79 static int      segvn_dup(struct seg *seg, struct seg *newseg);
  80 static int      segvn_unmap(struct seg *seg, caddr_t addr, size_t len);
  81 static void     segvn_free(struct seg *seg);
  82 static faultcode_t segvn_fault(struct hat *hat, struct seg *seg,
  83                     caddr_t addr, size_t len, enum fault_type type,
  84                     enum seg_rw rw);
  85 static faultcode_t segvn_faulta(struct seg *seg, caddr_t addr);
  86 static int      segvn_setprot(struct seg *seg, caddr_t addr,
  87                     size_t len, uint_t prot);
  88 static int      segvn_checkprot(struct seg *seg, caddr_t addr,
  89                     size_t len, uint_t prot);
  90 static int      segvn_kluster(struct seg *seg, caddr_t addr, ssize_t delta);

  91 static int      segvn_sync(struct seg *seg, caddr_t addr, size_t len,
  92                     int attr, uint_t flags);
  93 static size_t   segvn_incore(struct seg *seg, caddr_t addr, size_t len,
  94                     char *vec);
  95 static int      segvn_lockop(struct seg *seg, caddr_t addr, size_t len,
  96                     int attr, int op, ulong_t *lockmap, size_t pos);
  97 static int      segvn_getprot(struct seg *seg, caddr_t addr, size_t len,
  98                     uint_t *protv);
  99 static u_offset_t       segvn_getoffset(struct seg *seg, caddr_t addr);
 100 static int      segvn_gettype(struct seg *seg, caddr_t addr);
 101 static int      segvn_getvp(struct seg *seg, caddr_t addr,
 102                     struct vnode **vpp);
 103 static int      segvn_advise(struct seg *seg, caddr_t addr, size_t len,
 104                     uint_t behav);
 105 static void     segvn_dump(struct seg *seg);
 106 static int      segvn_pagelock(struct seg *seg, caddr_t addr, size_t len,
 107                     struct page ***ppp, enum lock_type type, enum seg_rw rw);
 108 static int      segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len,
 109                     uint_t szc);
 110 static int      segvn_getmemid(struct seg *seg, caddr_t addr,
 111                     memid_t *memidp);
 112 static lgrp_mem_policy_info_t   *segvn_getpolicy(struct seg *, caddr_t);
 113 static int      segvn_capable(struct seg *seg, segcapability_t capable);
 114 
 115 struct  seg_ops segvn_ops = {
 116         segvn_dup,
 117         segvn_unmap,
 118         segvn_free,
 119         segvn_fault,
 120         segvn_faulta,
 121         segvn_setprot,
 122         segvn_checkprot,
 123         segvn_kluster,

 124         segvn_sync,
 125         segvn_incore,
 126         segvn_lockop,
 127         segvn_getprot,
 128         segvn_getoffset,
 129         segvn_gettype,
 130         segvn_getvp,
 131         segvn_advise,
 132         segvn_dump,
 133         segvn_pagelock,
 134         segvn_setpagesize,
 135         segvn_getmemid,
 136         segvn_getpolicy,
 137         segvn_capable,
 138 };
 139 
 140 /*
 141  * Common zfod structures, provided as a shorthand for others to use.
 142  */
 143 static segvn_crargs_t zfod_segvn_crargs =


6978          * see if they happen to be properly allocated.
6979          */
6980 
6981         /*
6982          * XXX We cheat here and don't lock the anon slots. We can't because
6983          * we may have been called from the anon layer which might already
6984          * have locked them. We are holding a refcnt on the slots so they
6985          * can't disappear. The worst that will happen is we'll get the wrong
6986          * names (vp, off) for the slots and make a poor klustering decision.
6987          */
6988         swap_xlate(ap, &vp1, &off1);
6989         swap_xlate(oap, &vp2, &off2);
6990 
6991 
6992         if (!VOP_CMP(vp1, vp2, NULL) || off1 - off2 != delta)
6993                 return (-1);
6994         return (0);
6995 }
6996 
6997 /*























































































































































































6998  * Synchronize primary storage cache with real object in virtual memory.
6999  *
7000  * XXX - Anonymous pages should not be sync'ed out at all.
7001  */
7002 static int
7003 segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
7004 {
7005         struct segvn_data *svd = (struct segvn_data *)seg->s_data;
7006         struct vpage *vpp;
7007         page_t *pp;
7008         u_offset_t offset;
7009         struct vnode *vp;
7010         u_offset_t off;
7011         caddr_t eaddr;
7012         int bflags;
7013         int err = 0;
7014         int segtype;
7015         int pageprot;
7016         int prot;
7017         ulong_t anon_index;