1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/types.h>
  26 #include <sys/param.h>
  27 #include <sys/systm.h>
  28 #include <sys/buf.h>
  29 #include <sys/cred.h>
  30 #include <sys/errno.h>
  31 #include <sys/vnode.h>
  32 #include <sys/vfs_opreg.h>
  33 #include <sys/cmn_err.h>
  34 #include <sys/swap.h>
  35 #include <sys/mman.h>
  36 #include <sys/vmsystm.h>
  37 #include <sys/vtrace.h>
  38 #include <sys/debug.h>
  39 #include <sys/sysmacros.h>
  40 #include <sys/vm.h>
  41 
  42 #include <sys/fs/swapnode.h>
  43 
  44 #include <vm/seg.h>
  45 #include <vm/page.h>
  46 #include <vm/pvn.h>
  47 #include <fs/fs_subr.h>
  48 
  49 #include <vm/seg_kp.h>
  50 
  51 /*
  52  * Define the routines within this file.
  53  */
  54 static int      swap_getpage(struct vnode *vp, offset_t off, size_t len,
  55     uint_t *protp, struct page **plarr, size_t plsz, struct seg *seg,
  56     caddr_t addr, enum seg_rw rw, struct cred *cr, caller_context_t *ct);
  57 static int      swap_putpage(struct vnode *vp, offset_t off, size_t len,
  58     int flags, struct cred *cr, caller_context_t *ct);
  59 static void     swap_inactive(struct vnode *vp, struct cred *cr,
  60     caller_context_t *ct);
  61 static void     swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn,
  62     cred_t *cr, caller_context_t *ct);
  63 
  64 static int      swap_getapage(struct vnode *vp, u_offset_t off, size_t len,
  65     uint_t *protp, page_t **plarr, size_t plsz,
  66     struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
  67 
  68 int     swap_getconpage(struct vnode *vp, u_offset_t off, size_t len,
  69     uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp,
  70     uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr,
  71     enum seg_rw rw, struct cred *cr);
  72 
  73 static int      swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off,
  74     size_t *lenp, int flags, struct cred *cr);
  75 
  76 const fs_operation_def_t swap_vnodeops_template[] = {
  77         VOPNAME_INACTIVE,       { .vop_inactive = swap_inactive },
  78         VOPNAME_GETPAGE,        { .vop_getpage = swap_getpage },
  79         VOPNAME_PUTPAGE,        { .vop_putpage = swap_putpage },
  80         VOPNAME_DISPOSE,        { .vop_dispose = swap_dispose },
  81         VOPNAME_SETFL,          { .error = fs_error },
  82         VOPNAME_POLL,           { .error = fs_error },
  83         VOPNAME_PATHCONF,       { .error = fs_error },
  84         VOPNAME_GETSECATTR,     { .error = fs_error },
  85         VOPNAME_SHRLOCK,        { .error = fs_error },
  86         NULL,                   NULL
  87 };
  88 
  89 vnodeops_t *swap_vnodeops;
  90 
  91 /* ARGSUSED */
  92 static void
  93 swap_inactive(
  94         struct vnode *vp,
  95         struct cred *cr,
  96         caller_context_t *ct)
  97 {
  98         SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0);
  99 }
 100 
 101 /*
 102  * Return all the pages from [off..off+len] in given file
 103  */
 104 /*ARGSUSED*/
 105 static int
 106 swap_getpage(
 107         struct vnode *vp,
 108         offset_t off,
 109         size_t len,
 110         uint_t *protp,
 111         page_t *pl[],
 112         size_t plsz,
 113         struct seg *seg,
 114         caddr_t addr,
 115         enum seg_rw rw,
 116         struct cred *cr,
 117         caller_context_t *ct)
 118 {
 119         int err;
 120 
 121         SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n",
 122             (void *)vp, off, len, 0, 0);
 123 
 124         TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE,
 125             "swapfs getpage:vp %p off %llx len %ld",
 126             (void *)vp, off, len);
 127 
 128         if (len <= PAGESIZE) {
 129                 err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
 130                     seg, addr, rw, cr);
 131         } else {
 132                 err = pvn_getpages(swap_getapage, vp, (u_offset_t)off, len,
 133                     protp, pl, plsz, seg, addr, rw, cr);
 134         }
 135 
 136         return (err);
 137 }
 138 
 139 /*
 140  * Called from pvn_getpages or swap_getpage to get a particular page.
 141  */
 142 /*ARGSUSED*/
 143 static int
 144 swap_getapage(
 145         struct vnode *vp,
 146         u_offset_t off,
 147         size_t len,
 148         uint_t *protp,
 149         page_t *pl[],
 150         size_t plsz,
 151         struct seg *seg,
 152         caddr_t addr,
 153         enum seg_rw rw,
 154         struct cred *cr)
 155 {
 156         struct page *pp, *rpp;
 157         int flags;
 158         int err = 0;
 159         struct vnode *pvp = NULL;
 160         u_offset_t poff;
 161         int flag_noreloc;
 162         se_t lock;
 163         extern int kcage_on;
 164         int upgrade = 0;
 165 
 166         SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n",
 167             vp, off, len, 0, 0);
 168 
 169         /*
 170          * Until there is a call-back mechanism to cause SEGKP
 171          * pages to be unlocked, make them non-relocatable.
 172          */
 173         if (SEG_IS_SEGKP(seg))
 174                 flag_noreloc = PG_NORELOC;
 175         else
 176                 flag_noreloc = 0;
 177 
 178         if (protp != NULL)
 179                 *protp = PROT_ALL;
 180 
 181         lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
 182 
 183 again:
 184         if (pp = page_lookup(vp, off, lock)) {
 185                 /*
 186                  * In very rare instances, a segkp page may have been
 187                  * relocated outside of the kernel by the kernel cage
 188                  * due to the window between page_unlock() and
 189                  * VOP_PUTPAGE() in segkp_unlock().  Due to the
 190                  * rareness of these occurances, the solution is to
 191                  * relocate the page to a P_NORELOC page.
 192                  */
 193                 if (flag_noreloc != 0) {
 194                         if (!PP_ISNORELOC(pp) && kcage_on) {
 195                                 if (lock != SE_EXCL) {
 196                                         upgrade = 1;
 197                                         if (!page_tryupgrade(pp)) {
 198                                                 page_unlock(pp);
 199                                                 lock = SE_EXCL;
 200                                                 goto again;
 201                                         }
 202                                 }
 203 
 204                                 if (page_relocate_cage(&pp, &rpp) != 0)
 205                                         panic("swap_getapage: "
 206                                             "page_relocate_cage failed");
 207 
 208                                 pp = rpp;
 209                         }
 210                 }
 211 
 212                 if (pl) {
 213                         if (upgrade)
 214                                 page_downgrade(pp);
 215 
 216                         pl[0] = pp;
 217                         pl[1] = NULL;
 218                 } else {
 219                         page_unlock(pp);
 220                 }
 221         } else {
 222                 pp = page_create_va(vp, off, PAGESIZE,
 223                     PG_WAIT | PG_EXCL | flag_noreloc,
 224                     seg, addr);
 225                 /*
 226                  * Someone raced in and created the page after we did the
 227                  * lookup but before we did the create, so go back and
 228                  * try to look it up again.
 229                  */
 230                 if (pp == NULL)
 231                         goto again;
 232                 if (rw != S_CREATE) {
 233                         err = swap_getphysname(vp, off, &pvp, &poff);
 234                         if (pvp) {
 235                                 struct anon *ap;
 236                                 kmutex_t *ahm;
 237 
 238                                 flags = (pl == NULL ? B_ASYNC|B_READ : B_READ);
 239                                 err = VOP_PAGEIO(pvp, pp, poff,
 240                                     PAGESIZE, flags, cr, NULL);
 241 
 242                                 if (!err) {
 243                                         ahm = AH_MUTEX(vp, off);
 244                                         mutex_enter(ahm);
 245 
 246                                         ap = swap_anon(vp, off);
 247                                         if (ap == NULL) {
 248                                                 panic("swap_getapage:"
 249                                                     " null anon");
 250                                         }
 251 
 252                                         if (ap->an_pvp == pvp &&
 253                                             ap->an_poff == poff) {
 254                                                 swap_phys_free(pvp, poff,
 255                                                     PAGESIZE);
 256                                                 ap->an_pvp = NULL;
 257                                                 ap->an_poff = NULL;
 258                                                 hat_setmod(pp);
 259                                         }
 260 
 261                                         mutex_exit(ahm);
 262                                 }
 263                         } else {
 264                                 if (!err)
 265                                         pagezero(pp, 0, PAGESIZE);
 266 
 267                                 /*
 268                                  * If it's a fault ahead, release page_io_lock
 269                                  * and SE_EXCL we grabbed in page_create_va
 270                                  *
 271                                  * If we are here, we haven't called VOP_PAGEIO
 272                                  * and thus calling pvn_read_done(pp, B_READ)
 273                                  * below may mislead that we tried i/o. Besides,
 274                                  * in case of async, pvn_read_done() should
 275                                  * not be called by *getpage()
 276                                  */
 277                                 if (pl == NULL) {
 278                                         /*
 279                                          * swap_getphysname can return error
 280                                          * only when we are getting called from
 281                                          * swapslot_free which passes non-NULL
 282                                          * pl to VOP_GETPAGE.
 283                                          */
 284                                         ASSERT(err == 0);
 285                                         page_io_unlock(pp);
 286                                         page_unlock(pp);
 287                                 }
 288                         }
 289                 }
 290 
 291                 ASSERT(pp != NULL);
 292 
 293                 if (err && pl)
 294                         pvn_read_done(pp, B_ERROR);
 295 
 296                 if (!err && pl)
 297                         pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
 298         }
 299         TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
 300             "swapfs getapage:pp %p vp %p off %llx", pp, vp, off);
 301         return (err);
 302 }
 303 
 304 /*
 305  * Called from large page anon routines only! This is an ugly hack where
 306  * the anon layer directly calls into swapfs with a preallocated large page.
 307  * Another method would have been to change to VOP and add an extra arg for
 308  * the preallocated large page. This all could be cleaned up later when we
 309  * solve the anonymous naming problem and no longer need to loop across of
 310  * the VOP in PAGESIZE increments to fill in or initialize a large page as
 311  * is done today. I think the latter is better since it avoid a change to
 312  * the VOP interface that could later be avoided.
 313  */
 314 int
 315 swap_getconpage(
 316         struct vnode *vp,
 317         u_offset_t off,
 318         size_t len,
 319         uint_t *protp,
 320         page_t *pl[],
 321         size_t plsz,
 322         page_t  *conpp,
 323         uint_t  *pszc,
 324         spgcnt_t *nreloc,
 325         struct seg *seg,
 326         caddr_t addr,
 327         enum seg_rw rw,
 328         struct cred *cr)
 329 {
 330         struct page     *pp;
 331         int             err = 0;
 332         struct vnode    *pvp = NULL;
 333         u_offset_t      poff;
 334 
 335         ASSERT(len == PAGESIZE);
 336         ASSERT(pl != NULL);
 337         ASSERT(plsz == PAGESIZE);
 338         ASSERT(protp == NULL);
 339         ASSERT(nreloc != NULL);
 340         ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */
 341         SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n",
 342             vp, off, len, 0, 0);
 343 
 344         /*
 345          * If we are not using a preallocated page then we know one already
 346          * exists. So just let the old code handle it.
 347          */
 348         if (conpp == NULL) {
 349                 err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
 350                     seg, addr, rw, cr);
 351                 return (err);
 352         }
 353         ASSERT(conpp->p_szc != 0);
 354         ASSERT(PAGE_EXCL(conpp));
 355 
 356 
 357         ASSERT(conpp->p_next == conpp);
 358         ASSERT(conpp->p_prev == conpp);
 359         ASSERT(!PP_ISAGED(conpp));
 360         ASSERT(!PP_ISFREE(conpp));
 361 
 362         *nreloc = 0;
 363         pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0);
 364 
 365         /*
 366          * If existing page is found we may need to relocate.
 367          */
 368         if (pp != conpp) {
 369                 ASSERT(rw != S_CREATE);
 370                 ASSERT(pszc != NULL);
 371                 ASSERT(PAGE_SHARED(pp));
 372                 if (pp->p_szc < conpp->p_szc) {
 373                         *pszc = pp->p_szc;
 374                         page_unlock(pp);
 375                         err = -1;
 376                 } else if (pp->p_szc > conpp->p_szc &&
 377                     seg->s_szc > conpp->p_szc) {
 378                         *pszc = MIN(pp->p_szc, seg->s_szc);
 379                         page_unlock(pp);
 380                         err = -2;
 381                 } else {
 382                         pl[0] = pp;
 383                         pl[1] = NULL;
 384                         if (page_pptonum(pp) &
 385                             (page_get_pagecnt(conpp->p_szc) - 1))
 386                                 cmn_err(CE_PANIC, "swap_getconpage: no root");
 387                 }
 388                 return (err);
 389         }
 390 
 391         ASSERT(PAGE_EXCL(pp));
 392 
 393         if (*nreloc != 0) {
 394                 ASSERT(rw != S_CREATE);
 395                 pl[0] = pp;
 396                 pl[1] = NULL;
 397                 return (0);
 398         }
 399 
 400         *nreloc = 1;
 401 
 402         /*
 403          * If necessary do the page io.
 404          */
 405         if (rw != S_CREATE) {
 406                 /*
 407                  * Since we are only called now on behalf of an
 408                  * address space operation it's impossible for
 409                  * us to fail unlike swap_getapge() which
 410                  * also gets called from swapslot_free().
 411                  */
 412                 if (swap_getphysname(vp, off, &pvp, &poff)) {
 413                         cmn_err(CE_PANIC,
 414                             "swap_getconpage: swap_getphysname failed!");
 415                 }
 416 
 417                 if (pvp != NULL) {
 418                         err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ,
 419                             cr, NULL);
 420                         if (err == 0) {
 421                                 struct anon *ap;
 422                                 kmutex_t *ahm;
 423 
 424                                 ahm = AH_MUTEX(vp, off);
 425                                 mutex_enter(ahm);
 426                                 ap = swap_anon(vp, off);
 427                                 if (ap == NULL)
 428                                         panic("swap_getconpage: null anon");
 429                                 if (ap->an_pvp != pvp || ap->an_poff != poff)
 430                                         panic("swap_getconpage: bad anon");
 431 
 432                                 swap_phys_free(pvp, poff, PAGESIZE);
 433                                 ap->an_pvp = NULL;
 434                                 ap->an_poff = NULL;
 435                                 hat_setmod(pp);
 436                                 mutex_exit(ahm);
 437                         }
 438                 } else {
 439                         pagezero(pp, 0, PAGESIZE);
 440                 }
 441         }
 442 
 443         /*
 444          * Normally we would let pvn_read_done() destroy
 445          * the page on IO error. But since this is a preallocated
 446          * page we'll let the anon layer handle it.
 447          */
 448         page_io_unlock(pp);
 449         if (err != 0)
 450                 page_hashout(pp, NULL);
 451         ASSERT(pp->p_next == pp);
 452         ASSERT(pp->p_prev == pp);
 453 
 454         TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
 455             "swapfs getconpage:pp %p vp %p off %llx", pp, vp, off);
 456 
 457         pl[0] = pp;
 458         pl[1] = NULL;
 459         return (err);
 460 }
 461 
 462 /* Async putpage klustering stuff */
 463 int sw_pending_size;
 464 extern int klustsize;
 465 extern struct async_reqs *sw_getreq();
 466 extern void sw_putreq(struct async_reqs *);
 467 extern void sw_putbackreq(struct async_reqs *);
 468 extern struct async_reqs *sw_getfree();
 469 extern void sw_putfree(struct async_reqs *);
 470 
 471 static size_t swap_putpagecnt, swap_pagespushed;
 472 static size_t swap_otherfail, swap_otherpages;
 473 static size_t swap_klustfail, swap_klustpages;
 474 static size_t swap_getiofail, swap_getiopages;
 475 
 476 /*
 477  * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}.
 478  * If len == 0, do from off to EOF.
 479  */
 480 static int swap_nopage = 0;     /* Don't do swap_putpage's if set */
 481 
 482 /* ARGSUSED */
 483 static int
 484 swap_putpage(
 485         struct vnode *vp,
 486         offset_t off,
 487         size_t len,
 488         int flags,
 489         struct cred *cr,
 490         caller_context_t *ct)
 491 {
 492         page_t *pp;
 493         u_offset_t io_off;
 494         size_t io_len = 0;
 495         int err = 0;
 496         int nowait;
 497         struct async_reqs *arg;
 498 
 499         if (swap_nopage)
 500                 return (0);
 501 
 502         ASSERT(vp->v_count != 0);
 503 
 504         nowait = flags & B_PAGE_NOWAIT;
 505 
 506         /*
 507          * Clear force flag so that p_lckcnt pages are not invalidated.
 508          */
 509         flags &= ~(B_FORCE | B_PAGE_NOWAIT);
 510 
 511         SWAPFS_PRINT(SWAP_VOPS,
 512             "swap_putpage: vp %p, off %llx len %lx, flags %x\n",
 513             (void *)vp, off, len, flags, 0);
 514         TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE,
 515             "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len);
 516 
 517         if (vp->v_flag & VNOMAP)
 518                 return (ENOSYS);
 519 
 520         if (!vn_has_cached_data(vp))
 521                 return (0);
 522 
 523         if (len == 0) {
 524                 if (curproc == proc_pageout)
 525                         cmn_err(CE_PANIC, "swapfs: pageout can't block");
 526 
 527                 /* Search the entire vp list for pages >= off. */
 528                 err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage,
 529                     flags, cr);
 530         } else {
 531                 u_offset_t eoff;
 532 
 533                 /*
 534                  * Loop over all offsets in the range [off...off + len]
 535                  * looking for pages to deal with.
 536                  */
 537                 eoff = off + len;
 538                 for (io_off = (u_offset_t)off; io_off < eoff;
 539                     io_off += io_len) {
 540                         /*
 541                          * If we run out of the async req slot, put the page
 542                          * now instead of queuing.
 543                          */
 544                         if (flags == (B_ASYNC | B_FREE) &&
 545                             sw_pending_size < klustsize &&
 546                             (arg = sw_getfree())) {
 547                                 /*
 548                                  * If we are clustering, we should allow
 549                                  * pageout to feed us more pages because # of
 550                                  * pushes is limited by # of I/Os, and one
 551                                  * cluster is considered to be one I/O.
 552                                  */
 553                                 if (pushes)
 554                                         pushes--;
 555 
 556                                 arg->a_vp = vp;
 557                                 arg->a_off = io_off;
 558                                 arg->a_len = PAGESIZE;
 559                                 arg->a_flags = B_ASYNC | B_FREE;
 560                                 arg->a_cred = kcred;
 561                                 sw_putreq(arg);
 562                                 io_len = PAGESIZE;
 563                                 continue;
 564                         }
 565                         /*
 566                          * If we are not invalidating pages, use the
 567                          * routine page_lookup_nowait() to prevent
 568                          * reclaiming them from the free list.
 569                          */
 570                         if (!nowait && ((flags & B_INVAL) ||
 571                             (flags & (B_ASYNC | B_FREE)) == B_FREE))
 572                                 pp = page_lookup(vp, io_off, SE_EXCL);
 573                         else
 574                                 pp = page_lookup_nowait(vp, io_off,
 575                                     (flags & (B_FREE | B_INVAL)) ?
 576                                     SE_EXCL : SE_SHARED);
 577 
 578                         if (pp == NULL || pvn_getdirty(pp, flags) == 0)
 579                                 io_len = PAGESIZE;
 580                         else {
 581                                 err = swap_putapage(vp, pp, &io_off, &io_len,
 582                                     flags, cr);
 583                                 if (err != 0)
 584                                         break;
 585                         }
 586                 }
 587         }
 588         /* If invalidating, verify all pages on vnode list are gone. */
 589         if (err == 0 && off == 0 && len == 0 &&
 590             (flags & B_INVAL) && vn_has_cached_data(vp)) {
 591                 cmn_err(CE_WARN,
 592                     "swap_putpage: B_INVAL, pages not gone");
 593         }
 594         return (err);
 595 }
 596 
 597 /*
 598  * Write out a single page.
 599  * For swapfs this means choose a physical swap slot and write the page
 600  * out using VOP_PAGEIO.
 601  * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty
 602  * swapfs pages, a bunch of contiguous swap slots and then write them
 603  * all out in one clustered i/o.
 604  */
 605 /*ARGSUSED*/
 606 static int
 607 swap_putapage(
 608         struct vnode *vp,
 609         page_t *pp,
 610         u_offset_t *offp,
 611         size_t *lenp,
 612         int flags,
 613         struct cred *cr)
 614 {
 615         int err;
 616         struct vnode *pvp;
 617         u_offset_t poff, off;
 618         u_offset_t doff;
 619         size_t dlen;
 620         size_t klsz = 0;
 621         u_offset_t klstart = 0;
 622         struct vnode *klvp = NULL;
 623         page_t *pplist;
 624         se_t se;
 625         struct async_reqs *arg;
 626         size_t swap_klustsize;
 627 
 628         /*
 629          * This check is added for callers who access swap_putpage with len = 0.
 630          * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty.
 631          * And it's necessary to do the same queuing if users have the same
 632          * B_ASYNC|B_FREE flags on.
 633          */
 634         if (flags == (B_ASYNC | B_FREE) &&
 635             sw_pending_size < klustsize && (arg = sw_getfree())) {
 636 
 637                 hat_setmod(pp);
 638                 page_io_unlock(pp);
 639                 page_unlock(pp);
 640 
 641                 arg->a_vp = vp;
 642                 arg->a_off = pp->p_offset;
 643                 arg->a_len = PAGESIZE;
 644                 arg->a_flags = B_ASYNC | B_FREE;
 645                 arg->a_cred = kcred;
 646                 sw_putreq(arg);
 647 
 648                 return (0);
 649         }
 650 
 651         SWAPFS_PRINT(SWAP_PUTP,
 652             "swap_putapage: pp %p, vp %p, off %llx, flags %x\n",
 653             pp, vp, pp->p_offset, flags, 0);
 654 
 655         ASSERT(PAGE_LOCKED(pp));
 656 
 657         off = pp->p_offset;
 658 
 659         doff = off;
 660         dlen = PAGESIZE;
 661 
 662         if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) {
 663                 err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0);
 664                 hat_setmod(pp);
 665                 page_io_unlock(pp);
 666                 page_unlock(pp);
 667                 goto out;
 668         }
 669 
 670         klvp = pvp;
 671         klstart = poff;
 672         pplist = pp;
 673         /*
 674          * If this is ASYNC | FREE and we've accumulated a bunch of such
 675          * pending requests, kluster.
 676          */
 677         if (flags == (B_ASYNC | B_FREE))
 678                 swap_klustsize = klustsize;
 679         else
 680                 swap_klustsize = PAGESIZE;
 681         se = (flags & B_FREE ? SE_EXCL : SE_SHARED);
 682         klsz = PAGESIZE;
 683         while (klsz < swap_klustsize) {
 684                 if ((arg = sw_getreq()) == NULL) {
 685                         swap_getiofail++;
 686                         swap_getiopages += btop(klsz);
 687                         break;
 688                 }
 689                 ASSERT(vn_matchops(arg->a_vp, swap_vnodeops));
 690                 vp = arg->a_vp;
 691                 off = arg->a_off;
 692 
 693                 if ((pp = page_lookup_nowait(vp, off, se)) == NULL) {
 694                         swap_otherfail++;
 695                         swap_otherpages += btop(klsz);
 696                         sw_putfree(arg);
 697                         break;
 698                 }
 699                 if (pvn_getdirty(pp, flags | B_DELWRI) == 0) {
 700                         sw_putfree(arg);
 701                         continue;
 702                 }
 703                 /* Get new physical backing store for the page */
 704                 doff = off;
 705                 dlen = PAGESIZE;
 706                 if (err = swap_newphysname(vp, off, &doff, &dlen,
 707                     &pvp, &poff)) {
 708                         swap_otherfail++;
 709                         swap_otherpages += btop(klsz);
 710                         hat_setmod(pp);
 711                         page_io_unlock(pp);
 712                         page_unlock(pp);
 713                         sw_putbackreq(arg);
 714                         break;
 715                 }
 716                 /* Try to cluster new physical name with previous ones */
 717                 if (klvp == pvp && poff == klstart + klsz) {
 718                         klsz += PAGESIZE;
 719                         page_add(&pplist, pp);
 720                         pplist = pplist->p_next;
 721                         sw_putfree(arg);
 722                 } else if (klvp == pvp && poff == klstart - PAGESIZE) {
 723                         klsz += PAGESIZE;
 724                         klstart -= PAGESIZE;
 725                         page_add(&pplist, pp);
 726                         sw_putfree(arg);
 727                 } else {
 728                         swap_klustfail++;
 729                         swap_klustpages += btop(klsz);
 730                         hat_setmod(pp);
 731                         page_io_unlock(pp);
 732                         page_unlock(pp);
 733                         sw_putbackreq(arg);
 734                         break;
 735                 }
 736         }
 737 
 738         err = VOP_PAGEIO(klvp, pplist, klstart, klsz,
 739             B_WRITE | flags, cr, NULL);
 740 
 741         if ((flags & B_ASYNC) == 0)
 742                 pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
 743 
 744         /* Statistics */
 745         if (!err) {
 746                 swap_putpagecnt++;
 747                 swap_pagespushed += btop(klsz);
 748         }
 749 out:
 750         TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE,
 751             "swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx",
 752             vp, klvp, klstart, klsz);
 753         if (err && err != ENOMEM)
 754                 cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err);
 755         if (lenp)
 756                 *lenp = PAGESIZE;
 757         return (err);
 758 }
 759 
 760 static void
 761 swap_dispose(
 762         vnode_t *vp,
 763         page_t *pp,
 764         int fl,
 765         int dn,
 766         cred_t *cr,
 767         caller_context_t *ct)
 768 {
 769         int err;
 770         u_offset_t off = pp->p_offset;
 771         vnode_t *pvp;
 772         u_offset_t poff;
 773 
 774         ASSERT(PAGE_EXCL(pp));
 775 
 776         /*
 777          * The caller will free/invalidate large page in one shot instead of
 778          * one small page at a time.
 779          */
 780         if (pp->p_szc != 0) {
 781                 page_unlock(pp);
 782                 return;
 783         }
 784 
 785         err = swap_getphysname(vp, off, &pvp, &poff);
 786         if (!err && pvp != NULL)
 787                 VOP_DISPOSE(pvp, pp, fl, dn, cr, ct);
 788         else
 789                 fs_dispose(vp, pp, fl, dn, cr, ct);
 790 }