1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/param.h>
  28 #include <sys/systm.h>
  29 #include <sys/buf.h>
  30 #include <sys/cred.h>
  31 #include <sys/errno.h>
  32 #include <sys/vnode.h>
  33 #include <sys/vfs_opreg.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/swap.h>
  36 #include <sys/mman.h>
  37 #include <sys/vmsystm.h>
  38 #include <sys/vtrace.h>
  39 #include <sys/debug.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/vm.h>
  42 
  43 #include <sys/fs/swapnode.h>
  44 
  45 #include <vm/seg.h>
  46 #include <vm/page.h>
  47 #include <vm/pvn.h>
  48 #include <fs/fs_subr.h>
  49 
  50 #include <vm/seg_kp.h>
  51 
  52 /*
  53  * Define the routines within this file.
  54  */
  55 static int      swap_getpage(struct vnode *vp, offset_t off, size_t len,
  56     uint_t *protp, struct page **plarr, size_t plsz, struct seg *seg,
  57     caddr_t addr, enum seg_rw rw, struct cred *cr, caller_context_t *ct);
  58 static int      swap_putpage(struct vnode *vp, offset_t off, size_t len,
  59     int flags, struct cred *cr, caller_context_t *ct);
  60 static void     swap_inactive(struct vnode *vp, struct cred *cr,
  61     caller_context_t *ct);
  62 static void     swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn,
  63     cred_t *cr, caller_context_t *ct);
  64 
  65 static int      swap_getapage(struct vnode *vp, u_offset_t off, size_t len,
  66     uint_t *protp, page_t **plarr, size_t plsz,
  67     struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
  68 
  69 int     swap_getconpage(struct vnode *vp, u_offset_t off, size_t len,
  70     uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp,
  71     uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr,
  72     enum seg_rw rw, struct cred *cr);
  73 
  74 static int      swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off,
  75     size_t *lenp, int flags, struct cred *cr);
  76 
  77 const fs_operation_def_t swap_vnodeops_template[] = {
  78         VOPNAME_INACTIVE,       { .vop_inactive = swap_inactive },
  79         VOPNAME_GETPAGE,        { .vop_getpage = swap_getpage },
  80         VOPNAME_PUTPAGE,        { .vop_putpage = swap_putpage },
  81         VOPNAME_DISPOSE,        { .vop_dispose = swap_dispose },
  82         VOPNAME_SETFL,          { .error = fs_error },
  83         VOPNAME_POLL,           { .error = fs_error },
  84         VOPNAME_PATHCONF,       { .error = fs_error },
  85         VOPNAME_GETSECATTR,     { .error = fs_error },
  86         VOPNAME_SHRLOCK,        { .error = fs_error },
  87         NULL,                   NULL
  88 };
  89 
  90 vnodeops_t *swap_vnodeops;
  91 
  92 /* ARGSUSED */
  93 static void
  94 swap_inactive(
  95         struct vnode *vp,
  96         struct cred *cr,
  97         caller_context_t *ct)
  98 {
  99         SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0);
 100 }
 101 
 102 /*
 103  * Return all the pages from [off..off+len] in given file
 104  */
 105 /*ARGSUSED*/
 106 static int
 107 swap_getpage(
 108         struct vnode *vp,
 109         offset_t off,
 110         size_t len,
 111         uint_t *protp,
 112         page_t *pl[],
 113         size_t plsz,
 114         struct seg *seg,
 115         caddr_t addr,
 116         enum seg_rw rw,
 117         struct cred *cr,
 118         caller_context_t *ct)
 119 {
 120         SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n",
 121             (void *)vp, off, len, 0, 0);
 122 
 123         TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE,
 124             "swapfs getpage:vp %p off %llx len %ld",
 125             (void *)vp, off, len);
 126 
 127         return (pvn_getpages(swap_getapage, vp, (u_offset_t)off, len, protp,
 128             pl, plsz, seg, addr, rw, cr));
 129 }
 130 
 131 /*
 132  * Called from pvn_getpages to get a particular page.
 133  */
 134 /*ARGSUSED*/
 135 static int
 136 swap_getapage(
 137         struct vnode *vp,
 138         u_offset_t off,
 139         size_t len,
 140         uint_t *protp,
 141         page_t *pl[],
 142         size_t plsz,
 143         struct seg *seg,
 144         caddr_t addr,
 145         enum seg_rw rw,
 146         struct cred *cr)
 147 {
 148         struct page *pp, *rpp;
 149         int flags;
 150         int err = 0;
 151         struct vnode *pvp = NULL;
 152         u_offset_t poff;
 153         int flag_noreloc;
 154         se_t lock;
 155         extern int kcage_on;
 156         int upgrade = 0;
 157 
 158         SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n",
 159             vp, off, len, 0, 0);
 160 
 161         /*
 162          * Until there is a call-back mechanism to cause SEGKP
 163          * pages to be unlocked, make them non-relocatable.
 164          */
 165         if (SEG_IS_SEGKP(seg))
 166                 flag_noreloc = PG_NORELOC;
 167         else
 168                 flag_noreloc = 0;
 169 
 170         if (protp != NULL)
 171                 *protp = PROT_ALL;
 172 
 173         lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
 174 
 175 again:
 176         if (pp = page_lookup(vp, off, lock)) {
 177                 /*
 178                  * In very rare instances, a segkp page may have been
 179                  * relocated outside of the kernel by the kernel cage
 180                  * due to the window between page_unlock() and
 181                  * VOP_PUTPAGE() in segkp_unlock().  Due to the
 182                  * rareness of these occurances, the solution is to
 183                  * relocate the page to a P_NORELOC page.
 184                  */
 185                 if (flag_noreloc != 0) {
 186                         if (!PP_ISNORELOC(pp) && kcage_on) {
 187                                 if (lock != SE_EXCL) {
 188                                         upgrade = 1;
 189                                         if (!page_tryupgrade(pp)) {
 190                                                 page_unlock(pp);
 191                                                 lock = SE_EXCL;
 192                                                 goto again;
 193                                         }
 194                                 }
 195 
 196                                 if (page_relocate_cage(&pp, &rpp) != 0)
 197                                         panic("swap_getapage: "
 198                                             "page_relocate_cage failed");
 199 
 200                                 pp = rpp;
 201                         }
 202                 }
 203 
 204                 if (pl) {
 205                         if (upgrade)
 206                                 page_downgrade(pp);
 207 
 208                         pl[0] = pp;
 209                         pl[1] = NULL;
 210                 } else {
 211                         page_unlock(pp);
 212                 }
 213         } else {
 214                 pp = page_create_va(vp, off, PAGESIZE,
 215                     PG_WAIT | PG_EXCL | flag_noreloc,
 216                     seg, addr);
 217                 /*
 218                  * Someone raced in and created the page after we did the
 219                  * lookup but before we did the create, so go back and
 220                  * try to look it up again.
 221                  */
 222                 if (pp == NULL)
 223                         goto again;
 224                 if (rw != S_CREATE) {
 225                         err = swap_getphysname(vp, off, &pvp, &poff);
 226                         if (pvp) {
 227                                 struct anon *ap;
 228                                 kmutex_t *ahm;
 229 
 230                                 flags = (pl == NULL ? B_ASYNC|B_READ : B_READ);
 231                                 err = VOP_PAGEIO(pvp, pp, poff,
 232                                     PAGESIZE, flags, cr, NULL);
 233 
 234                                 if (!err) {
 235                                         ahm = AH_MUTEX(vp, off);
 236                                         mutex_enter(ahm);
 237 
 238                                         ap = swap_anon(vp, off);
 239                                         if (ap == NULL) {
 240                                                 panic("swap_getapage:"
 241                                                     " null anon");
 242                                         }
 243 
 244                                         if (ap->an_pvp == pvp &&
 245                                             ap->an_poff == poff) {
 246                                                 swap_phys_free(pvp, poff,
 247                                                     PAGESIZE);
 248                                                 ap->an_pvp = NULL;
 249                                                 ap->an_poff = NULL;
 250                                                 hat_setmod(pp);
 251                                         }
 252 
 253                                         mutex_exit(ahm);
 254                                 }
 255                         } else {
 256                                 if (!err)
 257                                         pagezero(pp, 0, PAGESIZE);
 258 
 259                                 /*
 260                                  * If it's a fault ahead, release page_io_lock
 261                                  * and SE_EXCL we grabbed in page_create_va
 262                                  *
 263                                  * If we are here, we haven't called VOP_PAGEIO
 264                                  * and thus calling pvn_read_done(pp, B_READ)
 265                                  * below may mislead that we tried i/o. Besides,
 266                                  * in case of async, pvn_read_done() should
 267                                  * not be called by *getpage()
 268                                  */
 269                                 if (pl == NULL) {
 270                                         /*
 271                                          * swap_getphysname can return error
 272                                          * only when we are getting called from
 273                                          * swapslot_free which passes non-NULL
 274                                          * pl to VOP_GETPAGE.
 275                                          */
 276                                         ASSERT(err == 0);
 277                                         page_io_unlock(pp);
 278                                         page_unlock(pp);
 279                                 }
 280                         }
 281                 }
 282 
 283                 ASSERT(pp != NULL);
 284 
 285                 if (err && pl)
 286                         pvn_read_done(pp, B_ERROR);
 287 
 288                 if (!err && pl)
 289                         pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
 290         }
 291         TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
 292             "swapfs getapage:pp %p vp %p off %llx", pp, vp, off);
 293         return (err);
 294 }
 295 
 296 /*
 297  * Called from large page anon routines only! This is an ugly hack where
 298  * the anon layer directly calls into swapfs with a preallocated large page.
 299  * Another method would have been to change to VOP and add an extra arg for
 300  * the preallocated large page. This all could be cleaned up later when we
 301  * solve the anonymous naming problem and no longer need to loop across of
 302  * the VOP in PAGESIZE increments to fill in or initialize a large page as
 303  * is done today. I think the latter is better since it avoid a change to
 304  * the VOP interface that could later be avoided.
 305  */
 306 int
 307 swap_getconpage(
 308         struct vnode *vp,
 309         u_offset_t off,
 310         size_t len,
 311         uint_t *protp,
 312         page_t *pl[],
 313         size_t plsz,
 314         page_t  *conpp,
 315         uint_t  *pszc,
 316         spgcnt_t *nreloc,
 317         struct seg *seg,
 318         caddr_t addr,
 319         enum seg_rw rw,
 320         struct cred *cr)
 321 {
 322         struct page     *pp;
 323         int             err = 0;
 324         struct vnode    *pvp = NULL;
 325         u_offset_t      poff;
 326 
 327         ASSERT(len == PAGESIZE);
 328         ASSERT(pl != NULL);
 329         ASSERT(plsz == PAGESIZE);
 330         ASSERT(protp == NULL);
 331         ASSERT(nreloc != NULL);
 332         ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */
 333         SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n",
 334             vp, off, len, 0, 0);
 335 
 336         /*
 337          * If we are not using a preallocated page then we know one already
 338          * exists. So just let the old code handle it.
 339          */
 340         if (conpp == NULL) {
 341                 err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
 342                     seg, addr, rw, cr);
 343                 return (err);
 344         }
 345         ASSERT(conpp->p_szc != 0);
 346         ASSERT(PAGE_EXCL(conpp));
 347 
 348 
 349         ASSERT(conpp->p_next == conpp);
 350         ASSERT(conpp->p_prev == conpp);
 351         ASSERT(!PP_ISAGED(conpp));
 352         ASSERT(!PP_ISFREE(conpp));
 353 
 354         *nreloc = 0;
 355         pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0);
 356 
 357         /*
 358          * If existing page is found we may need to relocate.
 359          */
 360         if (pp != conpp) {
 361                 ASSERT(rw != S_CREATE);
 362                 ASSERT(pszc != NULL);
 363                 ASSERT(PAGE_SHARED(pp));
 364                 if (pp->p_szc < conpp->p_szc) {
 365                         *pszc = pp->p_szc;
 366                         page_unlock(pp);
 367                         err = -1;
 368                 } else if (pp->p_szc > conpp->p_szc &&
 369                     seg->s_szc > conpp->p_szc) {
 370                         *pszc = MIN(pp->p_szc, seg->s_szc);
 371                         page_unlock(pp);
 372                         err = -2;
 373                 } else {
 374                         pl[0] = pp;
 375                         pl[1] = NULL;
 376                         if (page_pptonum(pp) &
 377                             (page_get_pagecnt(conpp->p_szc) - 1))
 378                                 cmn_err(CE_PANIC, "swap_getconpage: no root");
 379                 }
 380                 return (err);
 381         }
 382 
 383         ASSERT(PAGE_EXCL(pp));
 384 
 385         if (*nreloc != 0) {
 386                 ASSERT(rw != S_CREATE);
 387                 pl[0] = pp;
 388                 pl[1] = NULL;
 389                 return (0);
 390         }
 391 
 392         *nreloc = 1;
 393 
 394         /*
 395          * If necessary do the page io.
 396          */
 397         if (rw != S_CREATE) {
 398                 /*
 399                  * Since we are only called now on behalf of an
 400                  * address space operation it's impossible for
 401                  * us to fail unlike swap_getapge() which
 402                  * also gets called from swapslot_free().
 403                  */
 404                 if (swap_getphysname(vp, off, &pvp, &poff)) {
 405                         cmn_err(CE_PANIC,
 406                             "swap_getconpage: swap_getphysname failed!");
 407                 }
 408 
 409                 if (pvp != NULL) {
 410                         err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ,
 411                             cr, NULL);
 412                         if (err == 0) {
 413                                 struct anon *ap;
 414                                 kmutex_t *ahm;
 415 
 416                                 ahm = AH_MUTEX(vp, off);
 417                                 mutex_enter(ahm);
 418                                 ap = swap_anon(vp, off);
 419                                 if (ap == NULL)
 420                                         panic("swap_getconpage: null anon");
 421                                 if (ap->an_pvp != pvp || ap->an_poff != poff)
 422                                         panic("swap_getconpage: bad anon");
 423 
 424                                 swap_phys_free(pvp, poff, PAGESIZE);
 425                                 ap->an_pvp = NULL;
 426                                 ap->an_poff = NULL;
 427                                 hat_setmod(pp);
 428                                 mutex_exit(ahm);
 429                         }
 430                 } else {
 431                         pagezero(pp, 0, PAGESIZE);
 432                 }
 433         }
 434 
 435         /*
 436          * Normally we would let pvn_read_done() destroy
 437          * the page on IO error. But since this is a preallocated
 438          * page we'll let the anon layer handle it.
 439          */
 440         page_io_unlock(pp);
 441         if (err != 0)
 442                 page_hashout(pp, NULL);
 443         ASSERT(pp->p_next == pp);
 444         ASSERT(pp->p_prev == pp);
 445 
 446         TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
 447             "swapfs getconpage:pp %p vp %p off %llx", pp, vp, off);
 448 
 449         pl[0] = pp;
 450         pl[1] = NULL;
 451         return (err);
 452 }
 453 
 454 /* Async putpage klustering stuff */
 455 int sw_pending_size;
 456 extern int klustsize;
 457 extern struct async_reqs *sw_getreq();
 458 extern void sw_putreq(struct async_reqs *);
 459 extern void sw_putbackreq(struct async_reqs *);
 460 extern struct async_reqs *sw_getfree();
 461 extern void sw_putfree(struct async_reqs *);
 462 
 463 static size_t swap_putpagecnt, swap_pagespushed;
 464 static size_t swap_otherfail, swap_otherpages;
 465 static size_t swap_klustfail, swap_klustpages;
 466 static size_t swap_getiofail, swap_getiopages;
 467 
 468 /*
 469  * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}.
 470  * If len == 0, do from off to EOF.
 471  */
 472 static int swap_nopage = 0;     /* Don't do swap_putpage's if set */
 473 
 474 /* ARGSUSED */
 475 static int
 476 swap_putpage(
 477         struct vnode *vp,
 478         offset_t off,
 479         size_t len,
 480         int flags,
 481         struct cred *cr,
 482         caller_context_t *ct)
 483 {
 484         page_t *pp;
 485         u_offset_t io_off;
 486         size_t io_len = 0;
 487         int err = 0;
 488         int nowait;
 489         struct async_reqs *arg;
 490 
 491         if (swap_nopage)
 492                 return (0);
 493 
 494         ASSERT(vp->v_count != 0);
 495 
 496         nowait = flags & B_PAGE_NOWAIT;
 497 
 498         /*
 499          * Clear force flag so that p_lckcnt pages are not invalidated.
 500          */
 501         flags &= ~(B_FORCE | B_PAGE_NOWAIT);
 502 
 503         SWAPFS_PRINT(SWAP_VOPS,
 504             "swap_putpage: vp %p, off %llx len %lx, flags %x\n",
 505             (void *)vp, off, len, flags, 0);
 506         TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE,
 507             "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len);
 508 
 509         if (vp->v_flag & VNOMAP)
 510                 return (ENOSYS);
 511 
 512         if (!vn_has_cached_data(vp))
 513                 return (0);
 514 
 515         if (len == 0) {
 516                 if (curproc == proc_pageout)
 517                         cmn_err(CE_PANIC, "swapfs: pageout can't block");
 518 
 519                 /* Search the entire vp list for pages >= off. */
 520                 err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage,
 521                     flags, cr);
 522         } else {
 523                 u_offset_t eoff;
 524 
 525                 /*
 526                  * Loop over all offsets in the range [off...off + len]
 527                  * looking for pages to deal with.
 528                  */
 529                 eoff = off + len;
 530                 for (io_off = (u_offset_t)off; io_off < eoff;
 531                     io_off += io_len) {
 532                         /*
 533                          * If we run out of the async req slot, put the page
 534                          * now instead of queuing.
 535                          */
 536                         if (flags == (B_ASYNC | B_FREE) &&
 537                             sw_pending_size < klustsize &&
 538                             (arg = sw_getfree())) {
 539                                 /*
 540                                  * If we are clustering, we should allow
 541                                  * pageout to feed us more pages because # of
 542                                  * pushes is limited by # of I/Os, and one
 543                                  * cluster is considered to be one I/O.
 544                                  */
 545                                 if (pushes)
 546                                         pushes--;
 547 
 548                                 arg->a_vp = vp;
 549                                 arg->a_off = io_off;
 550                                 arg->a_len = PAGESIZE;
 551                                 arg->a_flags = B_ASYNC | B_FREE;
 552                                 arg->a_cred = kcred;
 553                                 sw_putreq(arg);
 554                                 io_len = PAGESIZE;
 555                                 continue;
 556                         }
 557                         /*
 558                          * If we are not invalidating pages, use the
 559                          * routine page_lookup_nowait() to prevent
 560                          * reclaiming them from the free list.
 561                          */
 562                         if (!nowait && ((flags & B_INVAL) ||
 563                             (flags & (B_ASYNC | B_FREE)) == B_FREE))
 564                                 pp = page_lookup(vp, io_off, SE_EXCL);
 565                         else
 566                                 pp = page_lookup_nowait(vp, io_off,
 567                                     (flags & (B_FREE | B_INVAL)) ?
 568                                     SE_EXCL : SE_SHARED);
 569 
 570                         if (pp == NULL || pvn_getdirty(pp, flags) == 0)
 571                                 io_len = PAGESIZE;
 572                         else {
 573                                 err = swap_putapage(vp, pp, &io_off, &io_len,
 574                                     flags, cr);
 575                                 if (err != 0)
 576                                         break;
 577                         }
 578                 }
 579         }
 580         /* If invalidating, verify all pages on vnode list are gone. */
 581         if (err == 0 && off == 0 && len == 0 &&
 582             (flags & B_INVAL) && vn_has_cached_data(vp)) {
 583                 cmn_err(CE_WARN,
 584                     "swap_putpage: B_INVAL, pages not gone");
 585         }
 586         return (err);
 587 }
 588 
 589 /*
 590  * Write out a single page.
 591  * For swapfs this means choose a physical swap slot and write the page
 592  * out using VOP_PAGEIO.
 593  * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty
 594  * swapfs pages, a bunch of contiguous swap slots and then write them
 595  * all out in one clustered i/o.
 596  */
 597 /*ARGSUSED*/
 598 static int
 599 swap_putapage(
 600         struct vnode *vp,
 601         page_t *pp,
 602         u_offset_t *offp,
 603         size_t *lenp,
 604         int flags,
 605         struct cred *cr)
 606 {
 607         int err;
 608         struct vnode *pvp;
 609         u_offset_t poff, off;
 610         u_offset_t doff;
 611         size_t dlen;
 612         size_t klsz = 0;
 613         u_offset_t klstart = 0;
 614         struct vnode *klvp = NULL;
 615         page_t *pplist;
 616         se_t se;
 617         struct async_reqs *arg;
 618         size_t swap_klustsize;
 619 
 620         /*
 621          * This check is added for callers who access swap_putpage with len = 0.
 622          * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty.
 623          * And it's necessary to do the same queuing if users have the same
 624          * B_ASYNC|B_FREE flags on.
 625          */
 626         if (flags == (B_ASYNC | B_FREE) &&
 627             sw_pending_size < klustsize && (arg = sw_getfree())) {
 628 
 629                 hat_setmod(pp);
 630                 page_io_unlock(pp);
 631                 page_unlock(pp);
 632 
 633                 arg->a_vp = vp;
 634                 arg->a_off = pp->p_offset;
 635                 arg->a_len = PAGESIZE;
 636                 arg->a_flags = B_ASYNC | B_FREE;
 637                 arg->a_cred = kcred;
 638                 sw_putreq(arg);
 639 
 640                 return (0);
 641         }
 642 
 643         SWAPFS_PRINT(SWAP_PUTP,
 644             "swap_putapage: pp %p, vp %p, off %llx, flags %x\n",
 645             pp, vp, pp->p_offset, flags, 0);
 646 
 647         ASSERT(PAGE_LOCKED(pp));
 648 
 649         off = pp->p_offset;
 650 
 651         doff = off;
 652         dlen = PAGESIZE;
 653 
 654         if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) {
 655                 err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0);
 656                 hat_setmod(pp);
 657                 page_io_unlock(pp);
 658                 page_unlock(pp);
 659                 goto out;
 660         }
 661 
 662         klvp = pvp;
 663         klstart = poff;
 664         pplist = pp;
 665         /*
 666          * If this is ASYNC | FREE and we've accumulated a bunch of such
 667          * pending requests, kluster.
 668          */
 669         if (flags == (B_ASYNC | B_FREE))
 670                 swap_klustsize = klustsize;
 671         else
 672                 swap_klustsize = PAGESIZE;
 673         se = (flags & B_FREE ? SE_EXCL : SE_SHARED);
 674         klsz = PAGESIZE;
 675         while (klsz < swap_klustsize) {
 676                 if ((arg = sw_getreq()) == NULL) {
 677                         swap_getiofail++;
 678                         swap_getiopages += btop(klsz);
 679                         break;
 680                 }
 681                 ASSERT(vn_matchops(arg->a_vp, swap_vnodeops));
 682                 vp = arg->a_vp;
 683                 off = arg->a_off;
 684 
 685                 if ((pp = page_lookup_nowait(vp, off, se)) == NULL) {
 686                         swap_otherfail++;
 687                         swap_otherpages += btop(klsz);
 688                         sw_putfree(arg);
 689                         break;
 690                 }
 691                 if (pvn_getdirty(pp, flags | B_DELWRI) == 0) {
 692                         sw_putfree(arg);
 693                         continue;
 694                 }
 695                 /* Get new physical backing store for the page */
 696                 doff = off;
 697                 dlen = PAGESIZE;
 698                 if (err = swap_newphysname(vp, off, &doff, &dlen,
 699                     &pvp, &poff)) {
 700                         swap_otherfail++;
 701                         swap_otherpages += btop(klsz);
 702                         hat_setmod(pp);
 703                         page_io_unlock(pp);
 704                         page_unlock(pp);
 705                         sw_putbackreq(arg);
 706                         break;
 707                 }
 708                 /* Try to cluster new physical name with previous ones */
 709                 if (klvp == pvp && poff == klstart + klsz) {
 710                         klsz += PAGESIZE;
 711                         page_add(&pplist, pp);
 712                         pplist = pplist->p_next;
 713                         sw_putfree(arg);
 714                 } else if (klvp == pvp && poff == klstart - PAGESIZE) {
 715                         klsz += PAGESIZE;
 716                         klstart -= PAGESIZE;
 717                         page_add(&pplist, pp);
 718                         sw_putfree(arg);
 719                 } else {
 720                         swap_klustfail++;
 721                         swap_klustpages += btop(klsz);
 722                         hat_setmod(pp);
 723                         page_io_unlock(pp);
 724                         page_unlock(pp);
 725                         sw_putbackreq(arg);
 726                         break;
 727                 }
 728         }
 729 
 730         err = VOP_PAGEIO(klvp, pplist, klstart, klsz,
 731             B_WRITE | flags, cr, NULL);
 732 
 733         if ((flags & B_ASYNC) == 0)
 734                 pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
 735 
 736         /* Statistics */
 737         if (!err) {
 738                 swap_putpagecnt++;
 739                 swap_pagespushed += btop(klsz);
 740         }
 741 out:
 742         TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE,
 743             "swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx",
 744             vp, klvp, klstart, klsz);
 745         if (err && err != ENOMEM)
 746                 cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err);
 747         if (lenp)
 748                 *lenp = PAGESIZE;
 749         return (err);
 750 }
 751 
 752 static void
 753 swap_dispose(
 754         vnode_t *vp,
 755         page_t *pp,
 756         int fl,
 757         int dn,
 758         cred_t *cr,
 759         caller_context_t *ct)
 760 {
 761         int err;
 762         u_offset_t off = pp->p_offset;
 763         vnode_t *pvp;
 764         u_offset_t poff;
 765 
 766         ASSERT(PAGE_EXCL(pp));
 767 
 768         /*
 769          * The caller will free/invalidate large page in one shot instead of
 770          * one small page at a time.
 771          */
 772         if (pp->p_szc != 0) {
 773                 page_unlock(pp);
 774                 return;
 775         }
 776 
 777         err = swap_getphysname(vp, off, &pvp, &poff);
 778         if (!err && pvp != NULL)
 779                 VOP_DISPOSE(pvp, pp, fl, dn, cr, ct);
 780         else
 781                 fs_dispose(vp, pp, fl, dn, cr, ct);
 782 }