Print this page
patch as-lock-macro-simplification


 343         return (0);
 344 }
 345 
 346 /*
 347  * Search for the segment containing addr. If a segment containing addr
 348  * exists, that segment is returned.  If no such segment exists, and
 349  * the list spans addresses greater than addr, then the first segment
 350  * whose base is greater than addr is returned; otherwise, NULL is
 351  * returned unless tail is true, in which case the last element of the
 352  * list is returned.
 353  *
 354  * a_seglast is used to cache the last found segment for repeated
 355  * searches to the same addr (which happens frequently).
 356  */
 357 struct seg *
 358 as_findseg(struct as *as, caddr_t addr, int tail)
 359 {
 360         struct seg *seg = as->a_seglast;
 361         avl_index_t where;
 362 
 363         ASSERT(AS_LOCK_HELD(as, &as->a_lock));
 364 
 365         if (seg != NULL &&
 366             seg->s_base <= addr &&
 367             addr < seg->s_base + seg->s_size)
 368                 return (seg);
 369 
 370         seg = avl_find(&as->a_segtree, &addr, &where);
 371         if (seg != NULL)
 372                 return (as->a_seglast = seg);
 373 
 374         seg = avl_nearest(&as->a_segtree, where, AVL_AFTER);
 375         if (seg == NULL && tail)
 376                 seg = avl_last(&as->a_segtree);
 377         return (as->a_seglast = seg);
 378 }
 379 
 380 #ifdef VERIFY_SEGLIST
 381 /*
 382  * verify that the linked list is coherent
 383  */


 405                 nsegs++;
 406         }
 407         ASSERT(seglast == NULL);
 408         ASSERT(avl_numnodes(&as->a_segtree) == nsegs);
 409 }
 410 #endif /* VERIFY_SEGLIST */
 411 
 412 /*
 413  * Add a new segment to the address space. The avl_find()
 414  * may be expensive so we attempt to use last segment accessed
 415  * in as_gap() as an insertion point.
 416  */
 417 int
 418 as_addseg(struct as  *as, struct seg *newseg)
 419 {
 420         struct seg *seg;
 421         caddr_t addr;
 422         caddr_t eaddr;
 423         avl_index_t where;
 424 
 425         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
 426 
 427         as->a_updatedir = 1; /* inform /proc */
 428         gethrestime(&as->a_updatetime);
 429 
 430         if (as->a_lastgaphl != NULL) {
 431                 struct seg *hseg = NULL;
 432                 struct seg *lseg = NULL;
 433 
 434                 if (as->a_lastgaphl->s_base > newseg->s_base) {
 435                         hseg = as->a_lastgaphl;
 436                         lseg = AVL_PREV(&as->a_segtree, hseg);
 437                 } else {
 438                         lseg = as->a_lastgaphl;
 439                         hseg = AVL_NEXT(&as->a_segtree, lseg);
 440                 }
 441 
 442                 if (hseg && lseg && lseg->s_base < newseg->s_base &&
 443                     hseg->s_base > newseg->s_base) {
 444                         avl_insert_here(&as->a_segtree, newseg, lseg,
 445                             AVL_AFTER);


 487                                 }
 488 #endif
 489                                 return (-1);    /* overlapping segment */
 490                         }
 491                 }
 492         }
 493         as->a_seglast = newseg;
 494         avl_insert(&as->a_segtree, newseg, where);
 495 
 496 #ifdef VERIFY_SEGLIST
 497         as_verify(as);
 498 #endif
 499         return (0);
 500 }
 501 
 502 struct seg *
 503 as_removeseg(struct as *as, struct seg *seg)
 504 {
 505         avl_tree_t *t;
 506 
 507         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
 508 
 509         as->a_updatedir = 1; /* inform /proc */
 510         gethrestime(&as->a_updatetime);
 511 
 512         if (seg == NULL)
 513                 return (NULL);
 514 
 515         t = &as->a_segtree;
 516         if (as->a_seglast == seg)
 517                 as->a_seglast = NULL;
 518         as->a_lastgaphl = NULL;
 519 
 520         /*
 521          * if this segment is at an address higher than
 522          * a_lastgap, set a_lastgap to the next segment (NULL if last segment)
 523          */
 524         if (as->a_lastgap &&
 525             (seg == as->a_lastgap || seg->s_base > as->a_lastgap->s_base))
 526                 as->a_lastgap = AVL_NEXT(t, seg);
 527 
 528         /*
 529          * remove the segment from the seg tree
 530          */
 531         avl_remove(t, seg);
 532 
 533 #ifdef VERIFY_SEGLIST
 534         as_verify(as);
 535 #endif
 536         return (seg);
 537 }
 538 
 539 /*
 540  * Find a segment containing addr.
 541  */
 542 struct seg *
 543 as_segat(struct as *as, caddr_t addr)
 544 {
 545         struct seg *seg = as->a_seglast;
 546 
 547         ASSERT(AS_LOCK_HELD(as, &as->a_lock));
 548 
 549         if (seg != NULL && seg->s_base <= addr &&
 550             addr < seg->s_base + seg->s_size)
 551                 return (seg);
 552 
 553         seg = avl_find(&as->a_segtree, &addr, NULL);
 554         return (seg);
 555 }
 556 
 557 /*
 558  * Serialize all searches for holes in an address space to
 559  * prevent two or more threads from allocating the same virtual
 560  * address range.  The address space must not be "read/write"
 561  * locked by the caller since we may block.
 562  */
 563 void
 564 as_rangelock(struct as *as)
 565 {
 566         mutex_enter(&as->a_contents);
 567         while (AS_ISCLAIMGAP(as))


 650 {
 651         struct as *as;
 652 
 653         as = kmem_cache_alloc(as_cache, KM_SLEEP);
 654 
 655         as->a_flags          = 0;
 656         as->a_vbits          = 0;
 657         as->a_hrm            = NULL;
 658         as->a_seglast                = NULL;
 659         as->a_size           = 0;
 660         as->a_resvsize               = 0;
 661         as->a_updatedir              = 0;
 662         gethrestime(&as->a_updatetime);
 663         as->a_objectdir              = NULL;
 664         as->a_sizedir                = 0;
 665         as->a_userlimit              = (caddr_t)USERLIMIT;
 666         as->a_lastgap                = NULL;
 667         as->a_lastgaphl              = NULL;
 668         as->a_callbacks              = NULL;
 669 
 670         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 671         as->a_hat = hat_alloc(as);   /* create hat for default system mmu */
 672         AS_LOCK_EXIT(as, &as->a_lock);
 673 
 674         as->a_xhat = NULL;
 675 
 676         return (as);
 677 }
 678 
 679 /*
 680  * Free an address space data structure.
 681  * Need to free the hat first and then
 682  * all the segments on this as and finally
 683  * the space for the as struct itself.
 684  */
 685 void
 686 as_free(struct as *as)
 687 {
 688         struct hat *hat = as->a_hat;
 689         struct seg *seg, *next;
 690         int called = 0;
 691 
 692 top:
 693         /*
 694          * Invoke ALL callbacks. as_do_callbacks will do one callback
 695          * per call, and not return (-1) until the callback has completed.
 696          * When as_do_callbacks returns zero, all callbacks have completed.
 697          */
 698         mutex_enter(&as->a_contents);
 699         while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0))
 700                 ;
 701 
 702         /* This will prevent new XHATs from attaching to as */
 703         if (!called)
 704                 AS_SETBUSY(as);
 705         mutex_exit(&as->a_contents);
 706         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 707 
 708         if (!called) {
 709                 called = 1;
 710                 hat_free_start(hat);
 711                 if (as->a_xhat != NULL)
 712                         xhat_free_start_all(as);
 713         }
 714         for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) {
 715                 int err;
 716 
 717                 next = AS_SEGNEXT(as, seg);
 718 retry:
 719                 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
 720                 if (err == EAGAIN) {
 721                         mutex_enter(&as->a_contents);
 722                         if (as->a_callbacks) {
 723                                 AS_LOCK_EXIT(as, &as->a_lock);
 724                         } else if (!AS_ISNOUNMAPWAIT(as)) {
 725                                 /*
 726                                  * Memory is currently locked. Wait for a
 727                                  * cv_signal that it has been unlocked, then
 728                                  * try the operation again.
 729                                  */
 730                                 if (AS_ISUNMAPWAIT(as) == 0)
 731                                         cv_broadcast(&as->a_cv);
 732                                 AS_SETUNMAPWAIT(as);
 733                                 AS_LOCK_EXIT(as, &as->a_lock);
 734                                 while (AS_ISUNMAPWAIT(as))
 735                                         cv_wait(&as->a_cv, &as->a_contents);
 736                         } else {
 737                                 /*
 738                                  * We may have raced with
 739                                  * segvn_reclaim()/segspt_reclaim(). In this
 740                                  * case clean nounmapwait flag and retry since
 741                                  * softlockcnt in this segment may be already
 742                                  * 0.  We don't drop as writer lock so our
 743                                  * number of retries without sleeping should
 744                                  * be very small. See segvn_reclaim() for
 745                                  * more comments.
 746                                  */
 747                                 AS_CLRNOUNMAPWAIT(as);
 748                                 mutex_exit(&as->a_contents);
 749                                 goto retry;
 750                         }
 751                         mutex_exit(&as->a_contents);
 752                         goto top;
 753                 } else {
 754                         /*
 755                          * We do not expect any other error return at this
 756                          * time. This is similar to an ASSERT in seg_unmap()
 757                          */
 758                         ASSERT(err == 0);
 759                 }
 760         }
 761         hat_free_end(hat);
 762         if (as->a_xhat != NULL)
 763                 xhat_free_end_all(as);
 764         AS_LOCK_EXIT(as, &as->a_lock);
 765 
 766         /* /proc stuff */
 767         ASSERT(avl_numnodes(&as->a_wpage) == 0);
 768         if (as->a_objectdir) {
 769                 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *));
 770                 as->a_objectdir = NULL;
 771                 as->a_sizedir = 0;
 772         }
 773 
 774         /*
 775          * Free the struct as back to kmem.  Assert it has no segments.
 776          */
 777         ASSERT(avl_numnodes(&as->a_segtree) == 0);
 778         kmem_cache_free(as_cache, as);
 779 }
 780 
 781 int
 782 as_dup(struct as *as, struct proc *forkedproc)
 783 {
 784         struct as *newas;
 785         struct seg *seg, *newseg;
 786         size_t  purgesize = 0;
 787         int error;
 788 
 789         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 790         as_clearwatch(as);
 791         newas = as_alloc();
 792         newas->a_userlimit = as->a_userlimit;
 793         newas->a_proc = forkedproc;
 794 
 795         AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER);
 796 
 797         /* This will prevent new XHATs from attaching */
 798         mutex_enter(&as->a_contents);
 799         AS_SETBUSY(as);
 800         mutex_exit(&as->a_contents);
 801         mutex_enter(&newas->a_contents);
 802         AS_SETBUSY(newas);
 803         mutex_exit(&newas->a_contents);
 804 
 805         (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
 806 
 807         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
 808 
 809                 if (seg->s_flags & S_PURGE) {
 810                         purgesize += seg->s_size;
 811                         continue;
 812                 }
 813 
 814                 newseg = seg_alloc(newas, seg->s_base, seg->s_size);
 815                 if (newseg == NULL) {
 816                         AS_LOCK_EXIT(newas, &newas->a_lock);
 817                         as_setwatch(as);
 818                         mutex_enter(&as->a_contents);
 819                         AS_CLRBUSY(as);
 820                         mutex_exit(&as->a_contents);
 821                         AS_LOCK_EXIT(as, &as->a_lock);
 822                         as_free(newas);
 823                         return (-1);
 824                 }
 825                 if ((error = SEGOP_DUP(seg, newseg)) != 0) {
 826                         /*
 827                          * We call seg_free() on the new seg
 828                          * because the segment is not set up
 829                          * completely; i.e. it has no ops.
 830                          */
 831                         as_setwatch(as);
 832                         mutex_enter(&as->a_contents);
 833                         AS_CLRBUSY(as);
 834                         mutex_exit(&as->a_contents);
 835                         AS_LOCK_EXIT(as, &as->a_lock);
 836                         seg_free(newseg);
 837                         AS_LOCK_EXIT(newas, &newas->a_lock);
 838                         as_free(newas);
 839                         return (error);
 840                 }
 841                 newas->a_size += seg->s_size;
 842         }
 843         newas->a_resvsize = as->a_resvsize - purgesize;
 844 
 845         error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL);
 846         if (as->a_xhat != NULL)
 847                 error |= xhat_dup_all(as, newas, NULL, 0, HAT_DUP_ALL);
 848 
 849         mutex_enter(&newas->a_contents);
 850         AS_CLRBUSY(newas);
 851         mutex_exit(&newas->a_contents);
 852         AS_LOCK_EXIT(newas, &newas->a_lock);
 853 
 854         as_setwatch(as);
 855         mutex_enter(&as->a_contents);
 856         AS_CLRBUSY(as);
 857         mutex_exit(&as->a_contents);
 858         AS_LOCK_EXIT(as, &as->a_lock);
 859         if (error != 0) {
 860                 as_free(newas);
 861                 return (error);
 862         }
 863         forkedproc->p_as = newas;
 864         return (0);
 865 }
 866 
 867 /*
 868  * Handle a ``fault'' at addr for size bytes.
 869  */
 870 faultcode_t
 871 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size,
 872         enum fault_type type, enum seg_rw rw)
 873 {
 874         struct seg *seg;
 875         caddr_t raddr;                  /* rounded down addr */
 876         size_t rsize;                   /* rounded up size */
 877         size_t ssize;
 878         faultcode_t res = 0;


 942         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
 943         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
 944             (size_t)raddr;
 945 
 946         /*
 947          * XXX -- Don't grab the as lock for segkmap. We should grab it for
 948          * correctness, but then we could be stuck holding this lock for
 949          * a LONG time if the fault needs to be resolved on a slow
 950          * filesystem, and then no-one will be able to exec new commands,
 951          * as exec'ing requires the write lock on the as.
 952          */
 953         if (as == &kas && segkmap && segkmap->s_base <= raddr &&
 954             raddr + size < segkmap->s_base + segkmap->s_size) {
 955                 /*
 956                  * if (as==&kas), this can't be XHAT: we've already returned
 957                  * FC_NOSUPPORT.
 958                  */
 959                 seg = segkmap;
 960                 as_lock_held = 0;
 961         } else {
 962                 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
 963                 if (is_xhat && avl_numnodes(&as->a_wpage) != 0) {
 964                         /*
 965                          * Grab and hold the writers' lock on the as
 966                          * if the fault is to a watched page.
 967                          * This will keep CPUs from "peeking" at the
 968                          * address range while we're temporarily boosting
 969                          * the permissions for the XHAT device to
 970                          * resolve the fault in the segment layer.
 971                          *
 972                          * We could check whether faulted address
 973                          * is within a watched page and only then grab
 974                          * the writer lock, but this is simpler.
 975                          */
 976                         AS_LOCK_EXIT(as, &as->a_lock);
 977                         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 978                 }
 979 
 980                 seg = as_segat(as, raddr);
 981                 if (seg == NULL) {
 982                         AS_LOCK_EXIT(as, &as->a_lock);
 983                         if ((lwp != NULL) && (!is_xhat))
 984                                 lwp->lwp_nostop--;
 985                         return (FC_NOMAP);
 986                 }
 987 
 988                 as_lock_held = 1;
 989         }
 990 
 991         addrsav = raddr;
 992         segsav = seg;
 993 
 994         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
 995                 if (raddr >= seg->s_base + seg->s_size) {
 996                         seg = AS_SEGNEXT(as, seg);
 997                         if (seg == NULL || raddr != seg->s_base) {
 998                                 res = FC_NOMAP;
 999                                 break;
1000                         }
1001                 }
1002                 if (raddr + rsize > seg->s_base + seg->s_size)


1043          */
1044         if (res != 0 && type == F_SOFTLOCK) {
1045                 for (seg = segsav; addrsav < raddr; addrsav += ssize) {
1046                         if (addrsav >= seg->s_base + seg->s_size)
1047                                 seg = AS_SEGNEXT(as, seg);
1048                         ASSERT(seg != NULL);
1049                         /*
1050                          * Now call the fault routine again to perform the
1051                          * unlock using S_OTHER instead of the rw variable
1052                          * since we never got a chance to touch the pages.
1053                          */
1054                         if (raddr > seg->s_base + seg->s_size)
1055                                 ssize = seg->s_base + seg->s_size - addrsav;
1056                         else
1057                                 ssize = raddr - addrsav;
1058                         (void) SEGOP_FAULT(hat, seg, addrsav, ssize,
1059                             F_SOFTUNLOCK, S_OTHER);
1060                 }
1061         }
1062         if (as_lock_held)
1063                 AS_LOCK_EXIT(as, &as->a_lock);
1064         if ((lwp != NULL) && (!is_xhat))
1065                 lwp->lwp_nostop--;
1066 
1067         /*
1068          * If the lower levels returned EDEADLK for a fault,
1069          * It means that we should retry the fault.  Let's wait
1070          * a bit also to let the deadlock causing condition clear.
1071          * This is part of a gross hack to work around a design flaw
1072          * in the ufs/sds logging code and should go away when the
1073          * logging code is re-designed to fix the problem. See bug
1074          * 4125102 for details of the problem.
1075          */
1076         if (FC_ERRNO(res) == EDEADLK) {
1077                 delay(deadlk_wait);
1078                 res = 0;
1079                 goto retry;
1080         }
1081         return (res);
1082 }
1083 


1091 {
1092         struct seg *seg;
1093         caddr_t raddr;                  /* rounded down addr */
1094         size_t rsize;                   /* rounded up size */
1095         faultcode_t res = 0;
1096         klwp_t *lwp = ttolwp(curthread);
1097 
1098 retry:
1099         /*
1100          * Indicate that the lwp is not to be stopped while waiting
1101          * for a pagefault.  This is to avoid deadlock while debugging
1102          * a process via /proc over NFS (in particular).
1103          */
1104         if (lwp != NULL)
1105                 lwp->lwp_nostop++;
1106 
1107         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1108         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1109             (size_t)raddr;
1110 
1111         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1112         seg = as_segat(as, raddr);
1113         if (seg == NULL) {
1114                 AS_LOCK_EXIT(as, &as->a_lock);
1115                 if (lwp != NULL)
1116                         lwp->lwp_nostop--;
1117                 return (FC_NOMAP);
1118         }
1119 
1120         for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) {
1121                 if (raddr >= seg->s_base + seg->s_size) {
1122                         seg = AS_SEGNEXT(as, seg);
1123                         if (seg == NULL || raddr != seg->s_base) {
1124                                 res = FC_NOMAP;
1125                                 break;
1126                         }
1127                 }
1128                 res = SEGOP_FAULTA(seg, raddr);
1129                 if (res != 0)
1130                         break;
1131         }
1132         AS_LOCK_EXIT(as, &as->a_lock);
1133         if (lwp != NULL)
1134                 lwp->lwp_nostop--;
1135         /*
1136          * If the lower levels returned EDEADLK for a fault,
1137          * It means that we should retry the fault.  Let's wait
1138          * a bit also to let the deadlock causing condition clear.
1139          * This is part of a gross hack to work around a design flaw
1140          * in the ufs/sds logging code and should go away when the
1141          * logging code is re-designed to fix the problem. See bug
1142          * 4125102 for details of the problem.
1143          */
1144         if (FC_ERRNO(res) == EDEADLK) {
1145                 delay(deadlk_wait);
1146                 res = 0;
1147                 goto retry;
1148         }
1149         return (res);
1150 }
1151 
1152 /*


1172         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1173             (size_t)raddr;
1174 
1175         if (raddr + rsize < raddr)           /* check for wraparound */
1176                 return (ENOMEM);
1177 
1178         saveraddr = raddr;
1179         saversize = rsize;
1180 
1181         /*
1182          * Normally we only lock the as as a reader. But
1183          * if due to setprot the segment driver needs to split
1184          * a segment it will return IE_RETRY. Therefore we re-acquire
1185          * the as lock as a writer so the segment driver can change
1186          * the seg list. Also the segment driver will return IE_RETRY
1187          * after it has changed the segment list so we therefore keep
1188          * locking as a writer. Since these opeartions should be rare
1189          * want to only lock as a writer when necessary.
1190          */
1191         if (writer || avl_numnodes(&as->a_wpage) != 0) {
1192                 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1193         } else {
1194                 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1195         }
1196 
1197         as_clearwatchprot(as, raddr, rsize);
1198         seg = as_segat(as, raddr);
1199         if (seg == NULL) {
1200                 as_setwatch(as);
1201                 AS_LOCK_EXIT(as, &as->a_lock);
1202                 return (ENOMEM);
1203         }
1204 
1205         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1206                 if (raddr >= seg->s_base + seg->s_size) {
1207                         seg = AS_SEGNEXT(as, seg);
1208                         if (seg == NULL || raddr != seg->s_base) {
1209                                 error = ENOMEM;
1210                                 break;
1211                         }
1212                 }
1213                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1214                         ssize = seg->s_base + seg->s_size - raddr;
1215                 else
1216                         ssize = rsize;
1217 retry:
1218                 error = SEGOP_SETPROT(seg, raddr, ssize, prot);
1219 
1220                 if (error == IE_NOMEM) {
1221                         error = EAGAIN;
1222                         break;
1223                 }
1224 
1225                 if (error == IE_RETRY) {
1226                         AS_LOCK_EXIT(as, &as->a_lock);
1227                         writer = 1;
1228                         goto setprot_top;
1229                 }
1230 
1231                 if (error == EAGAIN) {
1232                         /*
1233                          * Make sure we have a_lock as writer.
1234                          */
1235                         if (writer == 0) {
1236                                 AS_LOCK_EXIT(as, &as->a_lock);
1237                                 writer = 1;
1238                                 goto setprot_top;
1239                         }
1240 
1241                         /*
1242                          * Memory is currently locked.  It must be unlocked
1243                          * before this operation can succeed through a retry.
1244                          * The possible reasons for locked memory and
1245                          * corresponding strategies for unlocking are:
1246                          * (1) Normal I/O
1247                          *      wait for a signal that the I/O operation
1248                          *      has completed and the memory is unlocked.
1249                          * (2) Asynchronous I/O
1250                          *      The aio subsystem does not unlock pages when
1251                          *      the I/O is completed. Those pages are unlocked
1252                          *      when the application calls aiowait/aioerror.
1253                          *      So, to prevent blocking forever, cv_broadcast()
1254                          *      is done to wake up aio_cleanup_thread.
1255                          *      Subsequently, segvn_reclaim will be called, and
1256                          *      that will do AS_CLRUNMAPWAIT() and wake us up.
1257                          * (3) Long term page locking:
1258                          *      Drivers intending to have pages locked for a
1259                          *      period considerably longer than for normal I/O
1260                          *      (essentially forever) may have registered for a
1261                          *      callback so they may unlock these pages on
1262                          *      request. This is needed to allow this operation
1263                          *      to succeed. Each entry on the callback list is
1264                          *      examined. If the event or address range pertains
1265                          *      the callback is invoked (unless it already is in
1266                          *      progress). The a_contents lock must be dropped
1267                          *      before the callback, so only one callback can
1268                          *      be done at a time. Go to the top and do more
1269                          *      until zero is returned. If zero is returned,
1270                          *      either there were no callbacks for this event
1271                          *      or they were already in progress.
1272                          */
1273                         mutex_enter(&as->a_contents);
1274                         if (as->a_callbacks &&
1275                             (cb = as_find_callback(as, AS_SETPROT_EVENT,
1276                             seg->s_base, seg->s_size))) {
1277                                 AS_LOCK_EXIT(as, &as->a_lock);
1278                                 as_execute_callback(as, cb, AS_SETPROT_EVENT);
1279                         } else if (!AS_ISNOUNMAPWAIT(as)) {
1280                                 if (AS_ISUNMAPWAIT(as) == 0)
1281                                         cv_broadcast(&as->a_cv);
1282                                 AS_SETUNMAPWAIT(as);
1283                                 AS_LOCK_EXIT(as, &as->a_lock);
1284                                 while (AS_ISUNMAPWAIT(as))
1285                                         cv_wait(&as->a_cv, &as->a_contents);
1286                         } else {
1287                                 /*
1288                                  * We may have raced with
1289                                  * segvn_reclaim()/segspt_reclaim(). In this
1290                                  * case clean nounmapwait flag and retry since
1291                                  * softlockcnt in this segment may be already
1292                                  * 0.  We don't drop as writer lock so our
1293                                  * number of retries without sleeping should
1294                                  * be very small. See segvn_reclaim() for
1295                                  * more comments.
1296                                  */
1297                                 AS_CLRNOUNMAPWAIT(as);
1298                                 mutex_exit(&as->a_contents);
1299                                 goto retry;
1300                         }
1301                         mutex_exit(&as->a_contents);
1302                         goto setprot_top;
1303                 } else if (error != 0)
1304                         break;
1305         }
1306         if (error != 0) {
1307                 as_setwatch(as);
1308         } else {
1309                 as_setwatchprot(as, saveraddr, saversize, prot);
1310         }
1311         AS_LOCK_EXIT(as, &as->a_lock);
1312         return (error);
1313 }
1314 
1315 /*
1316  * Check to make sure that the interval [addr, addr + size)
1317  * in address space `as' has at least the specified protection.
1318  * It is ok for the range to cross over several segments, as long
1319  * as they are contiguous.
1320  */
1321 int
1322 as_checkprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
1323 {
1324         struct seg *seg;
1325         size_t ssize;
1326         caddr_t raddr;                  /* rounded down addr */
1327         size_t rsize;                   /* rounded up size */
1328         int error = 0;
1329 
1330         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1331         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1332             (size_t)raddr;
1333 
1334         if (raddr + rsize < raddr)           /* check for wraparound */
1335                 return (ENOMEM);
1336 
1337         /*
1338          * This is ugly as sin...
1339          * Normally, we only acquire the address space readers lock.
1340          * However, if the address space has watchpoints present,
1341          * we must acquire the writer lock on the address space for
1342          * the benefit of as_clearwatchprot() and as_setwatchprot().
1343          */
1344         if (avl_numnodes(&as->a_wpage) != 0)
1345                 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1346         else
1347                 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1348         as_clearwatchprot(as, raddr, rsize);
1349         seg = as_segat(as, raddr);
1350         if (seg == NULL) {
1351                 as_setwatch(as);
1352                 AS_LOCK_EXIT(as, &as->a_lock);
1353                 return (ENOMEM);
1354         }
1355 
1356         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1357                 if (raddr >= seg->s_base + seg->s_size) {
1358                         seg = AS_SEGNEXT(as, seg);
1359                         if (seg == NULL || raddr != seg->s_base) {
1360                                 error = ENOMEM;
1361                                 break;
1362                         }
1363                 }
1364                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1365                         ssize = seg->s_base + seg->s_size - raddr;
1366                 else
1367                         ssize = rsize;
1368 
1369                 error = SEGOP_CHECKPROT(seg, raddr, ssize, prot);
1370                 if (error != 0)
1371                         break;
1372         }
1373         as_setwatch(as);
1374         AS_LOCK_EXIT(as, &as->a_lock);
1375         return (error);
1376 }
1377 
1378 int
1379 as_unmap(struct as *as, caddr_t addr, size_t size)
1380 {
1381         struct seg *seg, *seg_next;
1382         struct as_callback *cb;
1383         caddr_t raddr, eaddr;
1384         size_t ssize, rsize = 0;
1385         int err;
1386 
1387 top:
1388         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1389         eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) &
1390             (uintptr_t)PAGEMASK);
1391 
1392         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1393 
1394         as->a_updatedir = 1; /* inform /proc */
1395         gethrestime(&as->a_updatetime);
1396 
1397         /*
1398          * Use as_findseg to find the first segment in the range, then
1399          * step through the segments in order, following s_next.
1400          */
1401         as_clearwatchprot(as, raddr, eaddr - raddr);
1402 
1403         for (seg = as_findseg(as, raddr, 0); seg != NULL; seg = seg_next) {
1404                 if (eaddr <= seg->s_base)
1405                         break;          /* eaddr was in a gap; all done */
1406 
1407                 /* this is implied by the test above */
1408                 ASSERT(raddr < eaddr);
1409 
1410                 if (raddr < seg->s_base)
1411                         raddr = seg->s_base;         /* raddr was in a gap */
1412 


1453                          * (3) Long term page locking:
1454                          *      Drivers intending to have pages locked for a
1455                          *      period considerably longer than for normal I/O
1456                          *      (essentially forever) may have registered for a
1457                          *      callback so they may unlock these pages on
1458                          *      request. This is needed to allow this operation
1459                          *      to succeed. Each entry on the callback list is
1460                          *      examined. If the event or address range pertains
1461                          *      the callback is invoked (unless it already is in
1462                          *      progress). The a_contents lock must be dropped
1463                          *      before the callback, so only one callback can
1464                          *      be done at a time. Go to the top and do more
1465                          *      until zero is returned. If zero is returned,
1466                          *      either there were no callbacks for this event
1467                          *      or they were already in progress.
1468                          */
1469                         mutex_enter(&as->a_contents);
1470                         if (as->a_callbacks &&
1471                             (cb = as_find_callback(as, AS_UNMAP_EVENT,
1472                             seg->s_base, seg->s_size))) {
1473                                 AS_LOCK_EXIT(as, &as->a_lock);
1474                                 as_execute_callback(as, cb, AS_UNMAP_EVENT);
1475                         } else if (!AS_ISNOUNMAPWAIT(as)) {
1476                                 if (AS_ISUNMAPWAIT(as) == 0)
1477                                         cv_broadcast(&as->a_cv);
1478                                 AS_SETUNMAPWAIT(as);
1479                                 AS_LOCK_EXIT(as, &as->a_lock);
1480                                 while (AS_ISUNMAPWAIT(as))
1481                                         cv_wait(&as->a_cv, &as->a_contents);
1482                         } else {
1483                                 /*
1484                                  * We may have raced with
1485                                  * segvn_reclaim()/segspt_reclaim(). In this
1486                                  * case clean nounmapwait flag and retry since
1487                                  * softlockcnt in this segment may be already
1488                                  * 0.  We don't drop as writer lock so our
1489                                  * number of retries without sleeping should
1490                                  * be very small. See segvn_reclaim() for
1491                                  * more comments.
1492                                  */
1493                                 AS_CLRNOUNMAPWAIT(as);
1494                                 mutex_exit(&as->a_contents);
1495                                 goto retry;
1496                         }
1497                         mutex_exit(&as->a_contents);
1498                         goto top;
1499                 } else if (err == IE_RETRY) {
1500                         AS_LOCK_EXIT(as, &as->a_lock);
1501                         goto top;
1502                 } else if (err) {
1503                         as_setwatch(as);
1504                         AS_LOCK_EXIT(as, &as->a_lock);
1505                         return (-1);
1506                 }
1507 
1508                 as->a_size -= ssize;
1509                 if (rsize)
1510                         as->a_resvsize -= rsize;
1511                 raddr += ssize;
1512         }
1513         AS_LOCK_EXIT(as, &as->a_lock);
1514         return (0);
1515 }
1516 
1517 static int
1518 as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec,
1519     int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1520 {
1521         uint_t szc;
1522         uint_t nszc;
1523         int error;
1524         caddr_t a;
1525         caddr_t eaddr;
1526         size_t segsize;
1527         struct seg *seg;
1528         size_t pgsz;
1529         int do_off = (vn_a->vp != NULL || vn_a->amp != NULL);
1530         uint_t save_szcvec;
1531 
1532         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
1533         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1534         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1535         ASSERT(vn_a->vp == NULL || vn_a->amp == NULL);
1536         if (!do_off) {
1537                 vn_a->offset = 0;
1538         }
1539 
1540         if (szcvec <= 1) {
1541                 seg = seg_alloc(as, addr, size);
1542                 if (seg == NULL) {
1543                         return (ENOMEM);
1544                 }
1545                 vn_a->szc = 0;
1546                 error = (*crfp)(seg, vn_a);
1547                 if (error != 0) {
1548                         seg_free(seg);
1549                 } else {
1550                         as->a_size += size;
1551                         as->a_resvsize += size;
1552                 }


1626         ASSERT(addr == eaddr);
1627 
1628         return (0);
1629 }
1630 
1631 static int
1632 as_map_vnsegs(struct as *as, caddr_t addr, size_t size,
1633     int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1634 {
1635         uint_t mapflags = vn_a->flags & (MAP_TEXT | MAP_INITDATA);
1636         int type = (vn_a->type == MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM;
1637         uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags,
1638             type, 0);
1639         int error;
1640         struct seg *seg;
1641         struct vattr va;
1642         u_offset_t eoff;
1643         size_t save_size = 0;
1644         extern size_t textrepl_size_thresh;
1645 
1646         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
1647         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1648         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1649         ASSERT(vn_a->vp != NULL);
1650         ASSERT(vn_a->amp == NULL);
1651 
1652 again:
1653         if (szcvec <= 1) {
1654                 seg = seg_alloc(as, addr, size);
1655                 if (seg == NULL) {
1656                         return (ENOMEM);
1657                 }
1658                 vn_a->szc = 0;
1659                 error = (*crfp)(seg, vn_a);
1660                 if (error != 0) {
1661                         seg_free(seg);
1662                 } else {
1663                         as->a_size += size;
1664                         as->a_resvsize += size;
1665                 }
1666                 return (error);


1715     int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1716 {
1717         uint_t szcvec;
1718         uchar_t type;
1719 
1720         ASSERT(vn_a->type == MAP_SHARED || vn_a->type == MAP_PRIVATE);
1721         if (vn_a->type == MAP_SHARED) {
1722                 type = MAPPGSZC_SHM;
1723         } else if (vn_a->type == MAP_PRIVATE) {
1724                 if (vn_a->szc == AS_MAP_HEAP) {
1725                         type = MAPPGSZC_HEAP;
1726                 } else if (vn_a->szc == AS_MAP_STACK) {
1727                         type = MAPPGSZC_STACK;
1728                 } else {
1729                         type = MAPPGSZC_PRIVM;
1730                 }
1731         }
1732         szcvec = map_pgszcvec(addr, size, vn_a->amp == NULL ?
1733             (uintptr_t)addr : (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE),
1734             (vn_a->flags & MAP_TEXT), type, 0);
1735         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
1736         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1737         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1738         ASSERT(vn_a->vp == NULL);
1739 
1740         return (as_map_segvn_segs(as, addr, size, szcvec,
1741             crfp, vn_a, segcreated));
1742 }
1743 
1744 int
1745 as_map(struct as *as, caddr_t addr, size_t size, int (*crfp)(), void *argsp)
1746 {
1747         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1748         return (as_map_locked(as, addr, size, crfp, argsp));
1749 }
1750 
1751 int
1752 as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(),
1753                 void *argsp)
1754 {
1755         struct seg *seg = NULL;
1756         caddr_t raddr;                  /* rounded down addr */
1757         size_t rsize;                   /* rounded up size */
1758         int error;
1759         int unmap = 0;
1760         struct proc *p = curproc;
1761         struct segvn_crargs crargs;
1762 
1763         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1764         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1765             (size_t)raddr;
1766 
1767         /*
1768          * check for wrap around
1769          */
1770         if ((raddr + rsize < raddr) || (as->a_size > (ULONG_MAX - size))) {
1771                 AS_LOCK_EXIT(as, &as->a_lock);
1772                 return (ENOMEM);
1773         }
1774 
1775         as->a_updatedir = 1; /* inform /proc */
1776         gethrestime(&as->a_updatetime);
1777 
1778         if (as != &kas && as->a_size + rsize > (size_t)p->p_vmem_ctl) {
1779                 AS_LOCK_EXIT(as, &as->a_lock);
1780 
1781                 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
1782                     RCA_UNSAFE_ALL);
1783 
1784                 return (ENOMEM);
1785         }
1786 
1787         if (AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp)) {
1788                 crargs = *(struct segvn_crargs *)argsp;
1789                 error = as_map_vnsegs(as, raddr, rsize, crfp, &crargs, &unmap);
1790                 if (error != 0) {
1791                         AS_LOCK_EXIT(as, &as->a_lock);
1792                         if (unmap) {
1793                                 (void) as_unmap(as, addr, size);
1794                         }
1795                         return (error);
1796                 }
1797         } else if (AS_MAP_CHECK_ANON_LPOOB(crfp, argsp)) {
1798                 crargs = *(struct segvn_crargs *)argsp;
1799                 error = as_map_ansegs(as, raddr, rsize, crfp, &crargs, &unmap);
1800                 if (error != 0) {
1801                         AS_LOCK_EXIT(as, &as->a_lock);
1802                         if (unmap) {
1803                                 (void) as_unmap(as, addr, size);
1804                         }
1805                         return (error);
1806                 }
1807         } else {
1808                 seg = seg_alloc(as, addr, size);
1809                 if (seg == NULL) {
1810                         AS_LOCK_EXIT(as, &as->a_lock);
1811                         return (ENOMEM);
1812                 }
1813 
1814                 error = (*crfp)(seg, argsp);
1815                 if (error != 0) {
1816                         seg_free(seg);
1817                         AS_LOCK_EXIT(as, &as->a_lock);
1818                         return (error);
1819                 }
1820                 /*
1821                  * Add size now so as_unmap will work if as_ctl fails.
1822                  */
1823                 as->a_size += rsize;
1824                 as->a_resvsize += rsize;
1825         }
1826 
1827         as_setwatch(as);
1828 
1829         /*
1830          * If the address space is locked,
1831          * establish memory locks for the new segment.
1832          */
1833         mutex_enter(&as->a_contents);
1834         if (AS_ISPGLCK(as)) {
1835                 mutex_exit(&as->a_contents);
1836                 AS_LOCK_EXIT(as, &as->a_lock);
1837                 error = as_ctl(as, addr, size, MC_LOCK, 0, 0, NULL, 0);
1838                 if (error != 0)
1839                         (void) as_unmap(as, addr, size);
1840         } else {
1841                 mutex_exit(&as->a_contents);
1842                 AS_LOCK_EXIT(as, &as->a_lock);
1843         }
1844         return (error);
1845 }
1846 
1847 
1848 /*
1849  * Delete all segments in the address space marked with S_PURGE.
1850  * This is currently used for Sparc V9 nofault ASI segments (seg_nf.c).
1851  * These segments are deleted as a first step before calls to as_gap(), so
1852  * that they don't affect mmap() or shmat().
1853  */
1854 void
1855 as_purge(struct as *as)
1856 {
1857         struct seg *seg;
1858         struct seg *next_seg;
1859 
1860         /*
1861          * the setting of NEEDSPURGE is protect by as_rangelock(), so
1862          * no need to grab a_contents mutex for this check
1863          */
1864         if ((as->a_flags & AS_NEEDSPURGE) == 0)
1865                 return;
1866 
1867         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1868         next_seg = NULL;
1869         seg = AS_SEGFIRST(as);
1870         while (seg != NULL) {
1871                 next_seg = AS_SEGNEXT(as, seg);
1872                 if (seg->s_flags & S_PURGE)
1873                         SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
1874                 seg = next_seg;
1875         }
1876         AS_LOCK_EXIT(as, &as->a_lock);
1877 
1878         mutex_enter(&as->a_contents);
1879         as->a_flags &= ~AS_NEEDSPURGE;
1880         mutex_exit(&as->a_contents);
1881 }
1882 
1883 /*
1884  * Find a hole within [*basep, *basep + *lenp), which contains a mappable
1885  * range of addresses at least "minlen" long, where the base of the range is
1886  * at "off" phase from an "align" boundary and there is space for a
1887  * "redzone"-sized redzone on eithe rside of the range.  Thus,
1888  * if align was 4M and off was 16k, the user wants a hole which will start
1889  * 16k into a 4M page.
1890  *
1891  * If flags specifies AH_HI, the hole will have the highest possible address
1892  * in the range.  We use the as->a_lastgap field to figure out where to
1893  * start looking for a gap.
1894  *
1895  * Otherwise, the gap will have the lowest possible address.
1896  *


1919         save_base = *basep;
1920         save_len = *lenp;
1921         save_minlen = minlen;
1922         save_redzone = redzone;
1923 
1924         /*
1925          * For the first pass/fast_path, just add align and redzone into
1926          * minlen since if we get an allocation, we can guarantee that it
1927          * will fit the alignment and redzone requested.
1928          * This increases the chance that hibound will be adjusted to
1929          * a_lastgap->s_base which will likely allow us to find an
1930          * acceptable hole in the address space quicker.
1931          * If we can't find a hole with this fast_path, then we look for
1932          * smaller holes in which the alignment and offset may allow
1933          * the allocation to fit.
1934          */
1935         minlen += align;
1936         minlen += 2 * redzone;
1937         redzone = 0;
1938 
1939         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1940         if (AS_SEGFIRST(as) == NULL) {
1941                 if (valid_va_range_aligned(basep, lenp, minlen, flags & AH_DIR,
1942                     align, redzone, off)) {
1943                         AS_LOCK_EXIT(as, &as->a_lock);
1944                         return (0);
1945                 } else {
1946                         AS_LOCK_EXIT(as, &as->a_lock);
1947                         *basep = save_base;
1948                         *lenp = save_len;
1949                         return (-1);
1950                 }
1951         }
1952 
1953 retry:
1954         /*
1955          * Set up to iterate over all the inter-segment holes in the given
1956          * direction.  lseg is NULL for the lowest-addressed hole and hseg is
1957          * NULL for the highest-addressed hole.  If moving backwards, we reset
1958          * sseg to denote the highest-addressed segment.
1959          */
1960         forward = (flags & AH_DIR) == AH_LO;
1961         if (forward) {
1962                 hseg = as_findseg(as, lobound, 1);
1963                 lseg = AS_SEGPREV(as, hseg);
1964         } else {
1965 
1966                 /*


2007                         lo = lobound;
2008                 if (hi > hibound)
2009                         hi = hibound;
2010                 /*
2011                  * Verify that the candidate hole is big enough and meets
2012                  * hardware constraints.  If the hole is too small, no need
2013                  * to do the further checks since they will fail.
2014                  */
2015                 *basep = lo;
2016                 *lenp = hi - lo;
2017                 if (*lenp >= minlen && valid_va_range_aligned(basep, lenp,
2018                     minlen, forward ? AH_LO : AH_HI, align, redzone, off) &&
2019                     ((flags & AH_CONTAIN) == 0 ||
2020                     (*basep <= addr && *basep + *lenp > addr))) {
2021                         if (!forward)
2022                                 as->a_lastgap = hseg;
2023                         if (hseg != NULL)
2024                                 as->a_lastgaphl = hseg;
2025                         else
2026                                 as->a_lastgaphl = lseg;
2027                         AS_LOCK_EXIT(as, &as->a_lock);
2028                         return (0);
2029                 }
2030         cont:
2031                 /*
2032                  * Move to the next hole.
2033                  */
2034                 if (forward) {
2035                         lseg = hseg;
2036                         if (lseg == NULL)
2037                                 break;
2038                         hseg = AS_SEGNEXT(as, hseg);
2039                 } else {
2040                         hseg = lseg;
2041                         if (hseg == NULL)
2042                                 break;
2043                         lseg = AS_SEGPREV(as, lseg);
2044                 }
2045         }
2046         if (fast_path && (align != 0 || save_redzone != 0)) {
2047                 fast_path = 0;
2048                 minlen = save_minlen;
2049                 redzone = save_redzone;
2050                 goto retry;
2051         }
2052         *basep = save_base;
2053         *lenp = save_len;
2054         AS_LOCK_EXIT(as, &as->a_lock);
2055         return (-1);
2056 }
2057 
2058 /*
2059  * Find a hole of at least size minlen within [*basep, *basep + *lenp).
2060  *
2061  * If flags specifies AH_HI, the hole will have the highest possible address
2062  * in the range.  We use the as->a_lastgap field to figure out where to
2063  * start looking for a gap.
2064  *
2065  * Otherwise, the gap will have the lowest possible address.
2066  *
2067  * If flags specifies AH_CONTAIN, the hole will contain the address addr.
2068  *
2069  * If an adequate hole is found, base and len are set to reflect the part of
2070  * the hole that is within range, and 0 is returned, otherwise,
2071  * -1 is returned.
2072  *
2073  * NOTE: This routine is not correct when base+len overflows caddr_t.
2074  */


2076 as_gap(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, uint_t flags,
2077     caddr_t addr)
2078 {
2079 
2080         return (as_gap_aligned(as, minlen, basep, lenp, flags, addr, 0, 0, 0));
2081 }
2082 
2083 /*
2084  * Return the next range within [base, base + len) that is backed
2085  * with "real memory".  Skip holes and non-seg_vn segments.
2086  * We're lazy and only return one segment at a time.
2087  */
2088 int
2089 as_memory(struct as *as, caddr_t *basep, size_t *lenp)
2090 {
2091         extern struct seg_ops segspt_shmops;    /* needs a header file */
2092         struct seg *seg;
2093         caddr_t addr, eaddr;
2094         caddr_t segend;
2095 
2096         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2097 
2098         addr = *basep;
2099         eaddr = addr + *lenp;
2100 
2101         seg = as_findseg(as, addr, 0);
2102         if (seg != NULL)
2103                 addr = MAX(seg->s_base, addr);
2104 
2105         for (;;) {
2106                 if (seg == NULL || addr >= eaddr || eaddr <= seg->s_base) {
2107                         AS_LOCK_EXIT(as, &as->a_lock);
2108                         return (EINVAL);
2109                 }
2110 
2111                 if (seg->s_ops == &segvn_ops) {
2112                         segend = seg->s_base + seg->s_size;
2113                         break;
2114                 }
2115 
2116                 /*
2117                  * We do ISM by looking into the private data
2118                  * to determine the real size of the segment.
2119                  */
2120                 if (seg->s_ops == &segspt_shmops) {
2121                         segend = seg->s_base + spt_realsize(seg);
2122                         if (addr < segend)
2123                                 break;
2124                 }
2125 
2126                 seg = AS_SEGNEXT(as, seg);
2127 
2128                 if (seg != NULL)
2129                         addr = seg->s_base;
2130         }
2131 
2132         *basep = addr;
2133 
2134         if (segend > eaddr)
2135                 *lenp = eaddr - addr;
2136         else
2137                 *lenp = segend - addr;
2138 
2139         AS_LOCK_EXIT(as, &as->a_lock);
2140         return (0);
2141 }
2142 
2143 /*
2144  * Swap the pages associated with the address space as out to
2145  * secondary storage, returning the number of bytes actually
2146  * swapped.
2147  *
2148  * The value returned is intended to correlate well with the process's
2149  * memory requirements.  Its usefulness for this purpose depends on
2150  * how well the segment-level routines do at returning accurate
2151  * information.
2152  */
2153 size_t
2154 as_swapout(struct as *as)
2155 {
2156         struct seg *seg;
2157         size_t swpcnt = 0;
2158 
2159         /*
2160          * Kernel-only processes have given up their address
2161          * spaces.  Of course, we shouldn't be attempting to
2162          * swap out such processes in the first place...
2163          */
2164         if (as == NULL)
2165                 return (0);
2166 
2167         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2168 
2169         /* Prevent XHATs from attaching */
2170         mutex_enter(&as->a_contents);
2171         AS_SETBUSY(as);
2172         mutex_exit(&as->a_contents);
2173 
2174 
2175         /*
2176          * Free all mapping resources associated with the address
2177          * space.  The segment-level swapout routines capitalize
2178          * on this unmapping by scavanging pages that have become
2179          * unmapped here.
2180          */
2181         hat_swapout(as->a_hat);
2182         if (as->a_xhat != NULL)
2183                 xhat_swapout_all(as);
2184 
2185         mutex_enter(&as->a_contents);
2186         AS_CLRBUSY(as);
2187         mutex_exit(&as->a_contents);
2188 
2189         /*
2190          * Call the swapout routines of all segments in the address
2191          * space to do the actual work, accumulating the amount of
2192          * space reclaimed.
2193          */
2194         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2195                 struct seg_ops *ov = seg->s_ops;
2196 
2197                 /*
2198                  * We have to check to see if the seg has
2199                  * an ops vector because the seg may have
2200                  * been in the middle of being set up when
2201                  * the process was picked for swapout.
2202                  */
2203                 if ((ov != NULL) && (ov->swapout != NULL))
2204                         swpcnt += SEGOP_SWAPOUT(seg);
2205         }
2206         AS_LOCK_EXIT(as, &as->a_lock);
2207         return (swpcnt);
2208 }
2209 
2210 /*
2211  * Determine whether data from the mappings in interval [addr, addr + size)
2212  * are in the primary memory (core) cache.
2213  */
2214 int
2215 as_incore(struct as *as, caddr_t addr,
2216     size_t size, char *vec, size_t *sizep)
2217 {
2218         struct seg *seg;
2219         size_t ssize;
2220         caddr_t raddr;          /* rounded down addr */
2221         size_t rsize;           /* rounded up size */
2222         size_t isize;                   /* iteration size */
2223         int error = 0;          /* result, assume success */
2224 
2225         *sizep = 0;
2226         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2227         rsize = ((((size_t)addr + size) + PAGEOFFSET) & PAGEMASK) -
2228             (size_t)raddr;
2229 
2230         if (raddr + rsize < raddr)           /* check for wraparound */
2231                 return (ENOMEM);
2232 
2233         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2234         seg = as_segat(as, raddr);
2235         if (seg == NULL) {
2236                 AS_LOCK_EXIT(as, &as->a_lock);
2237                 return (-1);
2238         }
2239 
2240         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2241                 if (raddr >= seg->s_base + seg->s_size) {
2242                         seg = AS_SEGNEXT(as, seg);
2243                         if (seg == NULL || raddr != seg->s_base) {
2244                                 error = -1;
2245                                 break;
2246                         }
2247                 }
2248                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2249                         ssize = seg->s_base + seg->s_size - raddr;
2250                 else
2251                         ssize = rsize;
2252                 *sizep += isize = SEGOP_INCORE(seg, raddr, ssize, vec);
2253                 if (isize != ssize) {
2254                         error = -1;
2255                         break;
2256                 }
2257                 vec += btopr(ssize);
2258         }
2259         AS_LOCK_EXIT(as, &as->a_lock);
2260         return (error);
2261 }
2262 
2263 static void
2264 as_segunlock(struct seg *seg, caddr_t addr, int attr,
2265         ulong_t *bitmap, size_t position, size_t npages)
2266 {
2267         caddr_t range_start;
2268         size_t  pos1 = position;
2269         size_t  pos2;
2270         size_t  size;
2271         size_t  end_pos = npages + position;
2272 
2273         while (bt_range(bitmap, &pos1, &pos2, end_pos)) {
2274                 size = ptob((pos2 - pos1));
2275                 range_start = (caddr_t)((uintptr_t)addr +
2276                     ptob(pos1 - position));
2277 
2278                 (void) SEGOP_LOCKOP(seg, range_start, size, attr, MC_UNLOCK,
2279                     (ulong_t *)NULL, (size_t)NULL);


2309  * address space "as".
2310  */
2311 /*ARGSUSED*/
2312 int
2313 as_ctl(struct as *as, caddr_t addr, size_t size, int func, int attr,
2314     uintptr_t arg, ulong_t *lock_map, size_t pos)
2315 {
2316         struct seg *seg;        /* working segment */
2317         caddr_t raddr;          /* rounded down addr */
2318         caddr_t initraddr;      /* saved initial rounded down addr */
2319         size_t rsize;           /* rounded up size */
2320         size_t initrsize;       /* saved initial rounded up size */
2321         size_t ssize;           /* size of seg */
2322         int error = 0;                  /* result */
2323         size_t mlock_size;      /* size of bitmap */
2324         ulong_t *mlock_map;     /* pointer to bitmap used */
2325                                 /* to represent the locked */
2326                                 /* pages. */
2327 retry:
2328         if (error == IE_RETRY)
2329                 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2330         else
2331                 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2332 
2333         /*
2334          * If these are address space lock/unlock operations, loop over
2335          * all segments in the address space, as appropriate.
2336          */
2337         if (func == MC_LOCKAS) {
2338                 size_t npages, idx;
2339                 size_t rlen = 0;        /* rounded as length */
2340 
2341                 idx = pos;
2342 
2343                 if (arg & MCL_FUTURE) {
2344                         mutex_enter(&as->a_contents);
2345                         AS_SETPGLCK(as);
2346                         mutex_exit(&as->a_contents);
2347                 }
2348                 if ((arg & MCL_CURRENT) == 0) {
2349                         AS_LOCK_EXIT(as, &as->a_lock);
2350                         return (0);
2351                 }
2352 
2353                 seg = AS_SEGFIRST(as);
2354                 if (seg == NULL) {
2355                         AS_LOCK_EXIT(as, &as->a_lock);
2356                         return (0);
2357                 }
2358 
2359                 do {
2360                         raddr = (caddr_t)((uintptr_t)seg->s_base &
2361                             (uintptr_t)PAGEMASK);
2362                         rlen += (((uintptr_t)(seg->s_base + seg->s_size) +
2363                             PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr;
2364                 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2365 
2366                 mlock_size = BT_BITOUL(btopr(rlen));
2367                 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2368                     sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2369                                 AS_LOCK_EXIT(as, &as->a_lock);
2370                                 return (EAGAIN);
2371                 }
2372 
2373                 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2374                         error = SEGOP_LOCKOP(seg, seg->s_base,
2375                             seg->s_size, attr, MC_LOCK, mlock_map, pos);
2376                         if (error != 0)
2377                                 break;
2378                         pos += seg_pages(seg);
2379                 }
2380 
2381                 if (error) {
2382                         for (seg = AS_SEGFIRST(as); seg != NULL;
2383                             seg = AS_SEGNEXT(as, seg)) {
2384 
2385                                 raddr = (caddr_t)((uintptr_t)seg->s_base &
2386                                     (uintptr_t)PAGEMASK);
2387                                 npages = seg_pages(seg);
2388                                 as_segunlock(seg, raddr, attr, mlock_map,
2389                                     idx, npages);
2390                                 idx += npages;
2391                         }
2392                 }
2393 
2394                 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2395                 AS_LOCK_EXIT(as, &as->a_lock);
2396                 goto lockerr;
2397         } else if (func == MC_UNLOCKAS) {
2398                 mutex_enter(&as->a_contents);
2399                 AS_CLRPGLCK(as);
2400                 mutex_exit(&as->a_contents);
2401 
2402                 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2403                         error = SEGOP_LOCKOP(seg, seg->s_base,
2404                             seg->s_size, attr, MC_UNLOCK, NULL, 0);
2405                         if (error != 0)
2406                                 break;
2407                 }
2408 
2409                 AS_LOCK_EXIT(as, &as->a_lock);
2410                 goto lockerr;
2411         }
2412 
2413         /*
2414          * Normalize addresses and sizes.
2415          */
2416         initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2417         initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2418             (size_t)raddr;
2419 
2420         if (raddr + rsize < raddr) {         /* check for wraparound */
2421                 AS_LOCK_EXIT(as, &as->a_lock);
2422                 return (ENOMEM);
2423         }
2424 
2425         /*
2426          * Get initial segment.
2427          */
2428         if ((seg = as_segat(as, raddr)) == NULL) {
2429                 AS_LOCK_EXIT(as, &as->a_lock);
2430                 return (ENOMEM);
2431         }
2432 
2433         if (func == MC_LOCK) {
2434                 mlock_size = BT_BITOUL(btopr(rsize));
2435                 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2436                     sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2437                                 AS_LOCK_EXIT(as, &as->a_lock);
2438                                 return (EAGAIN);
2439                 }
2440         }
2441 
2442         /*
2443          * Loop over all segments.  If a hole in the address range is
2444          * discovered, then fail.  For each segment, perform the appropriate
2445          * control operation.
2446          */
2447         while (rsize != 0) {
2448 
2449                 /*
2450                  * Make sure there's no hole, calculate the portion
2451                  * of the next segment to be operated over.
2452                  */
2453                 if (raddr >= seg->s_base + seg->s_size) {
2454                         seg = AS_SEGNEXT(as, seg);
2455                         if (seg == NULL || raddr != seg->s_base) {
2456                                 if (func == MC_LOCK) {
2457                                         as_unlockerr(as, attr, mlock_map,
2458                                             initraddr, initrsize - rsize);
2459                                         kmem_free(mlock_map,
2460                                             mlock_size * sizeof (ulong_t));
2461                                 }
2462                                 AS_LOCK_EXIT(as, &as->a_lock);
2463                                 return (ENOMEM);
2464                         }
2465                 }
2466                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2467                         ssize = seg->s_base + seg->s_size - raddr;
2468                 else
2469                         ssize = rsize;
2470 
2471                 /*
2472                  * Dispatch on specific function.
2473                  */
2474                 switch (func) {
2475 
2476                 /*
2477                  * Synchronize cached data from mappings with backing
2478                  * objects.
2479                  */
2480                 case MC_SYNC:
2481                         if (error = SEGOP_SYNC(seg, raddr, ssize,
2482                             attr, (uint_t)arg)) {
2483                                 AS_LOCK_EXIT(as, &as->a_lock);
2484                                 return (error);
2485                         }
2486                         break;
2487 
2488                 /*
2489                  * Lock pages in memory.
2490                  */
2491                 case MC_LOCK:
2492                         if (error = SEGOP_LOCKOP(seg, raddr, ssize,
2493                             attr, func, mlock_map, pos)) {
2494                                 as_unlockerr(as, attr, mlock_map, initraddr,
2495                                     initrsize - rsize + ssize);
2496                                 kmem_free(mlock_map, mlock_size *
2497                                     sizeof (ulong_t));
2498                                 AS_LOCK_EXIT(as, &as->a_lock);
2499                                 goto lockerr;
2500                         }
2501                         break;
2502 
2503                 /*
2504                  * Unlock mapped pages.
2505                  */
2506                 case MC_UNLOCK:
2507                         (void) SEGOP_LOCKOP(seg, raddr, ssize, attr, func,
2508                             (ulong_t *)NULL, (size_t)NULL);
2509                         break;
2510 
2511                 /*
2512                  * Store VM advise for mapped pages in segment layer.
2513                  */
2514                 case MC_ADVISE:
2515                         error = SEGOP_ADVISE(seg, raddr, ssize, (uint_t)arg);
2516 
2517                         /*
2518                          * Check for regular errors and special retry error
2519                          */
2520                         if (error) {
2521                                 if (error == IE_RETRY) {
2522                                         /*
2523                                          * Need to acquire writers lock, so
2524                                          * have to drop readers lock and start
2525                                          * all over again
2526                                          */
2527                                         AS_LOCK_EXIT(as, &as->a_lock);
2528                                         goto retry;
2529                                 } else if (error == IE_REATTACH) {
2530                                         /*
2531                                          * Find segment for current address
2532                                          * because current segment just got
2533                                          * split or concatenated
2534                                          */
2535                                         seg = as_segat(as, raddr);
2536                                         if (seg == NULL) {
2537                                                 AS_LOCK_EXIT(as, &as->a_lock);
2538                                                 return (ENOMEM);
2539                                         }
2540                                 } else {
2541                                         /*
2542                                          * Regular error
2543                                          */
2544                                         AS_LOCK_EXIT(as, &as->a_lock);
2545                                         return (error);
2546                                 }
2547                         }
2548                         break;
2549 
2550                 case MC_INHERIT_ZERO:
2551                         if (seg->s_ops->inherit == NULL) {
2552                                 error = ENOTSUP;
2553                         } else {
2554                                 error = SEGOP_INHERIT(seg, raddr, ssize,
2555                                     SEGP_INH_ZERO);
2556                         }
2557                         if (error != 0) {
2558                                 AS_LOCK_EXIT(as, &as->a_lock);
2559                                 return (error);
2560                         }
2561                         break;
2562 
2563                 /*
2564                  * Can't happen.
2565                  */
2566                 default:
2567                         panic("as_ctl: bad operation %d", func);
2568                         /*NOTREACHED*/
2569                 }
2570 
2571                 rsize -= ssize;
2572                 raddr += ssize;
2573         }
2574 
2575         if (func == MC_LOCK)
2576                 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2577         AS_LOCK_EXIT(as, &as->a_lock);
2578         return (0);
2579 lockerr:
2580 
2581         /*
2582          * If the lower levels returned EDEADLK for a segment lockop,
2583          * it means that we should retry the operation.  Let's wait
2584          * a bit also to let the deadlock causing condition clear.
2585          * This is part of a gross hack to work around a design flaw
2586          * in the ufs/sds logging code and should go away when the
2587          * logging code is re-designed to fix the problem. See bug
2588          * 4125102 for details of the problem.
2589          */
2590         if (error == EDEADLK) {
2591                 delay(deadlk_wait);
2592                 error = 0;
2593                 goto retry;
2594         }
2595         return (error);
2596 }
2597 


2622  */
2623 static int
2624 as_pagelock_segs(struct as *as, struct seg *seg, struct page ***ppp,
2625     caddr_t addr, size_t size, enum seg_rw rw)
2626 {
2627         caddr_t sv_addr = addr;
2628         size_t sv_size = size;
2629         struct seg *sv_seg = seg;
2630         ulong_t segcnt = 1;
2631         ulong_t cnt;
2632         size_t ssize;
2633         pgcnt_t npages = btop(size);
2634         page_t **plist;
2635         page_t **pl;
2636         int error;
2637         caddr_t eaddr;
2638         faultcode_t fault_err = 0;
2639         pgcnt_t pl_off;
2640         extern struct seg_ops segspt_shmops;
2641 
2642         ASSERT(AS_LOCK_HELD(as, &as->a_lock));
2643         ASSERT(seg != NULL);
2644         ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2645         ASSERT(addr + size > seg->s_base + seg->s_size);
2646         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2647         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2648 
2649         /*
2650          * Count the number of segments covered by the range we are about to
2651          * lock. The segment count is used to size the shadow list we return
2652          * back to the caller.
2653          */
2654         for (; size != 0; size -= ssize, addr += ssize) {
2655                 if (addr >= seg->s_base + seg->s_size) {
2656 
2657                         seg = AS_SEGNEXT(as, seg);
2658                         if (seg == NULL || addr != seg->s_base) {
2659                                 AS_LOCK_EXIT(as, &as->a_lock);
2660                                 return (EFAULT);
2661                         }
2662                         /*
2663                          * Do a quick check if subsequent segments
2664                          * will most likely support pagelock.
2665                          */
2666                         if (seg->s_ops == &segvn_ops) {
2667                                 vnode_t *vp;
2668 
2669                                 if (SEGOP_GETVP(seg, addr, &vp) != 0 ||
2670                                     vp != NULL) {
2671                                         AS_LOCK_EXIT(as, &as->a_lock);
2672                                         goto slow;
2673                                 }
2674                         } else if (seg->s_ops != &segspt_shmops) {
2675                                 AS_LOCK_EXIT(as, &as->a_lock);
2676                                 goto slow;
2677                         }
2678                         segcnt++;
2679                 }
2680                 if (addr + size > seg->s_base + seg->s_size) {
2681                         ssize = seg->s_base + seg->s_size - addr;
2682                 } else {
2683                         ssize = size;
2684                 }
2685         }
2686         ASSERT(segcnt > 1);
2687 
2688         plist = kmem_zalloc((npages + segcnt) * sizeof (page_t *), KM_SLEEP);
2689 
2690         addr = sv_addr;
2691         size = sv_size;
2692         seg = sv_seg;
2693 
2694         for (cnt = 0, pl_off = 0; size != 0; size -= ssize, addr += ssize) {
2695                 if (addr >= seg->s_base + seg->s_size) {


2700                 }
2701                 if (addr + size > seg->s_base + seg->s_size) {
2702                         ssize = seg->s_base + seg->s_size - addr;
2703                 } else {
2704                         ssize = size;
2705                 }
2706                 pl = &plist[npages + cnt];
2707                 error = SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2708                     L_PAGELOCK, rw);
2709                 if (error) {
2710                         break;
2711                 }
2712                 ASSERT(plist[npages + cnt] != NULL);
2713                 ASSERT(pl_off + btop(ssize) <= npages);
2714                 bcopy(plist[npages + cnt], &plist[pl_off],
2715                     btop(ssize) * sizeof (page_t *));
2716                 pl_off += btop(ssize);
2717         }
2718 
2719         if (size == 0) {
2720                 AS_LOCK_EXIT(as, &as->a_lock);
2721                 ASSERT(cnt == segcnt - 1);
2722                 *ppp = plist;
2723                 return (0);
2724         }
2725 
2726         /*
2727          * one of pagelock calls failed. The error type is in error variable.
2728          * Unlock what we've locked so far and retry with F_SOFTLOCK if error
2729          * type is either EFAULT or ENOTSUP. Otherwise just return the error
2730          * back to the caller.
2731          */
2732 
2733         eaddr = addr;
2734         seg = sv_seg;
2735 
2736         for (cnt = 0, addr = sv_addr; addr < eaddr; addr += ssize) {
2737                 if (addr >= seg->s_base + seg->s_size) {
2738                         seg = AS_SEGNEXT(as, seg);
2739                         ASSERT(seg != NULL && addr == seg->s_base);
2740                         cnt++;
2741                         ASSERT(cnt < segcnt);
2742                 }
2743                 if (eaddr > seg->s_base + seg->s_size) {
2744                         ssize = seg->s_base + seg->s_size - addr;
2745                 } else {
2746                         ssize = eaddr - addr;
2747                 }
2748                 pl = &plist[npages + cnt];
2749                 ASSERT(*pl != NULL);
2750                 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2751                     L_PAGEUNLOCK, rw);
2752         }
2753 
2754         AS_LOCK_EXIT(as, &as->a_lock);
2755 
2756         kmem_free(plist, (npages + segcnt) * sizeof (page_t *));
2757 
2758         if (error != ENOTSUP && error != EFAULT) {
2759                 return (error);
2760         }
2761 
2762 slow:
2763         /*
2764          * If we are here because pagelock failed due to the need to cow fault
2765          * in the pages we want to lock F_SOFTLOCK will do this job and in
2766          * next as_pagelock() call for this address range pagelock will
2767          * hopefully succeed.
2768          */
2769         fault_err = as_fault(as->a_hat, as, sv_addr, sv_size, F_SOFTLOCK, rw);
2770         if (fault_err != 0) {
2771                 return (fc_decode(fault_err));
2772         }
2773         *ppp = NULL;
2774 


2783 as_pagelock(struct as *as, struct page ***ppp, caddr_t addr,
2784     size_t size, enum seg_rw rw)
2785 {
2786         size_t rsize;
2787         caddr_t raddr;
2788         faultcode_t fault_err;
2789         struct seg *seg;
2790         int err;
2791 
2792         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_START,
2793             "as_pagelock_start: addr %p size %ld", addr, size);
2794 
2795         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2796         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2797             (size_t)raddr;
2798 
2799         /*
2800          * if the request crosses two segments let
2801          * as_fault handle it.
2802          */
2803         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2804 
2805         seg = as_segat(as, raddr);
2806         if (seg == NULL) {
2807                 AS_LOCK_EXIT(as, &as->a_lock);
2808                 return (EFAULT);
2809         }
2810         ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2811         if (raddr + rsize > seg->s_base + seg->s_size) {
2812                 return (as_pagelock_segs(as, seg, ppp, raddr, rsize, rw));
2813         }
2814         if (raddr + rsize <= raddr) {
2815                 AS_LOCK_EXIT(as, &as->a_lock);
2816                 return (EFAULT);
2817         }
2818 
2819         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START,
2820             "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize);
2821 
2822         /*
2823          * try to lock pages and pass back shadow list
2824          */
2825         err = SEGOP_PAGELOCK(seg, raddr, rsize, ppp, L_PAGELOCK, rw);
2826 
2827         TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end");
2828 
2829         AS_LOCK_EXIT(as, &as->a_lock);
2830 
2831         if (err == 0 || (err != ENOTSUP && err != EFAULT)) {
2832                 return (err);
2833         }
2834 
2835         /*
2836          * Use F_SOFTLOCK to lock the pages because pagelock failed either due
2837          * to no pagelock support for this segment or pages need to be cow
2838          * faulted in. If fault is needed F_SOFTLOCK will do this job for
2839          * this as_pagelock() call and in the next as_pagelock() call for the
2840          * same address range pagelock call will hopefull succeed.
2841          */
2842         fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw);
2843         if (fault_err != 0) {
2844                 return (fc_decode(fault_err));
2845         }
2846         *ppp = NULL;
2847 
2848         TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_END, "as_pagelock_end");
2849         return (0);
2850 }
2851 
2852 /*
2853  * unlock pages locked by as_pagelock_segs().  Retrieve per segment shadow
2854  * lists from the end of plist and call pageunlock interface for each segment.
2855  * Drop as lock and free plist.
2856  */
2857 static void
2858 as_pageunlock_segs(struct as *as, struct seg *seg, caddr_t addr, size_t size,
2859     struct page **plist, enum seg_rw rw)
2860 {
2861         ulong_t cnt;
2862         caddr_t eaddr = addr + size;
2863         pgcnt_t npages = btop(size);
2864         size_t ssize;
2865         page_t **pl;
2866 
2867         ASSERT(AS_LOCK_HELD(as, &as->a_lock));
2868         ASSERT(seg != NULL);
2869         ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2870         ASSERT(addr + size > seg->s_base + seg->s_size);
2871         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2872         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2873         ASSERT(plist != NULL);
2874 
2875         for (cnt = 0; addr < eaddr; addr += ssize) {
2876                 if (addr >= seg->s_base + seg->s_size) {
2877                         seg = AS_SEGNEXT(as, seg);
2878                         ASSERT(seg != NULL && addr == seg->s_base);
2879                         cnt++;
2880                 }
2881                 if (eaddr > seg->s_base + seg->s_size) {
2882                         ssize = seg->s_base + seg->s_size - addr;
2883                 } else {
2884                         ssize = eaddr - addr;
2885                 }
2886                 pl = &plist[npages + cnt];
2887                 ASSERT(*pl != NULL);
2888                 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2889                     L_PAGEUNLOCK, rw);
2890         }
2891         ASSERT(cnt > 0);
2892         AS_LOCK_EXIT(as, &as->a_lock);
2893 
2894         cnt++;
2895         kmem_free(plist, (npages + cnt) * sizeof (page_t *));
2896 }
2897 
2898 /*
2899  * unlock pages in a given address range
2900  */
2901 void
2902 as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size,
2903     enum seg_rw rw)
2904 {
2905         struct seg *seg;
2906         size_t rsize;
2907         caddr_t raddr;
2908 
2909         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_START,
2910             "as_pageunlock_start: addr %p size %ld", addr, size);
2911 
2912         /*
2913          * if the shadow list is NULL, as_pagelock was
2914          * falling back to as_fault
2915          */
2916         if (pp == NULL) {
2917                 (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw);
2918                 return;
2919         }
2920 
2921         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2922         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2923             (size_t)raddr;
2924 
2925         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2926         seg = as_segat(as, raddr);
2927         ASSERT(seg != NULL);
2928 
2929         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START,
2930             "seg_unlock_start: raddr %p rsize %ld", raddr, rsize);
2931 
2932         ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2933         if (raddr + rsize <= seg->s_base + seg->s_size) {
2934                 SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw);
2935         } else {
2936                 as_pageunlock_segs(as, seg, raddr, rsize, pp, rw);
2937                 return;
2938         }
2939         AS_LOCK_EXIT(as, &as->a_lock);
2940         TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end");
2941 }
2942 
2943 int
2944 as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc,
2945     boolean_t wait)
2946 {
2947         struct seg *seg;
2948         size_t ssize;
2949         caddr_t raddr;                  /* rounded down addr */
2950         size_t rsize;                   /* rounded up size */
2951         int error = 0;
2952         size_t pgsz = page_get_pagesize(szc);
2953 
2954 setpgsz_top:
2955         if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(size, pgsz)) {
2956                 return (EINVAL);
2957         }
2958 
2959         raddr = addr;
2960         rsize = size;
2961 
2962         if (raddr + rsize < raddr)           /* check for wraparound */
2963                 return (ENOMEM);
2964 
2965         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2966         as_clearwatchprot(as, raddr, rsize);
2967         seg = as_segat(as, raddr);
2968         if (seg == NULL) {
2969                 as_setwatch(as);
2970                 AS_LOCK_EXIT(as, &as->a_lock);
2971                 return (ENOMEM);
2972         }
2973 
2974         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2975                 if (raddr >= seg->s_base + seg->s_size) {
2976                         seg = AS_SEGNEXT(as, seg);
2977                         if (seg == NULL || raddr != seg->s_base) {
2978                                 error = ENOMEM;
2979                                 break;
2980                         }
2981                 }
2982                 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2983                         ssize = seg->s_base + seg->s_size - raddr;
2984                 } else {
2985                         ssize = rsize;
2986                 }
2987 
2988 retry:
2989                 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc);
2990 
2991                 if (error == IE_NOMEM) {
2992                         error = EAGAIN;
2993                         break;
2994                 }
2995 
2996                 if (error == IE_RETRY) {
2997                         AS_LOCK_EXIT(as, &as->a_lock);
2998                         goto setpgsz_top;
2999                 }
3000 
3001                 if (error == ENOTSUP) {
3002                         error = EINVAL;
3003                         break;
3004                 }
3005 
3006                 if (wait && (error == EAGAIN)) {
3007                         /*
3008                          * Memory is currently locked.  It must be unlocked
3009                          * before this operation can succeed through a retry.
3010                          * The possible reasons for locked memory and
3011                          * corresponding strategies for unlocking are:
3012                          * (1) Normal I/O
3013                          *      wait for a signal that the I/O operation
3014                          *      has completed and the memory is unlocked.
3015                          * (2) Asynchronous I/O
3016                          *      The aio subsystem does not unlock pages when
3017                          *      the I/O is completed. Those pages are unlocked
3018                          *      when the application calls aiowait/aioerror.
3019                          *      So, to prevent blocking forever, cv_broadcast()
3020                          *      is done to wake up aio_cleanup_thread.
3021                          *      Subsequently, segvn_reclaim will be called, and
3022                          *      that will do AS_CLRUNMAPWAIT() and wake us up.
3023                          * (3) Long term page locking:
3024                          *      This is not relevant for as_setpagesize()
3025                          *      because we cannot change the page size for
3026                          *      driver memory. The attempt to do so will
3027                          *      fail with a different error than EAGAIN so
3028                          *      there's no need to trigger as callbacks like
3029                          *      as_unmap, as_setprot or as_free would do.
3030                          */
3031                         mutex_enter(&as->a_contents);
3032                         if (!AS_ISNOUNMAPWAIT(as)) {
3033                                 if (AS_ISUNMAPWAIT(as) == 0) {
3034                                         cv_broadcast(&as->a_cv);
3035                                 }
3036                                 AS_SETUNMAPWAIT(as);
3037                                 AS_LOCK_EXIT(as, &as->a_lock);
3038                                 while (AS_ISUNMAPWAIT(as)) {
3039                                         cv_wait(&as->a_cv, &as->a_contents);
3040                                 }
3041                         } else {
3042                                 /*
3043                                  * We may have raced with
3044                                  * segvn_reclaim()/segspt_reclaim(). In this
3045                                  * case clean nounmapwait flag and retry since
3046                                  * softlockcnt in this segment may be already
3047                                  * 0.  We don't drop as writer lock so our
3048                                  * number of retries without sleeping should
3049                                  * be very small. See segvn_reclaim() for
3050                                  * more comments.
3051                                  */
3052                                 AS_CLRNOUNMAPWAIT(as);
3053                                 mutex_exit(&as->a_contents);
3054                                 goto retry;
3055                         }
3056                         mutex_exit(&as->a_contents);
3057                         goto setpgsz_top;
3058                 } else if (error != 0) {
3059                         break;
3060                 }
3061         }
3062         as_setwatch(as);
3063         AS_LOCK_EXIT(as, &as->a_lock);
3064         return (error);
3065 }
3066 
3067 /*
3068  * as_iset3_default_lpsize() just calls SEGOP_SETPAGESIZE() on all segments
3069  * in its chunk where s_szc is less than the szc we want to set.
3070  */
3071 static int
3072 as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
3073     int *retry)
3074 {
3075         struct seg *seg;
3076         size_t ssize;
3077         int error;
3078 
3079         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3080 
3081         seg = as_segat(as, raddr);
3082         if (seg == NULL) {
3083                 panic("as_iset3_default_lpsize: no seg");
3084         }
3085 
3086         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
3087                 if (raddr >= seg->s_base + seg->s_size) {
3088                         seg = AS_SEGNEXT(as, seg);
3089                         if (seg == NULL || raddr != seg->s_base) {
3090                                 panic("as_iset3_default_lpsize: as changed");
3091                         }
3092                 }
3093                 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
3094                         ssize = seg->s_base + seg->s_size - raddr;
3095                 } else {
3096                         ssize = rsize;
3097                 }
3098 
3099                 if (szc > seg->s_szc) {


3116         }
3117         return (0);
3118 }
3119 
3120 /*
3121  * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the
3122  * pagesize on each segment in its range, but if any fails with EINVAL,
3123  * then it reduces the pagesizes to the next size in the bitmap and
3124  * retries as_iset3_default_lpsize(). The reason why the code retries
3125  * smaller allowed sizes on EINVAL is because (a) the anon offset may not
3126  * match the bigger sizes, and (b) it's hard to get this offset (to begin
3127  * with) to pass to map_pgszcvec().
3128  */
3129 static int
3130 as_iset2_default_lpsize(struct as *as, caddr_t addr, size_t size, uint_t szc,
3131     uint_t szcvec)
3132 {
3133         int error;
3134         int retry;
3135 
3136         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3137 
3138         for (;;) {
3139                 error = as_iset3_default_lpsize(as, addr, size, szc, &retry);
3140                 if (error == EINVAL && retry) {
3141                         szcvec &= ~(1 << szc);
3142                         if (szcvec <= 1) {
3143                                 return (EINVAL);
3144                         }
3145                         szc = highbit(szcvec) - 1;
3146                 } else {
3147                         return (error);
3148                 }
3149         }
3150 }
3151 
3152 /*
3153  * as_iset1_default_lpsize() breaks its chunk into areas where existing
3154  * segments have a smaller szc than we want to set. For each such area,
3155  * it calls as_iset2_default_lpsize()
3156  */
3157 static int
3158 as_iset1_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
3159     uint_t szcvec)
3160 {
3161         struct seg *seg;
3162         size_t ssize;
3163         caddr_t setaddr = raddr;
3164         size_t setsize = 0;
3165         int set;
3166         int error;
3167 
3168         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3169 
3170         seg = as_segat(as, raddr);
3171         if (seg == NULL) {
3172                 panic("as_iset1_default_lpsize: no seg");
3173         }
3174         if (seg->s_szc < szc) {
3175                 set = 1;
3176         } else {
3177                 set = 0;
3178         }
3179 
3180         for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3181                 if (raddr >= seg->s_base + seg->s_size) {
3182                         seg = AS_SEGNEXT(as, seg);
3183                         if (seg == NULL || raddr != seg->s_base) {
3184                                 panic("as_iset1_default_lpsize: as changed");
3185                         }
3186                         if (seg->s_szc >= szc && set) {
3187                                 ASSERT(setsize != 0);
3188                                 error = as_iset2_default_lpsize(as,


3216  * as_iset_default_lpsize() breaks its chunk according to the size code bitmap
3217  * returned by map_pgszcvec() (similar to as_map_segvn_segs()), and passes each
3218  * chunk to as_iset1_default_lpsize().
3219  */
3220 static int
3221 as_iset_default_lpsize(struct as *as, caddr_t addr, size_t size, int flags,
3222     int type)
3223 {
3224         int rtype = (type & MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM;
3225         uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr,
3226             flags, rtype, 1);
3227         uint_t szc;
3228         uint_t nszc;
3229         int error;
3230         caddr_t a;
3231         caddr_t eaddr;
3232         size_t segsize;
3233         size_t pgsz;
3234         uint_t save_szcvec;
3235 
3236         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3237         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
3238         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
3239 
3240         szcvec &= ~1;
3241         if (szcvec <= 1) {   /* skip if base page size */
3242                 return (0);
3243         }
3244 
3245         /* Get the pagesize of the first larger page size. */
3246         szc = lowbit(szcvec) - 1;
3247         pgsz = page_get_pagesize(szc);
3248         eaddr = addr + size;
3249         addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
3250         eaddr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
3251 
3252         save_szcvec = szcvec;
3253         szcvec >>= (szc + 1);
3254         nszc = szc;
3255         while (szcvec) {
3256                 if ((szcvec & 0x1) == 0) {


3308  * chunks with the same type/flags, ignores-non segvn segments, and passes
3309  * each chunk to as_iset_default_lpsize().
3310  */
3311 int
3312 as_set_default_lpsize(struct as *as, caddr_t addr, size_t size)
3313 {
3314         struct seg *seg;
3315         caddr_t raddr;
3316         size_t rsize;
3317         size_t ssize;
3318         int rtype, rflags;
3319         int stype, sflags;
3320         int error;
3321         caddr_t setaddr;
3322         size_t setsize;
3323         int segvn;
3324 
3325         if (size == 0)
3326                 return (0);
3327 
3328         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3329 again:
3330         error = 0;
3331 
3332         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3333         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
3334             (size_t)raddr;
3335 
3336         if (raddr + rsize < raddr) {         /* check for wraparound */
3337                 AS_LOCK_EXIT(as, &as->a_lock);
3338                 return (ENOMEM);
3339         }
3340         as_clearwatchprot(as, raddr, rsize);
3341         seg = as_segat(as, raddr);
3342         if (seg == NULL) {
3343                 as_setwatch(as);
3344                 AS_LOCK_EXIT(as, &as->a_lock);
3345                 return (ENOMEM);
3346         }
3347         if (seg->s_ops == &segvn_ops) {
3348                 rtype = SEGOP_GETTYPE(seg, addr);
3349                 rflags = rtype & (MAP_TEXT | MAP_INITDATA);
3350                 rtype = rtype & (MAP_SHARED | MAP_PRIVATE);
3351                 segvn = 1;
3352         } else {
3353                 segvn = 0;
3354         }
3355         setaddr = raddr;
3356         setsize = 0;
3357 
3358         for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3359                 if (raddr >= (seg->s_base + seg->s_size)) {
3360                         seg = AS_SEGNEXT(as, seg);
3361                         if (seg == NULL || raddr != seg->s_base) {
3362                                 error = ENOMEM;
3363                                 break;
3364                         }


3409         if (error == 0 && segvn) {
3410                 /* The last chunk when rsize == 0. */
3411                 ASSERT(setsize != 0);
3412                 error = as_iset_default_lpsize(as, setaddr, setsize,
3413                     rflags, rtype);
3414         }
3415 
3416         if (error == IE_RETRY) {
3417                 goto again;
3418         } else if (error == IE_NOMEM) {
3419                 error = EAGAIN;
3420         } else if (error == ENOTSUP) {
3421                 error = EINVAL;
3422         } else if (error == EAGAIN) {
3423                 mutex_enter(&as->a_contents);
3424                 if (!AS_ISNOUNMAPWAIT(as)) {
3425                         if (AS_ISUNMAPWAIT(as) == 0) {
3426                                 cv_broadcast(&as->a_cv);
3427                         }
3428                         AS_SETUNMAPWAIT(as);
3429                         AS_LOCK_EXIT(as, &as->a_lock);
3430                         while (AS_ISUNMAPWAIT(as)) {
3431                                 cv_wait(&as->a_cv, &as->a_contents);
3432                         }
3433                         mutex_exit(&as->a_contents);
3434                         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3435                 } else {
3436                         /*
3437                          * We may have raced with
3438                          * segvn_reclaim()/segspt_reclaim(). In this case
3439                          * clean nounmapwait flag and retry since softlockcnt
3440                          * in this segment may be already 0.  We don't drop as
3441                          * writer lock so our number of retries without
3442                          * sleeping should be very small. See segvn_reclaim()
3443                          * for more comments.
3444                          */
3445                         AS_CLRNOUNMAPWAIT(as);
3446                         mutex_exit(&as->a_contents);
3447                 }
3448                 goto again;
3449         }
3450 
3451         as_setwatch(as);
3452         AS_LOCK_EXIT(as, &as->a_lock);
3453         return (error);
3454 }
3455 
3456 /*
3457  * Setup all of the uninitialized watched pages that we can.
3458  */
3459 void
3460 as_setwatch(struct as *as)
3461 {
3462         struct watched_page *pwp;
3463         struct seg *seg;
3464         caddr_t vaddr;
3465         uint_t prot;
3466         int  err, retrycnt;
3467 
3468         if (avl_numnodes(&as->a_wpage) == 0)
3469                 return;
3470 
3471         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3472 
3473         for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3474             pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3475                 retrycnt = 0;
3476         retry:
3477                 vaddr = pwp->wp_vaddr;
3478                 if (pwp->wp_oprot != 0 ||    /* already set up */
3479                     (seg = as_segat(as, vaddr)) == NULL ||
3480                     SEGOP_GETPROT(seg, vaddr, 0, &prot) != 0)
3481                         continue;
3482 
3483                 pwp->wp_oprot = prot;
3484                 if (pwp->wp_read)
3485                         prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3486                 if (pwp->wp_write)
3487                         prot &= ~PROT_WRITE;
3488                 if (pwp->wp_exec)
3489                         prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3490                 if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) {
3491                         err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);


3498                 }
3499                 pwp->wp_prot = prot;
3500         }
3501 }
3502 
3503 /*
3504  * Clear all of the watched pages in the address space.
3505  */
3506 void
3507 as_clearwatch(struct as *as)
3508 {
3509         struct watched_page *pwp;
3510         struct seg *seg;
3511         caddr_t vaddr;
3512         uint_t prot;
3513         int err, retrycnt;
3514 
3515         if (avl_numnodes(&as->a_wpage) == 0)
3516                 return;
3517 
3518         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3519 
3520         for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3521             pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3522                 retrycnt = 0;
3523         retry:
3524                 vaddr = pwp->wp_vaddr;
3525                 if (pwp->wp_oprot == 0 ||    /* not set up */
3526                     (seg = as_segat(as, vaddr)) == NULL)
3527                         continue;
3528 
3529                 if ((prot = pwp->wp_oprot) != pwp->wp_prot) {
3530                         err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
3531                         if (err == IE_RETRY) {
3532                                 ASSERT(retrycnt == 0);
3533                                 retrycnt++;
3534                                 goto retry;
3535                         }
3536                 }
3537                 pwp->wp_oprot = 0;
3538                 pwp->wp_prot = 0;


3540 }
3541 
3542 /*
3543  * Force a new setup for all the watched pages in the range.
3544  */
3545 static void
3546 as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
3547 {
3548         struct watched_page *pwp;
3549         struct watched_page tpw;
3550         caddr_t eaddr = addr + size;
3551         caddr_t vaddr;
3552         struct seg *seg;
3553         int err, retrycnt;
3554         uint_t  wprot;
3555         avl_index_t where;
3556 
3557         if (avl_numnodes(&as->a_wpage) == 0)
3558                 return;
3559 
3560         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3561 
3562         tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3563         if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3564                 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3565 
3566         while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3567                 retrycnt = 0;
3568                 vaddr = pwp->wp_vaddr;
3569 
3570                 wprot = prot;
3571                 if (pwp->wp_read)
3572                         wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3573                 if (pwp->wp_write)
3574                         wprot &= ~PROT_WRITE;
3575                 if (pwp->wp_exec)
3576                         wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3577                 if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) {
3578                 retry:
3579                         seg = as_segat(as, vaddr);
3580                         if (seg == NULL) {


3599  * Clear all of the watched pages in the range.
3600  */
3601 static void
3602 as_clearwatchprot(struct as *as, caddr_t addr, size_t size)
3603 {
3604         caddr_t eaddr = addr + size;
3605         struct watched_page *pwp;
3606         struct watched_page tpw;
3607         uint_t prot;
3608         struct seg *seg;
3609         int err, retrycnt;
3610         avl_index_t where;
3611 
3612         if (avl_numnodes(&as->a_wpage) == 0)
3613                 return;
3614 
3615         tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3616         if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3617                 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3618 
3619         ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3620 
3621         while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3622 
3623                 if ((prot = pwp->wp_oprot) != 0) {
3624                         retrycnt = 0;
3625 
3626                         if (prot != pwp->wp_prot) {
3627                         retry:
3628                                 seg = as_segat(as, pwp->wp_vaddr);
3629                                 if (seg == NULL)
3630                                         continue;
3631                                 err = SEGOP_SETPROT(seg, pwp->wp_vaddr,
3632                                     PAGESIZE, prot);
3633                                 if (err == IE_RETRY) {
3634                                         ASSERT(retrycnt == 0);
3635                                         retrycnt++;
3636                                         goto retry;
3637 
3638                                 }
3639                         }


3654         for (p = practive; p; p = p->p_next) {
3655                 if (p->p_as == as) {
3656                         mutex_enter(&p->p_lock);
3657                         if (p->p_as == as)
3658                                 sigaddq(p, NULL, siginfo, KM_NOSLEEP);
3659                         mutex_exit(&p->p_lock);
3660                 }
3661         }
3662         mutex_exit(&pidlock);
3663 }
3664 
3665 /*
3666  * return memory object ID
3667  */
3668 int
3669 as_getmemid(struct as *as, caddr_t addr, memid_t *memidp)
3670 {
3671         struct seg      *seg;
3672         int             sts;
3673 
3674         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
3675         seg = as_segat(as, addr);
3676         if (seg == NULL) {
3677                 AS_LOCK_EXIT(as, &as->a_lock);
3678                 return (EFAULT);
3679         }
3680         /*
3681          * catch old drivers which may not support getmemid
3682          */
3683         if (seg->s_ops->getmemid == NULL) {
3684                 AS_LOCK_EXIT(as, &as->a_lock);
3685                 return (ENODEV);
3686         }
3687 
3688         sts = SEGOP_GETMEMID(seg, addr, memidp);
3689 
3690         AS_LOCK_EXIT(as, &as->a_lock);
3691         return (sts);
3692 }


 343         return (0);
 344 }
 345 
 346 /*
 347  * Search for the segment containing addr. If a segment containing addr
 348  * exists, that segment is returned.  If no such segment exists, and
 349  * the list spans addresses greater than addr, then the first segment
 350  * whose base is greater than addr is returned; otherwise, NULL is
 351  * returned unless tail is true, in which case the last element of the
 352  * list is returned.
 353  *
 354  * a_seglast is used to cache the last found segment for repeated
 355  * searches to the same addr (which happens frequently).
 356  */
 357 struct seg *
 358 as_findseg(struct as *as, caddr_t addr, int tail)
 359 {
 360         struct seg *seg = as->a_seglast;
 361         avl_index_t where;
 362 
 363         ASSERT(AS_LOCK_HELD(as));
 364 
 365         if (seg != NULL &&
 366             seg->s_base <= addr &&
 367             addr < seg->s_base + seg->s_size)
 368                 return (seg);
 369 
 370         seg = avl_find(&as->a_segtree, &addr, &where);
 371         if (seg != NULL)
 372                 return (as->a_seglast = seg);
 373 
 374         seg = avl_nearest(&as->a_segtree, where, AVL_AFTER);
 375         if (seg == NULL && tail)
 376                 seg = avl_last(&as->a_segtree);
 377         return (as->a_seglast = seg);
 378 }
 379 
 380 #ifdef VERIFY_SEGLIST
 381 /*
 382  * verify that the linked list is coherent
 383  */


 405                 nsegs++;
 406         }
 407         ASSERT(seglast == NULL);
 408         ASSERT(avl_numnodes(&as->a_segtree) == nsegs);
 409 }
 410 #endif /* VERIFY_SEGLIST */
 411 
 412 /*
 413  * Add a new segment to the address space. The avl_find()
 414  * may be expensive so we attempt to use last segment accessed
 415  * in as_gap() as an insertion point.
 416  */
 417 int
 418 as_addseg(struct as  *as, struct seg *newseg)
 419 {
 420         struct seg *seg;
 421         caddr_t addr;
 422         caddr_t eaddr;
 423         avl_index_t where;
 424 
 425         ASSERT(AS_WRITE_HELD(as));
 426 
 427         as->a_updatedir = 1; /* inform /proc */
 428         gethrestime(&as->a_updatetime);
 429 
 430         if (as->a_lastgaphl != NULL) {
 431                 struct seg *hseg = NULL;
 432                 struct seg *lseg = NULL;
 433 
 434                 if (as->a_lastgaphl->s_base > newseg->s_base) {
 435                         hseg = as->a_lastgaphl;
 436                         lseg = AVL_PREV(&as->a_segtree, hseg);
 437                 } else {
 438                         lseg = as->a_lastgaphl;
 439                         hseg = AVL_NEXT(&as->a_segtree, lseg);
 440                 }
 441 
 442                 if (hseg && lseg && lseg->s_base < newseg->s_base &&
 443                     hseg->s_base > newseg->s_base) {
 444                         avl_insert_here(&as->a_segtree, newseg, lseg,
 445                             AVL_AFTER);


 487                                 }
 488 #endif
 489                                 return (-1);    /* overlapping segment */
 490                         }
 491                 }
 492         }
 493         as->a_seglast = newseg;
 494         avl_insert(&as->a_segtree, newseg, where);
 495 
 496 #ifdef VERIFY_SEGLIST
 497         as_verify(as);
 498 #endif
 499         return (0);
 500 }
 501 
 502 struct seg *
 503 as_removeseg(struct as *as, struct seg *seg)
 504 {
 505         avl_tree_t *t;
 506 
 507         ASSERT(AS_WRITE_HELD(as));
 508 
 509         as->a_updatedir = 1; /* inform /proc */
 510         gethrestime(&as->a_updatetime);
 511 
 512         if (seg == NULL)
 513                 return (NULL);
 514 
 515         t = &as->a_segtree;
 516         if (as->a_seglast == seg)
 517                 as->a_seglast = NULL;
 518         as->a_lastgaphl = NULL;
 519 
 520         /*
 521          * if this segment is at an address higher than
 522          * a_lastgap, set a_lastgap to the next segment (NULL if last segment)
 523          */
 524         if (as->a_lastgap &&
 525             (seg == as->a_lastgap || seg->s_base > as->a_lastgap->s_base))
 526                 as->a_lastgap = AVL_NEXT(t, seg);
 527 
 528         /*
 529          * remove the segment from the seg tree
 530          */
 531         avl_remove(t, seg);
 532 
 533 #ifdef VERIFY_SEGLIST
 534         as_verify(as);
 535 #endif
 536         return (seg);
 537 }
 538 
 539 /*
 540  * Find a segment containing addr.
 541  */
 542 struct seg *
 543 as_segat(struct as *as, caddr_t addr)
 544 {
 545         struct seg *seg = as->a_seglast;
 546 
 547         ASSERT(AS_LOCK_HELD(as));
 548 
 549         if (seg != NULL && seg->s_base <= addr &&
 550             addr < seg->s_base + seg->s_size)
 551                 return (seg);
 552 
 553         seg = avl_find(&as->a_segtree, &addr, NULL);
 554         return (seg);
 555 }
 556 
 557 /*
 558  * Serialize all searches for holes in an address space to
 559  * prevent two or more threads from allocating the same virtual
 560  * address range.  The address space must not be "read/write"
 561  * locked by the caller since we may block.
 562  */
 563 void
 564 as_rangelock(struct as *as)
 565 {
 566         mutex_enter(&as->a_contents);
 567         while (AS_ISCLAIMGAP(as))


 650 {
 651         struct as *as;
 652 
 653         as = kmem_cache_alloc(as_cache, KM_SLEEP);
 654 
 655         as->a_flags          = 0;
 656         as->a_vbits          = 0;
 657         as->a_hrm            = NULL;
 658         as->a_seglast                = NULL;
 659         as->a_size           = 0;
 660         as->a_resvsize               = 0;
 661         as->a_updatedir              = 0;
 662         gethrestime(&as->a_updatetime);
 663         as->a_objectdir              = NULL;
 664         as->a_sizedir                = 0;
 665         as->a_userlimit              = (caddr_t)USERLIMIT;
 666         as->a_lastgap                = NULL;
 667         as->a_lastgaphl              = NULL;
 668         as->a_callbacks              = NULL;
 669 
 670         AS_LOCK_ENTER(as, RW_WRITER);
 671         as->a_hat = hat_alloc(as);   /* create hat for default system mmu */
 672         AS_LOCK_EXIT(as);
 673 
 674         as->a_xhat = NULL;
 675 
 676         return (as);
 677 }
 678 
 679 /*
 680  * Free an address space data structure.
 681  * Need to free the hat first and then
 682  * all the segments on this as and finally
 683  * the space for the as struct itself.
 684  */
 685 void
 686 as_free(struct as *as)
 687 {
 688         struct hat *hat = as->a_hat;
 689         struct seg *seg, *next;
 690         int called = 0;
 691 
 692 top:
 693         /*
 694          * Invoke ALL callbacks. as_do_callbacks will do one callback
 695          * per call, and not return (-1) until the callback has completed.
 696          * When as_do_callbacks returns zero, all callbacks have completed.
 697          */
 698         mutex_enter(&as->a_contents);
 699         while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0))
 700                 ;
 701 
 702         /* This will prevent new XHATs from attaching to as */
 703         if (!called)
 704                 AS_SETBUSY(as);
 705         mutex_exit(&as->a_contents);
 706         AS_LOCK_ENTER(as, RW_WRITER);
 707 
 708         if (!called) {
 709                 called = 1;
 710                 hat_free_start(hat);
 711                 if (as->a_xhat != NULL)
 712                         xhat_free_start_all(as);
 713         }
 714         for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) {
 715                 int err;
 716 
 717                 next = AS_SEGNEXT(as, seg);
 718 retry:
 719                 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
 720                 if (err == EAGAIN) {
 721                         mutex_enter(&as->a_contents);
 722                         if (as->a_callbacks) {
 723                                 AS_LOCK_EXIT(as);
 724                         } else if (!AS_ISNOUNMAPWAIT(as)) {
 725                                 /*
 726                                  * Memory is currently locked. Wait for a
 727                                  * cv_signal that it has been unlocked, then
 728                                  * try the operation again.
 729                                  */
 730                                 if (AS_ISUNMAPWAIT(as) == 0)
 731                                         cv_broadcast(&as->a_cv);
 732                                 AS_SETUNMAPWAIT(as);
 733                                 AS_LOCK_EXIT(as);
 734                                 while (AS_ISUNMAPWAIT(as))
 735                                         cv_wait(&as->a_cv, &as->a_contents);
 736                         } else {
 737                                 /*
 738                                  * We may have raced with
 739                                  * segvn_reclaim()/segspt_reclaim(). In this
 740                                  * case clean nounmapwait flag and retry since
 741                                  * softlockcnt in this segment may be already
 742                                  * 0.  We don't drop as writer lock so our
 743                                  * number of retries without sleeping should
 744                                  * be very small. See segvn_reclaim() for
 745                                  * more comments.
 746                                  */
 747                                 AS_CLRNOUNMAPWAIT(as);
 748                                 mutex_exit(&as->a_contents);
 749                                 goto retry;
 750                         }
 751                         mutex_exit(&as->a_contents);
 752                         goto top;
 753                 } else {
 754                         /*
 755                          * We do not expect any other error return at this
 756                          * time. This is similar to an ASSERT in seg_unmap()
 757                          */
 758                         ASSERT(err == 0);
 759                 }
 760         }
 761         hat_free_end(hat);
 762         if (as->a_xhat != NULL)
 763                 xhat_free_end_all(as);
 764         AS_LOCK_EXIT(as);
 765 
 766         /* /proc stuff */
 767         ASSERT(avl_numnodes(&as->a_wpage) == 0);
 768         if (as->a_objectdir) {
 769                 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *));
 770                 as->a_objectdir = NULL;
 771                 as->a_sizedir = 0;
 772         }
 773 
 774         /*
 775          * Free the struct as back to kmem.  Assert it has no segments.
 776          */
 777         ASSERT(avl_numnodes(&as->a_segtree) == 0);
 778         kmem_cache_free(as_cache, as);
 779 }
 780 
 781 int
 782 as_dup(struct as *as, struct proc *forkedproc)
 783 {
 784         struct as *newas;
 785         struct seg *seg, *newseg;
 786         size_t  purgesize = 0;
 787         int error;
 788 
 789         AS_LOCK_ENTER(as, RW_WRITER);
 790         as_clearwatch(as);
 791         newas = as_alloc();
 792         newas->a_userlimit = as->a_userlimit;
 793         newas->a_proc = forkedproc;
 794 
 795         AS_LOCK_ENTER(newas, RW_WRITER);
 796 
 797         /* This will prevent new XHATs from attaching */
 798         mutex_enter(&as->a_contents);
 799         AS_SETBUSY(as);
 800         mutex_exit(&as->a_contents);
 801         mutex_enter(&newas->a_contents);
 802         AS_SETBUSY(newas);
 803         mutex_exit(&newas->a_contents);
 804 
 805         (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
 806 
 807         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
 808 
 809                 if (seg->s_flags & S_PURGE) {
 810                         purgesize += seg->s_size;
 811                         continue;
 812                 }
 813 
 814                 newseg = seg_alloc(newas, seg->s_base, seg->s_size);
 815                 if (newseg == NULL) {
 816                         AS_LOCK_EXIT(newas);
 817                         as_setwatch(as);
 818                         mutex_enter(&as->a_contents);
 819                         AS_CLRBUSY(as);
 820                         mutex_exit(&as->a_contents);
 821                         AS_LOCK_EXIT(as);
 822                         as_free(newas);
 823                         return (-1);
 824                 }
 825                 if ((error = SEGOP_DUP(seg, newseg)) != 0) {
 826                         /*
 827                          * We call seg_free() on the new seg
 828                          * because the segment is not set up
 829                          * completely; i.e. it has no ops.
 830                          */
 831                         as_setwatch(as);
 832                         mutex_enter(&as->a_contents);
 833                         AS_CLRBUSY(as);
 834                         mutex_exit(&as->a_contents);
 835                         AS_LOCK_EXIT(as);
 836                         seg_free(newseg);
 837                         AS_LOCK_EXIT(newas);
 838                         as_free(newas);
 839                         return (error);
 840                 }
 841                 newas->a_size += seg->s_size;
 842         }
 843         newas->a_resvsize = as->a_resvsize - purgesize;
 844 
 845         error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL);
 846         if (as->a_xhat != NULL)
 847                 error |= xhat_dup_all(as, newas, NULL, 0, HAT_DUP_ALL);
 848 
 849         mutex_enter(&newas->a_contents);
 850         AS_CLRBUSY(newas);
 851         mutex_exit(&newas->a_contents);
 852         AS_LOCK_EXIT(newas);
 853 
 854         as_setwatch(as);
 855         mutex_enter(&as->a_contents);
 856         AS_CLRBUSY(as);
 857         mutex_exit(&as->a_contents);
 858         AS_LOCK_EXIT(as);
 859         if (error != 0) {
 860                 as_free(newas);
 861                 return (error);
 862         }
 863         forkedproc->p_as = newas;
 864         return (0);
 865 }
 866 
 867 /*
 868  * Handle a ``fault'' at addr for size bytes.
 869  */
 870 faultcode_t
 871 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size,
 872         enum fault_type type, enum seg_rw rw)
 873 {
 874         struct seg *seg;
 875         caddr_t raddr;                  /* rounded down addr */
 876         size_t rsize;                   /* rounded up size */
 877         size_t ssize;
 878         faultcode_t res = 0;


 942         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
 943         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
 944             (size_t)raddr;
 945 
 946         /*
 947          * XXX -- Don't grab the as lock for segkmap. We should grab it for
 948          * correctness, but then we could be stuck holding this lock for
 949          * a LONG time if the fault needs to be resolved on a slow
 950          * filesystem, and then no-one will be able to exec new commands,
 951          * as exec'ing requires the write lock on the as.
 952          */
 953         if (as == &kas && segkmap && segkmap->s_base <= raddr &&
 954             raddr + size < segkmap->s_base + segkmap->s_size) {
 955                 /*
 956                  * if (as==&kas), this can't be XHAT: we've already returned
 957                  * FC_NOSUPPORT.
 958                  */
 959                 seg = segkmap;
 960                 as_lock_held = 0;
 961         } else {
 962                 AS_LOCK_ENTER(as, RW_READER);
 963                 if (is_xhat && avl_numnodes(&as->a_wpage) != 0) {
 964                         /*
 965                          * Grab and hold the writers' lock on the as
 966                          * if the fault is to a watched page.
 967                          * This will keep CPUs from "peeking" at the
 968                          * address range while we're temporarily boosting
 969                          * the permissions for the XHAT device to
 970                          * resolve the fault in the segment layer.
 971                          *
 972                          * We could check whether faulted address
 973                          * is within a watched page and only then grab
 974                          * the writer lock, but this is simpler.
 975                          */
 976                         AS_LOCK_EXIT(as);
 977                         AS_LOCK_ENTER(as, RW_WRITER);
 978                 }
 979 
 980                 seg = as_segat(as, raddr);
 981                 if (seg == NULL) {
 982                         AS_LOCK_EXIT(as);
 983                         if ((lwp != NULL) && (!is_xhat))
 984                                 lwp->lwp_nostop--;
 985                         return (FC_NOMAP);
 986                 }
 987 
 988                 as_lock_held = 1;
 989         }
 990 
 991         addrsav = raddr;
 992         segsav = seg;
 993 
 994         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
 995                 if (raddr >= seg->s_base + seg->s_size) {
 996                         seg = AS_SEGNEXT(as, seg);
 997                         if (seg == NULL || raddr != seg->s_base) {
 998                                 res = FC_NOMAP;
 999                                 break;
1000                         }
1001                 }
1002                 if (raddr + rsize > seg->s_base + seg->s_size)


1043          */
1044         if (res != 0 && type == F_SOFTLOCK) {
1045                 for (seg = segsav; addrsav < raddr; addrsav += ssize) {
1046                         if (addrsav >= seg->s_base + seg->s_size)
1047                                 seg = AS_SEGNEXT(as, seg);
1048                         ASSERT(seg != NULL);
1049                         /*
1050                          * Now call the fault routine again to perform the
1051                          * unlock using S_OTHER instead of the rw variable
1052                          * since we never got a chance to touch the pages.
1053                          */
1054                         if (raddr > seg->s_base + seg->s_size)
1055                                 ssize = seg->s_base + seg->s_size - addrsav;
1056                         else
1057                                 ssize = raddr - addrsav;
1058                         (void) SEGOP_FAULT(hat, seg, addrsav, ssize,
1059                             F_SOFTUNLOCK, S_OTHER);
1060                 }
1061         }
1062         if (as_lock_held)
1063                 AS_LOCK_EXIT(as);
1064         if ((lwp != NULL) && (!is_xhat))
1065                 lwp->lwp_nostop--;
1066 
1067         /*
1068          * If the lower levels returned EDEADLK for a fault,
1069          * It means that we should retry the fault.  Let's wait
1070          * a bit also to let the deadlock causing condition clear.
1071          * This is part of a gross hack to work around a design flaw
1072          * in the ufs/sds logging code and should go away when the
1073          * logging code is re-designed to fix the problem. See bug
1074          * 4125102 for details of the problem.
1075          */
1076         if (FC_ERRNO(res) == EDEADLK) {
1077                 delay(deadlk_wait);
1078                 res = 0;
1079                 goto retry;
1080         }
1081         return (res);
1082 }
1083 


1091 {
1092         struct seg *seg;
1093         caddr_t raddr;                  /* rounded down addr */
1094         size_t rsize;                   /* rounded up size */
1095         faultcode_t res = 0;
1096         klwp_t *lwp = ttolwp(curthread);
1097 
1098 retry:
1099         /*
1100          * Indicate that the lwp is not to be stopped while waiting
1101          * for a pagefault.  This is to avoid deadlock while debugging
1102          * a process via /proc over NFS (in particular).
1103          */
1104         if (lwp != NULL)
1105                 lwp->lwp_nostop++;
1106 
1107         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1108         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1109             (size_t)raddr;
1110 
1111         AS_LOCK_ENTER(as, RW_READER);
1112         seg = as_segat(as, raddr);
1113         if (seg == NULL) {
1114                 AS_LOCK_EXIT(as);
1115                 if (lwp != NULL)
1116                         lwp->lwp_nostop--;
1117                 return (FC_NOMAP);
1118         }
1119 
1120         for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) {
1121                 if (raddr >= seg->s_base + seg->s_size) {
1122                         seg = AS_SEGNEXT(as, seg);
1123                         if (seg == NULL || raddr != seg->s_base) {
1124                                 res = FC_NOMAP;
1125                                 break;
1126                         }
1127                 }
1128                 res = SEGOP_FAULTA(seg, raddr);
1129                 if (res != 0)
1130                         break;
1131         }
1132         AS_LOCK_EXIT(as);
1133         if (lwp != NULL)
1134                 lwp->lwp_nostop--;
1135         /*
1136          * If the lower levels returned EDEADLK for a fault,
1137          * It means that we should retry the fault.  Let's wait
1138          * a bit also to let the deadlock causing condition clear.
1139          * This is part of a gross hack to work around a design flaw
1140          * in the ufs/sds logging code and should go away when the
1141          * logging code is re-designed to fix the problem. See bug
1142          * 4125102 for details of the problem.
1143          */
1144         if (FC_ERRNO(res) == EDEADLK) {
1145                 delay(deadlk_wait);
1146                 res = 0;
1147                 goto retry;
1148         }
1149         return (res);
1150 }
1151 
1152 /*


1172         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1173             (size_t)raddr;
1174 
1175         if (raddr + rsize < raddr)           /* check for wraparound */
1176                 return (ENOMEM);
1177 
1178         saveraddr = raddr;
1179         saversize = rsize;
1180 
1181         /*
1182          * Normally we only lock the as as a reader. But
1183          * if due to setprot the segment driver needs to split
1184          * a segment it will return IE_RETRY. Therefore we re-acquire
1185          * the as lock as a writer so the segment driver can change
1186          * the seg list. Also the segment driver will return IE_RETRY
1187          * after it has changed the segment list so we therefore keep
1188          * locking as a writer. Since these opeartions should be rare
1189          * want to only lock as a writer when necessary.
1190          */
1191         if (writer || avl_numnodes(&as->a_wpage) != 0) {
1192                 AS_LOCK_ENTER(as, RW_WRITER);
1193         } else {
1194                 AS_LOCK_ENTER(as, RW_READER);
1195         }
1196 
1197         as_clearwatchprot(as, raddr, rsize);
1198         seg = as_segat(as, raddr);
1199         if (seg == NULL) {
1200                 as_setwatch(as);
1201                 AS_LOCK_EXIT(as);
1202                 return (ENOMEM);
1203         }
1204 
1205         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1206                 if (raddr >= seg->s_base + seg->s_size) {
1207                         seg = AS_SEGNEXT(as, seg);
1208                         if (seg == NULL || raddr != seg->s_base) {
1209                                 error = ENOMEM;
1210                                 break;
1211                         }
1212                 }
1213                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1214                         ssize = seg->s_base + seg->s_size - raddr;
1215                 else
1216                         ssize = rsize;
1217 retry:
1218                 error = SEGOP_SETPROT(seg, raddr, ssize, prot);
1219 
1220                 if (error == IE_NOMEM) {
1221                         error = EAGAIN;
1222                         break;
1223                 }
1224 
1225                 if (error == IE_RETRY) {
1226                         AS_LOCK_EXIT(as);
1227                         writer = 1;
1228                         goto setprot_top;
1229                 }
1230 
1231                 if (error == EAGAIN) {
1232                         /*
1233                          * Make sure we have a_lock as writer.
1234                          */
1235                         if (writer == 0) {
1236                                 AS_LOCK_EXIT(as);
1237                                 writer = 1;
1238                                 goto setprot_top;
1239                         }
1240 
1241                         /*
1242                          * Memory is currently locked.  It must be unlocked
1243                          * before this operation can succeed through a retry.
1244                          * The possible reasons for locked memory and
1245                          * corresponding strategies for unlocking are:
1246                          * (1) Normal I/O
1247                          *      wait for a signal that the I/O operation
1248                          *      has completed and the memory is unlocked.
1249                          * (2) Asynchronous I/O
1250                          *      The aio subsystem does not unlock pages when
1251                          *      the I/O is completed. Those pages are unlocked
1252                          *      when the application calls aiowait/aioerror.
1253                          *      So, to prevent blocking forever, cv_broadcast()
1254                          *      is done to wake up aio_cleanup_thread.
1255                          *      Subsequently, segvn_reclaim will be called, and
1256                          *      that will do AS_CLRUNMAPWAIT() and wake us up.
1257                          * (3) Long term page locking:
1258                          *      Drivers intending to have pages locked for a
1259                          *      period considerably longer than for normal I/O
1260                          *      (essentially forever) may have registered for a
1261                          *      callback so they may unlock these pages on
1262                          *      request. This is needed to allow this operation
1263                          *      to succeed. Each entry on the callback list is
1264                          *      examined. If the event or address range pertains
1265                          *      the callback is invoked (unless it already is in
1266                          *      progress). The a_contents lock must be dropped
1267                          *      before the callback, so only one callback can
1268                          *      be done at a time. Go to the top and do more
1269                          *      until zero is returned. If zero is returned,
1270                          *      either there were no callbacks for this event
1271                          *      or they were already in progress.
1272                          */
1273                         mutex_enter(&as->a_contents);
1274                         if (as->a_callbacks &&
1275                             (cb = as_find_callback(as, AS_SETPROT_EVENT,
1276                             seg->s_base, seg->s_size))) {
1277                                 AS_LOCK_EXIT(as);
1278                                 as_execute_callback(as, cb, AS_SETPROT_EVENT);
1279                         } else if (!AS_ISNOUNMAPWAIT(as)) {
1280                                 if (AS_ISUNMAPWAIT(as) == 0)
1281                                         cv_broadcast(&as->a_cv);
1282                                 AS_SETUNMAPWAIT(as);
1283                                 AS_LOCK_EXIT(as);
1284                                 while (AS_ISUNMAPWAIT(as))
1285                                         cv_wait(&as->a_cv, &as->a_contents);
1286                         } else {
1287                                 /*
1288                                  * We may have raced with
1289                                  * segvn_reclaim()/segspt_reclaim(). In this
1290                                  * case clean nounmapwait flag and retry since
1291                                  * softlockcnt in this segment may be already
1292                                  * 0.  We don't drop as writer lock so our
1293                                  * number of retries without sleeping should
1294                                  * be very small. See segvn_reclaim() for
1295                                  * more comments.
1296                                  */
1297                                 AS_CLRNOUNMAPWAIT(as);
1298                                 mutex_exit(&as->a_contents);
1299                                 goto retry;
1300                         }
1301                         mutex_exit(&as->a_contents);
1302                         goto setprot_top;
1303                 } else if (error != 0)
1304                         break;
1305         }
1306         if (error != 0) {
1307                 as_setwatch(as);
1308         } else {
1309                 as_setwatchprot(as, saveraddr, saversize, prot);
1310         }
1311         AS_LOCK_EXIT(as);
1312         return (error);
1313 }
1314 
1315 /*
1316  * Check to make sure that the interval [addr, addr + size)
1317  * in address space `as' has at least the specified protection.
1318  * It is ok for the range to cross over several segments, as long
1319  * as they are contiguous.
1320  */
1321 int
1322 as_checkprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
1323 {
1324         struct seg *seg;
1325         size_t ssize;
1326         caddr_t raddr;                  /* rounded down addr */
1327         size_t rsize;                   /* rounded up size */
1328         int error = 0;
1329 
1330         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1331         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1332             (size_t)raddr;
1333 
1334         if (raddr + rsize < raddr)           /* check for wraparound */
1335                 return (ENOMEM);
1336 
1337         /*
1338          * This is ugly as sin...
1339          * Normally, we only acquire the address space readers lock.
1340          * However, if the address space has watchpoints present,
1341          * we must acquire the writer lock on the address space for
1342          * the benefit of as_clearwatchprot() and as_setwatchprot().
1343          */
1344         if (avl_numnodes(&as->a_wpage) != 0)
1345                 AS_LOCK_ENTER(as, RW_WRITER);
1346         else
1347                 AS_LOCK_ENTER(as, RW_READER);
1348         as_clearwatchprot(as, raddr, rsize);
1349         seg = as_segat(as, raddr);
1350         if (seg == NULL) {
1351                 as_setwatch(as);
1352                 AS_LOCK_EXIT(as);
1353                 return (ENOMEM);
1354         }
1355 
1356         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
1357                 if (raddr >= seg->s_base + seg->s_size) {
1358                         seg = AS_SEGNEXT(as, seg);
1359                         if (seg == NULL || raddr != seg->s_base) {
1360                                 error = ENOMEM;
1361                                 break;
1362                         }
1363                 }
1364                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
1365                         ssize = seg->s_base + seg->s_size - raddr;
1366                 else
1367                         ssize = rsize;
1368 
1369                 error = SEGOP_CHECKPROT(seg, raddr, ssize, prot);
1370                 if (error != 0)
1371                         break;
1372         }
1373         as_setwatch(as);
1374         AS_LOCK_EXIT(as);
1375         return (error);
1376 }
1377 
1378 int
1379 as_unmap(struct as *as, caddr_t addr, size_t size)
1380 {
1381         struct seg *seg, *seg_next;
1382         struct as_callback *cb;
1383         caddr_t raddr, eaddr;
1384         size_t ssize, rsize = 0;
1385         int err;
1386 
1387 top:
1388         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1389         eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) &
1390             (uintptr_t)PAGEMASK);
1391 
1392         AS_LOCK_ENTER(as, RW_WRITER);
1393 
1394         as->a_updatedir = 1; /* inform /proc */
1395         gethrestime(&as->a_updatetime);
1396 
1397         /*
1398          * Use as_findseg to find the first segment in the range, then
1399          * step through the segments in order, following s_next.
1400          */
1401         as_clearwatchprot(as, raddr, eaddr - raddr);
1402 
1403         for (seg = as_findseg(as, raddr, 0); seg != NULL; seg = seg_next) {
1404                 if (eaddr <= seg->s_base)
1405                         break;          /* eaddr was in a gap; all done */
1406 
1407                 /* this is implied by the test above */
1408                 ASSERT(raddr < eaddr);
1409 
1410                 if (raddr < seg->s_base)
1411                         raddr = seg->s_base;         /* raddr was in a gap */
1412 


1453                          * (3) Long term page locking:
1454                          *      Drivers intending to have pages locked for a
1455                          *      period considerably longer than for normal I/O
1456                          *      (essentially forever) may have registered for a
1457                          *      callback so they may unlock these pages on
1458                          *      request. This is needed to allow this operation
1459                          *      to succeed. Each entry on the callback list is
1460                          *      examined. If the event or address range pertains
1461                          *      the callback is invoked (unless it already is in
1462                          *      progress). The a_contents lock must be dropped
1463                          *      before the callback, so only one callback can
1464                          *      be done at a time. Go to the top and do more
1465                          *      until zero is returned. If zero is returned,
1466                          *      either there were no callbacks for this event
1467                          *      or they were already in progress.
1468                          */
1469                         mutex_enter(&as->a_contents);
1470                         if (as->a_callbacks &&
1471                             (cb = as_find_callback(as, AS_UNMAP_EVENT,
1472                             seg->s_base, seg->s_size))) {
1473                                 AS_LOCK_EXIT(as);
1474                                 as_execute_callback(as, cb, AS_UNMAP_EVENT);
1475                         } else if (!AS_ISNOUNMAPWAIT(as)) {
1476                                 if (AS_ISUNMAPWAIT(as) == 0)
1477                                         cv_broadcast(&as->a_cv);
1478                                 AS_SETUNMAPWAIT(as);
1479                                 AS_LOCK_EXIT(as);
1480                                 while (AS_ISUNMAPWAIT(as))
1481                                         cv_wait(&as->a_cv, &as->a_contents);
1482                         } else {
1483                                 /*
1484                                  * We may have raced with
1485                                  * segvn_reclaim()/segspt_reclaim(). In this
1486                                  * case clean nounmapwait flag and retry since
1487                                  * softlockcnt in this segment may be already
1488                                  * 0.  We don't drop as writer lock so our
1489                                  * number of retries without sleeping should
1490                                  * be very small. See segvn_reclaim() for
1491                                  * more comments.
1492                                  */
1493                                 AS_CLRNOUNMAPWAIT(as);
1494                                 mutex_exit(&as->a_contents);
1495                                 goto retry;
1496                         }
1497                         mutex_exit(&as->a_contents);
1498                         goto top;
1499                 } else if (err == IE_RETRY) {
1500                         AS_LOCK_EXIT(as);
1501                         goto top;
1502                 } else if (err) {
1503                         as_setwatch(as);
1504                         AS_LOCK_EXIT(as);
1505                         return (-1);
1506                 }
1507 
1508                 as->a_size -= ssize;
1509                 if (rsize)
1510                         as->a_resvsize -= rsize;
1511                 raddr += ssize;
1512         }
1513         AS_LOCK_EXIT(as);
1514         return (0);
1515 }
1516 
1517 static int
1518 as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec,
1519     int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1520 {
1521         uint_t szc;
1522         uint_t nszc;
1523         int error;
1524         caddr_t a;
1525         caddr_t eaddr;
1526         size_t segsize;
1527         struct seg *seg;
1528         size_t pgsz;
1529         int do_off = (vn_a->vp != NULL || vn_a->amp != NULL);
1530         uint_t save_szcvec;
1531 
1532         ASSERT(AS_WRITE_HELD(as));
1533         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1534         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1535         ASSERT(vn_a->vp == NULL || vn_a->amp == NULL);
1536         if (!do_off) {
1537                 vn_a->offset = 0;
1538         }
1539 
1540         if (szcvec <= 1) {
1541                 seg = seg_alloc(as, addr, size);
1542                 if (seg == NULL) {
1543                         return (ENOMEM);
1544                 }
1545                 vn_a->szc = 0;
1546                 error = (*crfp)(seg, vn_a);
1547                 if (error != 0) {
1548                         seg_free(seg);
1549                 } else {
1550                         as->a_size += size;
1551                         as->a_resvsize += size;
1552                 }


1626         ASSERT(addr == eaddr);
1627 
1628         return (0);
1629 }
1630 
1631 static int
1632 as_map_vnsegs(struct as *as, caddr_t addr, size_t size,
1633     int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1634 {
1635         uint_t mapflags = vn_a->flags & (MAP_TEXT | MAP_INITDATA);
1636         int type = (vn_a->type == MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM;
1637         uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags,
1638             type, 0);
1639         int error;
1640         struct seg *seg;
1641         struct vattr va;
1642         u_offset_t eoff;
1643         size_t save_size = 0;
1644         extern size_t textrepl_size_thresh;
1645 
1646         ASSERT(AS_WRITE_HELD(as));
1647         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1648         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1649         ASSERT(vn_a->vp != NULL);
1650         ASSERT(vn_a->amp == NULL);
1651 
1652 again:
1653         if (szcvec <= 1) {
1654                 seg = seg_alloc(as, addr, size);
1655                 if (seg == NULL) {
1656                         return (ENOMEM);
1657                 }
1658                 vn_a->szc = 0;
1659                 error = (*crfp)(seg, vn_a);
1660                 if (error != 0) {
1661                         seg_free(seg);
1662                 } else {
1663                         as->a_size += size;
1664                         as->a_resvsize += size;
1665                 }
1666                 return (error);


1715     int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated)
1716 {
1717         uint_t szcvec;
1718         uchar_t type;
1719 
1720         ASSERT(vn_a->type == MAP_SHARED || vn_a->type == MAP_PRIVATE);
1721         if (vn_a->type == MAP_SHARED) {
1722                 type = MAPPGSZC_SHM;
1723         } else if (vn_a->type == MAP_PRIVATE) {
1724                 if (vn_a->szc == AS_MAP_HEAP) {
1725                         type = MAPPGSZC_HEAP;
1726                 } else if (vn_a->szc == AS_MAP_STACK) {
1727                         type = MAPPGSZC_STACK;
1728                 } else {
1729                         type = MAPPGSZC_PRIVM;
1730                 }
1731         }
1732         szcvec = map_pgszcvec(addr, size, vn_a->amp == NULL ?
1733             (uintptr_t)addr : (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE),
1734             (vn_a->flags & MAP_TEXT), type, 0);
1735         ASSERT(AS_WRITE_HELD(as));
1736         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
1737         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
1738         ASSERT(vn_a->vp == NULL);
1739 
1740         return (as_map_segvn_segs(as, addr, size, szcvec,
1741             crfp, vn_a, segcreated));
1742 }
1743 
1744 int
1745 as_map(struct as *as, caddr_t addr, size_t size, int (*crfp)(), void *argsp)
1746 {
1747         AS_LOCK_ENTER(as, RW_WRITER);
1748         return (as_map_locked(as, addr, size, crfp, argsp));
1749 }
1750 
1751 int
1752 as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(),
1753                 void *argsp)
1754 {
1755         struct seg *seg = NULL;
1756         caddr_t raddr;                  /* rounded down addr */
1757         size_t rsize;                   /* rounded up size */
1758         int error;
1759         int unmap = 0;
1760         struct proc *p = curproc;
1761         struct segvn_crargs crargs;
1762 
1763         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1764         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1765             (size_t)raddr;
1766 
1767         /*
1768          * check for wrap around
1769          */
1770         if ((raddr + rsize < raddr) || (as->a_size > (ULONG_MAX - size))) {
1771                 AS_LOCK_EXIT(as);
1772                 return (ENOMEM);
1773         }
1774 
1775         as->a_updatedir = 1; /* inform /proc */
1776         gethrestime(&as->a_updatetime);
1777 
1778         if (as != &kas && as->a_size + rsize > (size_t)p->p_vmem_ctl) {
1779                 AS_LOCK_EXIT(as);
1780 
1781                 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
1782                     RCA_UNSAFE_ALL);
1783 
1784                 return (ENOMEM);
1785         }
1786 
1787         if (AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp)) {
1788                 crargs = *(struct segvn_crargs *)argsp;
1789                 error = as_map_vnsegs(as, raddr, rsize, crfp, &crargs, &unmap);
1790                 if (error != 0) {
1791                         AS_LOCK_EXIT(as);
1792                         if (unmap) {
1793                                 (void) as_unmap(as, addr, size);
1794                         }
1795                         return (error);
1796                 }
1797         } else if (AS_MAP_CHECK_ANON_LPOOB(crfp, argsp)) {
1798                 crargs = *(struct segvn_crargs *)argsp;
1799                 error = as_map_ansegs(as, raddr, rsize, crfp, &crargs, &unmap);
1800                 if (error != 0) {
1801                         AS_LOCK_EXIT(as);
1802                         if (unmap) {
1803                                 (void) as_unmap(as, addr, size);
1804                         }
1805                         return (error);
1806                 }
1807         } else {
1808                 seg = seg_alloc(as, addr, size);
1809                 if (seg == NULL) {
1810                         AS_LOCK_EXIT(as);
1811                         return (ENOMEM);
1812                 }
1813 
1814                 error = (*crfp)(seg, argsp);
1815                 if (error != 0) {
1816                         seg_free(seg);
1817                         AS_LOCK_EXIT(as);
1818                         return (error);
1819                 }
1820                 /*
1821                  * Add size now so as_unmap will work if as_ctl fails.
1822                  */
1823                 as->a_size += rsize;
1824                 as->a_resvsize += rsize;
1825         }
1826 
1827         as_setwatch(as);
1828 
1829         /*
1830          * If the address space is locked,
1831          * establish memory locks for the new segment.
1832          */
1833         mutex_enter(&as->a_contents);
1834         if (AS_ISPGLCK(as)) {
1835                 mutex_exit(&as->a_contents);
1836                 AS_LOCK_EXIT(as);
1837                 error = as_ctl(as, addr, size, MC_LOCK, 0, 0, NULL, 0);
1838                 if (error != 0)
1839                         (void) as_unmap(as, addr, size);
1840         } else {
1841                 mutex_exit(&as->a_contents);
1842                 AS_LOCK_EXIT(as);
1843         }
1844         return (error);
1845 }
1846 
1847 
1848 /*
1849  * Delete all segments in the address space marked with S_PURGE.
1850  * This is currently used for Sparc V9 nofault ASI segments (seg_nf.c).
1851  * These segments are deleted as a first step before calls to as_gap(), so
1852  * that they don't affect mmap() or shmat().
1853  */
1854 void
1855 as_purge(struct as *as)
1856 {
1857         struct seg *seg;
1858         struct seg *next_seg;
1859 
1860         /*
1861          * the setting of NEEDSPURGE is protect by as_rangelock(), so
1862          * no need to grab a_contents mutex for this check
1863          */
1864         if ((as->a_flags & AS_NEEDSPURGE) == 0)
1865                 return;
1866 
1867         AS_LOCK_ENTER(as, RW_WRITER);
1868         next_seg = NULL;
1869         seg = AS_SEGFIRST(as);
1870         while (seg != NULL) {
1871                 next_seg = AS_SEGNEXT(as, seg);
1872                 if (seg->s_flags & S_PURGE)
1873                         SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
1874                 seg = next_seg;
1875         }
1876         AS_LOCK_EXIT(as);
1877 
1878         mutex_enter(&as->a_contents);
1879         as->a_flags &= ~AS_NEEDSPURGE;
1880         mutex_exit(&as->a_contents);
1881 }
1882 
1883 /*
1884  * Find a hole within [*basep, *basep + *lenp), which contains a mappable
1885  * range of addresses at least "minlen" long, where the base of the range is
1886  * at "off" phase from an "align" boundary and there is space for a
1887  * "redzone"-sized redzone on eithe rside of the range.  Thus,
1888  * if align was 4M and off was 16k, the user wants a hole which will start
1889  * 16k into a 4M page.
1890  *
1891  * If flags specifies AH_HI, the hole will have the highest possible address
1892  * in the range.  We use the as->a_lastgap field to figure out where to
1893  * start looking for a gap.
1894  *
1895  * Otherwise, the gap will have the lowest possible address.
1896  *


1919         save_base = *basep;
1920         save_len = *lenp;
1921         save_minlen = minlen;
1922         save_redzone = redzone;
1923 
1924         /*
1925          * For the first pass/fast_path, just add align and redzone into
1926          * minlen since if we get an allocation, we can guarantee that it
1927          * will fit the alignment and redzone requested.
1928          * This increases the chance that hibound will be adjusted to
1929          * a_lastgap->s_base which will likely allow us to find an
1930          * acceptable hole in the address space quicker.
1931          * If we can't find a hole with this fast_path, then we look for
1932          * smaller holes in which the alignment and offset may allow
1933          * the allocation to fit.
1934          */
1935         minlen += align;
1936         minlen += 2 * redzone;
1937         redzone = 0;
1938 
1939         AS_LOCK_ENTER(as, RW_READER);
1940         if (AS_SEGFIRST(as) == NULL) {
1941                 if (valid_va_range_aligned(basep, lenp, minlen, flags & AH_DIR,
1942                     align, redzone, off)) {
1943                         AS_LOCK_EXIT(as);
1944                         return (0);
1945                 } else {
1946                         AS_LOCK_EXIT(as);
1947                         *basep = save_base;
1948                         *lenp = save_len;
1949                         return (-1);
1950                 }
1951         }
1952 
1953 retry:
1954         /*
1955          * Set up to iterate over all the inter-segment holes in the given
1956          * direction.  lseg is NULL for the lowest-addressed hole and hseg is
1957          * NULL for the highest-addressed hole.  If moving backwards, we reset
1958          * sseg to denote the highest-addressed segment.
1959          */
1960         forward = (flags & AH_DIR) == AH_LO;
1961         if (forward) {
1962                 hseg = as_findseg(as, lobound, 1);
1963                 lseg = AS_SEGPREV(as, hseg);
1964         } else {
1965 
1966                 /*


2007                         lo = lobound;
2008                 if (hi > hibound)
2009                         hi = hibound;
2010                 /*
2011                  * Verify that the candidate hole is big enough and meets
2012                  * hardware constraints.  If the hole is too small, no need
2013                  * to do the further checks since they will fail.
2014                  */
2015                 *basep = lo;
2016                 *lenp = hi - lo;
2017                 if (*lenp >= minlen && valid_va_range_aligned(basep, lenp,
2018                     minlen, forward ? AH_LO : AH_HI, align, redzone, off) &&
2019                     ((flags & AH_CONTAIN) == 0 ||
2020                     (*basep <= addr && *basep + *lenp > addr))) {
2021                         if (!forward)
2022                                 as->a_lastgap = hseg;
2023                         if (hseg != NULL)
2024                                 as->a_lastgaphl = hseg;
2025                         else
2026                                 as->a_lastgaphl = lseg;
2027                         AS_LOCK_EXIT(as);
2028                         return (0);
2029                 }
2030         cont:
2031                 /*
2032                  * Move to the next hole.
2033                  */
2034                 if (forward) {
2035                         lseg = hseg;
2036                         if (lseg == NULL)
2037                                 break;
2038                         hseg = AS_SEGNEXT(as, hseg);
2039                 } else {
2040                         hseg = lseg;
2041                         if (hseg == NULL)
2042                                 break;
2043                         lseg = AS_SEGPREV(as, lseg);
2044                 }
2045         }
2046         if (fast_path && (align != 0 || save_redzone != 0)) {
2047                 fast_path = 0;
2048                 minlen = save_minlen;
2049                 redzone = save_redzone;
2050                 goto retry;
2051         }
2052         *basep = save_base;
2053         *lenp = save_len;
2054         AS_LOCK_EXIT(as);
2055         return (-1);
2056 }
2057 
2058 /*
2059  * Find a hole of at least size minlen within [*basep, *basep + *lenp).
2060  *
2061  * If flags specifies AH_HI, the hole will have the highest possible address
2062  * in the range.  We use the as->a_lastgap field to figure out where to
2063  * start looking for a gap.
2064  *
2065  * Otherwise, the gap will have the lowest possible address.
2066  *
2067  * If flags specifies AH_CONTAIN, the hole will contain the address addr.
2068  *
2069  * If an adequate hole is found, base and len are set to reflect the part of
2070  * the hole that is within range, and 0 is returned, otherwise,
2071  * -1 is returned.
2072  *
2073  * NOTE: This routine is not correct when base+len overflows caddr_t.
2074  */


2076 as_gap(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, uint_t flags,
2077     caddr_t addr)
2078 {
2079 
2080         return (as_gap_aligned(as, minlen, basep, lenp, flags, addr, 0, 0, 0));
2081 }
2082 
2083 /*
2084  * Return the next range within [base, base + len) that is backed
2085  * with "real memory".  Skip holes and non-seg_vn segments.
2086  * We're lazy and only return one segment at a time.
2087  */
2088 int
2089 as_memory(struct as *as, caddr_t *basep, size_t *lenp)
2090 {
2091         extern struct seg_ops segspt_shmops;    /* needs a header file */
2092         struct seg *seg;
2093         caddr_t addr, eaddr;
2094         caddr_t segend;
2095 
2096         AS_LOCK_ENTER(as, RW_READER);
2097 
2098         addr = *basep;
2099         eaddr = addr + *lenp;
2100 
2101         seg = as_findseg(as, addr, 0);
2102         if (seg != NULL)
2103                 addr = MAX(seg->s_base, addr);
2104 
2105         for (;;) {
2106                 if (seg == NULL || addr >= eaddr || eaddr <= seg->s_base) {
2107                         AS_LOCK_EXIT(as);
2108                         return (EINVAL);
2109                 }
2110 
2111                 if (seg->s_ops == &segvn_ops) {
2112                         segend = seg->s_base + seg->s_size;
2113                         break;
2114                 }
2115 
2116                 /*
2117                  * We do ISM by looking into the private data
2118                  * to determine the real size of the segment.
2119                  */
2120                 if (seg->s_ops == &segspt_shmops) {
2121                         segend = seg->s_base + spt_realsize(seg);
2122                         if (addr < segend)
2123                                 break;
2124                 }
2125 
2126                 seg = AS_SEGNEXT(as, seg);
2127 
2128                 if (seg != NULL)
2129                         addr = seg->s_base;
2130         }
2131 
2132         *basep = addr;
2133 
2134         if (segend > eaddr)
2135                 *lenp = eaddr - addr;
2136         else
2137                 *lenp = segend - addr;
2138 
2139         AS_LOCK_EXIT(as);
2140         return (0);
2141 }
2142 
2143 /*
2144  * Swap the pages associated with the address space as out to
2145  * secondary storage, returning the number of bytes actually
2146  * swapped.
2147  *
2148  * The value returned is intended to correlate well with the process's
2149  * memory requirements.  Its usefulness for this purpose depends on
2150  * how well the segment-level routines do at returning accurate
2151  * information.
2152  */
2153 size_t
2154 as_swapout(struct as *as)
2155 {
2156         struct seg *seg;
2157         size_t swpcnt = 0;
2158 
2159         /*
2160          * Kernel-only processes have given up their address
2161          * spaces.  Of course, we shouldn't be attempting to
2162          * swap out such processes in the first place...
2163          */
2164         if (as == NULL)
2165                 return (0);
2166 
2167         AS_LOCK_ENTER(as, RW_READER);
2168 
2169         /* Prevent XHATs from attaching */
2170         mutex_enter(&as->a_contents);
2171         AS_SETBUSY(as);
2172         mutex_exit(&as->a_contents);
2173 
2174 
2175         /*
2176          * Free all mapping resources associated with the address
2177          * space.  The segment-level swapout routines capitalize
2178          * on this unmapping by scavanging pages that have become
2179          * unmapped here.
2180          */
2181         hat_swapout(as->a_hat);
2182         if (as->a_xhat != NULL)
2183                 xhat_swapout_all(as);
2184 
2185         mutex_enter(&as->a_contents);
2186         AS_CLRBUSY(as);
2187         mutex_exit(&as->a_contents);
2188 
2189         /*
2190          * Call the swapout routines of all segments in the address
2191          * space to do the actual work, accumulating the amount of
2192          * space reclaimed.
2193          */
2194         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2195                 struct seg_ops *ov = seg->s_ops;
2196 
2197                 /*
2198                  * We have to check to see if the seg has
2199                  * an ops vector because the seg may have
2200                  * been in the middle of being set up when
2201                  * the process was picked for swapout.
2202                  */
2203                 if ((ov != NULL) && (ov->swapout != NULL))
2204                         swpcnt += SEGOP_SWAPOUT(seg);
2205         }
2206         AS_LOCK_EXIT(as);
2207         return (swpcnt);
2208 }
2209 
2210 /*
2211  * Determine whether data from the mappings in interval [addr, addr + size)
2212  * are in the primary memory (core) cache.
2213  */
2214 int
2215 as_incore(struct as *as, caddr_t addr,
2216     size_t size, char *vec, size_t *sizep)
2217 {
2218         struct seg *seg;
2219         size_t ssize;
2220         caddr_t raddr;          /* rounded down addr */
2221         size_t rsize;           /* rounded up size */
2222         size_t isize;                   /* iteration size */
2223         int error = 0;          /* result, assume success */
2224 
2225         *sizep = 0;
2226         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2227         rsize = ((((size_t)addr + size) + PAGEOFFSET) & PAGEMASK) -
2228             (size_t)raddr;
2229 
2230         if (raddr + rsize < raddr)           /* check for wraparound */
2231                 return (ENOMEM);
2232 
2233         AS_LOCK_ENTER(as, RW_READER);
2234         seg = as_segat(as, raddr);
2235         if (seg == NULL) {
2236                 AS_LOCK_EXIT(as);
2237                 return (-1);
2238         }
2239 
2240         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2241                 if (raddr >= seg->s_base + seg->s_size) {
2242                         seg = AS_SEGNEXT(as, seg);
2243                         if (seg == NULL || raddr != seg->s_base) {
2244                                 error = -1;
2245                                 break;
2246                         }
2247                 }
2248                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2249                         ssize = seg->s_base + seg->s_size - raddr;
2250                 else
2251                         ssize = rsize;
2252                 *sizep += isize = SEGOP_INCORE(seg, raddr, ssize, vec);
2253                 if (isize != ssize) {
2254                         error = -1;
2255                         break;
2256                 }
2257                 vec += btopr(ssize);
2258         }
2259         AS_LOCK_EXIT(as);
2260         return (error);
2261 }
2262 
2263 static void
2264 as_segunlock(struct seg *seg, caddr_t addr, int attr,
2265         ulong_t *bitmap, size_t position, size_t npages)
2266 {
2267         caddr_t range_start;
2268         size_t  pos1 = position;
2269         size_t  pos2;
2270         size_t  size;
2271         size_t  end_pos = npages + position;
2272 
2273         while (bt_range(bitmap, &pos1, &pos2, end_pos)) {
2274                 size = ptob((pos2 - pos1));
2275                 range_start = (caddr_t)((uintptr_t)addr +
2276                     ptob(pos1 - position));
2277 
2278                 (void) SEGOP_LOCKOP(seg, range_start, size, attr, MC_UNLOCK,
2279                     (ulong_t *)NULL, (size_t)NULL);


2309  * address space "as".
2310  */
2311 /*ARGSUSED*/
2312 int
2313 as_ctl(struct as *as, caddr_t addr, size_t size, int func, int attr,
2314     uintptr_t arg, ulong_t *lock_map, size_t pos)
2315 {
2316         struct seg *seg;        /* working segment */
2317         caddr_t raddr;          /* rounded down addr */
2318         caddr_t initraddr;      /* saved initial rounded down addr */
2319         size_t rsize;           /* rounded up size */
2320         size_t initrsize;       /* saved initial rounded up size */
2321         size_t ssize;           /* size of seg */
2322         int error = 0;                  /* result */
2323         size_t mlock_size;      /* size of bitmap */
2324         ulong_t *mlock_map;     /* pointer to bitmap used */
2325                                 /* to represent the locked */
2326                                 /* pages. */
2327 retry:
2328         if (error == IE_RETRY)
2329                 AS_LOCK_ENTER(as, RW_WRITER);
2330         else
2331                 AS_LOCK_ENTER(as, RW_READER);
2332 
2333         /*
2334          * If these are address space lock/unlock operations, loop over
2335          * all segments in the address space, as appropriate.
2336          */
2337         if (func == MC_LOCKAS) {
2338                 size_t npages, idx;
2339                 size_t rlen = 0;        /* rounded as length */
2340 
2341                 idx = pos;
2342 
2343                 if (arg & MCL_FUTURE) {
2344                         mutex_enter(&as->a_contents);
2345                         AS_SETPGLCK(as);
2346                         mutex_exit(&as->a_contents);
2347                 }
2348                 if ((arg & MCL_CURRENT) == 0) {
2349                         AS_LOCK_EXIT(as);
2350                         return (0);
2351                 }
2352 
2353                 seg = AS_SEGFIRST(as);
2354                 if (seg == NULL) {
2355                         AS_LOCK_EXIT(as);
2356                         return (0);
2357                 }
2358 
2359                 do {
2360                         raddr = (caddr_t)((uintptr_t)seg->s_base &
2361                             (uintptr_t)PAGEMASK);
2362                         rlen += (((uintptr_t)(seg->s_base + seg->s_size) +
2363                             PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr;
2364                 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2365 
2366                 mlock_size = BT_BITOUL(btopr(rlen));
2367                 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2368                     sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2369                                 AS_LOCK_EXIT(as);
2370                                 return (EAGAIN);
2371                 }
2372 
2373                 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2374                         error = SEGOP_LOCKOP(seg, seg->s_base,
2375                             seg->s_size, attr, MC_LOCK, mlock_map, pos);
2376                         if (error != 0)
2377                                 break;
2378                         pos += seg_pages(seg);
2379                 }
2380 
2381                 if (error) {
2382                         for (seg = AS_SEGFIRST(as); seg != NULL;
2383                             seg = AS_SEGNEXT(as, seg)) {
2384 
2385                                 raddr = (caddr_t)((uintptr_t)seg->s_base &
2386                                     (uintptr_t)PAGEMASK);
2387                                 npages = seg_pages(seg);
2388                                 as_segunlock(seg, raddr, attr, mlock_map,
2389                                     idx, npages);
2390                                 idx += npages;
2391                         }
2392                 }
2393 
2394                 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2395                 AS_LOCK_EXIT(as);
2396                 goto lockerr;
2397         } else if (func == MC_UNLOCKAS) {
2398                 mutex_enter(&as->a_contents);
2399                 AS_CLRPGLCK(as);
2400                 mutex_exit(&as->a_contents);
2401 
2402                 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
2403                         error = SEGOP_LOCKOP(seg, seg->s_base,
2404                             seg->s_size, attr, MC_UNLOCK, NULL, 0);
2405                         if (error != 0)
2406                                 break;
2407                 }
2408 
2409                 AS_LOCK_EXIT(as);
2410                 goto lockerr;
2411         }
2412 
2413         /*
2414          * Normalize addresses and sizes.
2415          */
2416         initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2417         initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2418             (size_t)raddr;
2419 
2420         if (raddr + rsize < raddr) {         /* check for wraparound */
2421                 AS_LOCK_EXIT(as);
2422                 return (ENOMEM);
2423         }
2424 
2425         /*
2426          * Get initial segment.
2427          */
2428         if ((seg = as_segat(as, raddr)) == NULL) {
2429                 AS_LOCK_EXIT(as);
2430                 return (ENOMEM);
2431         }
2432 
2433         if (func == MC_LOCK) {
2434                 mlock_size = BT_BITOUL(btopr(rsize));
2435                 if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size *
2436                     sizeof (ulong_t), KM_NOSLEEP)) == NULL) {
2437                                 AS_LOCK_EXIT(as);
2438                                 return (EAGAIN);
2439                 }
2440         }
2441 
2442         /*
2443          * Loop over all segments.  If a hole in the address range is
2444          * discovered, then fail.  For each segment, perform the appropriate
2445          * control operation.
2446          */
2447         while (rsize != 0) {
2448 
2449                 /*
2450                  * Make sure there's no hole, calculate the portion
2451                  * of the next segment to be operated over.
2452                  */
2453                 if (raddr >= seg->s_base + seg->s_size) {
2454                         seg = AS_SEGNEXT(as, seg);
2455                         if (seg == NULL || raddr != seg->s_base) {
2456                                 if (func == MC_LOCK) {
2457                                         as_unlockerr(as, attr, mlock_map,
2458                                             initraddr, initrsize - rsize);
2459                                         kmem_free(mlock_map,
2460                                             mlock_size * sizeof (ulong_t));
2461                                 }
2462                                 AS_LOCK_EXIT(as);
2463                                 return (ENOMEM);
2464                         }
2465                 }
2466                 if ((raddr + rsize) > (seg->s_base + seg->s_size))
2467                         ssize = seg->s_base + seg->s_size - raddr;
2468                 else
2469                         ssize = rsize;
2470 
2471                 /*
2472                  * Dispatch on specific function.
2473                  */
2474                 switch (func) {
2475 
2476                 /*
2477                  * Synchronize cached data from mappings with backing
2478                  * objects.
2479                  */
2480                 case MC_SYNC:
2481                         if (error = SEGOP_SYNC(seg, raddr, ssize,
2482                             attr, (uint_t)arg)) {
2483                                 AS_LOCK_EXIT(as);
2484                                 return (error);
2485                         }
2486                         break;
2487 
2488                 /*
2489                  * Lock pages in memory.
2490                  */
2491                 case MC_LOCK:
2492                         if (error = SEGOP_LOCKOP(seg, raddr, ssize,
2493                             attr, func, mlock_map, pos)) {
2494                                 as_unlockerr(as, attr, mlock_map, initraddr,
2495                                     initrsize - rsize + ssize);
2496                                 kmem_free(mlock_map, mlock_size *
2497                                     sizeof (ulong_t));
2498                                 AS_LOCK_EXIT(as);
2499                                 goto lockerr;
2500                         }
2501                         break;
2502 
2503                 /*
2504                  * Unlock mapped pages.
2505                  */
2506                 case MC_UNLOCK:
2507                         (void) SEGOP_LOCKOP(seg, raddr, ssize, attr, func,
2508                             (ulong_t *)NULL, (size_t)NULL);
2509                         break;
2510 
2511                 /*
2512                  * Store VM advise for mapped pages in segment layer.
2513                  */
2514                 case MC_ADVISE:
2515                         error = SEGOP_ADVISE(seg, raddr, ssize, (uint_t)arg);
2516 
2517                         /*
2518                          * Check for regular errors and special retry error
2519                          */
2520                         if (error) {
2521                                 if (error == IE_RETRY) {
2522                                         /*
2523                                          * Need to acquire writers lock, so
2524                                          * have to drop readers lock and start
2525                                          * all over again
2526                                          */
2527                                         AS_LOCK_EXIT(as);
2528                                         goto retry;
2529                                 } else if (error == IE_REATTACH) {
2530                                         /*
2531                                          * Find segment for current address
2532                                          * because current segment just got
2533                                          * split or concatenated
2534                                          */
2535                                         seg = as_segat(as, raddr);
2536                                         if (seg == NULL) {
2537                                                 AS_LOCK_EXIT(as);
2538                                                 return (ENOMEM);
2539                                         }
2540                                 } else {
2541                                         /*
2542                                          * Regular error
2543                                          */
2544                                         AS_LOCK_EXIT(as);
2545                                         return (error);
2546                                 }
2547                         }
2548                         break;
2549 
2550                 case MC_INHERIT_ZERO:
2551                         if (seg->s_ops->inherit == NULL) {
2552                                 error = ENOTSUP;
2553                         } else {
2554                                 error = SEGOP_INHERIT(seg, raddr, ssize,
2555                                     SEGP_INH_ZERO);
2556                         }
2557                         if (error != 0) {
2558                                 AS_LOCK_EXIT(as);
2559                                 return (error);
2560                         }
2561                         break;
2562 
2563                 /*
2564                  * Can't happen.
2565                  */
2566                 default:
2567                         panic("as_ctl: bad operation %d", func);
2568                         /*NOTREACHED*/
2569                 }
2570 
2571                 rsize -= ssize;
2572                 raddr += ssize;
2573         }
2574 
2575         if (func == MC_LOCK)
2576                 kmem_free(mlock_map, mlock_size * sizeof (ulong_t));
2577         AS_LOCK_EXIT(as);
2578         return (0);
2579 lockerr:
2580 
2581         /*
2582          * If the lower levels returned EDEADLK for a segment lockop,
2583          * it means that we should retry the operation.  Let's wait
2584          * a bit also to let the deadlock causing condition clear.
2585          * This is part of a gross hack to work around a design flaw
2586          * in the ufs/sds logging code and should go away when the
2587          * logging code is re-designed to fix the problem. See bug
2588          * 4125102 for details of the problem.
2589          */
2590         if (error == EDEADLK) {
2591                 delay(deadlk_wait);
2592                 error = 0;
2593                 goto retry;
2594         }
2595         return (error);
2596 }
2597 


2622  */
2623 static int
2624 as_pagelock_segs(struct as *as, struct seg *seg, struct page ***ppp,
2625     caddr_t addr, size_t size, enum seg_rw rw)
2626 {
2627         caddr_t sv_addr = addr;
2628         size_t sv_size = size;
2629         struct seg *sv_seg = seg;
2630         ulong_t segcnt = 1;
2631         ulong_t cnt;
2632         size_t ssize;
2633         pgcnt_t npages = btop(size);
2634         page_t **plist;
2635         page_t **pl;
2636         int error;
2637         caddr_t eaddr;
2638         faultcode_t fault_err = 0;
2639         pgcnt_t pl_off;
2640         extern struct seg_ops segspt_shmops;
2641 
2642         ASSERT(AS_LOCK_HELD(as));
2643         ASSERT(seg != NULL);
2644         ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2645         ASSERT(addr + size > seg->s_base + seg->s_size);
2646         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2647         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2648 
2649         /*
2650          * Count the number of segments covered by the range we are about to
2651          * lock. The segment count is used to size the shadow list we return
2652          * back to the caller.
2653          */
2654         for (; size != 0; size -= ssize, addr += ssize) {
2655                 if (addr >= seg->s_base + seg->s_size) {
2656 
2657                         seg = AS_SEGNEXT(as, seg);
2658                         if (seg == NULL || addr != seg->s_base) {
2659                                 AS_LOCK_EXIT(as);
2660                                 return (EFAULT);
2661                         }
2662                         /*
2663                          * Do a quick check if subsequent segments
2664                          * will most likely support pagelock.
2665                          */
2666                         if (seg->s_ops == &segvn_ops) {
2667                                 vnode_t *vp;
2668 
2669                                 if (SEGOP_GETVP(seg, addr, &vp) != 0 ||
2670                                     vp != NULL) {
2671                                         AS_LOCK_EXIT(as);
2672                                         goto slow;
2673                                 }
2674                         } else if (seg->s_ops != &segspt_shmops) {
2675                                 AS_LOCK_EXIT(as);
2676                                 goto slow;
2677                         }
2678                         segcnt++;
2679                 }
2680                 if (addr + size > seg->s_base + seg->s_size) {
2681                         ssize = seg->s_base + seg->s_size - addr;
2682                 } else {
2683                         ssize = size;
2684                 }
2685         }
2686         ASSERT(segcnt > 1);
2687 
2688         plist = kmem_zalloc((npages + segcnt) * sizeof (page_t *), KM_SLEEP);
2689 
2690         addr = sv_addr;
2691         size = sv_size;
2692         seg = sv_seg;
2693 
2694         for (cnt = 0, pl_off = 0; size != 0; size -= ssize, addr += ssize) {
2695                 if (addr >= seg->s_base + seg->s_size) {


2700                 }
2701                 if (addr + size > seg->s_base + seg->s_size) {
2702                         ssize = seg->s_base + seg->s_size - addr;
2703                 } else {
2704                         ssize = size;
2705                 }
2706                 pl = &plist[npages + cnt];
2707                 error = SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2708                     L_PAGELOCK, rw);
2709                 if (error) {
2710                         break;
2711                 }
2712                 ASSERT(plist[npages + cnt] != NULL);
2713                 ASSERT(pl_off + btop(ssize) <= npages);
2714                 bcopy(plist[npages + cnt], &plist[pl_off],
2715                     btop(ssize) * sizeof (page_t *));
2716                 pl_off += btop(ssize);
2717         }
2718 
2719         if (size == 0) {
2720                 AS_LOCK_EXIT(as);
2721                 ASSERT(cnt == segcnt - 1);
2722                 *ppp = plist;
2723                 return (0);
2724         }
2725 
2726         /*
2727          * one of pagelock calls failed. The error type is in error variable.
2728          * Unlock what we've locked so far and retry with F_SOFTLOCK if error
2729          * type is either EFAULT or ENOTSUP. Otherwise just return the error
2730          * back to the caller.
2731          */
2732 
2733         eaddr = addr;
2734         seg = sv_seg;
2735 
2736         for (cnt = 0, addr = sv_addr; addr < eaddr; addr += ssize) {
2737                 if (addr >= seg->s_base + seg->s_size) {
2738                         seg = AS_SEGNEXT(as, seg);
2739                         ASSERT(seg != NULL && addr == seg->s_base);
2740                         cnt++;
2741                         ASSERT(cnt < segcnt);
2742                 }
2743                 if (eaddr > seg->s_base + seg->s_size) {
2744                         ssize = seg->s_base + seg->s_size - addr;
2745                 } else {
2746                         ssize = eaddr - addr;
2747                 }
2748                 pl = &plist[npages + cnt];
2749                 ASSERT(*pl != NULL);
2750                 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2751                     L_PAGEUNLOCK, rw);
2752         }
2753 
2754         AS_LOCK_EXIT(as);
2755 
2756         kmem_free(plist, (npages + segcnt) * sizeof (page_t *));
2757 
2758         if (error != ENOTSUP && error != EFAULT) {
2759                 return (error);
2760         }
2761 
2762 slow:
2763         /*
2764          * If we are here because pagelock failed due to the need to cow fault
2765          * in the pages we want to lock F_SOFTLOCK will do this job and in
2766          * next as_pagelock() call for this address range pagelock will
2767          * hopefully succeed.
2768          */
2769         fault_err = as_fault(as->a_hat, as, sv_addr, sv_size, F_SOFTLOCK, rw);
2770         if (fault_err != 0) {
2771                 return (fc_decode(fault_err));
2772         }
2773         *ppp = NULL;
2774 


2783 as_pagelock(struct as *as, struct page ***ppp, caddr_t addr,
2784     size_t size, enum seg_rw rw)
2785 {
2786         size_t rsize;
2787         caddr_t raddr;
2788         faultcode_t fault_err;
2789         struct seg *seg;
2790         int err;
2791 
2792         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_START,
2793             "as_pagelock_start: addr %p size %ld", addr, size);
2794 
2795         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2796         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2797             (size_t)raddr;
2798 
2799         /*
2800          * if the request crosses two segments let
2801          * as_fault handle it.
2802          */
2803         AS_LOCK_ENTER(as, RW_READER);
2804 
2805         seg = as_segat(as, raddr);
2806         if (seg == NULL) {
2807                 AS_LOCK_EXIT(as);
2808                 return (EFAULT);
2809         }
2810         ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2811         if (raddr + rsize > seg->s_base + seg->s_size) {
2812                 return (as_pagelock_segs(as, seg, ppp, raddr, rsize, rw));
2813         }
2814         if (raddr + rsize <= raddr) {
2815                 AS_LOCK_EXIT(as);
2816                 return (EFAULT);
2817         }
2818 
2819         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START,
2820             "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize);
2821 
2822         /*
2823          * try to lock pages and pass back shadow list
2824          */
2825         err = SEGOP_PAGELOCK(seg, raddr, rsize, ppp, L_PAGELOCK, rw);
2826 
2827         TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end");
2828 
2829         AS_LOCK_EXIT(as);
2830 
2831         if (err == 0 || (err != ENOTSUP && err != EFAULT)) {
2832                 return (err);
2833         }
2834 
2835         /*
2836          * Use F_SOFTLOCK to lock the pages because pagelock failed either due
2837          * to no pagelock support for this segment or pages need to be cow
2838          * faulted in. If fault is needed F_SOFTLOCK will do this job for
2839          * this as_pagelock() call and in the next as_pagelock() call for the
2840          * same address range pagelock call will hopefull succeed.
2841          */
2842         fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw);
2843         if (fault_err != 0) {
2844                 return (fc_decode(fault_err));
2845         }
2846         *ppp = NULL;
2847 
2848         TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_END, "as_pagelock_end");
2849         return (0);
2850 }
2851 
2852 /*
2853  * unlock pages locked by as_pagelock_segs().  Retrieve per segment shadow
2854  * lists from the end of plist and call pageunlock interface for each segment.
2855  * Drop as lock and free plist.
2856  */
2857 static void
2858 as_pageunlock_segs(struct as *as, struct seg *seg, caddr_t addr, size_t size,
2859     struct page **plist, enum seg_rw rw)
2860 {
2861         ulong_t cnt;
2862         caddr_t eaddr = addr + size;
2863         pgcnt_t npages = btop(size);
2864         size_t ssize;
2865         page_t **pl;
2866 
2867         ASSERT(AS_LOCK_HELD(as));
2868         ASSERT(seg != NULL);
2869         ASSERT(addr >= seg->s_base && addr < seg->s_base + seg->s_size);
2870         ASSERT(addr + size > seg->s_base + seg->s_size);
2871         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
2872         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
2873         ASSERT(plist != NULL);
2874 
2875         for (cnt = 0; addr < eaddr; addr += ssize) {
2876                 if (addr >= seg->s_base + seg->s_size) {
2877                         seg = AS_SEGNEXT(as, seg);
2878                         ASSERT(seg != NULL && addr == seg->s_base);
2879                         cnt++;
2880                 }
2881                 if (eaddr > seg->s_base + seg->s_size) {
2882                         ssize = seg->s_base + seg->s_size - addr;
2883                 } else {
2884                         ssize = eaddr - addr;
2885                 }
2886                 pl = &plist[npages + cnt];
2887                 ASSERT(*pl != NULL);
2888                 (void) SEGOP_PAGELOCK(seg, addr, ssize, (page_t ***)pl,
2889                     L_PAGEUNLOCK, rw);
2890         }
2891         ASSERT(cnt > 0);
2892         AS_LOCK_EXIT(as);
2893 
2894         cnt++;
2895         kmem_free(plist, (npages + cnt) * sizeof (page_t *));
2896 }
2897 
2898 /*
2899  * unlock pages in a given address range
2900  */
2901 void
2902 as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size,
2903     enum seg_rw rw)
2904 {
2905         struct seg *seg;
2906         size_t rsize;
2907         caddr_t raddr;
2908 
2909         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_START,
2910             "as_pageunlock_start: addr %p size %ld", addr, size);
2911 
2912         /*
2913          * if the shadow list is NULL, as_pagelock was
2914          * falling back to as_fault
2915          */
2916         if (pp == NULL) {
2917                 (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw);
2918                 return;
2919         }
2920 
2921         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2922         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
2923             (size_t)raddr;
2924 
2925         AS_LOCK_ENTER(as, RW_READER);
2926         seg = as_segat(as, raddr);
2927         ASSERT(seg != NULL);
2928 
2929         TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START,
2930             "seg_unlock_start: raddr %p rsize %ld", raddr, rsize);
2931 
2932         ASSERT(raddr >= seg->s_base && raddr < seg->s_base + seg->s_size);
2933         if (raddr + rsize <= seg->s_base + seg->s_size) {
2934                 SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw);
2935         } else {
2936                 as_pageunlock_segs(as, seg, raddr, rsize, pp, rw);
2937                 return;
2938         }
2939         AS_LOCK_EXIT(as);
2940         TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end");
2941 }
2942 
2943 int
2944 as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc,
2945     boolean_t wait)
2946 {
2947         struct seg *seg;
2948         size_t ssize;
2949         caddr_t raddr;                  /* rounded down addr */
2950         size_t rsize;                   /* rounded up size */
2951         int error = 0;
2952         size_t pgsz = page_get_pagesize(szc);
2953 
2954 setpgsz_top:
2955         if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(size, pgsz)) {
2956                 return (EINVAL);
2957         }
2958 
2959         raddr = addr;
2960         rsize = size;
2961 
2962         if (raddr + rsize < raddr)           /* check for wraparound */
2963                 return (ENOMEM);
2964 
2965         AS_LOCK_ENTER(as, RW_WRITER);
2966         as_clearwatchprot(as, raddr, rsize);
2967         seg = as_segat(as, raddr);
2968         if (seg == NULL) {
2969                 as_setwatch(as);
2970                 AS_LOCK_EXIT(as);
2971                 return (ENOMEM);
2972         }
2973 
2974         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
2975                 if (raddr >= seg->s_base + seg->s_size) {
2976                         seg = AS_SEGNEXT(as, seg);
2977                         if (seg == NULL || raddr != seg->s_base) {
2978                                 error = ENOMEM;
2979                                 break;
2980                         }
2981                 }
2982                 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
2983                         ssize = seg->s_base + seg->s_size - raddr;
2984                 } else {
2985                         ssize = rsize;
2986                 }
2987 
2988 retry:
2989                 error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc);
2990 
2991                 if (error == IE_NOMEM) {
2992                         error = EAGAIN;
2993                         break;
2994                 }
2995 
2996                 if (error == IE_RETRY) {
2997                         AS_LOCK_EXIT(as);
2998                         goto setpgsz_top;
2999                 }
3000 
3001                 if (error == ENOTSUP) {
3002                         error = EINVAL;
3003                         break;
3004                 }
3005 
3006                 if (wait && (error == EAGAIN)) {
3007                         /*
3008                          * Memory is currently locked.  It must be unlocked
3009                          * before this operation can succeed through a retry.
3010                          * The possible reasons for locked memory and
3011                          * corresponding strategies for unlocking are:
3012                          * (1) Normal I/O
3013                          *      wait for a signal that the I/O operation
3014                          *      has completed and the memory is unlocked.
3015                          * (2) Asynchronous I/O
3016                          *      The aio subsystem does not unlock pages when
3017                          *      the I/O is completed. Those pages are unlocked
3018                          *      when the application calls aiowait/aioerror.
3019                          *      So, to prevent blocking forever, cv_broadcast()
3020                          *      is done to wake up aio_cleanup_thread.
3021                          *      Subsequently, segvn_reclaim will be called, and
3022                          *      that will do AS_CLRUNMAPWAIT() and wake us up.
3023                          * (3) Long term page locking:
3024                          *      This is not relevant for as_setpagesize()
3025                          *      because we cannot change the page size for
3026                          *      driver memory. The attempt to do so will
3027                          *      fail with a different error than EAGAIN so
3028                          *      there's no need to trigger as callbacks like
3029                          *      as_unmap, as_setprot or as_free would do.
3030                          */
3031                         mutex_enter(&as->a_contents);
3032                         if (!AS_ISNOUNMAPWAIT(as)) {
3033                                 if (AS_ISUNMAPWAIT(as) == 0) {
3034                                         cv_broadcast(&as->a_cv);
3035                                 }
3036                                 AS_SETUNMAPWAIT(as);
3037                                 AS_LOCK_EXIT(as);
3038                                 while (AS_ISUNMAPWAIT(as)) {
3039                                         cv_wait(&as->a_cv, &as->a_contents);
3040                                 }
3041                         } else {
3042                                 /*
3043                                  * We may have raced with
3044                                  * segvn_reclaim()/segspt_reclaim(). In this
3045                                  * case clean nounmapwait flag and retry since
3046                                  * softlockcnt in this segment may be already
3047                                  * 0.  We don't drop as writer lock so our
3048                                  * number of retries without sleeping should
3049                                  * be very small. See segvn_reclaim() for
3050                                  * more comments.
3051                                  */
3052                                 AS_CLRNOUNMAPWAIT(as);
3053                                 mutex_exit(&as->a_contents);
3054                                 goto retry;
3055                         }
3056                         mutex_exit(&as->a_contents);
3057                         goto setpgsz_top;
3058                 } else if (error != 0) {
3059                         break;
3060                 }
3061         }
3062         as_setwatch(as);
3063         AS_LOCK_EXIT(as);
3064         return (error);
3065 }
3066 
3067 /*
3068  * as_iset3_default_lpsize() just calls SEGOP_SETPAGESIZE() on all segments
3069  * in its chunk where s_szc is less than the szc we want to set.
3070  */
3071 static int
3072 as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
3073     int *retry)
3074 {
3075         struct seg *seg;
3076         size_t ssize;
3077         int error;
3078 
3079         ASSERT(AS_WRITE_HELD(as));
3080 
3081         seg = as_segat(as, raddr);
3082         if (seg == NULL) {
3083                 panic("as_iset3_default_lpsize: no seg");
3084         }
3085 
3086         for (; rsize != 0; rsize -= ssize, raddr += ssize) {
3087                 if (raddr >= seg->s_base + seg->s_size) {
3088                         seg = AS_SEGNEXT(as, seg);
3089                         if (seg == NULL || raddr != seg->s_base) {
3090                                 panic("as_iset3_default_lpsize: as changed");
3091                         }
3092                 }
3093                 if ((raddr + rsize) > (seg->s_base + seg->s_size)) {
3094                         ssize = seg->s_base + seg->s_size - raddr;
3095                 } else {
3096                         ssize = rsize;
3097                 }
3098 
3099                 if (szc > seg->s_szc) {


3116         }
3117         return (0);
3118 }
3119 
3120 /*
3121  * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the
3122  * pagesize on each segment in its range, but if any fails with EINVAL,
3123  * then it reduces the pagesizes to the next size in the bitmap and
3124  * retries as_iset3_default_lpsize(). The reason why the code retries
3125  * smaller allowed sizes on EINVAL is because (a) the anon offset may not
3126  * match the bigger sizes, and (b) it's hard to get this offset (to begin
3127  * with) to pass to map_pgszcvec().
3128  */
3129 static int
3130 as_iset2_default_lpsize(struct as *as, caddr_t addr, size_t size, uint_t szc,
3131     uint_t szcvec)
3132 {
3133         int error;
3134         int retry;
3135 
3136         ASSERT(AS_WRITE_HELD(as));
3137 
3138         for (;;) {
3139                 error = as_iset3_default_lpsize(as, addr, size, szc, &retry);
3140                 if (error == EINVAL && retry) {
3141                         szcvec &= ~(1 << szc);
3142                         if (szcvec <= 1) {
3143                                 return (EINVAL);
3144                         }
3145                         szc = highbit(szcvec) - 1;
3146                 } else {
3147                         return (error);
3148                 }
3149         }
3150 }
3151 
3152 /*
3153  * as_iset1_default_lpsize() breaks its chunk into areas where existing
3154  * segments have a smaller szc than we want to set. For each such area,
3155  * it calls as_iset2_default_lpsize()
3156  */
3157 static int
3158 as_iset1_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc,
3159     uint_t szcvec)
3160 {
3161         struct seg *seg;
3162         size_t ssize;
3163         caddr_t setaddr = raddr;
3164         size_t setsize = 0;
3165         int set;
3166         int error;
3167 
3168         ASSERT(AS_WRITE_HELD(as));
3169 
3170         seg = as_segat(as, raddr);
3171         if (seg == NULL) {
3172                 panic("as_iset1_default_lpsize: no seg");
3173         }
3174         if (seg->s_szc < szc) {
3175                 set = 1;
3176         } else {
3177                 set = 0;
3178         }
3179 
3180         for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3181                 if (raddr >= seg->s_base + seg->s_size) {
3182                         seg = AS_SEGNEXT(as, seg);
3183                         if (seg == NULL || raddr != seg->s_base) {
3184                                 panic("as_iset1_default_lpsize: as changed");
3185                         }
3186                         if (seg->s_szc >= szc && set) {
3187                                 ASSERT(setsize != 0);
3188                                 error = as_iset2_default_lpsize(as,


3216  * as_iset_default_lpsize() breaks its chunk according to the size code bitmap
3217  * returned by map_pgszcvec() (similar to as_map_segvn_segs()), and passes each
3218  * chunk to as_iset1_default_lpsize().
3219  */
3220 static int
3221 as_iset_default_lpsize(struct as *as, caddr_t addr, size_t size, int flags,
3222     int type)
3223 {
3224         int rtype = (type & MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM;
3225         uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr,
3226             flags, rtype, 1);
3227         uint_t szc;
3228         uint_t nszc;
3229         int error;
3230         caddr_t a;
3231         caddr_t eaddr;
3232         size_t segsize;
3233         size_t pgsz;
3234         uint_t save_szcvec;
3235 
3236         ASSERT(AS_WRITE_HELD(as));
3237         ASSERT(IS_P2ALIGNED(addr, PAGESIZE));
3238         ASSERT(IS_P2ALIGNED(size, PAGESIZE));
3239 
3240         szcvec &= ~1;
3241         if (szcvec <= 1) {   /* skip if base page size */
3242                 return (0);
3243         }
3244 
3245         /* Get the pagesize of the first larger page size. */
3246         szc = lowbit(szcvec) - 1;
3247         pgsz = page_get_pagesize(szc);
3248         eaddr = addr + size;
3249         addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
3250         eaddr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
3251 
3252         save_szcvec = szcvec;
3253         szcvec >>= (szc + 1);
3254         nszc = szc;
3255         while (szcvec) {
3256                 if ((szcvec & 0x1) == 0) {


3308  * chunks with the same type/flags, ignores-non segvn segments, and passes
3309  * each chunk to as_iset_default_lpsize().
3310  */
3311 int
3312 as_set_default_lpsize(struct as *as, caddr_t addr, size_t size)
3313 {
3314         struct seg *seg;
3315         caddr_t raddr;
3316         size_t rsize;
3317         size_t ssize;
3318         int rtype, rflags;
3319         int stype, sflags;
3320         int error;
3321         caddr_t setaddr;
3322         size_t setsize;
3323         int segvn;
3324 
3325         if (size == 0)
3326                 return (0);
3327 
3328         AS_LOCK_ENTER(as, RW_WRITER);
3329 again:
3330         error = 0;
3331 
3332         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3333         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
3334             (size_t)raddr;
3335 
3336         if (raddr + rsize < raddr) {         /* check for wraparound */
3337                 AS_LOCK_EXIT(as);
3338                 return (ENOMEM);
3339         }
3340         as_clearwatchprot(as, raddr, rsize);
3341         seg = as_segat(as, raddr);
3342         if (seg == NULL) {
3343                 as_setwatch(as);
3344                 AS_LOCK_EXIT(as);
3345                 return (ENOMEM);
3346         }
3347         if (seg->s_ops == &segvn_ops) {
3348                 rtype = SEGOP_GETTYPE(seg, addr);
3349                 rflags = rtype & (MAP_TEXT | MAP_INITDATA);
3350                 rtype = rtype & (MAP_SHARED | MAP_PRIVATE);
3351                 segvn = 1;
3352         } else {
3353                 segvn = 0;
3354         }
3355         setaddr = raddr;
3356         setsize = 0;
3357 
3358         for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) {
3359                 if (raddr >= (seg->s_base + seg->s_size)) {
3360                         seg = AS_SEGNEXT(as, seg);
3361                         if (seg == NULL || raddr != seg->s_base) {
3362                                 error = ENOMEM;
3363                                 break;
3364                         }


3409         if (error == 0 && segvn) {
3410                 /* The last chunk when rsize == 0. */
3411                 ASSERT(setsize != 0);
3412                 error = as_iset_default_lpsize(as, setaddr, setsize,
3413                     rflags, rtype);
3414         }
3415 
3416         if (error == IE_RETRY) {
3417                 goto again;
3418         } else if (error == IE_NOMEM) {
3419                 error = EAGAIN;
3420         } else if (error == ENOTSUP) {
3421                 error = EINVAL;
3422         } else if (error == EAGAIN) {
3423                 mutex_enter(&as->a_contents);
3424                 if (!AS_ISNOUNMAPWAIT(as)) {
3425                         if (AS_ISUNMAPWAIT(as) == 0) {
3426                                 cv_broadcast(&as->a_cv);
3427                         }
3428                         AS_SETUNMAPWAIT(as);
3429                         AS_LOCK_EXIT(as);
3430                         while (AS_ISUNMAPWAIT(as)) {
3431                                 cv_wait(&as->a_cv, &as->a_contents);
3432                         }
3433                         mutex_exit(&as->a_contents);
3434                         AS_LOCK_ENTER(as, RW_WRITER);
3435                 } else {
3436                         /*
3437                          * We may have raced with
3438                          * segvn_reclaim()/segspt_reclaim(). In this case
3439                          * clean nounmapwait flag and retry since softlockcnt
3440                          * in this segment may be already 0.  We don't drop as
3441                          * writer lock so our number of retries without
3442                          * sleeping should be very small. See segvn_reclaim()
3443                          * for more comments.
3444                          */
3445                         AS_CLRNOUNMAPWAIT(as);
3446                         mutex_exit(&as->a_contents);
3447                 }
3448                 goto again;
3449         }
3450 
3451         as_setwatch(as);
3452         AS_LOCK_EXIT(as);
3453         return (error);
3454 }
3455 
3456 /*
3457  * Setup all of the uninitialized watched pages that we can.
3458  */
3459 void
3460 as_setwatch(struct as *as)
3461 {
3462         struct watched_page *pwp;
3463         struct seg *seg;
3464         caddr_t vaddr;
3465         uint_t prot;
3466         int  err, retrycnt;
3467 
3468         if (avl_numnodes(&as->a_wpage) == 0)
3469                 return;
3470 
3471         ASSERT(AS_WRITE_HELD(as));
3472 
3473         for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3474             pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3475                 retrycnt = 0;
3476         retry:
3477                 vaddr = pwp->wp_vaddr;
3478                 if (pwp->wp_oprot != 0 ||    /* already set up */
3479                     (seg = as_segat(as, vaddr)) == NULL ||
3480                     SEGOP_GETPROT(seg, vaddr, 0, &prot) != 0)
3481                         continue;
3482 
3483                 pwp->wp_oprot = prot;
3484                 if (pwp->wp_read)
3485                         prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3486                 if (pwp->wp_write)
3487                         prot &= ~PROT_WRITE;
3488                 if (pwp->wp_exec)
3489                         prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3490                 if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) {
3491                         err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);


3498                 }
3499                 pwp->wp_prot = prot;
3500         }
3501 }
3502 
3503 /*
3504  * Clear all of the watched pages in the address space.
3505  */
3506 void
3507 as_clearwatch(struct as *as)
3508 {
3509         struct watched_page *pwp;
3510         struct seg *seg;
3511         caddr_t vaddr;
3512         uint_t prot;
3513         int err, retrycnt;
3514 
3515         if (avl_numnodes(&as->a_wpage) == 0)
3516                 return;
3517 
3518         ASSERT(AS_WRITE_HELD(as));
3519 
3520         for (pwp = avl_first(&as->a_wpage); pwp != NULL;
3521             pwp = AVL_NEXT(&as->a_wpage, pwp)) {
3522                 retrycnt = 0;
3523         retry:
3524                 vaddr = pwp->wp_vaddr;
3525                 if (pwp->wp_oprot == 0 ||    /* not set up */
3526                     (seg = as_segat(as, vaddr)) == NULL)
3527                         continue;
3528 
3529                 if ((prot = pwp->wp_oprot) != pwp->wp_prot) {
3530                         err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot);
3531                         if (err == IE_RETRY) {
3532                                 ASSERT(retrycnt == 0);
3533                                 retrycnt++;
3534                                 goto retry;
3535                         }
3536                 }
3537                 pwp->wp_oprot = 0;
3538                 pwp->wp_prot = 0;


3540 }
3541 
3542 /*
3543  * Force a new setup for all the watched pages in the range.
3544  */
3545 static void
3546 as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot)
3547 {
3548         struct watched_page *pwp;
3549         struct watched_page tpw;
3550         caddr_t eaddr = addr + size;
3551         caddr_t vaddr;
3552         struct seg *seg;
3553         int err, retrycnt;
3554         uint_t  wprot;
3555         avl_index_t where;
3556 
3557         if (avl_numnodes(&as->a_wpage) == 0)
3558                 return;
3559 
3560         ASSERT(AS_WRITE_HELD(as));
3561 
3562         tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3563         if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3564                 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3565 
3566         while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3567                 retrycnt = 0;
3568                 vaddr = pwp->wp_vaddr;
3569 
3570                 wprot = prot;
3571                 if (pwp->wp_read)
3572                         wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3573                 if (pwp->wp_write)
3574                         wprot &= ~PROT_WRITE;
3575                 if (pwp->wp_exec)
3576                         wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3577                 if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) {
3578                 retry:
3579                         seg = as_segat(as, vaddr);
3580                         if (seg == NULL) {


3599  * Clear all of the watched pages in the range.
3600  */
3601 static void
3602 as_clearwatchprot(struct as *as, caddr_t addr, size_t size)
3603 {
3604         caddr_t eaddr = addr + size;
3605         struct watched_page *pwp;
3606         struct watched_page tpw;
3607         uint_t prot;
3608         struct seg *seg;
3609         int err, retrycnt;
3610         avl_index_t where;
3611 
3612         if (avl_numnodes(&as->a_wpage) == 0)
3613                 return;
3614 
3615         tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3616         if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL)
3617                 pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER);
3618 
3619         ASSERT(AS_WRITE_HELD(as));
3620 
3621         while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3622 
3623                 if ((prot = pwp->wp_oprot) != 0) {
3624                         retrycnt = 0;
3625 
3626                         if (prot != pwp->wp_prot) {
3627                         retry:
3628                                 seg = as_segat(as, pwp->wp_vaddr);
3629                                 if (seg == NULL)
3630                                         continue;
3631                                 err = SEGOP_SETPROT(seg, pwp->wp_vaddr,
3632                                     PAGESIZE, prot);
3633                                 if (err == IE_RETRY) {
3634                                         ASSERT(retrycnt == 0);
3635                                         retrycnt++;
3636                                         goto retry;
3637 
3638                                 }
3639                         }


3654         for (p = practive; p; p = p->p_next) {
3655                 if (p->p_as == as) {
3656                         mutex_enter(&p->p_lock);
3657                         if (p->p_as == as)
3658                                 sigaddq(p, NULL, siginfo, KM_NOSLEEP);
3659                         mutex_exit(&p->p_lock);
3660                 }
3661         }
3662         mutex_exit(&pidlock);
3663 }
3664 
3665 /*
3666  * return memory object ID
3667  */
3668 int
3669 as_getmemid(struct as *as, caddr_t addr, memid_t *memidp)
3670 {
3671         struct seg      *seg;
3672         int             sts;
3673 
3674         AS_LOCK_ENTER(as, RW_READER);
3675         seg = as_segat(as, addr);
3676         if (seg == NULL) {
3677                 AS_LOCK_EXIT(as);
3678                 return (EFAULT);
3679         }
3680         /*
3681          * catch old drivers which may not support getmemid
3682          */
3683         if (seg->s_ops->getmemid == NULL) {
3684                 AS_LOCK_EXIT(as);
3685                 return (ENODEV);
3686         }
3687 
3688         sts = SEGOP_GETMEMID(seg, addr, memidp);
3689 
3690         AS_LOCK_EXIT(as);
3691         return (sts);
3692 }