42 */
43
44 #include <sys/types.h>
45 #include <sys/t_lock.h>
46 #include <sys/param.h>
47 #include <sys/errno.h>
48 #include <sys/systm.h>
49 #include <sys/mman.h>
50 #include <sys/sysmacros.h>
51 #include <sys/cpuvar.h>
52 #include <sys/sysinfo.h>
53 #include <sys/kmem.h>
54 #include <sys/vnode.h>
55 #include <sys/vmsystm.h>
56 #include <sys/cmn_err.h>
57 #include <sys/debug.h>
58 #include <sys/tnf_probe.h>
59 #include <sys/vtrace.h>
60
61 #include <vm/hat.h>
62 #include <vm/xhat.h>
63 #include <vm/as.h>
64 #include <vm/seg.h>
65 #include <vm/seg_vn.h>
66 #include <vm/seg_dev.h>
67 #include <vm/seg_kmem.h>
68 #include <vm/seg_map.h>
69 #include <vm/seg_spt.h>
70 #include <vm/page.h>
71
72 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */
73
74 static struct kmem_cache *as_cache;
75
76 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t);
77 static void as_clearwatchprot(struct as *, caddr_t, size_t);
78 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *);
79
80
81 /*
82 * Verifying the segment lists is very time-consuming; it may not be
654
655 as->a_flags = 0;
656 as->a_vbits = 0;
657 as->a_hrm = NULL;
658 as->a_seglast = NULL;
659 as->a_size = 0;
660 as->a_resvsize = 0;
661 as->a_updatedir = 0;
662 gethrestime(&as->a_updatetime);
663 as->a_objectdir = NULL;
664 as->a_sizedir = 0;
665 as->a_userlimit = (caddr_t)USERLIMIT;
666 as->a_lastgap = NULL;
667 as->a_lastgaphl = NULL;
668 as->a_callbacks = NULL;
669
670 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
671 as->a_hat = hat_alloc(as); /* create hat for default system mmu */
672 AS_LOCK_EXIT(as, &as->a_lock);
673
674 as->a_xhat = NULL;
675
676 return (as);
677 }
678
679 /*
680 * Free an address space data structure.
681 * Need to free the hat first and then
682 * all the segments on this as and finally
683 * the space for the as struct itself.
684 */
685 void
686 as_free(struct as *as)
687 {
688 struct hat *hat = as->a_hat;
689 struct seg *seg, *next;
690 int called = 0;
691
692 top:
693 /*
694 * Invoke ALL callbacks. as_do_callbacks will do one callback
695 * per call, and not return (-1) until the callback has completed.
696 * When as_do_callbacks returns zero, all callbacks have completed.
697 */
698 mutex_enter(&as->a_contents);
699 while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0))
700 ;
701
702 /* This will prevent new XHATs from attaching to as */
703 if (!called)
704 AS_SETBUSY(as);
705 mutex_exit(&as->a_contents);
706 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
707
708 if (!called) {
709 called = 1;
710 hat_free_start(hat);
711 if (as->a_xhat != NULL)
712 xhat_free_start_all(as);
713 }
714 for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) {
715 int err;
716
717 next = AS_SEGNEXT(as, seg);
718 retry:
719 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
720 if (err == EAGAIN) {
721 mutex_enter(&as->a_contents);
722 if (as->a_callbacks) {
723 AS_LOCK_EXIT(as, &as->a_lock);
724 } else if (!AS_ISNOUNMAPWAIT(as)) {
725 /*
726 * Memory is currently locked. Wait for a
727 * cv_signal that it has been unlocked, then
728 * try the operation again.
729 */
730 if (AS_ISUNMAPWAIT(as) == 0)
731 cv_broadcast(&as->a_cv);
732 AS_SETUNMAPWAIT(as);
742 * 0. We don't drop as writer lock so our
743 * number of retries without sleeping should
744 * be very small. See segvn_reclaim() for
745 * more comments.
746 */
747 AS_CLRNOUNMAPWAIT(as);
748 mutex_exit(&as->a_contents);
749 goto retry;
750 }
751 mutex_exit(&as->a_contents);
752 goto top;
753 } else {
754 /*
755 * We do not expect any other error return at this
756 * time. This is similar to an ASSERT in seg_unmap()
757 */
758 ASSERT(err == 0);
759 }
760 }
761 hat_free_end(hat);
762 if (as->a_xhat != NULL)
763 xhat_free_end_all(as);
764 AS_LOCK_EXIT(as, &as->a_lock);
765
766 /* /proc stuff */
767 ASSERT(avl_numnodes(&as->a_wpage) == 0);
768 if (as->a_objectdir) {
769 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *));
770 as->a_objectdir = NULL;
771 as->a_sizedir = 0;
772 }
773
774 /*
775 * Free the struct as back to kmem. Assert it has no segments.
776 */
777 ASSERT(avl_numnodes(&as->a_segtree) == 0);
778 kmem_cache_free(as_cache, as);
779 }
780
781 int
782 as_dup(struct as *as, struct proc *forkedproc)
783 {
784 struct as *newas;
785 struct seg *seg, *newseg;
786 size_t purgesize = 0;
787 int error;
788
789 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
790 as_clearwatch(as);
791 newas = as_alloc();
792 newas->a_userlimit = as->a_userlimit;
793 newas->a_proc = forkedproc;
794
795 AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER);
796
797 /* This will prevent new XHATs from attaching */
798 mutex_enter(&as->a_contents);
799 AS_SETBUSY(as);
800 mutex_exit(&as->a_contents);
801 mutex_enter(&newas->a_contents);
802 AS_SETBUSY(newas);
803 mutex_exit(&newas->a_contents);
804
805 (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
806
807 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
808
809 if (seg->s_flags & S_PURGE) {
810 purgesize += seg->s_size;
811 continue;
812 }
813
814 newseg = seg_alloc(newas, seg->s_base, seg->s_size);
815 if (newseg == NULL) {
816 AS_LOCK_EXIT(newas, &newas->a_lock);
817 as_setwatch(as);
818 mutex_enter(&as->a_contents);
819 AS_CLRBUSY(as);
820 mutex_exit(&as->a_contents);
821 AS_LOCK_EXIT(as, &as->a_lock);
822 as_free(newas);
823 return (-1);
824 }
825 if ((error = SEGOP_DUP(seg, newseg)) != 0) {
826 /*
827 * We call seg_free() on the new seg
828 * because the segment is not set up
829 * completely; i.e. it has no ops.
830 */
831 as_setwatch(as);
832 mutex_enter(&as->a_contents);
833 AS_CLRBUSY(as);
834 mutex_exit(&as->a_contents);
835 AS_LOCK_EXIT(as, &as->a_lock);
836 seg_free(newseg);
837 AS_LOCK_EXIT(newas, &newas->a_lock);
838 as_free(newas);
839 return (error);
840 }
841 newas->a_size += seg->s_size;
842 }
843 newas->a_resvsize = as->a_resvsize - purgesize;
844
845 error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL);
846 if (as->a_xhat != NULL)
847 error |= xhat_dup_all(as, newas, NULL, 0, HAT_DUP_ALL);
848
849 mutex_enter(&newas->a_contents);
850 AS_CLRBUSY(newas);
851 mutex_exit(&newas->a_contents);
852 AS_LOCK_EXIT(newas, &newas->a_lock);
853
854 as_setwatch(as);
855 mutex_enter(&as->a_contents);
856 AS_CLRBUSY(as);
857 mutex_exit(&as->a_contents);
858 AS_LOCK_EXIT(as, &as->a_lock);
859 if (error != 0) {
860 as_free(newas);
861 return (error);
862 }
863 forkedproc->p_as = newas;
864 return (0);
865 }
866
867 /*
868 * Handle a ``fault'' at addr for size bytes.
869 */
870 faultcode_t
871 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size,
872 enum fault_type type, enum seg_rw rw)
873 {
874 struct seg *seg;
875 caddr_t raddr; /* rounded down addr */
876 size_t rsize; /* rounded up size */
877 size_t ssize;
878 faultcode_t res = 0;
879 caddr_t addrsav;
880 struct seg *segsav;
881 int as_lock_held;
882 klwp_t *lwp = ttolwp(curthread);
883 int is_xhat = 0;
884 int holding_wpage = 0;
885 extern struct seg_ops segdev_ops;
886
887
888
889 if (as->a_hat != hat) {
890 /* This must be an XHAT then */
891 is_xhat = 1;
892
893 if ((type != F_INVAL) || (as == &kas))
894 return (FC_NOSUPPORT);
895 }
896
897 retry:
898 if (!is_xhat) {
899 /*
900 * Indicate that the lwp is not to be stopped while waiting
901 * for a pagefault. This is to avoid deadlock while debugging
902 * a process via /proc over NFS (in particular).
903 */
904 if (lwp != NULL)
905 lwp->lwp_nostop++;
906
907 /*
908 * same length must be used when we softlock and softunlock.
909 * We don't support softunlocking lengths less than
910 * the original length when there is largepage support.
911 * See seg_dev.c for more comments.
912 */
913 switch (type) {
914
915 case F_SOFTLOCK:
916 CPU_STATS_ADD_K(vm, softlock, 1);
917 break;
918
919 case F_SOFTUNLOCK:
920 break;
921
922 case F_PROT:
923 CPU_STATS_ADD_K(vm, prot_fault, 1);
924 break;
925
926 case F_INVAL:
927 CPU_STATS_ENTER_K();
928 CPU_STATS_ADDQ(CPU, vm, as_fault, 1);
929 if (as == &kas)
930 CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1);
931 CPU_STATS_EXIT_K();
932 break;
933 }
934 }
935
936 /* Kernel probe */
937 TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */,
938 tnf_opaque, address, addr,
939 tnf_fault_type, fault_type, type,
940 tnf_seg_access, access, rw);
941
942 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
943 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
944 (size_t)raddr;
945
946 /*
947 * XXX -- Don't grab the as lock for segkmap. We should grab it for
948 * correctness, but then we could be stuck holding this lock for
949 * a LONG time if the fault needs to be resolved on a slow
950 * filesystem, and then no-one will be able to exec new commands,
951 * as exec'ing requires the write lock on the as.
952 */
953 if (as == &kas && segkmap && segkmap->s_base <= raddr &&
954 raddr + size < segkmap->s_base + segkmap->s_size) {
955 /*
956 * if (as==&kas), this can't be XHAT: we've already returned
957 * FC_NOSUPPORT.
958 */
959 seg = segkmap;
960 as_lock_held = 0;
961 } else {
962 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
963 if (is_xhat && avl_numnodes(&as->a_wpage) != 0) {
964 /*
965 * Grab and hold the writers' lock on the as
966 * if the fault is to a watched page.
967 * This will keep CPUs from "peeking" at the
968 * address range while we're temporarily boosting
969 * the permissions for the XHAT device to
970 * resolve the fault in the segment layer.
971 *
972 * We could check whether faulted address
973 * is within a watched page and only then grab
974 * the writer lock, but this is simpler.
975 */
976 AS_LOCK_EXIT(as, &as->a_lock);
977 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
978 }
979
980 seg = as_segat(as, raddr);
981 if (seg == NULL) {
982 AS_LOCK_EXIT(as, &as->a_lock);
983 if ((lwp != NULL) && (!is_xhat))
984 lwp->lwp_nostop--;
985 return (FC_NOMAP);
986 }
987
988 as_lock_held = 1;
989 }
990
991 addrsav = raddr;
992 segsav = seg;
993
994 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
995 if (raddr >= seg->s_base + seg->s_size) {
996 seg = AS_SEGNEXT(as, seg);
997 if (seg == NULL || raddr != seg->s_base) {
998 res = FC_NOMAP;
999 break;
1000 }
1001 }
1002 if (raddr + rsize > seg->s_base + seg->s_size)
1003 ssize = seg->s_base + seg->s_size - raddr;
1004 else
1005 ssize = rsize;
1006
1007 if (!is_xhat || (seg->s_ops != &segdev_ops)) {
1008
1009 if (is_xhat && avl_numnodes(&as->a_wpage) != 0 &&
1010 pr_is_watchpage_as(raddr, rw, as)) {
1011 /*
1012 * Handle watch pages. If we're faulting on a
1013 * watched page from an X-hat, we have to
1014 * restore the original permissions while we
1015 * handle the fault.
1016 */
1017 as_clearwatch(as);
1018 holding_wpage = 1;
1019 }
1020
1021 res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw);
1022
1023 /* Restore watchpoints */
1024 if (holding_wpage) {
1025 as_setwatch(as);
1026 holding_wpage = 0;
1027 }
1028
1029 if (res != 0)
1030 break;
1031 } else {
1032 /* XHAT does not support seg_dev */
1033 res = FC_NOSUPPORT;
1034 break;
1035 }
1036 }
1037
1038 /*
1039 * If we were SOFTLOCKing and encountered a failure,
1040 * we must SOFTUNLOCK the range we already did. (Maybe we
1041 * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing
1042 * right here...)
1043 */
1044 if (res != 0 && type == F_SOFTLOCK) {
1045 for (seg = segsav; addrsav < raddr; addrsav += ssize) {
1046 if (addrsav >= seg->s_base + seg->s_size)
1047 seg = AS_SEGNEXT(as, seg);
1048 ASSERT(seg != NULL);
1049 /*
1050 * Now call the fault routine again to perform the
1051 * unlock using S_OTHER instead of the rw variable
1052 * since we never got a chance to touch the pages.
1053 */
1054 if (raddr > seg->s_base + seg->s_size)
1055 ssize = seg->s_base + seg->s_size - addrsav;
1056 else
1057 ssize = raddr - addrsav;
1058 (void) SEGOP_FAULT(hat, seg, addrsav, ssize,
1059 F_SOFTUNLOCK, S_OTHER);
1060 }
1061 }
1062 if (as_lock_held)
1063 AS_LOCK_EXIT(as, &as->a_lock);
1064 if ((lwp != NULL) && (!is_xhat))
1065 lwp->lwp_nostop--;
1066
1067 /*
1068 * If the lower levels returned EDEADLK for a fault,
1069 * It means that we should retry the fault. Let's wait
1070 * a bit also to let the deadlock causing condition clear.
1071 * This is part of a gross hack to work around a design flaw
1072 * in the ufs/sds logging code and should go away when the
1073 * logging code is re-designed to fix the problem. See bug
1074 * 4125102 for details of the problem.
1075 */
1076 if (FC_ERRNO(res) == EDEADLK) {
1077 delay(deadlk_wait);
1078 res = 0;
1079 goto retry;
1080 }
1081 return (res);
1082 }
1083
1084
2149 * memory requirements. Its usefulness for this purpose depends on
2150 * how well the segment-level routines do at returning accurate
2151 * information.
2152 */
2153 size_t
2154 as_swapout(struct as *as)
2155 {
2156 struct seg *seg;
2157 size_t swpcnt = 0;
2158
2159 /*
2160 * Kernel-only processes have given up their address
2161 * spaces. Of course, we shouldn't be attempting to
2162 * swap out such processes in the first place...
2163 */
2164 if (as == NULL)
2165 return (0);
2166
2167 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2168
2169 /* Prevent XHATs from attaching */
2170 mutex_enter(&as->a_contents);
2171 AS_SETBUSY(as);
2172 mutex_exit(&as->a_contents);
2173
2174
2175 /*
2176 * Free all mapping resources associated with the address
2177 * space. The segment-level swapout routines capitalize
2178 * on this unmapping by scavanging pages that have become
2179 * unmapped here.
2180 */
2181 hat_swapout(as->a_hat);
2182 if (as->a_xhat != NULL)
2183 xhat_swapout_all(as);
2184
2185 mutex_enter(&as->a_contents);
2186 AS_CLRBUSY(as);
2187 mutex_exit(&as->a_contents);
2188
2189 /*
2190 * Call the swapout routines of all segments in the address
2191 * space to do the actual work, accumulating the amount of
2192 * space reclaimed.
2193 */
2194 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2195 struct seg_ops *ov = seg->s_ops;
2196
2197 /*
2198 * We have to check to see if the seg has
2199 * an ops vector because the seg may have
2200 * been in the middle of being set up when
2201 * the process was picked for swapout.
2202 */
2203 if ((ov != NULL) && (ov->swapout != NULL))
2204 swpcnt += SEGOP_SWAPOUT(seg);
2205 }
2206 AS_LOCK_EXIT(as, &as->a_lock);
2207 return (swpcnt);
|
42 */
43
44 #include <sys/types.h>
45 #include <sys/t_lock.h>
46 #include <sys/param.h>
47 #include <sys/errno.h>
48 #include <sys/systm.h>
49 #include <sys/mman.h>
50 #include <sys/sysmacros.h>
51 #include <sys/cpuvar.h>
52 #include <sys/sysinfo.h>
53 #include <sys/kmem.h>
54 #include <sys/vnode.h>
55 #include <sys/vmsystm.h>
56 #include <sys/cmn_err.h>
57 #include <sys/debug.h>
58 #include <sys/tnf_probe.h>
59 #include <sys/vtrace.h>
60
61 #include <vm/hat.h>
62 #include <vm/as.h>
63 #include <vm/seg.h>
64 #include <vm/seg_vn.h>
65 #include <vm/seg_dev.h>
66 #include <vm/seg_kmem.h>
67 #include <vm/seg_map.h>
68 #include <vm/seg_spt.h>
69 #include <vm/page.h>
70
71 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */
72
73 static struct kmem_cache *as_cache;
74
75 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t);
76 static void as_clearwatchprot(struct as *, caddr_t, size_t);
77 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *);
78
79
80 /*
81 * Verifying the segment lists is very time-consuming; it may not be
653
654 as->a_flags = 0;
655 as->a_vbits = 0;
656 as->a_hrm = NULL;
657 as->a_seglast = NULL;
658 as->a_size = 0;
659 as->a_resvsize = 0;
660 as->a_updatedir = 0;
661 gethrestime(&as->a_updatetime);
662 as->a_objectdir = NULL;
663 as->a_sizedir = 0;
664 as->a_userlimit = (caddr_t)USERLIMIT;
665 as->a_lastgap = NULL;
666 as->a_lastgaphl = NULL;
667 as->a_callbacks = NULL;
668
669 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
670 as->a_hat = hat_alloc(as); /* create hat for default system mmu */
671 AS_LOCK_EXIT(as, &as->a_lock);
672
673 return (as);
674 }
675
676 /*
677 * Free an address space data structure.
678 * Need to free the hat first and then
679 * all the segments on this as and finally
680 * the space for the as struct itself.
681 */
682 void
683 as_free(struct as *as)
684 {
685 struct hat *hat = as->a_hat;
686 struct seg *seg, *next;
687 boolean_t free_started = B_FALSE;
688
689 top:
690 /*
691 * Invoke ALL callbacks. as_do_callbacks will do one callback
692 * per call, and not return (-1) until the callback has completed.
693 * When as_do_callbacks returns zero, all callbacks have completed.
694 */
695 mutex_enter(&as->a_contents);
696 while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0))
697 ;
698
699 mutex_exit(&as->a_contents);
700 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
701
702 if (!free_started) {
703 free_started = B_TRUE;
704 hat_free_start(hat);
705 }
706 for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) {
707 int err;
708
709 next = AS_SEGNEXT(as, seg);
710 retry:
711 err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
712 if (err == EAGAIN) {
713 mutex_enter(&as->a_contents);
714 if (as->a_callbacks) {
715 AS_LOCK_EXIT(as, &as->a_lock);
716 } else if (!AS_ISNOUNMAPWAIT(as)) {
717 /*
718 * Memory is currently locked. Wait for a
719 * cv_signal that it has been unlocked, then
720 * try the operation again.
721 */
722 if (AS_ISUNMAPWAIT(as) == 0)
723 cv_broadcast(&as->a_cv);
724 AS_SETUNMAPWAIT(as);
734 * 0. We don't drop as writer lock so our
735 * number of retries without sleeping should
736 * be very small. See segvn_reclaim() for
737 * more comments.
738 */
739 AS_CLRNOUNMAPWAIT(as);
740 mutex_exit(&as->a_contents);
741 goto retry;
742 }
743 mutex_exit(&as->a_contents);
744 goto top;
745 } else {
746 /*
747 * We do not expect any other error return at this
748 * time. This is similar to an ASSERT in seg_unmap()
749 */
750 ASSERT(err == 0);
751 }
752 }
753 hat_free_end(hat);
754 AS_LOCK_EXIT(as, &as->a_lock);
755
756 /* /proc stuff */
757 ASSERT(avl_numnodes(&as->a_wpage) == 0);
758 if (as->a_objectdir) {
759 kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *));
760 as->a_objectdir = NULL;
761 as->a_sizedir = 0;
762 }
763
764 /*
765 * Free the struct as back to kmem. Assert it has no segments.
766 */
767 ASSERT(avl_numnodes(&as->a_segtree) == 0);
768 kmem_cache_free(as_cache, as);
769 }
770
771 int
772 as_dup(struct as *as, struct proc *forkedproc)
773 {
774 struct as *newas;
775 struct seg *seg, *newseg;
776 size_t purgesize = 0;
777 int error;
778
779 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
780 as_clearwatch(as);
781 newas = as_alloc();
782 newas->a_userlimit = as->a_userlimit;
783 newas->a_proc = forkedproc;
784
785 AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER);
786
787 (void) hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_SRD);
788
789 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
790
791 if (seg->s_flags & S_PURGE) {
792 purgesize += seg->s_size;
793 continue;
794 }
795
796 newseg = seg_alloc(newas, seg->s_base, seg->s_size);
797 if (newseg == NULL) {
798 AS_LOCK_EXIT(newas, &newas->a_lock);
799 as_setwatch(as);
800 AS_LOCK_EXIT(as, &as->a_lock);
801 as_free(newas);
802 return (-1);
803 }
804 if ((error = SEGOP_DUP(seg, newseg)) != 0) {
805 /*
806 * We call seg_free() on the new seg
807 * because the segment is not set up
808 * completely; i.e. it has no ops.
809 */
810 as_setwatch(as);
811 AS_LOCK_EXIT(as, &as->a_lock);
812 seg_free(newseg);
813 AS_LOCK_EXIT(newas, &newas->a_lock);
814 as_free(newas);
815 return (error);
816 }
817 newas->a_size += seg->s_size;
818 }
819 newas->a_resvsize = as->a_resvsize - purgesize;
820
821 error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL);
822
823 AS_LOCK_EXIT(newas, &newas->a_lock);
824
825 as_setwatch(as);
826 AS_LOCK_EXIT(as, &as->a_lock);
827 if (error != 0) {
828 as_free(newas);
829 return (error);
830 }
831 forkedproc->p_as = newas;
832 return (0);
833 }
834
835 /*
836 * Handle a ``fault'' at addr for size bytes.
837 */
838 faultcode_t
839 as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size,
840 enum fault_type type, enum seg_rw rw)
841 {
842 struct seg *seg;
843 caddr_t raddr; /* rounded down addr */
844 size_t rsize; /* rounded up size */
845 size_t ssize;
846 faultcode_t res = 0;
847 caddr_t addrsav;
848 struct seg *segsav;
849 int as_lock_held;
850 klwp_t *lwp = ttolwp(curthread);
851
852
853
854 retry:
855 /*
856 * Indicate that the lwp is not to be stopped while waiting for a
857 * pagefault. This is to avoid deadlock while debugging a process
858 * via /proc over NFS (in particular).
859 */
860 if (lwp != NULL)
861 lwp->lwp_nostop++;
862
863 /*
864 * same length must be used when we softlock and softunlock. We
865 * don't support softunlocking lengths less than the original length
866 * when there is largepage support. See seg_dev.c for more
867 * comments.
868 */
869 switch (type) {
870
871 case F_SOFTLOCK:
872 CPU_STATS_ADD_K(vm, softlock, 1);
873 break;
874
875 case F_SOFTUNLOCK:
876 break;
877
878 case F_PROT:
879 CPU_STATS_ADD_K(vm, prot_fault, 1);
880 break;
881
882 case F_INVAL:
883 CPU_STATS_ENTER_K();
884 CPU_STATS_ADDQ(CPU, vm, as_fault, 1);
885 if (as == &kas)
886 CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1);
887 CPU_STATS_EXIT_K();
888 break;
889 }
890
891 /* Kernel probe */
892 TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */,
893 tnf_opaque, address, addr,
894 tnf_fault_type, fault_type, type,
895 tnf_seg_access, access, rw);
896
897 raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
898 rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
899 (size_t)raddr;
900
901 /*
902 * XXX -- Don't grab the as lock for segkmap. We should grab it for
903 * correctness, but then we could be stuck holding this lock for
904 * a LONG time if the fault needs to be resolved on a slow
905 * filesystem, and then no-one will be able to exec new commands,
906 * as exec'ing requires the write lock on the as.
907 */
908 if (as == &kas && segkmap && segkmap->s_base <= raddr &&
909 raddr + size < segkmap->s_base + segkmap->s_size) {
910 seg = segkmap;
911 as_lock_held = 0;
912 } else {
913 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
914
915 seg = as_segat(as, raddr);
916 if (seg == NULL) {
917 AS_LOCK_EXIT(as, &as->a_lock);
918 if (lwp != NULL)
919 lwp->lwp_nostop--;
920 return (FC_NOMAP);
921 }
922
923 as_lock_held = 1;
924 }
925
926 addrsav = raddr;
927 segsav = seg;
928
929 for (; rsize != 0; rsize -= ssize, raddr += ssize) {
930 if (raddr >= seg->s_base + seg->s_size) {
931 seg = AS_SEGNEXT(as, seg);
932 if (seg == NULL || raddr != seg->s_base) {
933 res = FC_NOMAP;
934 break;
935 }
936 }
937 if (raddr + rsize > seg->s_base + seg->s_size)
938 ssize = seg->s_base + seg->s_size - raddr;
939 else
940 ssize = rsize;
941
942 res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw);
943 if (res != 0)
944 break;
945 }
946
947 /*
948 * If we were SOFTLOCKing and encountered a failure,
949 * we must SOFTUNLOCK the range we already did. (Maybe we
950 * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing
951 * right here...)
952 */
953 if (res != 0 && type == F_SOFTLOCK) {
954 for (seg = segsav; addrsav < raddr; addrsav += ssize) {
955 if (addrsav >= seg->s_base + seg->s_size)
956 seg = AS_SEGNEXT(as, seg);
957 ASSERT(seg != NULL);
958 /*
959 * Now call the fault routine again to perform the
960 * unlock using S_OTHER instead of the rw variable
961 * since we never got a chance to touch the pages.
962 */
963 if (raddr > seg->s_base + seg->s_size)
964 ssize = seg->s_base + seg->s_size - addrsav;
965 else
966 ssize = raddr - addrsav;
967 (void) SEGOP_FAULT(hat, seg, addrsav, ssize,
968 F_SOFTUNLOCK, S_OTHER);
969 }
970 }
971 if (as_lock_held)
972 AS_LOCK_EXIT(as, &as->a_lock);
973 if (lwp != NULL)
974 lwp->lwp_nostop--;
975
976 /*
977 * If the lower levels returned EDEADLK for a fault,
978 * It means that we should retry the fault. Let's wait
979 * a bit also to let the deadlock causing condition clear.
980 * This is part of a gross hack to work around a design flaw
981 * in the ufs/sds logging code and should go away when the
982 * logging code is re-designed to fix the problem. See bug
983 * 4125102 for details of the problem.
984 */
985 if (FC_ERRNO(res) == EDEADLK) {
986 delay(deadlk_wait);
987 res = 0;
988 goto retry;
989 }
990 return (res);
991 }
992
993
2058 * memory requirements. Its usefulness for this purpose depends on
2059 * how well the segment-level routines do at returning accurate
2060 * information.
2061 */
2062 size_t
2063 as_swapout(struct as *as)
2064 {
2065 struct seg *seg;
2066 size_t swpcnt = 0;
2067
2068 /*
2069 * Kernel-only processes have given up their address
2070 * spaces. Of course, we shouldn't be attempting to
2071 * swap out such processes in the first place...
2072 */
2073 if (as == NULL)
2074 return (0);
2075
2076 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2077
2078 /*
2079 * Free all mapping resources associated with the address
2080 * space. The segment-level swapout routines capitalize
2081 * on this unmapping by scavanging pages that have become
2082 * unmapped here.
2083 */
2084 hat_swapout(as->a_hat);
2085
2086 /*
2087 * Call the swapout routines of all segments in the address
2088 * space to do the actual work, accumulating the amount of
2089 * space reclaimed.
2090 */
2091 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2092 struct seg_ops *ov = seg->s_ops;
2093
2094 /*
2095 * We have to check to see if the seg has
2096 * an ops vector because the seg may have
2097 * been in the middle of being set up when
2098 * the process was picked for swapout.
2099 */
2100 if ((ov != NULL) && (ov->swapout != NULL))
2101 swpcnt += SEGOP_SWAPOUT(seg);
2102 }
2103 AS_LOCK_EXIT(as, &as->a_lock);
2104 return (swpcnt);
|