1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright (c) 2014 Joyent, Inc. All rights reserved. 14 */ 15 16 #ifndef _VM_DEP_H 17 #define _VM_DEP_H 18 19 /* 20 * UNIX machine dependent virtual memory support for ARMv7. 21 */ 22 23 #ifdef __cplusplus 24 extern "C" { 25 #endif 26 27 #include <sys/param.h> 28 #include <sys/memnode.h> 29 30 /* 31 * Do not use GETTICK. It is only meant to be used when timesource 32 * synchronization is unimportant. 33 */ 34 #define GETTICK() gethrtime_unscaled() 35 36 /* tick value that should be used for random values */ 37 extern u_longlong_t randtick(void); 38 39 #define PLCNT_SZ(ctrs_sz) panic("plcnt_sz") 40 41 #define PLCNT_INIT(addr) panic("plcnt_init") 42 43 #define PLCNT_INCR(pp, mnode, mtype, szc, flags) panic("plcnt_incr") 44 #define PLCNT_DECR(pp, mnode, mtype, szc, flags) panic("plcnt_decr") 45 46 /* 47 * Macro to update page list max counts. This is a no-op on x86, not on SPARC. 48 * We panic for now on ARM. It's primarily used for kcage it appears. 49 */ 50 #define PLCNT_XFER_NORELOC(pp) panic("plcnt_xfer_noreloc") 51 52 /* 53 * Macro to modify the page list max counts when memory is added to 54 * the page lists during startup (add_physmem) or during a DR operation 55 * when memory is added (kphysm_add_memory_dynamic) or deleted 56 * (kphysm_del_cleanup). 57 */ 58 extern void plcnt_modify_max(pfn_t, long); 59 #define PLCNT_MODIFY_MAX(pfn, cnt) plcnt_modify_max(pfn, cnt) 60 61 /* 62 * These macros are used in dealing with the page counters and its candidate 63 * counters. These are used as a part of coalescing our free lists. 64 */ 65 66 /* 67 * The maximum number of memory ranges that exist in the system. Consider i86pc, 68 * there we have various ranges that exist due to legacy DMA. eg. < 16 Mb, < 4 69 * Gb for PCI, etc. Like sun4, this may actually just be a single number, since 70 * unlike on sun4, we're not going to pretend we have a kcage. 71 */ 72 #define MAX_MNODE_MRANGES 1 73 #define MNODE_RANGE_CNT(mnode) 1 74 #define MNODE_MAX_MRANGE(mnode) (MAX_MNODE_MRANGES - 1) 75 #define MTYPE_2_MRANGE(mnode, mtype) mtype 76 77 78 /* 79 * XXX These are strawman definitions based on the i86pc versions of the 80 * page_freelists and the page_cachelists; however, unlike i86pc we only have 81 * one mtype, therefore we don't bother keeping around an index for it. 82 * 83 * We index into the freelist by [mmu_page_sizes][colors]. We index into the 84 * cachelist by [colors]. 85 */ 86 extern page_t ***page_freelists; 87 extern page_t **page_cachelists; 88 89 #define PAGE_FREELISTS(mnode, szc, color, mtype) \ 90 (*(page_freelists[szc] + (color))) 91 #define PAGE_CACHELISTS(mnode, color, mtype) \ 92 (page_cachelists[color]) 93 94 /* 95 * XXX This set of locks needs to be rethought with respect to mandatory page 96 * coloring. It was taken rather naively from i86pc 97 */ 98 99 /* 100 * There are mutexes for both the page freelist 101 * and the page cachelist. We want enough locks to make contention 102 * reasonable, but not too many -- otherwise page_freelist_lock() gets 103 * so expensive that it becomes the bottleneck! 104 */ 105 106 #define NPC_MUTEX 16 107 108 extern kmutex_t *fpc_mutex[NPC_MUTEX]; 109 extern kmutex_t *cpc_mutex[NPC_MUTEX]; 110 111 #define PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ? \ 112 &fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] : \ 113 &cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode]) 114 115 #define FPC_MUTEX(mnode, i) (&fpc_mutex[i][mnode]) 116 #define CPC_MUTEX(mnode, i) (&cpc_mutex[i][mnode]) 117 118 /* 119 * Memory node iterators. We may need something here related to colors, but we 120 * may not. For the time being, just panic on use for ust to get back to later. 121 */ 122 #define MEM_NODE_ITERATOR_DECL(it) panic("mem_node_iterator_decl") 123 #define MEM_NODE_ITERATOR_INIT(pfn, mnode, szc, it) panic("mem_node_iterator_init") 124 125 /* 126 * XXX Do we ever interleave memory ndoes on armv6? Probably not? Does coloring 127 * come into play here? 128 */ 129 #define HPM_COUNTERS_LIMITS(mnodes, pyysbase, physmax, first) \ 130 panic("hpm_counters_list") 131 132 #define PAGE_CTRS_WRITE_LOCK(mnode) panic("page_ctrs_write_lock") 133 #define PAGE_CTRS_WRITE_UNLOCK(mnode) panic("page_ctrs_write_unlock") 134 #define PAGE_CTRS_ADJUST(pfn, cnt, rv) panic("page_cntrs_adjust") 135 136 /* 137 * Coloring related macros. For more on coloring, see uts/armv7/vm/vm_machdep.c. 138 */ 139 #define PAGE_GET_COLOR_SHIFT(szc, nszc) \ 140 (hw_page_array[(nszc)].hp_shift - hw_page_array[(szc)].hp_shift) 141 142 #define PAGE_CONVERT_COLOR(ncolor, szc, nszc) \ 143 ((ncolor) << PAGE_GET_COLOR_SHIFT((szc), (nszc))) 144 145 #define PFN_2_COLOR(pfn, szc, it) \ 146 (((pfn) & page_colors_mask) >> \ 147 (hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift)) 148 149 #define PNUM_SIZE(szc) \ 150 (hw_page_array[(szc)].hp_pgcnt) 151 #define PNUM_SHIFT(szc) \ 152 (hw_page_array[(szc)].hp_shift - hw_page_array[0].hp_shift) 153 #define PAGE_GET_SIZE(szc) \ 154 (hw_page_array[(szc)].hp_size) 155 #define PAGE_GET_SHIFT(szc) \ 156 (hw_page_array[(szc)].hp_shift) 157 #define PAGE_GET_PAGECOLORS(szc) \ 158 (hw_page_array[(szc)].hp_colors) 159 160 #define PAGE_NEXT_PFN_FOR_COLOR(pfn, szc, color, ceq_mask, color_mask, it) \ 161 panic("page_next_pfn_for_color") 162 163 /* get the color equivalency mask for the next szc */ 164 #define PAGE_GET_NSZ_MASK(szc, mask) \ 165 ((mask) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) 166 167 /* get the color of the next szc */ 168 #define PAGE_GET_NSZ_COLOR(szc, color) \ 169 ((color) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc))) 170 171 /* Find the bin for the given page if it was of size szc */ 172 #define PP_2_BIN_SZC(pp, szc) (PFN_2_COLOR(pp->p_pagenum, szc, NULL)) 173 174 #define PP_2_BIN(pp) (PP_2_BIN_SZC(pp, pp->p_szc)) 175 176 #define PP_2_MEM_NODE(pp) (0) 177 #define PP_2_MTYPE(pp) (0) 178 #define PP_2_SZC(pp) (pp->p_szc) 179 180 #define SZCPAGES(szc) (1 << PAGE_BSZS_SHIFT(szc)) 181 #define PFN_BASE(pfnum, szc) (pfnum & ~(SZCPAGES(szc) - 1)) 182 183 /* 184 * XXX These are total strawmen based on i86pc and sun4 for walking the page 185 * tables. 186 */ 187 typedef struct page_list_walker { 188 uint_t plw_colors; /* num of colors for szc */ 189 uint_t plw_color_mask; /* colors-1 */ 190 uint_t plw_bin_step; /* next bin: 1 or 2 */ 191 uint_t plw_count; /* loop count */ 192 uint_t plw_bin0; /* starting bin */ 193 uint_t plw_bin_marker; /* bin after initial jump */ 194 uint_t plw_bin_split_prev; /* last bin we tried to split */ 195 uint_t plw_do_split; /* set if OK to split */ 196 uint_t plw_split_next; /* next bin to split */ 197 uint_t plw_ceq_dif; /* number of different color groups */ 198 /* to check */ 199 uint_t plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */ 200 uint_t plw_bins[MMU_PAGE_SIZES + 1]; /* num of bins */ 201 } page_list_walker_t; 202 203 extern void page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin, 204 int can_split, int use_ceq, page_list_walker_t *plw); 205 206 extern struct cpu cpus[]; 207 #define CPU0 &cpus[0] 208 209 /* 210 * XXX memory type initializaiton 211 */ 212 #define MTYPE_INIT(mtype, vp, vaddr, flags, pgsz) panic("mtype_init") 213 #define MTYPE_START(mnode, mtype, flags) panic("mtype_start") 214 #define MTYPE_NEXT(mnode, mtype, flags) panic("mtype_next") 215 #define MTYPE_PGR_INIT(mtype, flags, pp, mnode, pgcnt) panic("mtype_pgr_init") 216 #define MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi) panic("mnodetype_2_pfn") 217 218 #ifdef DEBUG 219 #define CHK_LPG(pp, szc) panic("chk_lpg") 220 #else 221 #define CHK_LPG(pp, szc) 222 #endif 223 224 #define FULL_REGION_CNT(rg_szc) \ 225 (PAGE_GET_SIZE(rg_szc) >> PAGE_GET_SHIFT(rg_szc - 1)) 226 227 /* Return the leader for this mapping size */ 228 #define PP_GROUPLEADER(pp, szc) \ 229 (&(pp)[-(int)((pp)->p_pagenum & (SZCPAGES(szc)-1))]) 230 231 /* Return the root page for this page based on p_szc */ 232 #define PP_PAGEROOT(pp) ((pp)->p_szc == 0 ? (pp) : \ 233 PP_GROUPLEADER((pp), (pp)->p_szc)) 234 235 /* 236 * The counter base must be per page_counter element to prevent 237 * races when re-indexing, and the base page size element should 238 * be aligned on a boundary of the given region size. 239 * 240 * We also round up the number of pages spanned by the counters 241 * for a given region to PC_BASE_ALIGN in certain situations to simplify 242 * the coding for some non-performance critical routines. 243 */ 244 #define PC_BASE_ALIGN ((pfn_t)1 << PAGE_BSZS_SHIFT(mmu_page_sizes-1)) 245 #define PC_BASE_ALIGN_MASK (PC_BASE_ALIGN - 1) 246 247 /* 248 * The following three constants describe the set of page sizes that are 249 * supported by the hardware. Note that there is a notion of legacy page sizes 250 * for certain applications. However, such applications don't exist on ARMv7, so 251 * they'll always get the same data. 252 */ 253 extern uint_t mmu_page_sizes; 254 extern uint_t mmu_exported_page_sizes; 255 extern uint_t mmu_legacy_page_sizes; 256 257 /* 258 * These macros are used for converting between userland page sizes and kernel 259 * page sizes. However, these are the same on ARMv7 (just like i86pc). 260 */ 261 #define USERSZC_2_SZC(userszc) userszc 262 #define SZC_2_USERSZC(szc) szc 263 264 /* 265 * for hw_page_map_t, sized to hold the ratio of large page to base 266 * pagesize 267 */ 268 typedef short hpmctr_t; 269 270 /* 271 * On ARMv6 the layer two cache isn't architecturally defined. A given 272 * implementation may or may not support it. The maximum size appears to be 273 * 64-bytes; however, we end up having to defer to the individual platforms for 274 * more information. Because of this, we also get and use the l1 cache 275 * information. This is further complicated by the fact that the I-cache and 276 * D-cache are separate usually; therefore we us the the l1 d-cache for 277 * CPUSETSIZE(). 278 */ 279 extern int armv6_cachesz, armv6_cache_assoc; 280 extern int armv6_l2cache_size, armv6_l2cache_linesz; 281 #define L2CACHE_ALIGN armv6_l2cache_linesz 282 #define L2CACHE_ALIGN_MAX 64 283 #define CPUSETSIZE() (armv6_cachesz / armv6_cache_assoc) 284 285 /* 286 * Return the log2(pagesize(szc) / MMU_PAGESIZE) --- or the shift count 287 * for the number of base pages in this pagesize 288 */ 289 #define PAGE_BSZS_SHIFT(szc) (PNUM_SHIFT(szc) - MMU_PAGESHIFT) 290 291 /* 292 * Internal PG_ flags. 293 */ 294 #define PGI_RELOCONLY 0x010000 /* opposite of PG_NORELOC */ 295 #define PGI_NOCAGE 0x020000 /* cage is disabled */ 296 #define PGI_PGCPHIPRI 0x040000 /* page_get_contig_page pri alloc */ 297 #define PGI_PGCPSZC0 0x080000 /* relocate base pagesize page */ 298 299 /* 300 * XXX Consider PGI flags for ourselves 301 */ 302 303 #define AS_2_BIN(as, seg, vp, addr, bin, szc) panic("as_2_bin") 304 305 /* 306 * XXX For the moment, we'll use the same value for VM_CPU_DATA_PADSIZE that 307 * is used on other platforms. We don't use this at all, but it's required for 308 * stuff like vm_pagelist.c to build. We should figure out what the right answer 309 * looks like here. 310 */ 311 /* 312 * cpu private vm data - accessed thru CPU->cpu_vm_data 313 * vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock() 314 * vc_pnext_memseg: tracks last memseg visited in page_nextn() 315 * vc_kmptr: orignal unaligned kmem pointer for this vm_cpu_data_t 316 * vc_kmsize: orignal kmem size for this vm_cpu_data_t 317 */ 318 319 typedef struct { 320 struct memseg *vc_pnum_memseg; 321 struct memseg *vc_pnext_memseg; 322 void *vc_kmptr; 323 size_t vc_kmsize; 324 } vm_cpu_data_t; 325 326 /* allocation size to ensure vm_cpu_data_t resides in its own cache line */ 327 #define VM_CPU_DATA_PADSIZE \ 328 (P2ROUNDUP(sizeof (vm_cpu_data_t), L2CACHE_ALIGN_MAX)) 329 330 /* 331 * When a bin is empty, and we can't satisfy a color request correctly, 332 * we scan. If we assume that the programs have reasonable spatial 333 * behavior, then it will not be a good idea to use the adjacent color. 334 * Using the adjacent color would result in virtually adjacent addresses 335 * mapping into the same spot in the cache. So, if we stumble across 336 * an empty bin, skip a bunch before looking. After the first skip, 337 * then just look one bin at a time so we don't miss our cache on 338 * every look. Be sure to check every bin. Page_create() will panic 339 * if we miss a page. 340 * 341 * This also explains the `<=' in the for loops in both page_get_freelist() 342 * and page_get_cachelist(). Since we checked the target bin, skipped 343 * a bunch, then continued one a time, we wind up checking the target bin 344 * twice to make sure we get all of them bins. 345 */ 346 #define BIN_STEP 19 347 348 /* 349 * TODO We should re-evaluate this at some point. This is a reasonable set of 350 * stats that both i86pc and sun4 have, which likely the common code all 351 * requires. We may find that we want additional stats here. 352 */ 353 #ifdef VM_STATS 354 struct vmm_vmstats_str { 355 ulong_t pgf_alloc[MMU_PAGE_SIZES]; /* page_get_freelist */ 356 ulong_t pgf_allocok[MMU_PAGE_SIZES]; 357 ulong_t pgf_allocokrem[MMU_PAGE_SIZES]; 358 ulong_t pgf_allocfailed[MMU_PAGE_SIZES]; 359 ulong_t pgf_allocdeferred; 360 ulong_t pgf_allocretry[MMU_PAGE_SIZES]; 361 ulong_t pgc_alloc; /* page_get_cachelist */ 362 ulong_t pgc_allocok; 363 ulong_t pgc_allocokrem; 364 ulong_t pgc_allocokdeferred; 365 ulong_t pgc_allocfailed; 366 ulong_t pgcp_alloc[MMU_PAGE_SIZES]; /* page_get_contig_pages */ 367 ulong_t pgcp_allocfailed[MMU_PAGE_SIZES]; 368 ulong_t pgcp_allocempty[MMU_PAGE_SIZES]; 369 ulong_t pgcp_allocok[MMU_PAGE_SIZES]; 370 ulong_t ptcp[MMU_PAGE_SIZES]; /* page_trylock_contig_pages */ 371 ulong_t ptcpfreethresh[MMU_PAGE_SIZES]; 372 ulong_t ptcpfailexcl[MMU_PAGE_SIZES]; 373 ulong_t ptcpfailszc[MMU_PAGE_SIZES]; 374 ulong_t ptcpfailcage[MMU_PAGE_SIZES]; 375 ulong_t ptcpok[MMU_PAGE_SIZES]; 376 ulong_t pgmf_alloc[MMU_PAGE_SIZES]; /* page_get_mnode_freelist */ 377 ulong_t pgmf_allocfailed[MMU_PAGE_SIZES]; 378 ulong_t pgmf_allocempty[MMU_PAGE_SIZES]; 379 ulong_t pgmf_allocok[MMU_PAGE_SIZES]; 380 ulong_t pgmc_alloc; /* page_get_mnode_cachelist */ 381 ulong_t pgmc_allocfailed; 382 ulong_t pgmc_allocempty; 383 ulong_t pgmc_allocok; 384 ulong_t pladd_free[MMU_PAGE_SIZES]; /* page_list_add/sub */ 385 ulong_t plsub_free[MMU_PAGE_SIZES]; 386 ulong_t pladd_cache; 387 ulong_t plsub_cache; 388 ulong_t plsubpages_szcbig; 389 ulong_t plsubpages_szc0; 390 ulong_t pfs_req[MMU_PAGE_SIZES]; /* page_freelist_split */ 391 ulong_t pfs_demote[MMU_PAGE_SIZES]; 392 ulong_t pfc_coalok[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 393 ulong_t ppr_reloc[MMU_PAGE_SIZES]; /* page_relocate */ 394 ulong_t ppr_relocnoroot[MMU_PAGE_SIZES]; 395 ulong_t ppr_reloc_replnoroot[MMU_PAGE_SIZES]; 396 ulong_t ppr_relocnolock[MMU_PAGE_SIZES]; 397 ulong_t ppr_relocnomem[MMU_PAGE_SIZES]; 398 ulong_t ppr_relocok[MMU_PAGE_SIZES]; 399 ulong_t ppr_copyfail; 400 /* page coalesce counter */ 401 ulong_t page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 402 /* candidates useful */ 403 ulong_t page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 404 /* ctrs changed after locking */ 405 ulong_t page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 406 /* page_freelist_coalesce failed */ 407 ulong_t page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES]; 408 ulong_t page_ctrs_coalesce_all; /* page coalesce all counter */ 409 ulong_t page_ctrs_cands_skip_all; /* candidates useful for all func */ 410 ulong_t restrict4gcnt; 411 ulong_t unrestrict16mcnt; /* non-DMA 16m allocs allowed */ 412 ulong_t pgpanicalloc; /* PG_PANIC allocation */ 413 ulong_t pcf_deny[MMU_PAGE_SIZES]; /* page_chk_freelist */ 414 ulong_t pcf_allow[MMU_PAGE_SIZES]; 415 }; 416 extern struct vmm_vmstats_str vmm_vmstats; 417 #endif /* VM_STATS */ 418 419 420 #ifdef __cplusplus 421 } 422 #endif 423 424 #endif /* _VM_DEP_H */