Print this page
patch x2apic-x86fset
patch remove-unused-vars
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/os/cpuid.c
+++ new/usr/src/uts/i86pc/os/cpuid.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2011 by Delphix. All rights reserved.
24 24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 + * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
25 26 */
26 27 /*
27 28 * Copyright (c) 2010, Intel Corporation.
28 29 * All rights reserved.
29 30 */
30 31 /*
31 32 * Portions Copyright 2009 Advanced Micro Devices, Inc.
32 33 */
33 34 /*
34 35 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
35 36 */
36 37 /*
37 38 * Various routines to handle identification
38 39 * and classification of x86 processors.
39 40 */
40 41
41 42 #include <sys/types.h>
42 43 #include <sys/archsystm.h>
43 44 #include <sys/x86_archext.h>
44 45 #include <sys/kmem.h>
45 46 #include <sys/systm.h>
46 47 #include <sys/cmn_err.h>
47 48 #include <sys/sunddi.h>
48 49 #include <sys/sunndi.h>
49 50 #include <sys/cpuvar.h>
50 51 #include <sys/processor.h>
51 52 #include <sys/sysmacros.h>
52 53 #include <sys/pg.h>
53 54 #include <sys/fp.h>
54 55 #include <sys/controlregs.h>
55 56 #include <sys/bitmap.h>
56 57 #include <sys/auxv_386.h>
57 58 #include <sys/memnode.h>
58 59 #include <sys/pci_cfgspace.h>
59 60
60 61 #ifdef __xpv
61 62 #include <sys/hypervisor.h>
62 63 #else
63 64 #include <sys/ontrap.h>
64 65 #endif
65 66
66 67 /*
67 68 * Pass 0 of cpuid feature analysis happens in locore. It contains special code
68 69 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with
69 70 * them accordingly. For most modern processors, feature detection occurs here
70 71 * in pass 1.
71 72 *
72 73 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup()
73 74 * for the boot CPU and does the basic analysis that the early kernel needs.
74 75 * x86_featureset is set based on the return value of cpuid_pass1() of the boot
75 76 * CPU.
76 77 *
77 78 * Pass 1 includes:
78 79 *
79 80 * o Determining vendor/model/family/stepping and setting x86_type and
80 81 * x86_vendor accordingly.
81 82 * o Processing the feature flags returned by the cpuid instruction while
82 83 * applying any workarounds or tricks for the specific processor.
83 84 * o Mapping the feature flags into Solaris feature bits (X86_*).
84 85 * o Processing extended feature flags if supported by the processor,
85 86 * again while applying specific processor knowledge.
86 87 * o Determining the CMT characteristics of the system.
87 88 *
88 89 * Pass 1 is done on non-boot CPUs during their initialization and the results
89 90 * are used only as a meager attempt at ensuring that all processors within the
90 91 * system support the same features.
91 92 *
92 93 * Pass 2 of cpuid feature analysis happens just at the beginning
93 94 * of startup(). It just copies in and corrects the remainder
94 95 * of the cpuid data we depend on: standard cpuid functions that we didn't
95 96 * need for pass1 feature analysis, and extended cpuid functions beyond the
96 97 * simple feature processing done in pass1.
97 98 *
98 99 * Pass 3 of cpuid analysis is invoked after basic kernel services; in
99 100 * particular kernel memory allocation has been made available. It creates a
100 101 * readable brand string based on the data collected in the first two passes.
101 102 *
102 103 * Pass 4 of cpuid analysis is invoked after post_startup() when all
103 104 * the support infrastructure for various hardware features has been
104 105 * initialized. It determines which processor features will be reported
105 106 * to userland via the aux vector.
106 107 *
107 108 * All passes are executed on all CPUs, but only the boot CPU determines what
108 109 * features the kernel will use.
109 110 *
110 111 * Much of the worst junk in this file is for the support of processors
111 112 * that didn't really implement the cpuid instruction properly.
112 113 *
↓ open down ↓ |
78 lines elided |
↑ open up ↑ |
113 114 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon,
114 115 * the pass numbers. Accordingly, changes to the pass code may require changes
115 116 * to the accessor code.
116 117 */
117 118
118 119 uint_t x86_vendor = X86_VENDOR_IntelClone;
119 120 uint_t x86_type = X86_TYPE_OTHER;
120 121 uint_t x86_clflush_size = 0;
121 122
122 123 uint_t pentiumpro_bug4046376;
123 -uint_t pentiumpro_bug4064495;
124 124
125 125 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
126 126
127 127 static char *x86_feature_names[NUM_X86_FEATURES] = {
128 128 "lgpg",
129 129 "tsc",
130 130 "msr",
131 131 "mtrr",
132 132 "pge",
133 133 "de",
134 134 "cmov",
135 135 "mmx",
136 136 "mca",
137 137 "pae",
138 138 "cv8",
139 139 "pat",
140 140 "sep",
141 141 "sse",
142 142 "sse2",
143 143 "htt",
144 144 "asysc",
145 145 "nx",
146 146 "sse3",
147 147 "cx16",
148 148 "cmp",
149 149 "tscp",
150 150 "mwait",
151 151 "sse4a",
152 152 "cpuid",
153 153 "ssse3",
154 154 "sse4_1",
155 155 "sse4_2",
156 156 "1gpg",
↓ open down ↓ |
23 lines elided |
↑ open up ↑ |
157 157 "clfsh",
158 158 "64",
159 159 "aes",
160 160 "pclmulqdq",
161 161 "xsave",
162 162 "avx",
163 163 "vmx",
164 164 "svm",
165 165 "topoext",
166 166 "f16c",
167 - "rdrand"
167 + "rdrand",
168 + "x2apic",
168 169 };
169 170
170 171 boolean_t
171 172 is_x86_feature(void *featureset, uint_t feature)
172 173 {
173 174 ASSERT(feature < NUM_X86_FEATURES);
174 175 return (BT_TEST((ulong_t *)featureset, feature));
175 176 }
176 177
177 178 void
178 179 add_x86_feature(void *featureset, uint_t feature)
179 180 {
180 181 ASSERT(feature < NUM_X86_FEATURES);
181 182 BT_SET((ulong_t *)featureset, feature);
182 183 }
183 184
184 185 void
185 186 remove_x86_feature(void *featureset, uint_t feature)
186 187 {
187 188 ASSERT(feature < NUM_X86_FEATURES);
188 189 BT_CLEAR((ulong_t *)featureset, feature);
189 190 }
190 191
191 192 boolean_t
192 193 compare_x86_featureset(void *setA, void *setB)
193 194 {
194 195 /*
195 196 * We assume that the unused bits of the bitmap are always zero.
196 197 */
197 198 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
198 199 return (B_TRUE);
199 200 } else {
200 201 return (B_FALSE);
201 202 }
202 203 }
203 204
204 205 void
205 206 print_x86_featureset(void *featureset)
206 207 {
↓ open down ↓ |
29 lines elided |
↑ open up ↑ |
207 208 uint_t i;
208 209
209 210 for (i = 0; i < NUM_X86_FEATURES; i++) {
210 211 if (is_x86_feature(featureset, i)) {
211 212 cmn_err(CE_CONT, "?x86_feature: %s\n",
212 213 x86_feature_names[i]);
213 214 }
214 215 }
215 216 }
216 217
217 -uint_t enable486;
218 -
219 218 static size_t xsave_state_size = 0;
220 219 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
221 220 boolean_t xsave_force_disable = B_FALSE;
222 221
223 222 /*
224 223 * This is set to platform type we are running on.
225 224 */
226 225 static int platform_type = -1;
227 226
228 227 #if !defined(__xpv)
229 228 /*
230 229 * Variable to patch if hypervisor platform detection needs to be
231 230 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
232 231 */
233 232 int enable_platform_detection = 1;
234 233 #endif
235 234
236 235 /*
237 236 * monitor/mwait info.
238 237 *
239 238 * size_actual and buf_actual are the real address and size allocated to get
240 239 * proper mwait_buf alignement. buf_actual and size_actual should be passed
241 240 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use
242 241 * processor cache-line alignment, but this is not guarantied in the furture.
243 242 */
244 243 struct mwait_info {
245 244 size_t mon_min; /* min size to avoid missed wakeups */
246 245 size_t mon_max; /* size to avoid false wakeups */
247 246 size_t size_actual; /* size actually allocated */
248 247 void *buf_actual; /* memory actually allocated */
249 248 uint32_t support; /* processor support of monitor/mwait */
250 249 };
251 250
252 251 /*
253 252 * xsave/xrestor info.
254 253 *
255 254 * This structure contains HW feature bits and size of the xsave save area.
256 255 * Note: the kernel will use the maximum size required for all hardware
257 256 * features. It is not optimize for potential memory savings if features at
258 257 * the end of the save area are not enabled.
259 258 */
260 259 struct xsave_info {
261 260 uint32_t xsav_hw_features_low; /* Supported HW features */
262 261 uint32_t xsav_hw_features_high; /* Supported HW features */
263 262 size_t xsav_max_size; /* max size save area for HW features */
264 263 size_t ymm_size; /* AVX: size of ymm save area */
265 264 size_t ymm_offset; /* AVX: offset for ymm save area */
266 265 };
267 266
268 267
269 268 /*
270 269 * These constants determine how many of the elements of the
271 270 * cpuid we cache in the cpuid_info data structure; the
272 271 * remaining elements are accessible via the cpuid instruction.
273 272 */
274 273
275 274 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */
276 275 #define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */
277 276
278 277 /*
279 278 * Some terminology needs to be explained:
280 279 * - Socket: Something that can be plugged into a motherboard.
281 280 * - Package: Same as socket
282 281 * - Chip: Same as socket. Note that AMD's documentation uses term "chip"
283 282 * differently: there, chip is the same as processor node (below)
284 283 * - Processor node: Some AMD processors have more than one
285 284 * "subprocessor" embedded in a package. These subprocessors (nodes)
286 285 * are fully-functional processors themselves with cores, caches,
287 286 * memory controllers, PCI configuration spaces. They are connected
288 287 * inside the package with Hypertransport links. On single-node
289 288 * processors, processor node is equivalent to chip/socket/package.
290 289 * - Compute Unit: Some AMD processors pair cores in "compute units" that
291 290 * share the FPU and the I$ and L2 caches.
292 291 */
293 292
294 293 struct cpuid_info {
295 294 uint_t cpi_pass; /* last pass completed */
296 295 /*
297 296 * standard function information
298 297 */
299 298 uint_t cpi_maxeax; /* fn 0: %eax */
300 299 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */
301 300 uint_t cpi_vendor; /* enum of cpi_vendorstr */
302 301
303 302 uint_t cpi_family; /* fn 1: extended family */
304 303 uint_t cpi_model; /* fn 1: extended model */
305 304 uint_t cpi_step; /* fn 1: stepping */
306 305 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */
307 306 /* AMD: package/socket # */
308 307 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */
309 308 int cpi_clogid; /* fn 1: %ebx: thread # */
310 309 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */
311 310 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */
312 311 uint_t cpi_ncache; /* fn 2: number of elements */
313 312 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
314 313 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */
315 314 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */
316 315 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */
317 316 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */
318 317 /*
319 318 * extended function information
320 319 */
321 320 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */
322 321 char cpi_brandstr[49]; /* fn 0x8000000[234] */
323 322 uint8_t cpi_pabits; /* fn 0x80000006: %eax */
324 323 uint8_t cpi_vabits; /* fn 0x80000006: %eax */
325 324 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */
326 325
327 326 id_t cpi_coreid; /* same coreid => strands share core */
328 327 int cpi_pkgcoreid; /* core number within single package */
329 328 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */
330 329 /* Intel: fn 4: %eax[31-26] */
331 330 /*
332 331 * supported feature information
333 332 */
334 333 uint32_t cpi_support[5];
335 334 #define STD_EDX_FEATURES 0
336 335 #define AMD_EDX_FEATURES 1
337 336 #define TM_EDX_FEATURES 2
338 337 #define STD_ECX_FEATURES 3
339 338 #define AMD_ECX_FEATURES 4
340 339 /*
341 340 * Synthesized information, where known.
342 341 */
343 342 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */
344 343 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */
345 344 uint32_t cpi_socket; /* Chip package/socket type */
346 345
347 346 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */
348 347 uint32_t cpi_apicid;
349 348 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */
350 349 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */
351 350 /* Intel: 1 */
352 351 uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */
353 352 uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */
354 353
355 354 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */
356 355 };
357 356
358 357
359 358 static struct cpuid_info cpuid_info0;
360 359
361 360 /*
362 361 * These bit fields are defined by the Intel Application Note AP-485
363 362 * "Intel Processor Identification and the CPUID Instruction"
364 363 */
365 364 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
366 365 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
367 366 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
368 367 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
369 368 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
370 369 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
371 370
372 371 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx)
373 372 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx)
374 373 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx)
375 374 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx)
376 375
377 376 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
378 377 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
379 378 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
380 379 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
381 380
382 381 #define CPI_MAXEAX_MAX 0x100 /* sanity control */
383 382 #define CPI_XMAXEAX_MAX 0x80000100
384 383 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */
385 384 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */
386 385
387 386 /*
388 387 * Function 4 (Deterministic Cache Parameters) macros
389 388 * Defined by Intel Application Note AP-485
390 389 */
391 390 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26)
392 391 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14)
393 392 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9)
394 393 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8)
395 394 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5)
396 395 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0)
397 396 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8)
398 397
399 398 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22)
400 399 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12)
401 400 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0)
402 401
403 402 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0)
404 403
405 404 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0)
406 405
407 406
408 407 /*
409 408 * A couple of shorthand macros to identify "later" P6-family chips
410 409 * like the Pentium M and Core. First, the "older" P6-based stuff
411 410 * (loosely defined as "pre-Pentium-4"):
412 411 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
413 412 */
414 413
415 414 #define IS_LEGACY_P6(cpi) ( \
416 415 cpi->cpi_family == 6 && \
417 416 (cpi->cpi_model == 1 || \
418 417 cpi->cpi_model == 3 || \
419 418 cpi->cpi_model == 5 || \
420 419 cpi->cpi_model == 6 || \
421 420 cpi->cpi_model == 7 || \
422 421 cpi->cpi_model == 8 || \
423 422 cpi->cpi_model == 0xA || \
424 423 cpi->cpi_model == 0xB) \
425 424 )
426 425
427 426 /* A "new F6" is everything with family 6 that's not the above */
428 427 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
429 428
430 429 /* Extended family/model support */
431 430 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
432 431 cpi->cpi_family >= 0xf)
433 432
434 433 /*
435 434 * Info for monitor/mwait idle loop.
436 435 *
437 436 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
438 437 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
439 438 * 2006.
440 439 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
441 440 * Documentation Updates" #33633, Rev 2.05, December 2006.
442 441 */
443 442 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */
444 443 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */
445 444 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */
446 445 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
447 446 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2)
448 447 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1)
449 448 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
450 449 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
451 450 /*
452 451 * Number of sub-cstates for a given c-state.
453 452 */
454 453 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \
455 454 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
456 455
457 456 /*
458 457 * XSAVE leaf 0xD enumeration
459 458 */
460 459 #define CPUID_LEAFD_2_YMM_OFFSET 576
461 460 #define CPUID_LEAFD_2_YMM_SIZE 256
462 461
463 462 /*
464 463 * Functions we consune from cpuid_subr.c; don't publish these in a header
465 464 * file to try and keep people using the expected cpuid_* interfaces.
466 465 */
467 466 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
468 467 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
469 468 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
470 469 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
471 470 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
472 471
473 472 /*
474 473 * Apply up various platform-dependent restrictions where the
475 474 * underlying platform restrictions mean the CPU can be marked
476 475 * as less capable than its cpuid instruction would imply.
477 476 */
478 477 #if defined(__xpv)
479 478 static void
480 479 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
481 480 {
482 481 switch (eax) {
483 482 case 1: {
484 483 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
485 484 0 : CPUID_INTC_EDX_MCA;
486 485 cp->cp_edx &=
487 486 ~(mcamask |
488 487 CPUID_INTC_EDX_PSE |
489 488 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
490 489 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
491 490 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
492 491 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
493 492 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
494 493 break;
495 494 }
496 495
497 496 case 0x80000001:
498 497 cp->cp_edx &=
499 498 ~(CPUID_AMD_EDX_PSE |
500 499 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
501 500 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
502 501 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
503 502 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
504 503 CPUID_AMD_EDX_TSCP);
505 504 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
506 505 break;
507 506 default:
508 507 break;
509 508 }
510 509
511 510 switch (vendor) {
512 511 case X86_VENDOR_Intel:
513 512 switch (eax) {
514 513 case 4:
515 514 /*
516 515 * Zero out the (ncores-per-chip - 1) field
517 516 */
518 517 cp->cp_eax &= 0x03fffffff;
519 518 break;
520 519 default:
521 520 break;
522 521 }
523 522 break;
524 523 case X86_VENDOR_AMD:
525 524 switch (eax) {
526 525
527 526 case 0x80000001:
528 527 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
529 528 break;
530 529
531 530 case 0x80000008:
532 531 /*
533 532 * Zero out the (ncores-per-chip - 1) field
534 533 */
535 534 cp->cp_ecx &= 0xffffff00;
536 535 break;
537 536 default:
538 537 break;
539 538 }
540 539 break;
541 540 default:
542 541 break;
543 542 }
544 543 }
545 544 #else
546 545 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */
547 546 #endif
548 547
549 548 /*
550 549 * Some undocumented ways of patching the results of the cpuid
551 550 * instruction to permit running Solaris 10 on future cpus that
552 551 * we don't currently support. Could be set to non-zero values
553 552 * via settings in eeprom.
554 553 */
555 554
556 555 uint32_t cpuid_feature_ecx_include;
557 556 uint32_t cpuid_feature_ecx_exclude;
558 557 uint32_t cpuid_feature_edx_include;
559 558 uint32_t cpuid_feature_edx_exclude;
560 559
561 560 /*
562 561 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
563 562 */
564 563 void
565 564 cpuid_alloc_space(cpu_t *cpu)
566 565 {
567 566 /*
568 567 * By convention, cpu0 is the boot cpu, which is set up
569 568 * before memory allocation is available. All other cpus get
570 569 * their cpuid_info struct allocated here.
571 570 */
572 571 ASSERT(cpu->cpu_id != 0);
573 572 ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
574 573 cpu->cpu_m.mcpu_cpi =
575 574 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
576 575 }
577 576
578 577 void
579 578 cpuid_free_space(cpu_t *cpu)
580 579 {
581 580 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
582 581 int i;
583 582
584 583 ASSERT(cpi != NULL);
585 584 ASSERT(cpi != &cpuid_info0);
586 585
587 586 /*
588 587 * Free up any function 4 related dynamic storage
589 588 */
590 589 for (i = 1; i < cpi->cpi_std_4_size; i++)
591 590 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs));
592 591 if (cpi->cpi_std_4_size > 0)
593 592 kmem_free(cpi->cpi_std_4,
594 593 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *));
595 594
596 595 kmem_free(cpi, sizeof (*cpi));
597 596 cpu->cpu_m.mcpu_cpi = NULL;
598 597 }
599 598
600 599 #if !defined(__xpv)
601 600 /*
602 601 * Determine the type of the underlying platform. This is used to customize
603 602 * initialization of various subsystems (e.g. TSC). determine_platform() must
604 603 * only ever be called once to prevent two processors from seeing different
605 604 * values of platform_type. Must be called before cpuid_pass1(), the earliest
606 605 * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv).
607 606 */
608 607 void
609 608 determine_platform(void)
610 609 {
611 610 struct cpuid_regs cp;
612 611 uint32_t base;
613 612 uint32_t regs[4];
614 613 char *hvstr = (char *)regs;
615 614
616 615 ASSERT(platform_type == -1);
617 616
618 617 platform_type = HW_NATIVE;
619 618
620 619 if (!enable_platform_detection)
621 620 return;
622 621
623 622 /*
624 623 * If Hypervisor CPUID bit is set, try to determine hypervisor
625 624 * vendor signature, and set platform type accordingly.
626 625 *
627 626 * References:
628 627 * http://lkml.org/lkml/2008/10/1/246
629 628 * http://kb.vmware.com/kb/1009458
630 629 */
631 630 cp.cp_eax = 0x1;
632 631 (void) __cpuid_insn(&cp);
633 632 if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) {
634 633 cp.cp_eax = 0x40000000;
635 634 (void) __cpuid_insn(&cp);
636 635 regs[0] = cp.cp_ebx;
637 636 regs[1] = cp.cp_ecx;
638 637 regs[2] = cp.cp_edx;
639 638 regs[3] = 0;
640 639 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) {
641 640 platform_type = HW_XEN_HVM;
642 641 return;
643 642 }
644 643 if (strcmp(hvstr, HVSIG_VMWARE) == 0) {
645 644 platform_type = HW_VMWARE;
646 645 return;
647 646 }
648 647 if (strcmp(hvstr, HVSIG_KVM) == 0) {
649 648 platform_type = HW_KVM;
650 649 return;
651 650 }
652 651 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0)
653 652 platform_type = HW_MICROSOFT;
654 653 } else {
655 654 /*
656 655 * Check older VMware hardware versions. VMware hypervisor is
657 656 * detected by performing an IN operation to VMware hypervisor
658 657 * port and checking that value returned in %ebx is VMware
659 658 * hypervisor magic value.
660 659 *
661 660 * References: http://kb.vmware.com/kb/1009458
662 661 */
663 662 vmware_port(VMWARE_HVCMD_GETVERSION, regs);
664 663 if (regs[1] == VMWARE_HVMAGIC) {
665 664 platform_type = HW_VMWARE;
666 665 return;
667 666 }
668 667 }
669 668
670 669 /*
671 670 * Check Xen hypervisor. In a fully virtualized domain,
672 671 * Xen's pseudo-cpuid function returns a string representing the
673 672 * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum
674 673 * supported cpuid function. We need at least a (base + 2) leaf value
675 674 * to do what we want to do. Try different base values, since the
676 675 * hypervisor might use a different one depending on whether Hyper-V
677 676 * emulation is switched on by default or not.
678 677 */
679 678 for (base = 0x40000000; base < 0x40010000; base += 0x100) {
680 679 cp.cp_eax = base;
681 680 (void) __cpuid_insn(&cp);
682 681 regs[0] = cp.cp_ebx;
683 682 regs[1] = cp.cp_ecx;
684 683 regs[2] = cp.cp_edx;
685 684 regs[3] = 0;
686 685 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 &&
687 686 cp.cp_eax >= (base + 2)) {
688 687 platform_type &= ~HW_NATIVE;
689 688 platform_type |= HW_XEN_HVM;
690 689 return;
691 690 }
692 691 }
693 692 }
694 693
695 694 int
696 695 get_hwenv(void)
697 696 {
698 697 ASSERT(platform_type != -1);
699 698 return (platform_type);
700 699 }
701 700
702 701 int
703 702 is_controldom(void)
704 703 {
705 704 return (0);
706 705 }
707 706
708 707 #else
709 708
710 709 int
711 710 get_hwenv(void)
712 711 {
713 712 return (HW_XEN_PV);
714 713 }
715 714
716 715 int
717 716 is_controldom(void)
718 717 {
719 718 return (DOMAIN_IS_INITDOMAIN(xen_info));
720 719 }
721 720
722 721 #endif /* __xpv */
723 722
724 723 static void
725 724 cpuid_intel_getids(cpu_t *cpu, void *feature)
726 725 {
727 726 uint_t i;
728 727 uint_t chipid_shift = 0;
729 728 uint_t coreid_shift = 0;
730 729 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
731 730
732 731 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
733 732 chipid_shift++;
734 733
735 734 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
736 735 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
737 736
738 737 if (is_x86_feature(feature, X86FSET_CMP)) {
739 738 /*
740 739 * Multi-core (and possibly multi-threaded)
741 740 * processors.
742 741 */
743 742 uint_t ncpu_per_core;
744 743 if (cpi->cpi_ncore_per_chip == 1)
745 744 ncpu_per_core = cpi->cpi_ncpu_per_chip;
746 745 else if (cpi->cpi_ncore_per_chip > 1)
747 746 ncpu_per_core = cpi->cpi_ncpu_per_chip /
748 747 cpi->cpi_ncore_per_chip;
749 748 /*
750 749 * 8bit APIC IDs on dual core Pentiums
751 750 * look like this:
752 751 *
753 752 * +-----------------------+------+------+
754 753 * | Physical Package ID | MC | HT |
755 754 * +-----------------------+------+------+
756 755 * <------- chipid -------->
757 756 * <------- coreid --------------->
758 757 * <--- clogid -->
759 758 * <------>
760 759 * pkgcoreid
761 760 *
762 761 * Where the number of bits necessary to
763 762 * represent MC and HT fields together equals
764 763 * to the minimum number of bits necessary to
765 764 * store the value of cpi->cpi_ncpu_per_chip.
766 765 * Of those bits, the MC part uses the number
767 766 * of bits necessary to store the value of
768 767 * cpi->cpi_ncore_per_chip.
769 768 */
770 769 for (i = 1; i < ncpu_per_core; i <<= 1)
771 770 coreid_shift++;
772 771 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
773 772 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
774 773 } else if (is_x86_feature(feature, X86FSET_HTT)) {
775 774 /*
776 775 * Single-core multi-threaded processors.
777 776 */
778 777 cpi->cpi_coreid = cpi->cpi_chipid;
779 778 cpi->cpi_pkgcoreid = 0;
780 779 }
781 780 cpi->cpi_procnodeid = cpi->cpi_chipid;
782 781 cpi->cpi_compunitid = cpi->cpi_coreid;
783 782 }
784 783
785 784 static void
786 785 cpuid_amd_getids(cpu_t *cpu)
787 786 {
788 787 int i, first_half, coreidsz;
789 788 uint32_t nb_caps_reg;
790 789 uint_t node2_1;
791 790 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
792 791 struct cpuid_regs *cp;
793 792
794 793 /*
795 794 * AMD CMP chips currently have a single thread per core.
796 795 *
797 796 * Since no two cpus share a core we must assign a distinct coreid
798 797 * per cpu, and we do this by using the cpu_id. This scheme does not,
799 798 * however, guarantee that sibling cores of a chip will have sequential
800 799 * coreids starting at a multiple of the number of cores per chip -
801 800 * that is usually the case, but if the ACPI MADT table is presented
802 801 * in a different order then we need to perform a few more gymnastics
803 802 * for the pkgcoreid.
804 803 *
805 804 * All processors in the system have the same number of enabled
806 805 * cores. Cores within a processor are always numbered sequentially
807 806 * from 0 regardless of how many or which are disabled, and there
808 807 * is no way for operating system to discover the real core id when some
809 808 * are disabled.
810 809 *
811 810 * In family 0x15, the cores come in pairs called compute units. They
812 811 * share I$ and L2 caches and the FPU. Enumeration of this feature is
813 812 * simplified by the new topology extensions CPUID leaf, indicated by
814 813 * the X86 feature X86FSET_TOPOEXT.
815 814 */
816 815
817 816 cpi->cpi_coreid = cpu->cpu_id;
818 817 cpi->cpi_compunitid = cpu->cpu_id;
819 818
820 819 if (cpi->cpi_xmaxeax >= 0x80000008) {
821 820
822 821 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
823 822
824 823 /*
825 824 * In AMD parlance chip is really a node while Solaris
826 825 * sees chip as equivalent to socket/package.
827 826 */
828 827 cpi->cpi_ncore_per_chip =
829 828 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
830 829 if (coreidsz == 0) {
831 830 /* Use legacy method */
832 831 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
833 832 coreidsz++;
834 833 if (coreidsz == 0)
835 834 coreidsz = 1;
836 835 }
837 836 } else {
838 837 /* Assume single-core part */
839 838 cpi->cpi_ncore_per_chip = 1;
840 839 coreidsz = 1;
841 840 }
842 841
843 842 cpi->cpi_clogid = cpi->cpi_pkgcoreid =
844 843 cpi->cpi_apicid & ((1<<coreidsz) - 1);
845 844 cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip;
846 845
847 846 /* Get node ID, compute unit ID */
848 847 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
849 848 cpi->cpi_xmaxeax >= 0x8000001e) {
850 849 cp = &cpi->cpi_extd[0x1e];
851 850 cp->cp_eax = 0x8000001e;
852 851 (void) __cpuid_insn(cp);
853 852
854 853 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
855 854 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
856 855 cpi->cpi_cores_per_compunit = BITX(cp->cp_ebx, 15, 8) + 1;
857 856 cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0)
858 857 + (cpi->cpi_ncore_per_chip / cpi->cpi_cores_per_compunit)
859 858 * (cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg);
860 859 } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
861 860 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
862 861 } else if (cpi->cpi_family == 0x10) {
863 862 /*
864 863 * See if we are a multi-node processor.
865 864 * All processors in the system have the same number of nodes
866 865 */
867 866 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8);
868 867 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
869 868 /* Single-node */
870 869 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
871 870 coreidsz);
872 871 } else {
873 872
874 873 /*
875 874 * Multi-node revision D (2 nodes per package
876 875 * are supported)
877 876 */
878 877 cpi->cpi_procnodes_per_pkg = 2;
879 878
880 879 first_half = (cpi->cpi_pkgcoreid <=
881 880 (cpi->cpi_ncore_per_chip/2 - 1));
882 881
883 882 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
884 883 /* We are BSP */
885 884 cpi->cpi_procnodeid = (first_half ? 0 : 1);
886 885 } else {
887 886
888 887 /* We are AP */
889 888 /* NodeId[2:1] bits to use for reading F3xe8 */
890 889 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
891 890
892 891 nb_caps_reg =
893 892 pci_getl_func(0, 24 + node2_1, 3, 0xe8);
894 893
895 894 /*
896 895 * Check IntNodeNum bit (31:30, but bit 31 is
897 896 * always 0 on dual-node processors)
898 897 */
899 898 if (BITX(nb_caps_reg, 30, 30) == 0)
900 899 cpi->cpi_procnodeid = node2_1 +
901 900 !first_half;
902 901 else
903 902 cpi->cpi_procnodeid = node2_1 +
904 903 first_half;
905 904 }
906 905 }
907 906 } else {
908 907 cpi->cpi_procnodeid = 0;
909 908 }
910 909
911 910 cpi->cpi_chipid =
912 911 cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
913 912 }
914 913
915 914 /*
916 915 * Setup XFeature_Enabled_Mask register. Required by xsave feature.
917 916 */
918 917 void
919 918 setup_xfem(void)
920 919 {
921 920 uint64_t flags = XFEATURE_LEGACY_FP;
922 921
923 922 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
924 923
925 924 if (is_x86_feature(x86_featureset, X86FSET_SSE))
926 925 flags |= XFEATURE_SSE;
927 926
928 927 if (is_x86_feature(x86_featureset, X86FSET_AVX))
929 928 flags |= XFEATURE_AVX;
930 929
931 930 set_xcr(XFEATURE_ENABLED_MASK, flags);
932 931
933 932 xsave_bv_all = flags;
934 933 }
935 934
936 935 void
937 936 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
938 937 {
939 938 uint32_t mask_ecx, mask_edx;
940 939 struct cpuid_info *cpi;
941 940 struct cpuid_regs *cp;
942 941 int xcpuid;
943 942 #if !defined(__xpv)
944 943 extern int idle_cpu_prefer_mwait;
945 944 #endif
946 945
947 946 /*
948 947 * Space statically allocated for BSP, ensure pointer is set
949 948 */
950 949 if (cpu->cpu_id == 0) {
951 950 if (cpu->cpu_m.mcpu_cpi == NULL)
952 951 cpu->cpu_m.mcpu_cpi = &cpuid_info0;
953 952 }
954 953
955 954 add_x86_feature(featureset, X86FSET_CPUID);
956 955
957 956 cpi = cpu->cpu_m.mcpu_cpi;
958 957 ASSERT(cpi != NULL);
959 958 cp = &cpi->cpi_std[0];
960 959 cp->cp_eax = 0;
961 960 cpi->cpi_maxeax = __cpuid_insn(cp);
962 961 {
963 962 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
964 963 *iptr++ = cp->cp_ebx;
965 964 *iptr++ = cp->cp_edx;
966 965 *iptr++ = cp->cp_ecx;
967 966 *(char *)&cpi->cpi_vendorstr[12] = '\0';
968 967 }
969 968
970 969 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
971 970 x86_vendor = cpi->cpi_vendor; /* for compatibility */
972 971
973 972 /*
974 973 * Limit the range in case of weird hardware
975 974 */
976 975 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
977 976 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
978 977 if (cpi->cpi_maxeax < 1)
979 978 goto pass1_done;
980 979
981 980 cp = &cpi->cpi_std[1];
982 981 cp->cp_eax = 1;
983 982 (void) __cpuid_insn(cp);
984 983
985 984 /*
986 985 * Extract identifying constants for easy access.
987 986 */
988 987 cpi->cpi_model = CPI_MODEL(cpi);
989 988 cpi->cpi_family = CPI_FAMILY(cpi);
990 989
991 990 if (cpi->cpi_family == 0xf)
992 991 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
993 992
994 993 /*
995 994 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
996 995 * Intel, and presumably everyone else, uses model == 0xf, as
997 996 * one would expect (max value means possible overflow). Sigh.
998 997 */
999 998
1000 999 switch (cpi->cpi_vendor) {
1001 1000 case X86_VENDOR_Intel:
1002 1001 if (IS_EXTENDED_MODEL_INTEL(cpi))
1003 1002 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
1004 1003 break;
1005 1004 case X86_VENDOR_AMD:
1006 1005 if (CPI_FAMILY(cpi) == 0xf)
1007 1006 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
1008 1007 break;
1009 1008 default:
1010 1009 if (cpi->cpi_model == 0xf)
1011 1010 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
1012 1011 break;
1013 1012 }
1014 1013
1015 1014 cpi->cpi_step = CPI_STEP(cpi);
1016 1015 cpi->cpi_brandid = CPI_BRANDID(cpi);
1017 1016
1018 1017 /*
1019 1018 * *default* assumptions:
1020 1019 * - believe %edx feature word
1021 1020 * - ignore %ecx feature word
1022 1021 * - 32-bit virtual and physical addressing
1023 1022 */
1024 1023 mask_edx = 0xffffffff;
1025 1024 mask_ecx = 0;
↓ open down ↓ |
797 lines elided |
↑ open up ↑ |
1026 1025
1027 1026 cpi->cpi_pabits = cpi->cpi_vabits = 32;
1028 1027
1029 1028 switch (cpi->cpi_vendor) {
1030 1029 case X86_VENDOR_Intel:
1031 1030 if (cpi->cpi_family == 5)
1032 1031 x86_type = X86_TYPE_P5;
1033 1032 else if (IS_LEGACY_P6(cpi)) {
1034 1033 x86_type = X86_TYPE_P6;
1035 1034 pentiumpro_bug4046376 = 1;
1036 - pentiumpro_bug4064495 = 1;
1037 1035 /*
1038 1036 * Clear the SEP bit when it was set erroneously
1039 1037 */
1040 1038 if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
1041 1039 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
1042 1040 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
1043 1041 x86_type = X86_TYPE_P4;
1044 1042 /*
1045 1043 * We don't currently depend on any of the %ecx
1046 1044 * features until Prescott, so we'll only check
1047 1045 * this from P4 onwards. We might want to revisit
1048 1046 * that idea later.
1049 1047 */
1050 1048 mask_ecx = 0xffffffff;
1051 1049 } else if (cpi->cpi_family > 0xf)
1052 1050 mask_ecx = 0xffffffff;
1053 1051 /*
1054 1052 * We don't support MONITOR/MWAIT if leaf 5 is not available
1055 1053 * to obtain the monitor linesize.
1056 1054 */
1057 1055 if (cpi->cpi_maxeax < 5)
1058 1056 mask_ecx &= ~CPUID_INTC_ECX_MON;
1059 1057 break;
1060 1058 case X86_VENDOR_IntelClone:
1061 1059 default:
1062 1060 break;
1063 1061 case X86_VENDOR_AMD:
1064 1062 #if defined(OPTERON_ERRATUM_108)
1065 1063 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
1066 1064 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
1067 1065 cpi->cpi_model = 0xc;
1068 1066 } else
1069 1067 #endif
1070 1068 if (cpi->cpi_family == 5) {
1071 1069 /*
1072 1070 * AMD K5 and K6
1073 1071 *
1074 1072 * These CPUs have an incomplete implementation
1075 1073 * of MCA/MCE which we mask away.
1076 1074 */
1077 1075 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
1078 1076
1079 1077 /*
1080 1078 * Model 0 uses the wrong (APIC) bit
1081 1079 * to indicate PGE. Fix it here.
1082 1080 */
1083 1081 if (cpi->cpi_model == 0) {
1084 1082 if (cp->cp_edx & 0x200) {
1085 1083 cp->cp_edx &= ~0x200;
1086 1084 cp->cp_edx |= CPUID_INTC_EDX_PGE;
1087 1085 }
1088 1086 }
1089 1087
1090 1088 /*
1091 1089 * Early models had problems w/ MMX; disable.
1092 1090 */
1093 1091 if (cpi->cpi_model < 6)
1094 1092 mask_edx &= ~CPUID_INTC_EDX_MMX;
1095 1093 }
1096 1094
1097 1095 /*
1098 1096 * For newer families, SSE3 and CX16, at least, are valid;
1099 1097 * enable all
1100 1098 */
1101 1099 if (cpi->cpi_family >= 0xf)
1102 1100 mask_ecx = 0xffffffff;
1103 1101 /*
1104 1102 * We don't support MONITOR/MWAIT if leaf 5 is not available
1105 1103 * to obtain the monitor linesize.
1106 1104 */
1107 1105 if (cpi->cpi_maxeax < 5)
1108 1106 mask_ecx &= ~CPUID_INTC_ECX_MON;
1109 1107
1110 1108 #if !defined(__xpv)
1111 1109 /*
1112 1110 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD
1113 1111 * processors. AMD does not intend MWAIT to be used in the cpu
1114 1112 * idle loop on current and future processors. 10h and future
1115 1113 * AMD processors use more power in MWAIT than HLT.
1116 1114 * Pre-family-10h Opterons do not have the MWAIT instruction.
1117 1115 */
1118 1116 idle_cpu_prefer_mwait = 0;
1119 1117 #endif
1120 1118
1121 1119 break;
1122 1120 case X86_VENDOR_TM:
1123 1121 /*
1124 1122 * workaround the NT workaround in CMS 4.1
1125 1123 */
1126 1124 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
1127 1125 (cpi->cpi_step == 2 || cpi->cpi_step == 3))
1128 1126 cp->cp_edx |= CPUID_INTC_EDX_CX8;
1129 1127 break;
1130 1128 case X86_VENDOR_Centaur:
1131 1129 /*
1132 1130 * workaround the NT workarounds again
1133 1131 */
1134 1132 if (cpi->cpi_family == 6)
1135 1133 cp->cp_edx |= CPUID_INTC_EDX_CX8;
1136 1134 break;
1137 1135 case X86_VENDOR_Cyrix:
1138 1136 /*
1139 1137 * We rely heavily on the probing in locore
1140 1138 * to actually figure out what parts, if any,
1141 1139 * of the Cyrix cpuid instruction to believe.
1142 1140 */
1143 1141 switch (x86_type) {
1144 1142 case X86_TYPE_CYRIX_486:
1145 1143 mask_edx = 0;
1146 1144 break;
1147 1145 case X86_TYPE_CYRIX_6x86:
1148 1146 mask_edx = 0;
1149 1147 break;
1150 1148 case X86_TYPE_CYRIX_6x86L:
1151 1149 mask_edx =
1152 1150 CPUID_INTC_EDX_DE |
1153 1151 CPUID_INTC_EDX_CX8;
1154 1152 break;
1155 1153 case X86_TYPE_CYRIX_6x86MX:
1156 1154 mask_edx =
1157 1155 CPUID_INTC_EDX_DE |
1158 1156 CPUID_INTC_EDX_MSR |
1159 1157 CPUID_INTC_EDX_CX8 |
1160 1158 CPUID_INTC_EDX_PGE |
1161 1159 CPUID_INTC_EDX_CMOV |
1162 1160 CPUID_INTC_EDX_MMX;
1163 1161 break;
1164 1162 case X86_TYPE_CYRIX_GXm:
1165 1163 mask_edx =
1166 1164 CPUID_INTC_EDX_MSR |
1167 1165 CPUID_INTC_EDX_CX8 |
1168 1166 CPUID_INTC_EDX_CMOV |
1169 1167 CPUID_INTC_EDX_MMX;
1170 1168 break;
1171 1169 case X86_TYPE_CYRIX_MediaGX:
1172 1170 break;
1173 1171 case X86_TYPE_CYRIX_MII:
1174 1172 case X86_TYPE_VIA_CYRIX_III:
1175 1173 mask_edx =
1176 1174 CPUID_INTC_EDX_DE |
1177 1175 CPUID_INTC_EDX_TSC |
1178 1176 CPUID_INTC_EDX_MSR |
1179 1177 CPUID_INTC_EDX_CX8 |
1180 1178 CPUID_INTC_EDX_PGE |
1181 1179 CPUID_INTC_EDX_CMOV |
1182 1180 CPUID_INTC_EDX_MMX;
1183 1181 break;
1184 1182 default:
1185 1183 break;
1186 1184 }
1187 1185 break;
1188 1186 }
1189 1187
1190 1188 #if defined(__xpv)
1191 1189 /*
1192 1190 * Do not support MONITOR/MWAIT under a hypervisor
1193 1191 */
1194 1192 mask_ecx &= ~CPUID_INTC_ECX_MON;
1195 1193 /*
1196 1194 * Do not support XSAVE under a hypervisor for now
1197 1195 */
1198 1196 xsave_force_disable = B_TRUE;
1199 1197
1200 1198 #endif /* __xpv */
1201 1199
1202 1200 if (xsave_force_disable) {
1203 1201 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
1204 1202 mask_ecx &= ~CPUID_INTC_ECX_AVX;
1205 1203 mask_ecx &= ~CPUID_INTC_ECX_F16C;
1206 1204 }
1207 1205
1208 1206 /*
1209 1207 * Now we've figured out the masks that determine
1210 1208 * which bits we choose to believe, apply the masks
1211 1209 * to the feature words, then map the kernel's view
1212 1210 * of these feature words into its feature word.
1213 1211 */
1214 1212 cp->cp_edx &= mask_edx;
1215 1213 cp->cp_ecx &= mask_ecx;
1216 1214
1217 1215 /*
1218 1216 * apply any platform restrictions (we don't call this
1219 1217 * immediately after __cpuid_insn here, because we need the
1220 1218 * workarounds applied above first)
1221 1219 */
1222 1220 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
1223 1221
1224 1222 /*
1225 1223 * fold in overrides from the "eeprom" mechanism
1226 1224 */
1227 1225 cp->cp_edx |= cpuid_feature_edx_include;
1228 1226 cp->cp_edx &= ~cpuid_feature_edx_exclude;
1229 1227
1230 1228 cp->cp_ecx |= cpuid_feature_ecx_include;
1231 1229 cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
1232 1230
1233 1231 if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
1234 1232 add_x86_feature(featureset, X86FSET_LARGEPAGE);
1235 1233 }
1236 1234 if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
1237 1235 add_x86_feature(featureset, X86FSET_TSC);
1238 1236 }
1239 1237 if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
1240 1238 add_x86_feature(featureset, X86FSET_MSR);
1241 1239 }
1242 1240 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
1243 1241 add_x86_feature(featureset, X86FSET_MTRR);
1244 1242 }
1245 1243 if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
1246 1244 add_x86_feature(featureset, X86FSET_PGE);
1247 1245 }
1248 1246 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
1249 1247 add_x86_feature(featureset, X86FSET_CMOV);
1250 1248 }
1251 1249 if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
1252 1250 add_x86_feature(featureset, X86FSET_MMX);
1253 1251 }
1254 1252 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
1255 1253 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
1256 1254 add_x86_feature(featureset, X86FSET_MCA);
1257 1255 }
1258 1256 if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
1259 1257 add_x86_feature(featureset, X86FSET_PAE);
1260 1258 }
1261 1259 if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
1262 1260 add_x86_feature(featureset, X86FSET_CX8);
1263 1261 }
1264 1262 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
1265 1263 add_x86_feature(featureset, X86FSET_CX16);
1266 1264 }
1267 1265 if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
1268 1266 add_x86_feature(featureset, X86FSET_PAT);
1269 1267 }
1270 1268 if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
1271 1269 add_x86_feature(featureset, X86FSET_SEP);
1272 1270 }
1273 1271 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
1274 1272 /*
1275 1273 * In our implementation, fxsave/fxrstor
1276 1274 * are prerequisites before we'll even
1277 1275 * try and do SSE things.
1278 1276 */
1279 1277 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
1280 1278 add_x86_feature(featureset, X86FSET_SSE);
1281 1279 }
1282 1280 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
1283 1281 add_x86_feature(featureset, X86FSET_SSE2);
1284 1282 }
1285 1283 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
1286 1284 add_x86_feature(featureset, X86FSET_SSE3);
1287 1285 }
1288 1286 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
1289 1287 add_x86_feature(featureset, X86FSET_SSSE3);
1290 1288 }
1291 1289 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
1292 1290 add_x86_feature(featureset, X86FSET_SSE4_1);
1293 1291 }
1294 1292 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
1295 1293 add_x86_feature(featureset, X86FSET_SSE4_2);
1296 1294 }
1297 1295 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
1298 1296 add_x86_feature(featureset, X86FSET_AES);
1299 1297 }
1300 1298 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
1301 1299 add_x86_feature(featureset, X86FSET_PCLMULQDQ);
1302 1300 }
1303 1301
1304 1302 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
1305 1303 add_x86_feature(featureset, X86FSET_XSAVE);
1306 1304
1307 1305 /* We only test AVX when there is XSAVE */
↓ open down ↓ |
261 lines elided |
↑ open up ↑ |
1308 1306 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
1309 1307 add_x86_feature(featureset,
1310 1308 X86FSET_AVX);
1311 1309
1312 1310 if (cp->cp_ecx & CPUID_INTC_ECX_F16C)
1313 1311 add_x86_feature(featureset,
1314 1312 X86FSET_F16C);
1315 1313 }
1316 1314 }
1317 1315 }
1316 + if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) {
1317 + add_x86_feature(featureset, X86FSET_X2APIC);
1318 + }
1318 1319 if (cp->cp_edx & CPUID_INTC_EDX_DE) {
1319 1320 add_x86_feature(featureset, X86FSET_DE);
1320 1321 }
1321 1322 #if !defined(__xpv)
1322 1323 if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
1323 1324
1324 1325 /*
1325 1326 * We require the CLFLUSH instruction for erratum workaround
1326 1327 * to use MONITOR/MWAIT.
1327 1328 */
1328 1329 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1329 1330 cpi->cpi_mwait.support |= MWAIT_SUPPORT;
1330 1331 add_x86_feature(featureset, X86FSET_MWAIT);
1331 1332 } else {
1332 1333 extern int idle_cpu_assert_cflush_monitor;
1333 1334
1334 1335 /*
1335 1336 * All processors we are aware of which have
1336 1337 * MONITOR/MWAIT also have CLFLUSH.
1337 1338 */
1338 1339 if (idle_cpu_assert_cflush_monitor) {
1339 1340 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
1340 1341 (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
1341 1342 }
1342 1343 }
1343 1344 }
1344 1345 #endif /* __xpv */
1345 1346
1346 1347 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
1347 1348 add_x86_feature(featureset, X86FSET_VMX);
1348 1349 }
1349 1350
1350 1351 if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND)
1351 1352 add_x86_feature(featureset, X86FSET_RDRAND);
1352 1353
1353 1354 /*
1354 1355 * Only need it first time, rest of the cpus would follow suit.
1355 1356 * we only capture this for the bootcpu.
1356 1357 */
1357 1358 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1358 1359 add_x86_feature(featureset, X86FSET_CLFSH);
1359 1360 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
1360 1361 }
1361 1362 if (is_x86_feature(featureset, X86FSET_PAE))
1362 1363 cpi->cpi_pabits = 36;
1363 1364
1364 1365 /*
1365 1366 * Hyperthreading configuration is slightly tricky on Intel
1366 1367 * and pure clones, and even trickier on AMD.
1367 1368 *
1368 1369 * (AMD chose to set the HTT bit on their CMP processors,
1369 1370 * even though they're not actually hyperthreaded. Thus it
1370 1371 * takes a bit more work to figure out what's really going
1371 1372 * on ... see the handling of the CMP_LGCY bit below)
1372 1373 */
1373 1374 if (cp->cp_edx & CPUID_INTC_EDX_HTT) {
1374 1375 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
1375 1376 if (cpi->cpi_ncpu_per_chip > 1)
1376 1377 add_x86_feature(featureset, X86FSET_HTT);
1377 1378 } else {
1378 1379 cpi->cpi_ncpu_per_chip = 1;
1379 1380 }
1380 1381
1381 1382 /*
1382 1383 * Work on the "extended" feature information, doing
1383 1384 * some basic initialization for cpuid_pass2()
1384 1385 */
1385 1386 xcpuid = 0;
1386 1387 switch (cpi->cpi_vendor) {
1387 1388 case X86_VENDOR_Intel:
1388 1389 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf)
1389 1390 xcpuid++;
1390 1391 break;
1391 1392 case X86_VENDOR_AMD:
1392 1393 if (cpi->cpi_family > 5 ||
1393 1394 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
1394 1395 xcpuid++;
1395 1396 break;
1396 1397 case X86_VENDOR_Cyrix:
1397 1398 /*
1398 1399 * Only these Cyrix CPUs are -known- to support
1399 1400 * extended cpuid operations.
1400 1401 */
1401 1402 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
1402 1403 x86_type == X86_TYPE_CYRIX_GXm)
1403 1404 xcpuid++;
1404 1405 break;
1405 1406 case X86_VENDOR_Centaur:
1406 1407 case X86_VENDOR_TM:
1407 1408 default:
1408 1409 xcpuid++;
1409 1410 break;
1410 1411 }
1411 1412
1412 1413 if (xcpuid) {
1413 1414 cp = &cpi->cpi_extd[0];
1414 1415 cp->cp_eax = 0x80000000;
1415 1416 cpi->cpi_xmaxeax = __cpuid_insn(cp);
1416 1417 }
1417 1418
1418 1419 if (cpi->cpi_xmaxeax & 0x80000000) {
1419 1420
1420 1421 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
1421 1422 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
1422 1423
1423 1424 switch (cpi->cpi_vendor) {
1424 1425 case X86_VENDOR_Intel:
1425 1426 case X86_VENDOR_AMD:
1426 1427 if (cpi->cpi_xmaxeax < 0x80000001)
1427 1428 break;
1428 1429 cp = &cpi->cpi_extd[1];
1429 1430 cp->cp_eax = 0x80000001;
1430 1431 (void) __cpuid_insn(cp);
1431 1432
1432 1433 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1433 1434 cpi->cpi_family == 5 &&
1434 1435 cpi->cpi_model == 6 &&
1435 1436 cpi->cpi_step == 6) {
1436 1437 /*
1437 1438 * K6 model 6 uses bit 10 to indicate SYSC
1438 1439 * Later models use bit 11. Fix it here.
1439 1440 */
1440 1441 if (cp->cp_edx & 0x400) {
1441 1442 cp->cp_edx &= ~0x400;
1442 1443 cp->cp_edx |= CPUID_AMD_EDX_SYSC;
1443 1444 }
1444 1445 }
1445 1446
1446 1447 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
1447 1448
1448 1449 /*
1449 1450 * Compute the additions to the kernel's feature word.
1450 1451 */
1451 1452 if (cp->cp_edx & CPUID_AMD_EDX_NX) {
1452 1453 add_x86_feature(featureset, X86FSET_NX);
1453 1454 }
1454 1455
1455 1456 /*
1456 1457 * Regardless whether or not we boot 64-bit,
1457 1458 * we should have a way to identify whether
1458 1459 * the CPU is capable of running 64-bit.
1459 1460 */
1460 1461 if (cp->cp_edx & CPUID_AMD_EDX_LM) {
1461 1462 add_x86_feature(featureset, X86FSET_64);
1462 1463 }
1463 1464
1464 1465 #if defined(__amd64)
1465 1466 /* 1 GB large page - enable only for 64 bit kernel */
1466 1467 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
1467 1468 add_x86_feature(featureset, X86FSET_1GPG);
1468 1469 }
1469 1470 #endif
1470 1471
1471 1472 if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
1472 1473 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
1473 1474 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
1474 1475 add_x86_feature(featureset, X86FSET_SSE4A);
1475 1476 }
1476 1477
1477 1478 /*
1478 1479 * If both the HTT and CMP_LGCY bits are set,
1479 1480 * then we're not actually HyperThreaded. Read
1480 1481 * "AMD CPUID Specification" for more details.
1481 1482 */
1482 1483 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1483 1484 is_x86_feature(featureset, X86FSET_HTT) &&
1484 1485 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) {
1485 1486 remove_x86_feature(featureset, X86FSET_HTT);
1486 1487 add_x86_feature(featureset, X86FSET_CMP);
1487 1488 }
1488 1489 #if defined(__amd64)
1489 1490 /*
1490 1491 * It's really tricky to support syscall/sysret in
1491 1492 * the i386 kernel; we rely on sysenter/sysexit
1492 1493 * instead. In the amd64 kernel, things are -way-
1493 1494 * better.
1494 1495 */
1495 1496 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
1496 1497 add_x86_feature(featureset, X86FSET_ASYSC);
1497 1498 }
1498 1499
1499 1500 /*
1500 1501 * While we're thinking about system calls, note
1501 1502 * that AMD processors don't support sysenter
1502 1503 * in long mode at all, so don't try to program them.
1503 1504 */
1504 1505 if (x86_vendor == X86_VENDOR_AMD) {
1505 1506 remove_x86_feature(featureset, X86FSET_SEP);
1506 1507 }
1507 1508 #endif
1508 1509 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
1509 1510 add_x86_feature(featureset, X86FSET_TSCP);
1510 1511 }
1511 1512
1512 1513 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
1513 1514 add_x86_feature(featureset, X86FSET_SVM);
1514 1515 }
1515 1516
1516 1517 if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
1517 1518 add_x86_feature(featureset, X86FSET_TOPOEXT);
1518 1519 }
1519 1520 break;
1520 1521 default:
1521 1522 break;
1522 1523 }
1523 1524
1524 1525 /*
1525 1526 * Get CPUID data about processor cores and hyperthreads.
1526 1527 */
1527 1528 switch (cpi->cpi_vendor) {
1528 1529 case X86_VENDOR_Intel:
1529 1530 if (cpi->cpi_maxeax >= 4) {
1530 1531 cp = &cpi->cpi_std[4];
1531 1532 cp->cp_eax = 4;
1532 1533 cp->cp_ecx = 0;
1533 1534 (void) __cpuid_insn(cp);
1534 1535 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
1535 1536 }
1536 1537 /*FALLTHROUGH*/
1537 1538 case X86_VENDOR_AMD:
1538 1539 if (cpi->cpi_xmaxeax < 0x80000008)
1539 1540 break;
1540 1541 cp = &cpi->cpi_extd[8];
1541 1542 cp->cp_eax = 0x80000008;
1542 1543 (void) __cpuid_insn(cp);
1543 1544 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp);
1544 1545
1545 1546 /*
1546 1547 * Virtual and physical address limits from
1547 1548 * cpuid override previously guessed values.
1548 1549 */
1549 1550 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
1550 1551 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
1551 1552 break;
1552 1553 default:
1553 1554 break;
1554 1555 }
1555 1556
1556 1557 /*
1557 1558 * Derive the number of cores per chip
1558 1559 */
1559 1560 switch (cpi->cpi_vendor) {
1560 1561 case X86_VENDOR_Intel:
1561 1562 if (cpi->cpi_maxeax < 4) {
1562 1563 cpi->cpi_ncore_per_chip = 1;
1563 1564 break;
1564 1565 } else {
1565 1566 cpi->cpi_ncore_per_chip =
1566 1567 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1;
1567 1568 }
1568 1569 break;
1569 1570 case X86_VENDOR_AMD:
1570 1571 if (cpi->cpi_xmaxeax < 0x80000008) {
1571 1572 cpi->cpi_ncore_per_chip = 1;
1572 1573 break;
1573 1574 } else {
1574 1575 /*
1575 1576 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is
1576 1577 * 1 less than the number of physical cores on
1577 1578 * the chip. In family 0x10 this value can
1578 1579 * be affected by "downcoring" - it reflects
1579 1580 * 1 less than the number of cores actually
1580 1581 * enabled on this node.
1581 1582 */
1582 1583 cpi->cpi_ncore_per_chip =
1583 1584 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
1584 1585 }
1585 1586 break;
1586 1587 default:
1587 1588 cpi->cpi_ncore_per_chip = 1;
1588 1589 break;
1589 1590 }
1590 1591
1591 1592 /*
1592 1593 * Get CPUID data about TSC Invariance in Deep C-State.
1593 1594 */
1594 1595 switch (cpi->cpi_vendor) {
1595 1596 case X86_VENDOR_Intel:
1596 1597 if (cpi->cpi_maxeax >= 7) {
1597 1598 cp = &cpi->cpi_extd[7];
1598 1599 cp->cp_eax = 0x80000007;
1599 1600 cp->cp_ecx = 0;
1600 1601 (void) __cpuid_insn(cp);
1601 1602 }
1602 1603 break;
1603 1604 default:
1604 1605 break;
1605 1606 }
1606 1607 } else {
1607 1608 cpi->cpi_ncore_per_chip = 1;
1608 1609 }
1609 1610
1610 1611 /*
1611 1612 * If more than one core, then this processor is CMP.
1612 1613 */
1613 1614 if (cpi->cpi_ncore_per_chip > 1) {
1614 1615 add_x86_feature(featureset, X86FSET_CMP);
1615 1616 }
1616 1617
1617 1618 /*
1618 1619 * If the number of cores is the same as the number
1619 1620 * of CPUs, then we cannot have HyperThreading.
1620 1621 */
1621 1622 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) {
1622 1623 remove_x86_feature(featureset, X86FSET_HTT);
1623 1624 }
1624 1625
1625 1626 cpi->cpi_apicid = CPI_APIC_ID(cpi);
1626 1627 cpi->cpi_procnodes_per_pkg = 1;
1627 1628 cpi->cpi_cores_per_compunit = 1;
1628 1629 if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE &&
1629 1630 is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) {
1630 1631 /*
1631 1632 * Single-core single-threaded processors.
1632 1633 */
1633 1634 cpi->cpi_chipid = -1;
1634 1635 cpi->cpi_clogid = 0;
1635 1636 cpi->cpi_coreid = cpu->cpu_id;
1636 1637 cpi->cpi_pkgcoreid = 0;
1637 1638 if (cpi->cpi_vendor == X86_VENDOR_AMD)
1638 1639 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
1639 1640 else
1640 1641 cpi->cpi_procnodeid = cpi->cpi_chipid;
1641 1642 } else if (cpi->cpi_ncpu_per_chip > 1) {
1642 1643 if (cpi->cpi_vendor == X86_VENDOR_Intel)
1643 1644 cpuid_intel_getids(cpu, featureset);
1644 1645 else if (cpi->cpi_vendor == X86_VENDOR_AMD)
1645 1646 cpuid_amd_getids(cpu);
1646 1647 else {
1647 1648 /*
1648 1649 * All other processors are currently
1649 1650 * assumed to have single cores.
1650 1651 */
1651 1652 cpi->cpi_coreid = cpi->cpi_chipid;
1652 1653 cpi->cpi_pkgcoreid = 0;
1653 1654 cpi->cpi_procnodeid = cpi->cpi_chipid;
1654 1655 cpi->cpi_compunitid = cpi->cpi_chipid;
1655 1656 }
1656 1657 }
1657 1658
1658 1659 /*
1659 1660 * Synthesize chip "revision" and socket type
1660 1661 */
1661 1662 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
1662 1663 cpi->cpi_model, cpi->cpi_step);
1663 1664 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
1664 1665 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
1665 1666 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
1666 1667 cpi->cpi_model, cpi->cpi_step);
1667 1668
1668 1669 pass1_done:
1669 1670 cpi->cpi_pass = 1;
1670 1671 }
1671 1672
1672 1673 /*
1673 1674 * Make copies of the cpuid table entries we depend on, in
1674 1675 * part for ease of parsing now, in part so that we have only
1675 1676 * one place to correct any of it, in part for ease of
1676 1677 * later export to userland, and in part so we can look at
1677 1678 * this stuff in a crash dump.
1678 1679 */
1679 1680
1680 1681 /*ARGSUSED*/
1681 1682 void
1682 1683 cpuid_pass2(cpu_t *cpu)
1683 1684 {
1684 1685 uint_t n, nmax;
1685 1686 int i;
1686 1687 struct cpuid_regs *cp;
1687 1688 uint8_t *dp;
1688 1689 uint32_t *iptr;
1689 1690 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1690 1691
1691 1692 ASSERT(cpi->cpi_pass == 1);
1692 1693
1693 1694 if (cpi->cpi_maxeax < 1)
1694 1695 goto pass2_done;
1695 1696
1696 1697 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
1697 1698 nmax = NMAX_CPI_STD;
1698 1699 /*
1699 1700 * (We already handled n == 0 and n == 1 in pass 1)
1700 1701 */
1701 1702 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
1702 1703 cp->cp_eax = n;
1703 1704
1704 1705 /*
1705 1706 * CPUID function 4 expects %ecx to be initialized
1706 1707 * with an index which indicates which cache to return
1707 1708 * information about. The OS is expected to call function 4
1708 1709 * with %ecx set to 0, 1, 2, ... until it returns with
1709 1710 * EAX[4:0] set to 0, which indicates there are no more
1710 1711 * caches.
1711 1712 *
1712 1713 * Here, populate cpi_std[4] with the information returned by
1713 1714 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
1714 1715 * when dynamic memory allocation becomes available.
1715 1716 *
1716 1717 * Note: we need to explicitly initialize %ecx here, since
1717 1718 * function 4 may have been previously invoked.
1718 1719 */
1719 1720 if (n == 4)
1720 1721 cp->cp_ecx = 0;
1721 1722
1722 1723 (void) __cpuid_insn(cp);
1723 1724 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
1724 1725 switch (n) {
1725 1726 case 2:
1726 1727 /*
1727 1728 * "the lower 8 bits of the %eax register
1728 1729 * contain a value that identifies the number
1729 1730 * of times the cpuid [instruction] has to be
1730 1731 * executed to obtain a complete image of the
1731 1732 * processor's caching systems."
1732 1733 *
1733 1734 * How *do* they make this stuff up?
1734 1735 */
1735 1736 cpi->cpi_ncache = sizeof (*cp) *
1736 1737 BITX(cp->cp_eax, 7, 0);
1737 1738 if (cpi->cpi_ncache == 0)
1738 1739 break;
1739 1740 cpi->cpi_ncache--; /* skip count byte */
1740 1741
1741 1742 /*
1742 1743 * Well, for now, rather than attempt to implement
1743 1744 * this slightly dubious algorithm, we just look
1744 1745 * at the first 15 ..
1745 1746 */
1746 1747 if (cpi->cpi_ncache > (sizeof (*cp) - 1))
1747 1748 cpi->cpi_ncache = sizeof (*cp) - 1;
1748 1749
1749 1750 dp = cpi->cpi_cacheinfo;
1750 1751 if (BITX(cp->cp_eax, 31, 31) == 0) {
1751 1752 uint8_t *p = (void *)&cp->cp_eax;
1752 1753 for (i = 1; i < 4; i++)
1753 1754 if (p[i] != 0)
1754 1755 *dp++ = p[i];
1755 1756 }
1756 1757 if (BITX(cp->cp_ebx, 31, 31) == 0) {
1757 1758 uint8_t *p = (void *)&cp->cp_ebx;
1758 1759 for (i = 0; i < 4; i++)
1759 1760 if (p[i] != 0)
1760 1761 *dp++ = p[i];
1761 1762 }
1762 1763 if (BITX(cp->cp_ecx, 31, 31) == 0) {
1763 1764 uint8_t *p = (void *)&cp->cp_ecx;
1764 1765 for (i = 0; i < 4; i++)
1765 1766 if (p[i] != 0)
1766 1767 *dp++ = p[i];
1767 1768 }
1768 1769 if (BITX(cp->cp_edx, 31, 31) == 0) {
1769 1770 uint8_t *p = (void *)&cp->cp_edx;
1770 1771 for (i = 0; i < 4; i++)
1771 1772 if (p[i] != 0)
1772 1773 *dp++ = p[i];
1773 1774 }
1774 1775 break;
1775 1776
1776 1777 case 3: /* Processor serial number, if PSN supported */
1777 1778 break;
1778 1779
1779 1780 case 4: /* Deterministic cache parameters */
1780 1781 break;
1781 1782
1782 1783 case 5: /* Monitor/Mwait parameters */
1783 1784 {
1784 1785 size_t mwait_size;
1785 1786
1786 1787 /*
1787 1788 * check cpi_mwait.support which was set in cpuid_pass1
1788 1789 */
1789 1790 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
1790 1791 break;
1791 1792
1792 1793 /*
1793 1794 * Protect ourself from insane mwait line size.
1794 1795 * Workaround for incomplete hardware emulator(s).
1795 1796 */
1796 1797 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
1797 1798 if (mwait_size < sizeof (uint32_t) ||
1798 1799 !ISP2(mwait_size)) {
1799 1800 #if DEBUG
1800 1801 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
1801 1802 "size %ld", cpu->cpu_id, (long)mwait_size);
1802 1803 #endif
1803 1804 break;
1804 1805 }
1805 1806
1806 1807 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
1807 1808 cpi->cpi_mwait.mon_max = mwait_size;
1808 1809 if (MWAIT_EXTENSION(cpi)) {
1809 1810 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
1810 1811 if (MWAIT_INT_ENABLE(cpi))
1811 1812 cpi->cpi_mwait.support |=
1812 1813 MWAIT_ECX_INT_ENABLE;
1813 1814 }
1814 1815 break;
1815 1816 }
1816 1817 default:
1817 1818 break;
1818 1819 }
1819 1820 }
1820 1821
1821 1822 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) {
1822 1823 struct cpuid_regs regs;
1823 1824
1824 1825 cp = ®s;
1825 1826 cp->cp_eax = 0xB;
1826 1827 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1827 1828
1828 1829 (void) __cpuid_insn(cp);
1829 1830
1830 1831 /*
1831 1832 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1832 1833 * indicates that the extended topology enumeration leaf is
1833 1834 * available.
1834 1835 */
1835 1836 if (cp->cp_ebx) {
1836 1837 uint32_t x2apic_id;
1837 1838 uint_t coreid_shift = 0;
1838 1839 uint_t ncpu_per_core = 1;
1839 1840 uint_t chipid_shift = 0;
1840 1841 uint_t ncpu_per_chip = 1;
1841 1842 uint_t i;
1842 1843 uint_t level;
1843 1844
1844 1845 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1845 1846 cp->cp_eax = 0xB;
1846 1847 cp->cp_ecx = i;
1847 1848
1848 1849 (void) __cpuid_insn(cp);
1849 1850 level = CPI_CPU_LEVEL_TYPE(cp);
1850 1851
1851 1852 if (level == 1) {
1852 1853 x2apic_id = cp->cp_edx;
1853 1854 coreid_shift = BITX(cp->cp_eax, 4, 0);
1854 1855 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1855 1856 } else if (level == 2) {
1856 1857 x2apic_id = cp->cp_edx;
1857 1858 chipid_shift = BITX(cp->cp_eax, 4, 0);
1858 1859 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1859 1860 }
1860 1861 }
1861 1862
1862 1863 cpi->cpi_apicid = x2apic_id;
1863 1864 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1864 1865 cpi->cpi_ncore_per_chip = ncpu_per_chip /
1865 1866 ncpu_per_core;
1866 1867 cpi->cpi_chipid = x2apic_id >> chipid_shift;
1867 1868 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1868 1869 cpi->cpi_coreid = x2apic_id >> coreid_shift;
1869 1870 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1870 1871 }
1871 1872
1872 1873 /* Make cp NULL so that we don't stumble on others */
1873 1874 cp = NULL;
1874 1875 }
1875 1876
1876 1877 /*
1877 1878 * XSAVE enumeration
1878 1879 */
1879 1880 if (cpi->cpi_maxeax >= 0xD) {
1880 1881 struct cpuid_regs regs;
1881 1882 boolean_t cpuid_d_valid = B_TRUE;
1882 1883
1883 1884 cp = ®s;
1884 1885 cp->cp_eax = 0xD;
1885 1886 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1886 1887
1887 1888 (void) __cpuid_insn(cp);
1888 1889
1889 1890 /*
1890 1891 * Sanity checks for debug
1891 1892 */
1892 1893 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
1893 1894 (cp->cp_eax & XFEATURE_SSE) == 0) {
1894 1895 cpuid_d_valid = B_FALSE;
1895 1896 }
1896 1897
1897 1898 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
1898 1899 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
1899 1900 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
1900 1901
1901 1902 /*
1902 1903 * If the hw supports AVX, get the size and offset in the save
1903 1904 * area for the ymm state.
1904 1905 */
1905 1906 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
1906 1907 cp->cp_eax = 0xD;
1907 1908 cp->cp_ecx = 2;
1908 1909 cp->cp_edx = cp->cp_ebx = 0;
1909 1910
1910 1911 (void) __cpuid_insn(cp);
1911 1912
1912 1913 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
1913 1914 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
1914 1915 cpuid_d_valid = B_FALSE;
1915 1916 }
1916 1917
1917 1918 cpi->cpi_xsave.ymm_size = cp->cp_eax;
1918 1919 cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
1919 1920 }
1920 1921
1921 1922 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
1922 1923 xsave_state_size = 0;
1923 1924 } else if (cpuid_d_valid) {
1924 1925 xsave_state_size = cpi->cpi_xsave.xsav_max_size;
1925 1926 } else {
1926 1927 /* Broken CPUID 0xD, probably in HVM */
1927 1928 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
1928 1929 "value: hw_low = %d, hw_high = %d, xsave_size = %d"
1929 1930 ", ymm_size = %d, ymm_offset = %d\n",
1930 1931 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
1931 1932 cpi->cpi_xsave.xsav_hw_features_high,
1932 1933 (int)cpi->cpi_xsave.xsav_max_size,
1933 1934 (int)cpi->cpi_xsave.ymm_size,
1934 1935 (int)cpi->cpi_xsave.ymm_offset);
1935 1936
1936 1937 if (xsave_state_size != 0) {
1937 1938 /*
1938 1939 * This must be a non-boot CPU. We cannot
1939 1940 * continue, because boot cpu has already
1940 1941 * enabled XSAVE.
1941 1942 */
1942 1943 ASSERT(cpu->cpu_id != 0);
1943 1944 cmn_err(CE_PANIC, "cpu%d: we have already "
1944 1945 "enabled XSAVE on boot cpu, cannot "
1945 1946 "continue.", cpu->cpu_id);
1946 1947 } else {
1947 1948 /*
1948 1949 * Must be from boot CPU, OK to disable XSAVE.
1949 1950 */
1950 1951 ASSERT(cpu->cpu_id == 0);
1951 1952 remove_x86_feature(x86_featureset,
1952 1953 X86FSET_XSAVE);
1953 1954 remove_x86_feature(x86_featureset, X86FSET_AVX);
1954 1955 CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_XSAVE;
1955 1956 CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_AVX;
1956 1957 CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_F16C;
1957 1958 xsave_force_disable = B_TRUE;
1958 1959 }
1959 1960 }
1960 1961 }
1961 1962
1962 1963
1963 1964 if ((cpi->cpi_xmaxeax & 0x80000000) == 0)
1964 1965 goto pass2_done;
1965 1966
1966 1967 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD)
1967 1968 nmax = NMAX_CPI_EXTD;
1968 1969 /*
1969 1970 * Copy the extended properties, fixing them as we go.
1970 1971 * (We already handled n == 0 and n == 1 in pass 1)
1971 1972 */
1972 1973 iptr = (void *)cpi->cpi_brandstr;
1973 1974 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
1974 1975 cp->cp_eax = 0x80000000 + n;
1975 1976 (void) __cpuid_insn(cp);
1976 1977 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp);
1977 1978 switch (n) {
1978 1979 case 2:
1979 1980 case 3:
1980 1981 case 4:
1981 1982 /*
1982 1983 * Extract the brand string
1983 1984 */
1984 1985 *iptr++ = cp->cp_eax;
1985 1986 *iptr++ = cp->cp_ebx;
1986 1987 *iptr++ = cp->cp_ecx;
1987 1988 *iptr++ = cp->cp_edx;
1988 1989 break;
1989 1990 case 5:
1990 1991 switch (cpi->cpi_vendor) {
1991 1992 case X86_VENDOR_AMD:
1992 1993 /*
1993 1994 * The Athlon and Duron were the first
1994 1995 * parts to report the sizes of the
1995 1996 * TLB for large pages. Before then,
1996 1997 * we don't trust the data.
1997 1998 */
1998 1999 if (cpi->cpi_family < 6 ||
1999 2000 (cpi->cpi_family == 6 &&
2000 2001 cpi->cpi_model < 1))
2001 2002 cp->cp_eax = 0;
2002 2003 break;
2003 2004 default:
2004 2005 break;
2005 2006 }
2006 2007 break;
2007 2008 case 6:
2008 2009 switch (cpi->cpi_vendor) {
2009 2010 case X86_VENDOR_AMD:
2010 2011 /*
2011 2012 * The Athlon and Duron were the first
2012 2013 * AMD parts with L2 TLB's.
2013 2014 * Before then, don't trust the data.
2014 2015 */
2015 2016 if (cpi->cpi_family < 6 ||
2016 2017 cpi->cpi_family == 6 &&
2017 2018 cpi->cpi_model < 1)
2018 2019 cp->cp_eax = cp->cp_ebx = 0;
2019 2020 /*
2020 2021 * AMD Duron rev A0 reports L2
2021 2022 * cache size incorrectly as 1K
2022 2023 * when it is really 64K
2023 2024 */
2024 2025 if (cpi->cpi_family == 6 &&
2025 2026 cpi->cpi_model == 3 &&
2026 2027 cpi->cpi_step == 0) {
2027 2028 cp->cp_ecx &= 0xffff;
2028 2029 cp->cp_ecx |= 0x400000;
2029 2030 }
2030 2031 break;
2031 2032 case X86_VENDOR_Cyrix: /* VIA C3 */
2032 2033 /*
2033 2034 * VIA C3 processors are a bit messed
2034 2035 * up w.r.t. encoding cache sizes in %ecx
2035 2036 */
2036 2037 if (cpi->cpi_family != 6)
2037 2038 break;
2038 2039 /*
2039 2040 * model 7 and 8 were incorrectly encoded
2040 2041 *
2041 2042 * xxx is model 8 really broken?
2042 2043 */
2043 2044 if (cpi->cpi_model == 7 ||
2044 2045 cpi->cpi_model == 8)
2045 2046 cp->cp_ecx =
2046 2047 BITX(cp->cp_ecx, 31, 24) << 16 |
2047 2048 BITX(cp->cp_ecx, 23, 16) << 12 |
2048 2049 BITX(cp->cp_ecx, 15, 8) << 8 |
2049 2050 BITX(cp->cp_ecx, 7, 0);
2050 2051 /*
2051 2052 * model 9 stepping 1 has wrong associativity
2052 2053 */
2053 2054 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
2054 2055 cp->cp_ecx |= 8 << 12;
2055 2056 break;
2056 2057 case X86_VENDOR_Intel:
2057 2058 /*
2058 2059 * Extended L2 Cache features function.
2059 2060 * First appeared on Prescott.
2060 2061 */
2061 2062 default:
2062 2063 break;
2063 2064 }
2064 2065 break;
2065 2066 default:
2066 2067 break;
2067 2068 }
2068 2069 }
2069 2070
2070 2071 pass2_done:
2071 2072 cpi->cpi_pass = 2;
2072 2073 }
2073 2074
2074 2075 static const char *
2075 2076 intel_cpubrand(const struct cpuid_info *cpi)
2076 2077 {
2077 2078 int i;
2078 2079
2079 2080 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2080 2081 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2081 2082 return ("i486");
2082 2083
2083 2084 switch (cpi->cpi_family) {
2084 2085 case 5:
2085 2086 return ("Intel Pentium(r)");
2086 2087 case 6:
2087 2088 switch (cpi->cpi_model) {
2088 2089 uint_t celeron, xeon;
2089 2090 const struct cpuid_regs *cp;
2090 2091 case 0:
2091 2092 case 1:
2092 2093 case 2:
2093 2094 return ("Intel Pentium(r) Pro");
2094 2095 case 3:
2095 2096 case 4:
2096 2097 return ("Intel Pentium(r) II");
2097 2098 case 6:
2098 2099 return ("Intel Celeron(r)");
2099 2100 case 5:
2100 2101 case 7:
2101 2102 celeron = xeon = 0;
2102 2103 cp = &cpi->cpi_std[2]; /* cache info */
2103 2104
2104 2105 for (i = 1; i < 4; i++) {
2105 2106 uint_t tmp;
2106 2107
2107 2108 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
2108 2109 if (tmp == 0x40)
2109 2110 celeron++;
2110 2111 if (tmp >= 0x44 && tmp <= 0x45)
2111 2112 xeon++;
2112 2113 }
2113 2114
2114 2115 for (i = 0; i < 2; i++) {
2115 2116 uint_t tmp;
2116 2117
2117 2118 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
2118 2119 if (tmp == 0x40)
2119 2120 celeron++;
2120 2121 else if (tmp >= 0x44 && tmp <= 0x45)
2121 2122 xeon++;
2122 2123 }
2123 2124
2124 2125 for (i = 0; i < 4; i++) {
2125 2126 uint_t tmp;
2126 2127
2127 2128 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
2128 2129 if (tmp == 0x40)
2129 2130 celeron++;
2130 2131 else if (tmp >= 0x44 && tmp <= 0x45)
2131 2132 xeon++;
2132 2133 }
2133 2134
2134 2135 for (i = 0; i < 4; i++) {
2135 2136 uint_t tmp;
2136 2137
2137 2138 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
2138 2139 if (tmp == 0x40)
2139 2140 celeron++;
2140 2141 else if (tmp >= 0x44 && tmp <= 0x45)
2141 2142 xeon++;
2142 2143 }
2143 2144
2144 2145 if (celeron)
2145 2146 return ("Intel Celeron(r)");
2146 2147 if (xeon)
2147 2148 return (cpi->cpi_model == 5 ?
2148 2149 "Intel Pentium(r) II Xeon(tm)" :
2149 2150 "Intel Pentium(r) III Xeon(tm)");
2150 2151 return (cpi->cpi_model == 5 ?
2151 2152 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
2152 2153 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
2153 2154 default:
2154 2155 break;
2155 2156 }
2156 2157 default:
2157 2158 break;
2158 2159 }
2159 2160
2160 2161 /* BrandID is present if the field is nonzero */
2161 2162 if (cpi->cpi_brandid != 0) {
2162 2163 static const struct {
2163 2164 uint_t bt_bid;
2164 2165 const char *bt_str;
2165 2166 } brand_tbl[] = {
2166 2167 { 0x1, "Intel(r) Celeron(r)" },
2167 2168 { 0x2, "Intel(r) Pentium(r) III" },
2168 2169 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" },
2169 2170 { 0x4, "Intel(r) Pentium(r) III" },
2170 2171 { 0x6, "Mobile Intel(r) Pentium(r) III" },
2171 2172 { 0x7, "Mobile Intel(r) Celeron(r)" },
2172 2173 { 0x8, "Intel(r) Pentium(r) 4" },
2173 2174 { 0x9, "Intel(r) Pentium(r) 4" },
2174 2175 { 0xa, "Intel(r) Celeron(r)" },
2175 2176 { 0xb, "Intel(r) Xeon(tm)" },
2176 2177 { 0xc, "Intel(r) Xeon(tm) MP" },
2177 2178 { 0xe, "Mobile Intel(r) Pentium(r) 4" },
2178 2179 { 0xf, "Mobile Intel(r) Celeron(r)" },
2179 2180 { 0x11, "Mobile Genuine Intel(r)" },
2180 2181 { 0x12, "Intel(r) Celeron(r) M" },
2181 2182 { 0x13, "Mobile Intel(r) Celeron(r)" },
2182 2183 { 0x14, "Intel(r) Celeron(r)" },
2183 2184 { 0x15, "Mobile Genuine Intel(r)" },
2184 2185 { 0x16, "Intel(r) Pentium(r) M" },
2185 2186 { 0x17, "Mobile Intel(r) Celeron(r)" }
2186 2187 };
2187 2188 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
2188 2189 uint_t sgn;
2189 2190
2190 2191 sgn = (cpi->cpi_family << 8) |
2191 2192 (cpi->cpi_model << 4) | cpi->cpi_step;
2192 2193
2193 2194 for (i = 0; i < btblmax; i++)
2194 2195 if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
2195 2196 break;
2196 2197 if (i < btblmax) {
2197 2198 if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
2198 2199 return ("Intel(r) Celeron(r)");
2199 2200 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
2200 2201 return ("Intel(r) Xeon(tm) MP");
2201 2202 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
2202 2203 return ("Intel(r) Xeon(tm)");
2203 2204 return (brand_tbl[i].bt_str);
2204 2205 }
2205 2206 }
2206 2207
2207 2208 return (NULL);
2208 2209 }
2209 2210
2210 2211 static const char *
2211 2212 amd_cpubrand(const struct cpuid_info *cpi)
2212 2213 {
2213 2214 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2214 2215 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2215 2216 return ("i486 compatible");
2216 2217
2217 2218 switch (cpi->cpi_family) {
2218 2219 case 5:
2219 2220 switch (cpi->cpi_model) {
2220 2221 case 0:
2221 2222 case 1:
2222 2223 case 2:
2223 2224 case 3:
2224 2225 case 4:
2225 2226 case 5:
2226 2227 return ("AMD-K5(r)");
2227 2228 case 6:
2228 2229 case 7:
2229 2230 return ("AMD-K6(r)");
2230 2231 case 8:
2231 2232 return ("AMD-K6(r)-2");
2232 2233 case 9:
2233 2234 return ("AMD-K6(r)-III");
2234 2235 default:
2235 2236 return ("AMD (family 5)");
2236 2237 }
2237 2238 case 6:
2238 2239 switch (cpi->cpi_model) {
2239 2240 case 1:
2240 2241 return ("AMD-K7(tm)");
2241 2242 case 0:
2242 2243 case 2:
2243 2244 case 4:
2244 2245 return ("AMD Athlon(tm)");
2245 2246 case 3:
2246 2247 case 7:
2247 2248 return ("AMD Duron(tm)");
2248 2249 case 6:
2249 2250 case 8:
2250 2251 case 10:
2251 2252 /*
2252 2253 * Use the L2 cache size to distinguish
2253 2254 */
2254 2255 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
2255 2256 "AMD Athlon(tm)" : "AMD Duron(tm)");
2256 2257 default:
2257 2258 return ("AMD (family 6)");
2258 2259 }
2259 2260 default:
2260 2261 break;
2261 2262 }
2262 2263
2263 2264 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
2264 2265 cpi->cpi_brandid != 0) {
2265 2266 switch (BITX(cpi->cpi_brandid, 7, 5)) {
2266 2267 case 3:
2267 2268 return ("AMD Opteron(tm) UP 1xx");
2268 2269 case 4:
2269 2270 return ("AMD Opteron(tm) DP 2xx");
2270 2271 case 5:
2271 2272 return ("AMD Opteron(tm) MP 8xx");
2272 2273 default:
2273 2274 return ("AMD Opteron(tm)");
2274 2275 }
2275 2276 }
2276 2277
2277 2278 return (NULL);
2278 2279 }
2279 2280
2280 2281 static const char *
2281 2282 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
2282 2283 {
2283 2284 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2284 2285 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
2285 2286 type == X86_TYPE_CYRIX_486)
2286 2287 return ("i486 compatible");
2287 2288
2288 2289 switch (type) {
2289 2290 case X86_TYPE_CYRIX_6x86:
2290 2291 return ("Cyrix 6x86");
2291 2292 case X86_TYPE_CYRIX_6x86L:
2292 2293 return ("Cyrix 6x86L");
2293 2294 case X86_TYPE_CYRIX_6x86MX:
2294 2295 return ("Cyrix 6x86MX");
2295 2296 case X86_TYPE_CYRIX_GXm:
2296 2297 return ("Cyrix GXm");
2297 2298 case X86_TYPE_CYRIX_MediaGX:
2298 2299 return ("Cyrix MediaGX");
2299 2300 case X86_TYPE_CYRIX_MII:
2300 2301 return ("Cyrix M2");
2301 2302 case X86_TYPE_VIA_CYRIX_III:
2302 2303 return ("VIA Cyrix M3");
2303 2304 default:
2304 2305 /*
2305 2306 * Have another wild guess ..
2306 2307 */
2307 2308 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
2308 2309 return ("Cyrix 5x86");
2309 2310 else if (cpi->cpi_family == 5) {
2310 2311 switch (cpi->cpi_model) {
2311 2312 case 2:
2312 2313 return ("Cyrix 6x86"); /* Cyrix M1 */
2313 2314 case 4:
2314 2315 return ("Cyrix MediaGX");
2315 2316 default:
2316 2317 break;
2317 2318 }
2318 2319 } else if (cpi->cpi_family == 6) {
2319 2320 switch (cpi->cpi_model) {
2320 2321 case 0:
2321 2322 return ("Cyrix 6x86MX"); /* Cyrix M2? */
2322 2323 case 5:
2323 2324 case 6:
2324 2325 case 7:
2325 2326 case 8:
2326 2327 case 9:
2327 2328 return ("VIA C3");
2328 2329 default:
2329 2330 break;
2330 2331 }
2331 2332 }
2332 2333 break;
2333 2334 }
2334 2335 return (NULL);
2335 2336 }
2336 2337
2337 2338 /*
2338 2339 * This only gets called in the case that the CPU extended
2339 2340 * feature brand string (0x80000002, 0x80000003, 0x80000004)
2340 2341 * aren't available, or contain null bytes for some reason.
2341 2342 */
2342 2343 static void
2343 2344 fabricate_brandstr(struct cpuid_info *cpi)
2344 2345 {
2345 2346 const char *brand = NULL;
2346 2347
2347 2348 switch (cpi->cpi_vendor) {
2348 2349 case X86_VENDOR_Intel:
2349 2350 brand = intel_cpubrand(cpi);
2350 2351 break;
2351 2352 case X86_VENDOR_AMD:
2352 2353 brand = amd_cpubrand(cpi);
2353 2354 break;
2354 2355 case X86_VENDOR_Cyrix:
2355 2356 brand = cyrix_cpubrand(cpi, x86_type);
2356 2357 break;
2357 2358 case X86_VENDOR_NexGen:
2358 2359 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2359 2360 brand = "NexGen Nx586";
2360 2361 break;
2361 2362 case X86_VENDOR_Centaur:
2362 2363 if (cpi->cpi_family == 5)
2363 2364 switch (cpi->cpi_model) {
2364 2365 case 4:
2365 2366 brand = "Centaur C6";
2366 2367 break;
2367 2368 case 8:
2368 2369 brand = "Centaur C2";
2369 2370 break;
2370 2371 case 9:
2371 2372 brand = "Centaur C3";
2372 2373 break;
2373 2374 default:
2374 2375 break;
2375 2376 }
2376 2377 break;
2377 2378 case X86_VENDOR_Rise:
2378 2379 if (cpi->cpi_family == 5 &&
2379 2380 (cpi->cpi_model == 0 || cpi->cpi_model == 2))
2380 2381 brand = "Rise mP6";
2381 2382 break;
2382 2383 case X86_VENDOR_SiS:
2383 2384 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2384 2385 brand = "SiS 55x";
2385 2386 break;
2386 2387 case X86_VENDOR_TM:
2387 2388 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
2388 2389 brand = "Transmeta Crusoe TM3x00 or TM5x00";
2389 2390 break;
2390 2391 case X86_VENDOR_NSC:
2391 2392 case X86_VENDOR_UMC:
2392 2393 default:
2393 2394 break;
2394 2395 }
2395 2396 if (brand) {
2396 2397 (void) strcpy((char *)cpi->cpi_brandstr, brand);
2397 2398 return;
2398 2399 }
2399 2400
2400 2401 /*
2401 2402 * If all else fails ...
2402 2403 */
2403 2404 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
2404 2405 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
2405 2406 cpi->cpi_model, cpi->cpi_step);
2406 2407 }
2407 2408
2408 2409 /*
2409 2410 * This routine is called just after kernel memory allocation
2410 2411 * becomes available on cpu0, and as part of mp_startup() on
2411 2412 * the other cpus.
2412 2413 *
2413 2414 * Fixup the brand string, and collect any information from cpuid
2414 2415 * that requires dynamically allocated storage to represent.
2415 2416 */
2416 2417 /*ARGSUSED*/
2417 2418 void
2418 2419 cpuid_pass3(cpu_t *cpu)
2419 2420 {
2420 2421 int i, max, shft, level, size;
2421 2422 struct cpuid_regs regs;
2422 2423 struct cpuid_regs *cp;
2423 2424 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2424 2425
2425 2426 ASSERT(cpi->cpi_pass == 2);
2426 2427
2427 2428 /*
2428 2429 * Function 4: Deterministic cache parameters
2429 2430 *
2430 2431 * Take this opportunity to detect the number of threads
2431 2432 * sharing the last level cache, and construct a corresponding
2432 2433 * cache id. The respective cpuid_info members are initialized
2433 2434 * to the default case of "no last level cache sharing".
2434 2435 */
2435 2436 cpi->cpi_ncpu_shr_last_cache = 1;
2436 2437 cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
2437 2438
2438 2439 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) {
2439 2440
2440 2441 /*
2441 2442 * Find the # of elements (size) returned by fn 4, and along
2442 2443 * the way detect last level cache sharing details.
2443 2444 */
2444 2445 bzero(®s, sizeof (regs));
2445 2446 cp = ®s;
2446 2447 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
2447 2448 cp->cp_eax = 4;
2448 2449 cp->cp_ecx = i;
2449 2450
2450 2451 (void) __cpuid_insn(cp);
2451 2452
2452 2453 if (CPI_CACHE_TYPE(cp) == 0)
2453 2454 break;
2454 2455 level = CPI_CACHE_LVL(cp);
2455 2456 if (level > max) {
2456 2457 max = level;
2457 2458 cpi->cpi_ncpu_shr_last_cache =
2458 2459 CPI_NTHR_SHR_CACHE(cp) + 1;
2459 2460 }
2460 2461 }
2461 2462 cpi->cpi_std_4_size = size = i;
2462 2463
2463 2464 /*
2464 2465 * Allocate the cpi_std_4 array. The first element
2465 2466 * references the regs for fn 4, %ecx == 0, which
2466 2467 * cpuid_pass2() stashed in cpi->cpi_std[4].
2467 2468 */
2468 2469 if (size > 0) {
2469 2470 cpi->cpi_std_4 =
2470 2471 kmem_alloc(size * sizeof (cp), KM_SLEEP);
2471 2472 cpi->cpi_std_4[0] = &cpi->cpi_std[4];
2472 2473
2473 2474 /*
2474 2475 * Allocate storage to hold the additional regs
2475 2476 * for function 4, %ecx == 1 .. cpi_std_4_size.
2476 2477 *
2477 2478 * The regs for fn 4, %ecx == 0 has already
2478 2479 * been allocated as indicated above.
2479 2480 */
2480 2481 for (i = 1; i < size; i++) {
2481 2482 cp = cpi->cpi_std_4[i] =
2482 2483 kmem_zalloc(sizeof (regs), KM_SLEEP);
2483 2484 cp->cp_eax = 4;
2484 2485 cp->cp_ecx = i;
2485 2486
2486 2487 (void) __cpuid_insn(cp);
2487 2488 }
2488 2489 }
2489 2490 /*
2490 2491 * Determine the number of bits needed to represent
2491 2492 * the number of CPUs sharing the last level cache.
2492 2493 *
2493 2494 * Shift off that number of bits from the APIC id to
2494 2495 * derive the cache id.
2495 2496 */
2496 2497 shft = 0;
2497 2498 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
2498 2499 shft++;
2499 2500 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
2500 2501 }
2501 2502
2502 2503 /*
2503 2504 * Now fixup the brand string
2504 2505 */
2505 2506 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) {
2506 2507 fabricate_brandstr(cpi);
2507 2508 } else {
2508 2509
2509 2510 /*
2510 2511 * If we successfully extracted a brand string from the cpuid
2511 2512 * instruction, clean it up by removing leading spaces and
2512 2513 * similar junk.
2513 2514 */
2514 2515 if (cpi->cpi_brandstr[0]) {
2515 2516 size_t maxlen = sizeof (cpi->cpi_brandstr);
2516 2517 char *src, *dst;
2517 2518
2518 2519 dst = src = (char *)cpi->cpi_brandstr;
2519 2520 src[maxlen - 1] = '\0';
2520 2521 /*
2521 2522 * strip leading spaces
2522 2523 */
2523 2524 while (*src == ' ')
2524 2525 src++;
2525 2526 /*
2526 2527 * Remove any 'Genuine' or "Authentic" prefixes
2527 2528 */
2528 2529 if (strncmp(src, "Genuine ", 8) == 0)
2529 2530 src += 8;
2530 2531 if (strncmp(src, "Authentic ", 10) == 0)
2531 2532 src += 10;
2532 2533
2533 2534 /*
2534 2535 * Now do an in-place copy.
2535 2536 * Map (R) to (r) and (TM) to (tm).
2536 2537 * The era of teletypes is long gone, and there's
2537 2538 * -really- no need to shout.
2538 2539 */
2539 2540 while (*src != '\0') {
2540 2541 if (src[0] == '(') {
2541 2542 if (strncmp(src + 1, "R)", 2) == 0) {
2542 2543 (void) strncpy(dst, "(r)", 3);
2543 2544 src += 3;
2544 2545 dst += 3;
2545 2546 continue;
2546 2547 }
2547 2548 if (strncmp(src + 1, "TM)", 3) == 0) {
2548 2549 (void) strncpy(dst, "(tm)", 4);
2549 2550 src += 4;
2550 2551 dst += 4;
2551 2552 continue;
2552 2553 }
2553 2554 }
2554 2555 *dst++ = *src++;
2555 2556 }
2556 2557 *dst = '\0';
2557 2558
2558 2559 /*
2559 2560 * Finally, remove any trailing spaces
2560 2561 */
2561 2562 while (--dst > cpi->cpi_brandstr)
2562 2563 if (*dst == ' ')
2563 2564 *dst = '\0';
2564 2565 else
2565 2566 break;
2566 2567 } else
2567 2568 fabricate_brandstr(cpi);
2568 2569 }
2569 2570 cpi->cpi_pass = 3;
2570 2571 }
2571 2572
2572 2573 /*
2573 2574 * This routine is called out of bind_hwcap() much later in the life
2574 2575 * of the kernel (post_startup()). The job of this routine is to resolve
2575 2576 * the hardware feature support and kernel support for those features into
2576 2577 * what we're actually going to tell applications via the aux vector.
2577 2578 */
2578 2579 void
2579 2580 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
2580 2581 {
2581 2582 struct cpuid_info *cpi;
2582 2583 uint_t hwcap_flags = 0, hwcap_flags_2 = 0;
2583 2584
2584 2585 if (cpu == NULL)
2585 2586 cpu = CPU;
2586 2587 cpi = cpu->cpu_m.mcpu_cpi;
2587 2588
2588 2589 ASSERT(cpi->cpi_pass == 3);
2589 2590
2590 2591 if (cpi->cpi_maxeax >= 1) {
2591 2592 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
2592 2593 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
2593 2594
2594 2595 *edx = CPI_FEATURES_EDX(cpi);
2595 2596 *ecx = CPI_FEATURES_ECX(cpi);
2596 2597
2597 2598 /*
2598 2599 * [these require explicit kernel support]
2599 2600 */
2600 2601 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
2601 2602 *edx &= ~CPUID_INTC_EDX_SEP;
2602 2603
2603 2604 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
2604 2605 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
2605 2606 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
2606 2607 *edx &= ~CPUID_INTC_EDX_SSE2;
2607 2608
2608 2609 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
2609 2610 *edx &= ~CPUID_INTC_EDX_HTT;
2610 2611
2611 2612 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
2612 2613 *ecx &= ~CPUID_INTC_ECX_SSE3;
2613 2614
2614 2615 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
2615 2616 *ecx &= ~CPUID_INTC_ECX_SSSE3;
2616 2617 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
2617 2618 *ecx &= ~CPUID_INTC_ECX_SSE4_1;
2618 2619 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
2619 2620 *ecx &= ~CPUID_INTC_ECX_SSE4_2;
2620 2621 if (!is_x86_feature(x86_featureset, X86FSET_AES))
2621 2622 *ecx &= ~CPUID_INTC_ECX_AES;
2622 2623 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
2623 2624 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
2624 2625 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
2625 2626 *ecx &= ~(CPUID_INTC_ECX_XSAVE |
2626 2627 CPUID_INTC_ECX_OSXSAVE);
2627 2628 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
2628 2629 *ecx &= ~CPUID_INTC_ECX_AVX;
2629 2630 if (!is_x86_feature(x86_featureset, X86FSET_F16C))
2630 2631 *ecx &= ~CPUID_INTC_ECX_F16C;
2631 2632
2632 2633 /*
2633 2634 * [no explicit support required beyond x87 fp context]
2634 2635 */
2635 2636 if (!fpu_exists)
2636 2637 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
2637 2638
2638 2639 /*
2639 2640 * Now map the supported feature vector to things that we
2640 2641 * think userland will care about.
2641 2642 */
2642 2643 if (*edx & CPUID_INTC_EDX_SEP)
2643 2644 hwcap_flags |= AV_386_SEP;
2644 2645 if (*edx & CPUID_INTC_EDX_SSE)
2645 2646 hwcap_flags |= AV_386_FXSR | AV_386_SSE;
2646 2647 if (*edx & CPUID_INTC_EDX_SSE2)
2647 2648 hwcap_flags |= AV_386_SSE2;
2648 2649 if (*ecx & CPUID_INTC_ECX_SSE3)
2649 2650 hwcap_flags |= AV_386_SSE3;
2650 2651 if (*ecx & CPUID_INTC_ECX_SSSE3)
2651 2652 hwcap_flags |= AV_386_SSSE3;
2652 2653 if (*ecx & CPUID_INTC_ECX_SSE4_1)
2653 2654 hwcap_flags |= AV_386_SSE4_1;
2654 2655 if (*ecx & CPUID_INTC_ECX_SSE4_2)
2655 2656 hwcap_flags |= AV_386_SSE4_2;
2656 2657 if (*ecx & CPUID_INTC_ECX_MOVBE)
2657 2658 hwcap_flags |= AV_386_MOVBE;
2658 2659 if (*ecx & CPUID_INTC_ECX_AES)
2659 2660 hwcap_flags |= AV_386_AES;
2660 2661 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
2661 2662 hwcap_flags |= AV_386_PCLMULQDQ;
2662 2663 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
2663 2664 (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
2664 2665 hwcap_flags |= AV_386_XSAVE;
2665 2666
2666 2667 if (*ecx & CPUID_INTC_ECX_AVX) {
2667 2668 hwcap_flags |= AV_386_AVX;
2668 2669 if (*ecx & CPUID_INTC_ECX_F16C)
2669 2670 hwcap_flags_2 |= AV_386_2_F16C;
2670 2671 }
2671 2672 }
2672 2673 if (*ecx & CPUID_INTC_ECX_VMX)
2673 2674 hwcap_flags |= AV_386_VMX;
2674 2675 if (*ecx & CPUID_INTC_ECX_POPCNT)
2675 2676 hwcap_flags |= AV_386_POPCNT;
2676 2677 if (*edx & CPUID_INTC_EDX_FPU)
2677 2678 hwcap_flags |= AV_386_FPU;
2678 2679 if (*edx & CPUID_INTC_EDX_MMX)
2679 2680 hwcap_flags |= AV_386_MMX;
2680 2681
2681 2682 if (*edx & CPUID_INTC_EDX_TSC)
2682 2683 hwcap_flags |= AV_386_TSC;
2683 2684 if (*edx & CPUID_INTC_EDX_CX8)
2684 2685 hwcap_flags |= AV_386_CX8;
2685 2686 if (*edx & CPUID_INTC_EDX_CMOV)
2686 2687 hwcap_flags |= AV_386_CMOV;
2687 2688 if (*ecx & CPUID_INTC_ECX_CX16)
2688 2689 hwcap_flags |= AV_386_CX16;
2689 2690
2690 2691 if (*ecx & CPUID_INTC_ECX_RDRAND)
2691 2692 hwcap_flags_2 |= AV_386_2_RDRAND;
2692 2693 }
2693 2694
2694 2695 if (cpi->cpi_xmaxeax < 0x80000001)
2695 2696 goto pass4_done;
2696 2697
2697 2698 switch (cpi->cpi_vendor) {
2698 2699 struct cpuid_regs cp;
2699 2700 uint32_t *edx, *ecx;
2700 2701
2701 2702 case X86_VENDOR_Intel:
2702 2703 /*
2703 2704 * Seems like Intel duplicated what we necessary
2704 2705 * here to make the initial crop of 64-bit OS's work.
2705 2706 * Hopefully, those are the only "extended" bits
2706 2707 * they'll add.
2707 2708 */
2708 2709 /*FALLTHROUGH*/
2709 2710
2710 2711 case X86_VENDOR_AMD:
2711 2712 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
2712 2713 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
2713 2714
2714 2715 *edx = CPI_FEATURES_XTD_EDX(cpi);
2715 2716 *ecx = CPI_FEATURES_XTD_ECX(cpi);
2716 2717
2717 2718 /*
2718 2719 * [these features require explicit kernel support]
2719 2720 */
2720 2721 switch (cpi->cpi_vendor) {
2721 2722 case X86_VENDOR_Intel:
2722 2723 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2723 2724 *edx &= ~CPUID_AMD_EDX_TSCP;
2724 2725 break;
2725 2726
2726 2727 case X86_VENDOR_AMD:
2727 2728 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2728 2729 *edx &= ~CPUID_AMD_EDX_TSCP;
2729 2730 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
2730 2731 *ecx &= ~CPUID_AMD_ECX_SSE4A;
2731 2732 break;
2732 2733
2733 2734 default:
2734 2735 break;
2735 2736 }
2736 2737
2737 2738 /*
2738 2739 * [no explicit support required beyond
2739 2740 * x87 fp context and exception handlers]
2740 2741 */
2741 2742 if (!fpu_exists)
2742 2743 *edx &= ~(CPUID_AMD_EDX_MMXamd |
2743 2744 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
2744 2745
2745 2746 if (!is_x86_feature(x86_featureset, X86FSET_NX))
2746 2747 *edx &= ~CPUID_AMD_EDX_NX;
2747 2748 #if !defined(__amd64)
2748 2749 *edx &= ~CPUID_AMD_EDX_LM;
2749 2750 #endif
2750 2751 /*
2751 2752 * Now map the supported feature vector to
2752 2753 * things that we think userland will care about.
2753 2754 */
2754 2755 #if defined(__amd64)
2755 2756 if (*edx & CPUID_AMD_EDX_SYSC)
2756 2757 hwcap_flags |= AV_386_AMD_SYSC;
2757 2758 #endif
2758 2759 if (*edx & CPUID_AMD_EDX_MMXamd)
2759 2760 hwcap_flags |= AV_386_AMD_MMX;
2760 2761 if (*edx & CPUID_AMD_EDX_3DNow)
2761 2762 hwcap_flags |= AV_386_AMD_3DNow;
2762 2763 if (*edx & CPUID_AMD_EDX_3DNowx)
2763 2764 hwcap_flags |= AV_386_AMD_3DNowx;
2764 2765 if (*ecx & CPUID_AMD_ECX_SVM)
2765 2766 hwcap_flags |= AV_386_AMD_SVM;
2766 2767
2767 2768 switch (cpi->cpi_vendor) {
2768 2769 case X86_VENDOR_AMD:
2769 2770 if (*edx & CPUID_AMD_EDX_TSCP)
2770 2771 hwcap_flags |= AV_386_TSCP;
2771 2772 if (*ecx & CPUID_AMD_ECX_AHF64)
2772 2773 hwcap_flags |= AV_386_AHF;
2773 2774 if (*ecx & CPUID_AMD_ECX_SSE4A)
2774 2775 hwcap_flags |= AV_386_AMD_SSE4A;
2775 2776 if (*ecx & CPUID_AMD_ECX_LZCNT)
2776 2777 hwcap_flags |= AV_386_AMD_LZCNT;
2777 2778 break;
2778 2779
2779 2780 case X86_VENDOR_Intel:
2780 2781 if (*edx & CPUID_AMD_EDX_TSCP)
2781 2782 hwcap_flags |= AV_386_TSCP;
2782 2783 /*
2783 2784 * Aarrgh.
2784 2785 * Intel uses a different bit in the same word.
2785 2786 */
2786 2787 if (*ecx & CPUID_INTC_ECX_AHF64)
2787 2788 hwcap_flags |= AV_386_AHF;
2788 2789 break;
2789 2790
2790 2791 default:
2791 2792 break;
2792 2793 }
2793 2794 break;
2794 2795
2795 2796 case X86_VENDOR_TM:
2796 2797 cp.cp_eax = 0x80860001;
2797 2798 (void) __cpuid_insn(&cp);
2798 2799 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
2799 2800 break;
2800 2801
2801 2802 default:
2802 2803 break;
2803 2804 }
2804 2805
2805 2806 pass4_done:
2806 2807 cpi->cpi_pass = 4;
2807 2808 if (hwcap_out != NULL) {
2808 2809 hwcap_out[0] = hwcap_flags;
2809 2810 hwcap_out[1] = hwcap_flags_2;
2810 2811 }
2811 2812 }
2812 2813
2813 2814
2814 2815 /*
2815 2816 * Simulate the cpuid instruction using the data we previously
2816 2817 * captured about this CPU. We try our best to return the truth
2817 2818 * about the hardware, independently of kernel support.
2818 2819 */
2819 2820 uint32_t
2820 2821 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
2821 2822 {
2822 2823 struct cpuid_info *cpi;
2823 2824 struct cpuid_regs *xcp;
2824 2825
2825 2826 if (cpu == NULL)
2826 2827 cpu = CPU;
2827 2828 cpi = cpu->cpu_m.mcpu_cpi;
2828 2829
2829 2830 ASSERT(cpuid_checkpass(cpu, 3));
2830 2831
2831 2832 /*
2832 2833 * CPUID data is cached in two separate places: cpi_std for standard
2833 2834 * CPUID functions, and cpi_extd for extended CPUID functions.
2834 2835 */
2835 2836 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD)
2836 2837 xcp = &cpi->cpi_std[cp->cp_eax];
2837 2838 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax &&
2838 2839 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD)
2839 2840 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000];
2840 2841 else
2841 2842 /*
2842 2843 * The caller is asking for data from an input parameter which
2843 2844 * the kernel has not cached. In this case we go fetch from
2844 2845 * the hardware and return the data directly to the user.
2845 2846 */
2846 2847 return (__cpuid_insn(cp));
2847 2848
2848 2849 cp->cp_eax = xcp->cp_eax;
2849 2850 cp->cp_ebx = xcp->cp_ebx;
2850 2851 cp->cp_ecx = xcp->cp_ecx;
2851 2852 cp->cp_edx = xcp->cp_edx;
2852 2853 return (cp->cp_eax);
2853 2854 }
2854 2855
2855 2856 int
2856 2857 cpuid_checkpass(cpu_t *cpu, int pass)
2857 2858 {
2858 2859 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
2859 2860 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
2860 2861 }
2861 2862
2862 2863 int
2863 2864 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
2864 2865 {
2865 2866 ASSERT(cpuid_checkpass(cpu, 3));
2866 2867
2867 2868 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
2868 2869 }
2869 2870
2870 2871 int
2871 2872 cpuid_is_cmt(cpu_t *cpu)
2872 2873 {
2873 2874 if (cpu == NULL)
2874 2875 cpu = CPU;
2875 2876
2876 2877 ASSERT(cpuid_checkpass(cpu, 1));
2877 2878
2878 2879 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
2879 2880 }
2880 2881
2881 2882 /*
2882 2883 * AMD and Intel both implement the 64-bit variant of the syscall
2883 2884 * instruction (syscallq), so if there's -any- support for syscall,
2884 2885 * cpuid currently says "yes, we support this".
2885 2886 *
2886 2887 * However, Intel decided to -not- implement the 32-bit variant of the
2887 2888 * syscall instruction, so we provide a predicate to allow our caller
2888 2889 * to test that subtlety here.
2889 2890 *
2890 2891 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
2891 2892 * even in the case where the hardware would in fact support it.
2892 2893 */
2893 2894 /*ARGSUSED*/
2894 2895 int
2895 2896 cpuid_syscall32_insn(cpu_t *cpu)
2896 2897 {
2897 2898 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
2898 2899
2899 2900 #if !defined(__xpv)
2900 2901 if (cpu == NULL)
2901 2902 cpu = CPU;
2902 2903
2903 2904 /*CSTYLED*/
2904 2905 {
2905 2906 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2906 2907
2907 2908 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2908 2909 cpi->cpi_xmaxeax >= 0x80000001 &&
2909 2910 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
2910 2911 return (1);
2911 2912 }
2912 2913 #endif
2913 2914 return (0);
2914 2915 }
2915 2916
2916 2917 int
2917 2918 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
2918 2919 {
2919 2920 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2920 2921
2921 2922 static const char fmt[] =
2922 2923 "x86 (%s %X family %d model %d step %d clock %d MHz)";
2923 2924 static const char fmt_ht[] =
2924 2925 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
2925 2926
2926 2927 ASSERT(cpuid_checkpass(cpu, 1));
2927 2928
2928 2929 if (cpuid_is_cmt(cpu))
2929 2930 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
2930 2931 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2931 2932 cpi->cpi_family, cpi->cpi_model,
2932 2933 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2933 2934 return (snprintf(s, n, fmt,
2934 2935 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2935 2936 cpi->cpi_family, cpi->cpi_model,
2936 2937 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2937 2938 }
2938 2939
2939 2940 const char *
2940 2941 cpuid_getvendorstr(cpu_t *cpu)
2941 2942 {
2942 2943 ASSERT(cpuid_checkpass(cpu, 1));
2943 2944 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
2944 2945 }
2945 2946
2946 2947 uint_t
2947 2948 cpuid_getvendor(cpu_t *cpu)
2948 2949 {
2949 2950 ASSERT(cpuid_checkpass(cpu, 1));
2950 2951 return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
2951 2952 }
2952 2953
2953 2954 uint_t
2954 2955 cpuid_getfamily(cpu_t *cpu)
2955 2956 {
2956 2957 ASSERT(cpuid_checkpass(cpu, 1));
2957 2958 return (cpu->cpu_m.mcpu_cpi->cpi_family);
2958 2959 }
2959 2960
2960 2961 uint_t
2961 2962 cpuid_getmodel(cpu_t *cpu)
2962 2963 {
2963 2964 ASSERT(cpuid_checkpass(cpu, 1));
2964 2965 return (cpu->cpu_m.mcpu_cpi->cpi_model);
2965 2966 }
2966 2967
2967 2968 uint_t
2968 2969 cpuid_get_ncpu_per_chip(cpu_t *cpu)
2969 2970 {
2970 2971 ASSERT(cpuid_checkpass(cpu, 1));
2971 2972 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
2972 2973 }
2973 2974
2974 2975 uint_t
2975 2976 cpuid_get_ncore_per_chip(cpu_t *cpu)
2976 2977 {
2977 2978 ASSERT(cpuid_checkpass(cpu, 1));
2978 2979 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
2979 2980 }
2980 2981
2981 2982 uint_t
2982 2983 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
2983 2984 {
2984 2985 ASSERT(cpuid_checkpass(cpu, 2));
2985 2986 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
2986 2987 }
2987 2988
2988 2989 id_t
2989 2990 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
2990 2991 {
2991 2992 ASSERT(cpuid_checkpass(cpu, 2));
2992 2993 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
2993 2994 }
2994 2995
2995 2996 uint_t
2996 2997 cpuid_getstep(cpu_t *cpu)
2997 2998 {
2998 2999 ASSERT(cpuid_checkpass(cpu, 1));
2999 3000 return (cpu->cpu_m.mcpu_cpi->cpi_step);
3000 3001 }
3001 3002
3002 3003 uint_t
3003 3004 cpuid_getsig(struct cpu *cpu)
3004 3005 {
3005 3006 ASSERT(cpuid_checkpass(cpu, 1));
3006 3007 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
3007 3008 }
3008 3009
3009 3010 uint32_t
3010 3011 cpuid_getchiprev(struct cpu *cpu)
3011 3012 {
3012 3013 ASSERT(cpuid_checkpass(cpu, 1));
3013 3014 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
3014 3015 }
3015 3016
3016 3017 const char *
3017 3018 cpuid_getchiprevstr(struct cpu *cpu)
3018 3019 {
3019 3020 ASSERT(cpuid_checkpass(cpu, 1));
3020 3021 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
3021 3022 }
3022 3023
3023 3024 uint32_t
3024 3025 cpuid_getsockettype(struct cpu *cpu)
3025 3026 {
3026 3027 ASSERT(cpuid_checkpass(cpu, 1));
3027 3028 return (cpu->cpu_m.mcpu_cpi->cpi_socket);
3028 3029 }
3029 3030
3030 3031 const char *
3031 3032 cpuid_getsocketstr(cpu_t *cpu)
3032 3033 {
3033 3034 static const char *socketstr = NULL;
3034 3035 struct cpuid_info *cpi;
3035 3036
3036 3037 ASSERT(cpuid_checkpass(cpu, 1));
3037 3038 cpi = cpu->cpu_m.mcpu_cpi;
3038 3039
3039 3040 /* Assume that socket types are the same across the system */
3040 3041 if (socketstr == NULL)
3041 3042 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
3042 3043 cpi->cpi_model, cpi->cpi_step);
3043 3044
3044 3045
3045 3046 return (socketstr);
3046 3047 }
3047 3048
3048 3049 int
3049 3050 cpuid_get_chipid(cpu_t *cpu)
3050 3051 {
3051 3052 ASSERT(cpuid_checkpass(cpu, 1));
3052 3053
3053 3054 if (cpuid_is_cmt(cpu))
3054 3055 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
3055 3056 return (cpu->cpu_id);
3056 3057 }
3057 3058
3058 3059 id_t
3059 3060 cpuid_get_coreid(cpu_t *cpu)
3060 3061 {
3061 3062 ASSERT(cpuid_checkpass(cpu, 1));
3062 3063 return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
3063 3064 }
3064 3065
3065 3066 int
3066 3067 cpuid_get_pkgcoreid(cpu_t *cpu)
3067 3068 {
3068 3069 ASSERT(cpuid_checkpass(cpu, 1));
3069 3070 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
3070 3071 }
3071 3072
3072 3073 int
3073 3074 cpuid_get_clogid(cpu_t *cpu)
3074 3075 {
3075 3076 ASSERT(cpuid_checkpass(cpu, 1));
3076 3077 return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
3077 3078 }
3078 3079
3079 3080 int
3080 3081 cpuid_get_cacheid(cpu_t *cpu)
3081 3082 {
3082 3083 ASSERT(cpuid_checkpass(cpu, 1));
3083 3084 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
3084 3085 }
3085 3086
3086 3087 uint_t
3087 3088 cpuid_get_procnodeid(cpu_t *cpu)
3088 3089 {
3089 3090 ASSERT(cpuid_checkpass(cpu, 1));
3090 3091 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
3091 3092 }
3092 3093
3093 3094 uint_t
3094 3095 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
3095 3096 {
3096 3097 ASSERT(cpuid_checkpass(cpu, 1));
3097 3098 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
3098 3099 }
3099 3100
3100 3101 uint_t
3101 3102 cpuid_get_compunitid(cpu_t *cpu)
3102 3103 {
3103 3104 ASSERT(cpuid_checkpass(cpu, 1));
3104 3105 return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
3105 3106 }
3106 3107
3107 3108 uint_t
3108 3109 cpuid_get_cores_per_compunit(cpu_t *cpu)
3109 3110 {
3110 3111 ASSERT(cpuid_checkpass(cpu, 1));
3111 3112 return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
3112 3113 }
3113 3114
3114 3115 /*ARGSUSED*/
3115 3116 int
3116 3117 cpuid_have_cr8access(cpu_t *cpu)
3117 3118 {
3118 3119 #if defined(__amd64)
3119 3120 return (1);
3120 3121 #else
3121 3122 struct cpuid_info *cpi;
3122 3123
3123 3124 ASSERT(cpu != NULL);
3124 3125 cpi = cpu->cpu_m.mcpu_cpi;
3125 3126 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
3126 3127 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
3127 3128 return (1);
3128 3129 return (0);
3129 3130 #endif
3130 3131 }
3131 3132
3132 3133 uint32_t
3133 3134 cpuid_get_apicid(cpu_t *cpu)
3134 3135 {
3135 3136 ASSERT(cpuid_checkpass(cpu, 1));
3136 3137 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
3137 3138 return (UINT32_MAX);
3138 3139 } else {
3139 3140 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
3140 3141 }
3141 3142 }
3142 3143
3143 3144 void
3144 3145 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
3145 3146 {
3146 3147 struct cpuid_info *cpi;
3147 3148
3148 3149 if (cpu == NULL)
3149 3150 cpu = CPU;
3150 3151 cpi = cpu->cpu_m.mcpu_cpi;
3151 3152
3152 3153 ASSERT(cpuid_checkpass(cpu, 1));
3153 3154
3154 3155 if (pabits)
3155 3156 *pabits = cpi->cpi_pabits;
3156 3157 if (vabits)
3157 3158 *vabits = cpi->cpi_vabits;
3158 3159 }
3159 3160
3160 3161 /*
3161 3162 * Returns the number of data TLB entries for a corresponding
3162 3163 * pagesize. If it can't be computed, or isn't known, the
3163 3164 * routine returns zero. If you ask about an architecturally
3164 3165 * impossible pagesize, the routine will panic (so that the
3165 3166 * hat implementor knows that things are inconsistent.)
3166 3167 */
3167 3168 uint_t
3168 3169 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
3169 3170 {
3170 3171 struct cpuid_info *cpi;
3171 3172 uint_t dtlb_nent = 0;
3172 3173
3173 3174 if (cpu == NULL)
3174 3175 cpu = CPU;
3175 3176 cpi = cpu->cpu_m.mcpu_cpi;
3176 3177
3177 3178 ASSERT(cpuid_checkpass(cpu, 1));
3178 3179
3179 3180 /*
3180 3181 * Check the L2 TLB info
3181 3182 */
3182 3183 if (cpi->cpi_xmaxeax >= 0x80000006) {
3183 3184 struct cpuid_regs *cp = &cpi->cpi_extd[6];
3184 3185
3185 3186 switch (pagesize) {
3186 3187
3187 3188 case 4 * 1024:
3188 3189 /*
3189 3190 * All zero in the top 16 bits of the register
3190 3191 * indicates a unified TLB. Size is in low 16 bits.
3191 3192 */
3192 3193 if ((cp->cp_ebx & 0xffff0000) == 0)
3193 3194 dtlb_nent = cp->cp_ebx & 0x0000ffff;
3194 3195 else
3195 3196 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
3196 3197 break;
3197 3198
3198 3199 case 2 * 1024 * 1024:
3199 3200 if ((cp->cp_eax & 0xffff0000) == 0)
3200 3201 dtlb_nent = cp->cp_eax & 0x0000ffff;
3201 3202 else
3202 3203 dtlb_nent = BITX(cp->cp_eax, 27, 16);
3203 3204 break;
3204 3205
3205 3206 default:
3206 3207 panic("unknown L2 pagesize");
3207 3208 /*NOTREACHED*/
3208 3209 }
3209 3210 }
3210 3211
3211 3212 if (dtlb_nent != 0)
3212 3213 return (dtlb_nent);
3213 3214
3214 3215 /*
3215 3216 * No L2 TLB support for this size, try L1.
3216 3217 */
3217 3218 if (cpi->cpi_xmaxeax >= 0x80000005) {
3218 3219 struct cpuid_regs *cp = &cpi->cpi_extd[5];
3219 3220
3220 3221 switch (pagesize) {
3221 3222 case 4 * 1024:
3222 3223 dtlb_nent = BITX(cp->cp_ebx, 23, 16);
3223 3224 break;
3224 3225 case 2 * 1024 * 1024:
3225 3226 dtlb_nent = BITX(cp->cp_eax, 23, 16);
3226 3227 break;
3227 3228 default:
3228 3229 panic("unknown L1 d-TLB pagesize");
3229 3230 /*NOTREACHED*/
3230 3231 }
3231 3232 }
3232 3233
3233 3234 return (dtlb_nent);
3234 3235 }
3235 3236
3236 3237 /*
3237 3238 * Return 0 if the erratum is not present or not applicable, positive
3238 3239 * if it is, and negative if the status of the erratum is unknown.
3239 3240 *
3240 3241 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
3241 3242 * Processors" #25759, Rev 3.57, August 2005
3242 3243 */
3243 3244 int
3244 3245 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
3245 3246 {
3246 3247 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3247 3248 uint_t eax;
3248 3249
3249 3250 /*
3250 3251 * Bail out if this CPU isn't an AMD CPU, or if it's
3251 3252 * a legacy (32-bit) AMD CPU.
3252 3253 */
3253 3254 if (cpi->cpi_vendor != X86_VENDOR_AMD ||
3254 3255 cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
3255 3256 cpi->cpi_family == 6)
3256 3257
3257 3258 return (0);
3258 3259
3259 3260 eax = cpi->cpi_std[1].cp_eax;
3260 3261
3261 3262 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50)
3262 3263 #define SH_B3(eax) (eax == 0xf51)
3263 3264 #define B(eax) (SH_B0(eax) || SH_B3(eax))
3264 3265
3265 3266 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58)
3266 3267
3267 3268 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
3268 3269 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
3269 3270 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2)
3270 3271 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
3271 3272
3272 3273 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
3273 3274 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0)
3274 3275 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0)
3275 3276 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
3276 3277
3277 3278 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
3278 3279 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */
3279 3280 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0)
3280 3281 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71)
3281 3282 #define BH_E4(eax) (eax == 0x20fb1)
3282 3283 #define SH_E5(eax) (eax == 0x20f42)
3283 3284 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2)
3284 3285 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32)
3285 3286 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
3286 3287 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
3287 3288 DH_E6(eax) || JH_E6(eax))
3288 3289
3289 3290 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
3290 3291 #define DR_B0(eax) (eax == 0x100f20)
3291 3292 #define DR_B1(eax) (eax == 0x100f21)
3292 3293 #define DR_BA(eax) (eax == 0x100f2a)
3293 3294 #define DR_B2(eax) (eax == 0x100f22)
3294 3295 #define DR_B3(eax) (eax == 0x100f23)
3295 3296 #define RB_C0(eax) (eax == 0x100f40)
3296 3297
3297 3298 switch (erratum) {
3298 3299 case 1:
3299 3300 return (cpi->cpi_family < 0x10);
3300 3301 case 51: /* what does the asterisk mean? */
3301 3302 return (B(eax) || SH_C0(eax) || CG(eax));
3302 3303 case 52:
3303 3304 return (B(eax));
3304 3305 case 57:
3305 3306 return (cpi->cpi_family <= 0x11);
3306 3307 case 58:
3307 3308 return (B(eax));
3308 3309 case 60:
3309 3310 return (cpi->cpi_family <= 0x11);
3310 3311 case 61:
3311 3312 case 62:
3312 3313 case 63:
3313 3314 case 64:
3314 3315 case 65:
3315 3316 case 66:
3316 3317 case 68:
3317 3318 case 69:
3318 3319 case 70:
3319 3320 case 71:
3320 3321 return (B(eax));
3321 3322 case 72:
3322 3323 return (SH_B0(eax));
3323 3324 case 74:
3324 3325 return (B(eax));
3325 3326 case 75:
3326 3327 return (cpi->cpi_family < 0x10);
3327 3328 case 76:
3328 3329 return (B(eax));
3329 3330 case 77:
3330 3331 return (cpi->cpi_family <= 0x11);
3331 3332 case 78:
3332 3333 return (B(eax) || SH_C0(eax));
3333 3334 case 79:
3334 3335 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3335 3336 case 80:
3336 3337 case 81:
3337 3338 case 82:
3338 3339 return (B(eax));
3339 3340 case 83:
3340 3341 return (B(eax) || SH_C0(eax) || CG(eax));
3341 3342 case 85:
3342 3343 return (cpi->cpi_family < 0x10);
3343 3344 case 86:
3344 3345 return (SH_C0(eax) || CG(eax));
3345 3346 case 88:
3346 3347 #if !defined(__amd64)
3347 3348 return (0);
3348 3349 #else
3349 3350 return (B(eax) || SH_C0(eax));
3350 3351 #endif
3351 3352 case 89:
3352 3353 return (cpi->cpi_family < 0x10);
3353 3354 case 90:
3354 3355 return (B(eax) || SH_C0(eax) || CG(eax));
3355 3356 case 91:
3356 3357 case 92:
3357 3358 return (B(eax) || SH_C0(eax));
3358 3359 case 93:
3359 3360 return (SH_C0(eax));
3360 3361 case 94:
3361 3362 return (B(eax) || SH_C0(eax) || CG(eax));
3362 3363 case 95:
3363 3364 #if !defined(__amd64)
3364 3365 return (0);
3365 3366 #else
3366 3367 return (B(eax) || SH_C0(eax));
3367 3368 #endif
3368 3369 case 96:
3369 3370 return (B(eax) || SH_C0(eax) || CG(eax));
3370 3371 case 97:
3371 3372 case 98:
3372 3373 return (SH_C0(eax) || CG(eax));
3373 3374 case 99:
3374 3375 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3375 3376 case 100:
3376 3377 return (B(eax) || SH_C0(eax));
3377 3378 case 101:
3378 3379 case 103:
3379 3380 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3380 3381 case 104:
3381 3382 return (SH_C0(eax) || CG(eax) || D0(eax));
3382 3383 case 105:
3383 3384 case 106:
3384 3385 case 107:
3385 3386 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3386 3387 case 108:
3387 3388 return (DH_CG(eax));
3388 3389 case 109:
3389 3390 return (SH_C0(eax) || CG(eax) || D0(eax));
3390 3391 case 110:
3391 3392 return (D0(eax) || EX(eax));
3392 3393 case 111:
3393 3394 return (CG(eax));
3394 3395 case 112:
3395 3396 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3396 3397 case 113:
3397 3398 return (eax == 0x20fc0);
3398 3399 case 114:
3399 3400 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3400 3401 case 115:
3401 3402 return (SH_E0(eax) || JH_E1(eax));
3402 3403 case 116:
3403 3404 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3404 3405 case 117:
3405 3406 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3406 3407 case 118:
3407 3408 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
3408 3409 JH_E6(eax));
3409 3410 case 121:
3410 3411 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3411 3412 case 122:
3412 3413 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
3413 3414 case 123:
3414 3415 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
3415 3416 case 131:
3416 3417 return (cpi->cpi_family < 0x10);
3417 3418 case 6336786:
3418 3419 /*
3419 3420 * Test for AdvPowerMgmtInfo.TscPStateInvariant
3420 3421 * if this is a K8 family or newer processor
3421 3422 */
3422 3423 if (CPI_FAMILY(cpi) == 0xf) {
3423 3424 struct cpuid_regs regs;
3424 3425 regs.cp_eax = 0x80000007;
3425 3426 (void) __cpuid_insn(®s);
3426 3427 return (!(regs.cp_edx & 0x100));
3427 3428 }
3428 3429 return (0);
3429 3430 case 6323525:
3430 3431 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
3431 3432 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
3432 3433
3433 3434 case 6671130:
3434 3435 /*
3435 3436 * check for processors (pre-Shanghai) that do not provide
3436 3437 * optimal management of 1gb ptes in its tlb.
3437 3438 */
3438 3439 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
3439 3440
3440 3441 case 298:
3441 3442 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
3442 3443 DR_B2(eax) || RB_C0(eax));
3443 3444
3444 3445 case 721:
3445 3446 #if defined(__amd64)
3446 3447 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
3447 3448 #else
3448 3449 return (0);
3449 3450 #endif
3450 3451
3451 3452 default:
3452 3453 return (-1);
3453 3454
3454 3455 }
3455 3456 }
3456 3457
3457 3458 /*
3458 3459 * Determine if specified erratum is present via OSVW (OS Visible Workaround).
3459 3460 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
3460 3461 */
3461 3462 int
3462 3463 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
3463 3464 {
3464 3465 struct cpuid_info *cpi;
3465 3466 uint_t osvwid;
3466 3467 static int osvwfeature = -1;
3467 3468 uint64_t osvwlength;
3468 3469
3469 3470
3470 3471 cpi = cpu->cpu_m.mcpu_cpi;
3471 3472
3472 3473 /* confirm OSVW supported */
3473 3474 if (osvwfeature == -1) {
3474 3475 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
3475 3476 } else {
3476 3477 /* assert that osvw feature setting is consistent on all cpus */
3477 3478 ASSERT(osvwfeature ==
3478 3479 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
3479 3480 }
3480 3481 if (!osvwfeature)
3481 3482 return (-1);
3482 3483
3483 3484 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
3484 3485
3485 3486 switch (erratum) {
3486 3487 case 298: /* osvwid is 0 */
3487 3488 osvwid = 0;
3488 3489 if (osvwlength <= (uint64_t)osvwid) {
3489 3490 /* osvwid 0 is unknown */
3490 3491 return (-1);
3491 3492 }
3492 3493
3493 3494 /*
3494 3495 * Check the OSVW STATUS MSR to determine the state
3495 3496 * of the erratum where:
3496 3497 * 0 - fixed by HW
3497 3498 * 1 - BIOS has applied the workaround when BIOS
3498 3499 * workaround is available. (Or for other errata,
3499 3500 * OS workaround is required.)
3500 3501 * For a value of 1, caller will confirm that the
3501 3502 * erratum 298 workaround has indeed been applied by BIOS.
3502 3503 *
3503 3504 * A 1 may be set in cpus that have a HW fix
3504 3505 * in a mixed cpu system. Regarding erratum 298:
3505 3506 * In a multiprocessor platform, the workaround above
3506 3507 * should be applied to all processors regardless of
3507 3508 * silicon revision when an affected processor is
3508 3509 * present.
3509 3510 */
3510 3511
3511 3512 return (rdmsr(MSR_AMD_OSVW_STATUS +
3512 3513 (osvwid / OSVW_ID_CNT_PER_MSR)) &
3513 3514 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
3514 3515
3515 3516 default:
3516 3517 return (-1);
3517 3518 }
3518 3519 }
3519 3520
3520 3521 static const char assoc_str[] = "associativity";
3521 3522 static const char line_str[] = "line-size";
3522 3523 static const char size_str[] = "size";
3523 3524
3524 3525 static void
3525 3526 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
3526 3527 uint32_t val)
3527 3528 {
3528 3529 char buf[128];
3529 3530
3530 3531 /*
3531 3532 * ndi_prop_update_int() is used because it is desirable for
3532 3533 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
3533 3534 */
3534 3535 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
3535 3536 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
3536 3537 }
3537 3538
3538 3539 /*
3539 3540 * Intel-style cache/tlb description
3540 3541 *
3541 3542 * Standard cpuid level 2 gives a randomly ordered
3542 3543 * selection of tags that index into a table that describes
3543 3544 * cache and tlb properties.
3544 3545 */
3545 3546
3546 3547 static const char l1_icache_str[] = "l1-icache";
3547 3548 static const char l1_dcache_str[] = "l1-dcache";
3548 3549 static const char l2_cache_str[] = "l2-cache";
3549 3550 static const char l3_cache_str[] = "l3-cache";
3550 3551 static const char itlb4k_str[] = "itlb-4K";
3551 3552 static const char dtlb4k_str[] = "dtlb-4K";
3552 3553 static const char itlb2M_str[] = "itlb-2M";
3553 3554 static const char itlb4M_str[] = "itlb-4M";
3554 3555 static const char dtlb4M_str[] = "dtlb-4M";
3555 3556 static const char dtlb24_str[] = "dtlb0-2M-4M";
3556 3557 static const char itlb424_str[] = "itlb-4K-2M-4M";
3557 3558 static const char itlb24_str[] = "itlb-2M-4M";
3558 3559 static const char dtlb44_str[] = "dtlb-4K-4M";
3559 3560 static const char sl1_dcache_str[] = "sectored-l1-dcache";
3560 3561 static const char sl2_cache_str[] = "sectored-l2-cache";
3561 3562 static const char itrace_str[] = "itrace-cache";
3562 3563 static const char sl3_cache_str[] = "sectored-l3-cache";
3563 3564 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
3564 3565
3565 3566 static const struct cachetab {
3566 3567 uint8_t ct_code;
3567 3568 uint8_t ct_assoc;
3568 3569 uint16_t ct_line_size;
3569 3570 size_t ct_size;
3570 3571 const char *ct_label;
3571 3572 } intel_ctab[] = {
3572 3573 /*
3573 3574 * maintain descending order!
3574 3575 *
3575 3576 * Codes ignored - Reason
3576 3577 * ----------------------
3577 3578 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
3578 3579 * f0H/f1H - Currently we do not interpret prefetch size by design
3579 3580 */
3580 3581 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
3581 3582 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
3582 3583 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
3583 3584 { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
3584 3585 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
3585 3586 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
3586 3587 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
3587 3588 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
3588 3589 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
3589 3590 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
3590 3591 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
3591 3592 { 0xd0, 4, 64, 512*1024, l3_cache_str},
3592 3593 { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
3593 3594 { 0xc0, 4, 0, 8, dtlb44_str },
3594 3595 { 0xba, 4, 0, 64, dtlb4k_str },
3595 3596 { 0xb4, 4, 0, 256, dtlb4k_str },
3596 3597 { 0xb3, 4, 0, 128, dtlb4k_str },
3597 3598 { 0xb2, 4, 0, 64, itlb4k_str },
3598 3599 { 0xb0, 4, 0, 128, itlb4k_str },
3599 3600 { 0x87, 8, 64, 1024*1024, l2_cache_str},
3600 3601 { 0x86, 4, 64, 512*1024, l2_cache_str},
3601 3602 { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
3602 3603 { 0x84, 8, 32, 1024*1024, l2_cache_str},
3603 3604 { 0x83, 8, 32, 512*1024, l2_cache_str},
3604 3605 { 0x82, 8, 32, 256*1024, l2_cache_str},
3605 3606 { 0x80, 8, 64, 512*1024, l2_cache_str},
3606 3607 { 0x7f, 2, 64, 512*1024, l2_cache_str},
3607 3608 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
3608 3609 { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
3609 3610 { 0x7b, 8, 64, 512*1024, sl2_cache_str},
3610 3611 { 0x7a, 8, 64, 256*1024, sl2_cache_str},
3611 3612 { 0x79, 8, 64, 128*1024, sl2_cache_str},
3612 3613 { 0x78, 8, 64, 1024*1024, l2_cache_str},
3613 3614 { 0x73, 8, 0, 64*1024, itrace_str},
3614 3615 { 0x72, 8, 0, 32*1024, itrace_str},
3615 3616 { 0x71, 8, 0, 16*1024, itrace_str},
3616 3617 { 0x70, 8, 0, 12*1024, itrace_str},
3617 3618 { 0x68, 4, 64, 32*1024, sl1_dcache_str},
3618 3619 { 0x67, 4, 64, 16*1024, sl1_dcache_str},
3619 3620 { 0x66, 4, 64, 8*1024, sl1_dcache_str},
3620 3621 { 0x60, 8, 64, 16*1024, sl1_dcache_str},
3621 3622 { 0x5d, 0, 0, 256, dtlb44_str},
3622 3623 { 0x5c, 0, 0, 128, dtlb44_str},
3623 3624 { 0x5b, 0, 0, 64, dtlb44_str},
3624 3625 { 0x5a, 4, 0, 32, dtlb24_str},
3625 3626 { 0x59, 0, 0, 16, dtlb4k_str},
3626 3627 { 0x57, 4, 0, 16, dtlb4k_str},
3627 3628 { 0x56, 4, 0, 16, dtlb4M_str},
3628 3629 { 0x55, 0, 0, 7, itlb24_str},
3629 3630 { 0x52, 0, 0, 256, itlb424_str},
3630 3631 { 0x51, 0, 0, 128, itlb424_str},
3631 3632 { 0x50, 0, 0, 64, itlb424_str},
3632 3633 { 0x4f, 0, 0, 32, itlb4k_str},
3633 3634 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
3634 3635 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
3635 3636 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
3636 3637 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
3637 3638 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
3638 3639 { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
3639 3640 { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
3640 3641 { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
3641 3642 { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
3642 3643 { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
3643 3644 { 0x44, 4, 32, 1024*1024, l2_cache_str},
3644 3645 { 0x43, 4, 32, 512*1024, l2_cache_str},
3645 3646 { 0x42, 4, 32, 256*1024, l2_cache_str},
3646 3647 { 0x41, 4, 32, 128*1024, l2_cache_str},
3647 3648 { 0x3e, 4, 64, 512*1024, sl2_cache_str},
3648 3649 { 0x3d, 6, 64, 384*1024, sl2_cache_str},
3649 3650 { 0x3c, 4, 64, 256*1024, sl2_cache_str},
3650 3651 { 0x3b, 2, 64, 128*1024, sl2_cache_str},
3651 3652 { 0x3a, 6, 64, 192*1024, sl2_cache_str},
3652 3653 { 0x39, 4, 64, 128*1024, sl2_cache_str},
3653 3654 { 0x30, 8, 64, 32*1024, l1_icache_str},
3654 3655 { 0x2c, 8, 64, 32*1024, l1_dcache_str},
3655 3656 { 0x29, 8, 64, 4096*1024, sl3_cache_str},
3656 3657 { 0x25, 8, 64, 2048*1024, sl3_cache_str},
3657 3658 { 0x23, 8, 64, 1024*1024, sl3_cache_str},
3658 3659 { 0x22, 4, 64, 512*1024, sl3_cache_str},
3659 3660 { 0x0e, 6, 64, 24*1024, l1_dcache_str},
3660 3661 { 0x0d, 4, 32, 16*1024, l1_dcache_str},
3661 3662 { 0x0c, 4, 32, 16*1024, l1_dcache_str},
3662 3663 { 0x0b, 4, 0, 4, itlb4M_str},
3663 3664 { 0x0a, 2, 32, 8*1024, l1_dcache_str},
3664 3665 { 0x08, 4, 32, 16*1024, l1_icache_str},
3665 3666 { 0x06, 4, 32, 8*1024, l1_icache_str},
3666 3667 { 0x05, 4, 0, 32, dtlb4M_str},
3667 3668 { 0x04, 4, 0, 8, dtlb4M_str},
3668 3669 { 0x03, 4, 0, 64, dtlb4k_str},
3669 3670 { 0x02, 4, 0, 2, itlb4M_str},
3670 3671 { 0x01, 4, 0, 32, itlb4k_str},
3671 3672 { 0 }
3672 3673 };
3673 3674
3674 3675 static const struct cachetab cyrix_ctab[] = {
3675 3676 { 0x70, 4, 0, 32, "tlb-4K" },
3676 3677 { 0x80, 4, 16, 16*1024, "l1-cache" },
3677 3678 { 0 }
3678 3679 };
3679 3680
3680 3681 /*
3681 3682 * Search a cache table for a matching entry
3682 3683 */
3683 3684 static const struct cachetab *
3684 3685 find_cacheent(const struct cachetab *ct, uint_t code)
3685 3686 {
3686 3687 if (code != 0) {
3687 3688 for (; ct->ct_code != 0; ct++)
3688 3689 if (ct->ct_code <= code)
3689 3690 break;
3690 3691 if (ct->ct_code == code)
3691 3692 return (ct);
3692 3693 }
3693 3694 return (NULL);
3694 3695 }
3695 3696
3696 3697 /*
3697 3698 * Populate cachetab entry with L2 or L3 cache-information using
3698 3699 * cpuid function 4. This function is called from intel_walk_cacheinfo()
3699 3700 * when descriptor 0x49 is encountered. It returns 0 if no such cache
3700 3701 * information is found.
3701 3702 */
3702 3703 static int
3703 3704 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
3704 3705 {
3705 3706 uint32_t level, i;
3706 3707 int ret = 0;
3707 3708
3708 3709 for (i = 0; i < cpi->cpi_std_4_size; i++) {
3709 3710 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]);
3710 3711
3711 3712 if (level == 2 || level == 3) {
3712 3713 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1;
3713 3714 ct->ct_line_size =
3714 3715 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1;
3715 3716 ct->ct_size = ct->ct_assoc *
3716 3717 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) *
3717 3718 ct->ct_line_size *
3718 3719 (cpi->cpi_std_4[i]->cp_ecx + 1);
3719 3720
3720 3721 if (level == 2) {
3721 3722 ct->ct_label = l2_cache_str;
3722 3723 } else if (level == 3) {
3723 3724 ct->ct_label = l3_cache_str;
3724 3725 }
3725 3726 ret = 1;
3726 3727 }
3727 3728 }
3728 3729
3729 3730 return (ret);
3730 3731 }
3731 3732
3732 3733 /*
3733 3734 * Walk the cacheinfo descriptor, applying 'func' to every valid element
3734 3735 * The walk is terminated if the walker returns non-zero.
3735 3736 */
3736 3737 static void
3737 3738 intel_walk_cacheinfo(struct cpuid_info *cpi,
3738 3739 void *arg, int (*func)(void *, const struct cachetab *))
3739 3740 {
3740 3741 const struct cachetab *ct;
3741 3742 struct cachetab des_49_ct, des_b1_ct;
3742 3743 uint8_t *dp;
3743 3744 int i;
3744 3745
3745 3746 if ((dp = cpi->cpi_cacheinfo) == NULL)
3746 3747 return;
3747 3748 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3748 3749 /*
3749 3750 * For overloaded descriptor 0x49 we use cpuid function 4
3750 3751 * if supported by the current processor, to create
3751 3752 * cache information.
3752 3753 * For overloaded descriptor 0xb1 we use X86_PAE flag
3753 3754 * to disambiguate the cache information.
3754 3755 */
3755 3756 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
3756 3757 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
3757 3758 ct = &des_49_ct;
3758 3759 } else if (*dp == 0xb1) {
3759 3760 des_b1_ct.ct_code = 0xb1;
3760 3761 des_b1_ct.ct_assoc = 4;
3761 3762 des_b1_ct.ct_line_size = 0;
3762 3763 if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
3763 3764 des_b1_ct.ct_size = 8;
3764 3765 des_b1_ct.ct_label = itlb2M_str;
3765 3766 } else {
3766 3767 des_b1_ct.ct_size = 4;
3767 3768 des_b1_ct.ct_label = itlb4M_str;
3768 3769 }
3769 3770 ct = &des_b1_ct;
3770 3771 } else {
3771 3772 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
3772 3773 continue;
3773 3774 }
3774 3775 }
3775 3776
3776 3777 if (func(arg, ct) != 0) {
3777 3778 break;
3778 3779 }
3779 3780 }
3780 3781 }
3781 3782
3782 3783 /*
3783 3784 * (Like the Intel one, except for Cyrix CPUs)
3784 3785 */
3785 3786 static void
3786 3787 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
3787 3788 void *arg, int (*func)(void *, const struct cachetab *))
3788 3789 {
3789 3790 const struct cachetab *ct;
3790 3791 uint8_t *dp;
3791 3792 int i;
3792 3793
3793 3794 if ((dp = cpi->cpi_cacheinfo) == NULL)
3794 3795 return;
3795 3796 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3796 3797 /*
3797 3798 * Search Cyrix-specific descriptor table first ..
3798 3799 */
3799 3800 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
3800 3801 if (func(arg, ct) != 0)
3801 3802 break;
3802 3803 continue;
3803 3804 }
3804 3805 /*
3805 3806 * .. else fall back to the Intel one
3806 3807 */
3807 3808 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
3808 3809 if (func(arg, ct) != 0)
3809 3810 break;
3810 3811 continue;
3811 3812 }
3812 3813 }
3813 3814 }
3814 3815
3815 3816 /*
3816 3817 * A cacheinfo walker that adds associativity, line-size, and size properties
3817 3818 * to the devinfo node it is passed as an argument.
3818 3819 */
3819 3820 static int
3820 3821 add_cacheent_props(void *arg, const struct cachetab *ct)
3821 3822 {
3822 3823 dev_info_t *devi = arg;
3823 3824
3824 3825 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
3825 3826 if (ct->ct_line_size != 0)
3826 3827 add_cache_prop(devi, ct->ct_label, line_str,
3827 3828 ct->ct_line_size);
3828 3829 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
3829 3830 return (0);
3830 3831 }
3831 3832
3832 3833
3833 3834 static const char fully_assoc[] = "fully-associative?";
3834 3835
3835 3836 /*
3836 3837 * AMD style cache/tlb description
3837 3838 *
3838 3839 * Extended functions 5 and 6 directly describe properties of
3839 3840 * tlbs and various cache levels.
3840 3841 */
3841 3842 static void
3842 3843 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3843 3844 {
3844 3845 switch (assoc) {
3845 3846 case 0: /* reserved; ignore */
3846 3847 break;
3847 3848 default:
3848 3849 add_cache_prop(devi, label, assoc_str, assoc);
3849 3850 break;
3850 3851 case 0xff:
3851 3852 add_cache_prop(devi, label, fully_assoc, 1);
3852 3853 break;
3853 3854 }
3854 3855 }
3855 3856
3856 3857 static void
3857 3858 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3858 3859 {
3859 3860 if (size == 0)
3860 3861 return;
3861 3862 add_cache_prop(devi, label, size_str, size);
3862 3863 add_amd_assoc(devi, label, assoc);
3863 3864 }
3864 3865
3865 3866 static void
3866 3867 add_amd_cache(dev_info_t *devi, const char *label,
3867 3868 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3868 3869 {
3869 3870 if (size == 0 || line_size == 0)
3870 3871 return;
3871 3872 add_amd_assoc(devi, label, assoc);
3872 3873 /*
3873 3874 * Most AMD parts have a sectored cache. Multiple cache lines are
3874 3875 * associated with each tag. A sector consists of all cache lines
3875 3876 * associated with a tag. For example, the AMD K6-III has a sector
3876 3877 * size of 2 cache lines per tag.
3877 3878 */
3878 3879 if (lines_per_tag != 0)
3879 3880 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3880 3881 add_cache_prop(devi, label, line_str, line_size);
3881 3882 add_cache_prop(devi, label, size_str, size * 1024);
3882 3883 }
3883 3884
3884 3885 static void
3885 3886 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3886 3887 {
3887 3888 switch (assoc) {
3888 3889 case 0: /* off */
3889 3890 break;
3890 3891 case 1:
3891 3892 case 2:
3892 3893 case 4:
3893 3894 add_cache_prop(devi, label, assoc_str, assoc);
3894 3895 break;
3895 3896 case 6:
3896 3897 add_cache_prop(devi, label, assoc_str, 8);
3897 3898 break;
3898 3899 case 8:
3899 3900 add_cache_prop(devi, label, assoc_str, 16);
3900 3901 break;
3901 3902 case 0xf:
3902 3903 add_cache_prop(devi, label, fully_assoc, 1);
3903 3904 break;
3904 3905 default: /* reserved; ignore */
3905 3906 break;
3906 3907 }
3907 3908 }
3908 3909
3909 3910 static void
3910 3911 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3911 3912 {
3912 3913 if (size == 0 || assoc == 0)
3913 3914 return;
3914 3915 add_amd_l2_assoc(devi, label, assoc);
3915 3916 add_cache_prop(devi, label, size_str, size);
3916 3917 }
3917 3918
3918 3919 static void
3919 3920 add_amd_l2_cache(dev_info_t *devi, const char *label,
3920 3921 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3921 3922 {
3922 3923 if (size == 0 || assoc == 0 || line_size == 0)
3923 3924 return;
3924 3925 add_amd_l2_assoc(devi, label, assoc);
3925 3926 if (lines_per_tag != 0)
3926 3927 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3927 3928 add_cache_prop(devi, label, line_str, line_size);
3928 3929 add_cache_prop(devi, label, size_str, size * 1024);
3929 3930 }
3930 3931
3931 3932 static void
3932 3933 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
3933 3934 {
3934 3935 struct cpuid_regs *cp;
3935 3936
3936 3937 if (cpi->cpi_xmaxeax < 0x80000005)
3937 3938 return;
3938 3939 cp = &cpi->cpi_extd[5];
3939 3940
3940 3941 /*
3941 3942 * 4M/2M L1 TLB configuration
3942 3943 *
3943 3944 * We report the size for 2M pages because AMD uses two
3944 3945 * TLB entries for one 4M page.
3945 3946 */
3946 3947 add_amd_tlb(devi, "dtlb-2M",
3947 3948 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
3948 3949 add_amd_tlb(devi, "itlb-2M",
3949 3950 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
3950 3951
3951 3952 /*
3952 3953 * 4K L1 TLB configuration
3953 3954 */
3954 3955
3955 3956 switch (cpi->cpi_vendor) {
3956 3957 uint_t nentries;
3957 3958 case X86_VENDOR_TM:
3958 3959 if (cpi->cpi_family >= 5) {
3959 3960 /*
3960 3961 * Crusoe processors have 256 TLB entries, but
3961 3962 * cpuid data format constrains them to only
3962 3963 * reporting 255 of them.
3963 3964 */
3964 3965 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
3965 3966 nentries = 256;
3966 3967 /*
3967 3968 * Crusoe processors also have a unified TLB
3968 3969 */
3969 3970 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
3970 3971 nentries);
3971 3972 break;
3972 3973 }
3973 3974 /*FALLTHROUGH*/
3974 3975 default:
3975 3976 add_amd_tlb(devi, itlb4k_str,
3976 3977 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
3977 3978 add_amd_tlb(devi, dtlb4k_str,
3978 3979 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
3979 3980 break;
3980 3981 }
3981 3982
3982 3983 /*
3983 3984 * data L1 cache configuration
3984 3985 */
3985 3986
3986 3987 add_amd_cache(devi, l1_dcache_str,
3987 3988 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
3988 3989 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
3989 3990
3990 3991 /*
3991 3992 * code L1 cache configuration
3992 3993 */
3993 3994
3994 3995 add_amd_cache(devi, l1_icache_str,
3995 3996 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
3996 3997 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
3997 3998
3998 3999 if (cpi->cpi_xmaxeax < 0x80000006)
3999 4000 return;
4000 4001 cp = &cpi->cpi_extd[6];
4001 4002
4002 4003 /* Check for a unified L2 TLB for large pages */
4003 4004
4004 4005 if (BITX(cp->cp_eax, 31, 16) == 0)
4005 4006 add_amd_l2_tlb(devi, "l2-tlb-2M",
4006 4007 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4007 4008 else {
4008 4009 add_amd_l2_tlb(devi, "l2-dtlb-2M",
4009 4010 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
4010 4011 add_amd_l2_tlb(devi, "l2-itlb-2M",
4011 4012 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4012 4013 }
4013 4014
4014 4015 /* Check for a unified L2 TLB for 4K pages */
4015 4016
4016 4017 if (BITX(cp->cp_ebx, 31, 16) == 0) {
4017 4018 add_amd_l2_tlb(devi, "l2-tlb-4K",
4018 4019 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4019 4020 } else {
4020 4021 add_amd_l2_tlb(devi, "l2-dtlb-4K",
4021 4022 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
4022 4023 add_amd_l2_tlb(devi, "l2-itlb-4K",
4023 4024 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4024 4025 }
4025 4026
4026 4027 add_amd_l2_cache(devi, l2_cache_str,
4027 4028 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
4028 4029 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
4029 4030 }
4030 4031
4031 4032 /*
4032 4033 * There are two basic ways that the x86 world describes it cache
4033 4034 * and tlb architecture - Intel's way and AMD's way.
4034 4035 *
4035 4036 * Return which flavor of cache architecture we should use
4036 4037 */
4037 4038 static int
4038 4039 x86_which_cacheinfo(struct cpuid_info *cpi)
4039 4040 {
4040 4041 switch (cpi->cpi_vendor) {
4041 4042 case X86_VENDOR_Intel:
4042 4043 if (cpi->cpi_maxeax >= 2)
4043 4044 return (X86_VENDOR_Intel);
4044 4045 break;
4045 4046 case X86_VENDOR_AMD:
4046 4047 /*
4047 4048 * The K5 model 1 was the first part from AMD that reported
4048 4049 * cache sizes via extended cpuid functions.
4049 4050 */
4050 4051 if (cpi->cpi_family > 5 ||
4051 4052 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
4052 4053 return (X86_VENDOR_AMD);
4053 4054 break;
4054 4055 case X86_VENDOR_TM:
4055 4056 if (cpi->cpi_family >= 5)
4056 4057 return (X86_VENDOR_AMD);
4057 4058 /*FALLTHROUGH*/
4058 4059 default:
4059 4060 /*
4060 4061 * If they have extended CPU data for 0x80000005
4061 4062 * then we assume they have AMD-format cache
4062 4063 * information.
4063 4064 *
4064 4065 * If not, and the vendor happens to be Cyrix,
4065 4066 * then try our-Cyrix specific handler.
4066 4067 *
4067 4068 * If we're not Cyrix, then assume we're using Intel's
4068 4069 * table-driven format instead.
4069 4070 */
4070 4071 if (cpi->cpi_xmaxeax >= 0x80000005)
4071 4072 return (X86_VENDOR_AMD);
4072 4073 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
4073 4074 return (X86_VENDOR_Cyrix);
4074 4075 else if (cpi->cpi_maxeax >= 2)
4075 4076 return (X86_VENDOR_Intel);
4076 4077 break;
4077 4078 }
4078 4079 return (-1);
4079 4080 }
4080 4081
4081 4082 void
4082 4083 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
4083 4084 struct cpuid_info *cpi)
4084 4085 {
4085 4086 dev_info_t *cpu_devi;
4086 4087 int create;
4087 4088
4088 4089 cpu_devi = (dev_info_t *)dip;
4089 4090
4090 4091 /* device_type */
4091 4092 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4092 4093 "device_type", "cpu");
4093 4094
4094 4095 /* reg */
4095 4096 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4096 4097 "reg", cpu_id);
4097 4098
4098 4099 /* cpu-mhz, and clock-frequency */
4099 4100 if (cpu_freq > 0) {
4100 4101 long long mul;
4101 4102
4102 4103 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4103 4104 "cpu-mhz", cpu_freq);
4104 4105 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
4105 4106 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4106 4107 "clock-frequency", (int)mul);
4107 4108 }
4108 4109
4109 4110 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
4110 4111 return;
4111 4112 }
4112 4113
4113 4114 /* vendor-id */
4114 4115 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4115 4116 "vendor-id", cpi->cpi_vendorstr);
4116 4117
4117 4118 if (cpi->cpi_maxeax == 0) {
4118 4119 return;
4119 4120 }
4120 4121
4121 4122 /*
4122 4123 * family, model, and step
4123 4124 */
4124 4125 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4125 4126 "family", CPI_FAMILY(cpi));
4126 4127 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4127 4128 "cpu-model", CPI_MODEL(cpi));
4128 4129 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4129 4130 "stepping-id", CPI_STEP(cpi));
4130 4131
4131 4132 /* type */
4132 4133 switch (cpi->cpi_vendor) {
4133 4134 case X86_VENDOR_Intel:
4134 4135 create = 1;
4135 4136 break;
4136 4137 default:
4137 4138 create = 0;
4138 4139 break;
4139 4140 }
4140 4141 if (create)
4141 4142 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4142 4143 "type", CPI_TYPE(cpi));
4143 4144
4144 4145 /* ext-family */
4145 4146 switch (cpi->cpi_vendor) {
4146 4147 case X86_VENDOR_Intel:
4147 4148 case X86_VENDOR_AMD:
4148 4149 create = cpi->cpi_family >= 0xf;
4149 4150 break;
4150 4151 default:
4151 4152 create = 0;
4152 4153 break;
4153 4154 }
4154 4155 if (create)
4155 4156 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4156 4157 "ext-family", CPI_FAMILY_XTD(cpi));
4157 4158
4158 4159 /* ext-model */
4159 4160 switch (cpi->cpi_vendor) {
4160 4161 case X86_VENDOR_Intel:
4161 4162 create = IS_EXTENDED_MODEL_INTEL(cpi);
4162 4163 break;
4163 4164 case X86_VENDOR_AMD:
4164 4165 create = CPI_FAMILY(cpi) == 0xf;
4165 4166 break;
4166 4167 default:
4167 4168 create = 0;
4168 4169 break;
4169 4170 }
4170 4171 if (create)
4171 4172 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4172 4173 "ext-model", CPI_MODEL_XTD(cpi));
4173 4174
4174 4175 /* generation */
4175 4176 switch (cpi->cpi_vendor) {
4176 4177 case X86_VENDOR_AMD:
4177 4178 /*
4178 4179 * AMD K5 model 1 was the first part to support this
4179 4180 */
4180 4181 create = cpi->cpi_xmaxeax >= 0x80000001;
4181 4182 break;
4182 4183 default:
4183 4184 create = 0;
4184 4185 break;
4185 4186 }
4186 4187 if (create)
4187 4188 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4188 4189 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
4189 4190
4190 4191 /* brand-id */
4191 4192 switch (cpi->cpi_vendor) {
4192 4193 case X86_VENDOR_Intel:
4193 4194 /*
4194 4195 * brand id first appeared on Pentium III Xeon model 8,
4195 4196 * and Celeron model 8 processors and Opteron
4196 4197 */
4197 4198 create = cpi->cpi_family > 6 ||
4198 4199 (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
4199 4200 break;
4200 4201 case X86_VENDOR_AMD:
4201 4202 create = cpi->cpi_family >= 0xf;
4202 4203 break;
4203 4204 default:
4204 4205 create = 0;
4205 4206 break;
4206 4207 }
4207 4208 if (create && cpi->cpi_brandid != 0) {
4208 4209 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4209 4210 "brand-id", cpi->cpi_brandid);
4210 4211 }
4211 4212
4212 4213 /* chunks, and apic-id */
4213 4214 switch (cpi->cpi_vendor) {
4214 4215 /*
4215 4216 * first available on Pentium IV and Opteron (K8)
4216 4217 */
4217 4218 case X86_VENDOR_Intel:
4218 4219 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4219 4220 break;
4220 4221 case X86_VENDOR_AMD:
4221 4222 create = cpi->cpi_family >= 0xf;
4222 4223 break;
4223 4224 default:
4224 4225 create = 0;
4225 4226 break;
4226 4227 }
4227 4228 if (create) {
4228 4229 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4229 4230 "chunks", CPI_CHUNKS(cpi));
4230 4231 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4231 4232 "apic-id", cpi->cpi_apicid);
4232 4233 if (cpi->cpi_chipid >= 0) {
4233 4234 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4234 4235 "chip#", cpi->cpi_chipid);
4235 4236 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4236 4237 "clog#", cpi->cpi_clogid);
4237 4238 }
4238 4239 }
4239 4240
4240 4241 /* cpuid-features */
4241 4242 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4242 4243 "cpuid-features", CPI_FEATURES_EDX(cpi));
4243 4244
4244 4245
4245 4246 /* cpuid-features-ecx */
4246 4247 switch (cpi->cpi_vendor) {
4247 4248 case X86_VENDOR_Intel:
4248 4249 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4249 4250 break;
4250 4251 case X86_VENDOR_AMD:
4251 4252 create = cpi->cpi_family >= 0xf;
4252 4253 break;
4253 4254 default:
4254 4255 create = 0;
4255 4256 break;
4256 4257 }
4257 4258 if (create)
4258 4259 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4259 4260 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
4260 4261
4261 4262 /* ext-cpuid-features */
4262 4263 switch (cpi->cpi_vendor) {
4263 4264 case X86_VENDOR_Intel:
4264 4265 case X86_VENDOR_AMD:
4265 4266 case X86_VENDOR_Cyrix:
4266 4267 case X86_VENDOR_TM:
4267 4268 case X86_VENDOR_Centaur:
4268 4269 create = cpi->cpi_xmaxeax >= 0x80000001;
4269 4270 break;
4270 4271 default:
4271 4272 create = 0;
4272 4273 break;
4273 4274 }
4274 4275 if (create) {
4275 4276 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4276 4277 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
4277 4278 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4278 4279 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
4279 4280 }
4280 4281
4281 4282 /*
4282 4283 * Brand String first appeared in Intel Pentium IV, AMD K5
4283 4284 * model 1, and Cyrix GXm. On earlier models we try and
4284 4285 * simulate something similar .. so this string should always
4285 4286 * same -something- about the processor, however lame.
4286 4287 */
4287 4288 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4288 4289 "brand-string", cpi->cpi_brandstr);
4289 4290
4290 4291 /*
4291 4292 * Finally, cache and tlb information
4292 4293 */
4293 4294 switch (x86_which_cacheinfo(cpi)) {
4294 4295 case X86_VENDOR_Intel:
4295 4296 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4296 4297 break;
4297 4298 case X86_VENDOR_Cyrix:
4298 4299 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4299 4300 break;
4300 4301 case X86_VENDOR_AMD:
4301 4302 amd_cache_info(cpi, cpu_devi);
4302 4303 break;
4303 4304 default:
4304 4305 break;
4305 4306 }
4306 4307 }
4307 4308
4308 4309 struct l2info {
4309 4310 int *l2i_csz;
4310 4311 int *l2i_lsz;
4311 4312 int *l2i_assoc;
4312 4313 int l2i_ret;
4313 4314 };
4314 4315
4315 4316 /*
4316 4317 * A cacheinfo walker that fetches the size, line-size and associativity
4317 4318 * of the L2 cache
4318 4319 */
4319 4320 static int
4320 4321 intel_l2cinfo(void *arg, const struct cachetab *ct)
4321 4322 {
4322 4323 struct l2info *l2i = arg;
4323 4324 int *ip;
4324 4325
4325 4326 if (ct->ct_label != l2_cache_str &&
4326 4327 ct->ct_label != sl2_cache_str)
4327 4328 return (0); /* not an L2 -- keep walking */
4328 4329
4329 4330 if ((ip = l2i->l2i_csz) != NULL)
4330 4331 *ip = ct->ct_size;
4331 4332 if ((ip = l2i->l2i_lsz) != NULL)
4332 4333 *ip = ct->ct_line_size;
4333 4334 if ((ip = l2i->l2i_assoc) != NULL)
4334 4335 *ip = ct->ct_assoc;
4335 4336 l2i->l2i_ret = ct->ct_size;
4336 4337 return (1); /* was an L2 -- terminate walk */
4337 4338 }
4338 4339
4339 4340 /*
4340 4341 * AMD L2/L3 Cache and TLB Associativity Field Definition:
4341 4342 *
4342 4343 * Unlike the associativity for the L1 cache and tlb where the 8 bit
4343 4344 * value is the associativity, the associativity for the L2 cache and
4344 4345 * tlb is encoded in the following table. The 4 bit L2 value serves as
4345 4346 * an index into the amd_afd[] array to determine the associativity.
4346 4347 * -1 is undefined. 0 is fully associative.
4347 4348 */
4348 4349
4349 4350 static int amd_afd[] =
4350 4351 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
4351 4352
4352 4353 static void
4353 4354 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
4354 4355 {
4355 4356 struct cpuid_regs *cp;
4356 4357 uint_t size, assoc;
4357 4358 int i;
4358 4359 int *ip;
4359 4360
4360 4361 if (cpi->cpi_xmaxeax < 0x80000006)
4361 4362 return;
4362 4363 cp = &cpi->cpi_extd[6];
4363 4364
4364 4365 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
4365 4366 (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
4366 4367 uint_t cachesz = size * 1024;
4367 4368 assoc = amd_afd[i];
4368 4369
4369 4370 ASSERT(assoc != -1);
4370 4371
4371 4372 if ((ip = l2i->l2i_csz) != NULL)
4372 4373 *ip = cachesz;
4373 4374 if ((ip = l2i->l2i_lsz) != NULL)
4374 4375 *ip = BITX(cp->cp_ecx, 7, 0);
4375 4376 if ((ip = l2i->l2i_assoc) != NULL)
4376 4377 *ip = assoc;
4377 4378 l2i->l2i_ret = cachesz;
4378 4379 }
4379 4380 }
4380 4381
4381 4382 int
4382 4383 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
4383 4384 {
4384 4385 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4385 4386 struct l2info __l2info, *l2i = &__l2info;
4386 4387
4387 4388 l2i->l2i_csz = csz;
4388 4389 l2i->l2i_lsz = lsz;
4389 4390 l2i->l2i_assoc = assoc;
4390 4391 l2i->l2i_ret = -1;
4391 4392
4392 4393 switch (x86_which_cacheinfo(cpi)) {
4393 4394 case X86_VENDOR_Intel:
4394 4395 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4395 4396 break;
4396 4397 case X86_VENDOR_Cyrix:
4397 4398 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4398 4399 break;
4399 4400 case X86_VENDOR_AMD:
4400 4401 amd_l2cacheinfo(cpi, l2i);
4401 4402 break;
4402 4403 default:
4403 4404 break;
4404 4405 }
4405 4406 return (l2i->l2i_ret);
4406 4407 }
4407 4408
4408 4409 #if !defined(__xpv)
4409 4410
4410 4411 uint32_t *
4411 4412 cpuid_mwait_alloc(cpu_t *cpu)
4412 4413 {
4413 4414 uint32_t *ret;
4414 4415 size_t mwait_size;
4415 4416
4416 4417 ASSERT(cpuid_checkpass(CPU, 2));
4417 4418
4418 4419 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
4419 4420 if (mwait_size == 0)
4420 4421 return (NULL);
4421 4422
4422 4423 /*
4423 4424 * kmem_alloc() returns cache line size aligned data for mwait_size
4424 4425 * allocations. mwait_size is currently cache line sized. Neither
4425 4426 * of these implementation details are guarantied to be true in the
4426 4427 * future.
4427 4428 *
4428 4429 * First try allocating mwait_size as kmem_alloc() currently returns
4429 4430 * correctly aligned memory. If kmem_alloc() does not return
4430 4431 * mwait_size aligned memory, then use mwait_size ROUNDUP.
4431 4432 *
4432 4433 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
4433 4434 * decide to free this memory.
4434 4435 */
4435 4436 ret = kmem_zalloc(mwait_size, KM_SLEEP);
4436 4437 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
4437 4438 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4438 4439 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
4439 4440 *ret = MWAIT_RUNNING;
4440 4441 return (ret);
4441 4442 } else {
4442 4443 kmem_free(ret, mwait_size);
4443 4444 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
4444 4445 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4445 4446 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
4446 4447 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
4447 4448 *ret = MWAIT_RUNNING;
4448 4449 return (ret);
4449 4450 }
4450 4451 }
4451 4452
4452 4453 void
4453 4454 cpuid_mwait_free(cpu_t *cpu)
4454 4455 {
4455 4456 if (cpu->cpu_m.mcpu_cpi == NULL) {
4456 4457 return;
4457 4458 }
4458 4459
4459 4460 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
4460 4461 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
4461 4462 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
4462 4463 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
4463 4464 }
4464 4465
4465 4466 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
4466 4467 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
4467 4468 }
4468 4469
4469 4470 void
4470 4471 patch_tsc_read(int flag)
4471 4472 {
4472 4473 size_t cnt;
4473 4474
4474 4475 switch (flag) {
4475 4476 case X86_NO_TSC:
4476 4477 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
4477 4478 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
4478 4479 break;
4479 4480 case X86_HAVE_TSCP:
4480 4481 cnt = &_tscp_end - &_tscp_start;
4481 4482 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
4482 4483 break;
4483 4484 case X86_TSC_MFENCE:
4484 4485 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
4485 4486 (void) memcpy((void *)tsc_read,
4486 4487 (void *)&_tsc_mfence_start, cnt);
4487 4488 break;
4488 4489 case X86_TSC_LFENCE:
4489 4490 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
4490 4491 (void) memcpy((void *)tsc_read,
4491 4492 (void *)&_tsc_lfence_start, cnt);
4492 4493 break;
4493 4494 default:
4494 4495 break;
4495 4496 }
4496 4497 }
4497 4498
4498 4499 int
4499 4500 cpuid_deep_cstates_supported(void)
4500 4501 {
4501 4502 struct cpuid_info *cpi;
4502 4503 struct cpuid_regs regs;
4503 4504
4504 4505 ASSERT(cpuid_checkpass(CPU, 1));
4505 4506
4506 4507 cpi = CPU->cpu_m.mcpu_cpi;
4507 4508
4508 4509 if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
4509 4510 return (0);
4510 4511
4511 4512 switch (cpi->cpi_vendor) {
4512 4513 case X86_VENDOR_Intel:
4513 4514 if (cpi->cpi_xmaxeax < 0x80000007)
4514 4515 return (0);
4515 4516
4516 4517 /*
4517 4518 * TSC run at a constant rate in all ACPI C-states?
4518 4519 */
4519 4520 regs.cp_eax = 0x80000007;
4520 4521 (void) __cpuid_insn(®s);
4521 4522 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
4522 4523
4523 4524 default:
4524 4525 return (0);
4525 4526 }
4526 4527 }
4527 4528
4528 4529 #endif /* !__xpv */
4529 4530
4530 4531 void
4531 4532 post_startup_cpu_fixups(void)
4532 4533 {
4533 4534 #ifndef __xpv
4534 4535 /*
4535 4536 * Some AMD processors support C1E state. Entering this state will
4536 4537 * cause the local APIC timer to stop, which we can't deal with at
4537 4538 * this time.
4538 4539 */
4539 4540 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
4540 4541 on_trap_data_t otd;
4541 4542 uint64_t reg;
4542 4543
4543 4544 if (!on_trap(&otd, OT_DATA_ACCESS)) {
4544 4545 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
4545 4546 /* Disable C1E state if it is enabled by BIOS */
4546 4547 if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
4547 4548 AMD_ACTONCMPHALT_MASK) {
4548 4549 reg &= ~(AMD_ACTONCMPHALT_MASK <<
4549 4550 AMD_ACTONCMPHALT_SHIFT);
4550 4551 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
4551 4552 }
4552 4553 }
4553 4554 no_trap();
4554 4555 }
4555 4556 #endif /* !__xpv */
4556 4557 }
4557 4558
4558 4559 /*
4559 4560 * Setup necessary registers to enable XSAVE feature on this processor.
4560 4561 * This function needs to be called early enough, so that no xsave/xrstor
4561 4562 * ops will execute on the processor before the MSRs are properly set up.
4562 4563 *
4563 4564 * Current implementation has the following assumption:
4564 4565 * - cpuid_pass1() is done, so that X86 features are known.
4565 4566 * - fpu_probe() is done, so that fp_save_mech is chosen.
4566 4567 */
4567 4568 void
4568 4569 xsave_setup_msr(cpu_t *cpu)
4569 4570 {
4570 4571 ASSERT(fp_save_mech == FP_XSAVE);
4571 4572 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
4572 4573
4573 4574 /* Enable OSXSAVE in CR4. */
4574 4575 setcr4(getcr4() | CR4_OSXSAVE);
4575 4576 /*
4576 4577 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
4577 4578 * correct value.
4578 4579 */
4579 4580 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
4580 4581 setup_xfem();
4581 4582 }
4582 4583
4583 4584 /*
4584 4585 * Starting with the Westmere processor the local
4585 4586 * APIC timer will continue running in all C-states,
4586 4587 * including the deepest C-states.
4587 4588 */
4588 4589 int
4589 4590 cpuid_arat_supported(void)
4590 4591 {
4591 4592 struct cpuid_info *cpi;
4592 4593 struct cpuid_regs regs;
4593 4594
4594 4595 ASSERT(cpuid_checkpass(CPU, 1));
4595 4596 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4596 4597
4597 4598 cpi = CPU->cpu_m.mcpu_cpi;
4598 4599
4599 4600 switch (cpi->cpi_vendor) {
4600 4601 case X86_VENDOR_Intel:
4601 4602 /*
4602 4603 * Always-running Local APIC Timer is
4603 4604 * indicated by CPUID.6.EAX[2].
4604 4605 */
4605 4606 if (cpi->cpi_maxeax >= 6) {
4606 4607 regs.cp_eax = 6;
4607 4608 (void) cpuid_insn(NULL, ®s);
4608 4609 return (regs.cp_eax & CPUID_CSTATE_ARAT);
4609 4610 } else {
4610 4611 return (0);
4611 4612 }
4612 4613 default:
4613 4614 return (0);
4614 4615 }
4615 4616 }
4616 4617
4617 4618 /*
4618 4619 * Check support for Intel ENERGY_PERF_BIAS feature
4619 4620 */
4620 4621 int
4621 4622 cpuid_iepb_supported(struct cpu *cp)
4622 4623 {
4623 4624 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
4624 4625 struct cpuid_regs regs;
4625 4626
4626 4627 ASSERT(cpuid_checkpass(cp, 1));
4627 4628
4628 4629 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
4629 4630 !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
4630 4631 return (0);
4631 4632 }
4632 4633
4633 4634 /*
4634 4635 * Intel ENERGY_PERF_BIAS MSR is indicated by
4635 4636 * capability bit CPUID.6.ECX.3
4636 4637 */
4637 4638 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
4638 4639 return (0);
4639 4640
4640 4641 regs.cp_eax = 0x6;
4641 4642 (void) cpuid_insn(NULL, ®s);
4642 4643 return (regs.cp_ecx & CPUID_EPB_SUPPORT);
4643 4644 }
4644 4645
4645 4646 /*
4646 4647 * Check support for TSC deadline timer
4647 4648 *
4648 4649 * TSC deadline timer provides a superior software programming
4649 4650 * model over local APIC timer that eliminates "time drifts".
4650 4651 * Instead of specifying a relative time, software specifies an
4651 4652 * absolute time as the target at which the processor should
4652 4653 * generate a timer event.
4653 4654 */
4654 4655 int
4655 4656 cpuid_deadline_tsc_supported(void)
4656 4657 {
4657 4658 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
4658 4659 struct cpuid_regs regs;
4659 4660
4660 4661 ASSERT(cpuid_checkpass(CPU, 1));
4661 4662 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4662 4663
4663 4664 switch (cpi->cpi_vendor) {
4664 4665 case X86_VENDOR_Intel:
4665 4666 if (cpi->cpi_maxeax >= 1) {
4666 4667 regs.cp_eax = 1;
4667 4668 (void) cpuid_insn(NULL, ®s);
4668 4669 return (regs.cp_ecx & CPUID_DEADLINE_TSC);
4669 4670 } else {
4670 4671 return (0);
4671 4672 }
4672 4673 default:
4673 4674 return (0);
4674 4675 }
4675 4676 }
4676 4677
4677 4678 #if defined(__amd64) && !defined(__xpv)
4678 4679 /*
4679 4680 * Patch in versions of bcopy for high performance Intel Nhm processors
4680 4681 * and later...
4681 4682 */
4682 4683 void
4683 4684 patch_memops(uint_t vendor)
4684 4685 {
4685 4686 size_t cnt, i;
4686 4687 caddr_t to, from;
4687 4688
4688 4689 if ((vendor == X86_VENDOR_Intel) &&
4689 4690 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
4690 4691 cnt = &bcopy_patch_end - &bcopy_patch_start;
4691 4692 to = &bcopy_ck_size;
4692 4693 from = &bcopy_patch_start;
4693 4694 for (i = 0; i < cnt; i++) {
4694 4695 *to++ = *from++;
4695 4696 }
4696 4697 }
4697 4698 }
4698 4699 #endif /* __amd64 && !__xpv */
4699 4700
4700 4701 /*
4701 4702 * This function finds the number of bits to represent the number of cores per
4702 4703 * chip and the number of strands per core for the Intel platforms.
4703 4704 * It re-uses the x2APIC cpuid code of the cpuid_pass2().
4704 4705 */
4705 4706 void
4706 4707 cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits)
4707 4708 {
4708 4709 struct cpuid_regs regs;
4709 4710 struct cpuid_regs *cp = ®s;
4710 4711
4711 4712 if (vendor != X86_VENDOR_Intel) {
4712 4713 return;
4713 4714 }
4714 4715
4715 4716 /* if the cpuid level is 0xB, extended topo is available. */
4716 4717 cp->cp_eax = 0;
4717 4718 if (__cpuid_insn(cp) >= 0xB) {
4718 4719
4719 4720 cp->cp_eax = 0xB;
4720 4721 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
4721 4722 (void) __cpuid_insn(cp);
4722 4723
4723 4724 /*
4724 4725 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
4725 4726 * indicates that the extended topology enumeration leaf is
4726 4727 * available.
4727 4728 */
4728 4729 if (cp->cp_ebx) {
4729 4730 uint_t coreid_shift = 0;
4730 4731 uint_t chipid_shift = 0;
4731 4732 uint_t i;
4732 4733 uint_t level;
4733 4734
4734 4735 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
4735 4736 cp->cp_eax = 0xB;
4736 4737 cp->cp_ecx = i;
4737 4738
4738 4739 (void) __cpuid_insn(cp);
4739 4740 level = CPI_CPU_LEVEL_TYPE(cp);
4740 4741
4741 4742 if (level == 1) {
4742 4743 /*
4743 4744 * Thread level processor topology
4744 4745 * Number of bits shift right APIC ID
4745 4746 * to get the coreid.
4746 4747 */
4747 4748 coreid_shift = BITX(cp->cp_eax, 4, 0);
4748 4749 } else if (level == 2) {
4749 4750 /*
4750 4751 * Core level processor topology
4751 4752 * Number of bits shift right APIC ID
4752 4753 * to get the chipid.
4753 4754 */
4754 4755 chipid_shift = BITX(cp->cp_eax, 4, 0);
4755 4756 }
4756 4757 }
4757 4758
4758 4759 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
4759 4760 *strand_nbits = coreid_shift;
4760 4761 *core_nbits = chipid_shift - coreid_shift;
4761 4762 }
4762 4763 }
4763 4764 }
4764 4765 }
↓ open down ↓ |
3437 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX