Print this page
XXXX define x2apic feature flag
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/os/cpuid.c
+++ new/usr/src/uts/i86pc/os/cpuid.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2011 by Delphix. All rights reserved.
24 24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 + * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
25 26 */
26 27 /*
27 28 * Copyright (c) 2010, Intel Corporation.
28 29 * All rights reserved.
29 30 */
30 31 /*
31 32 * Portions Copyright 2009 Advanced Micro Devices, Inc.
32 33 */
33 34 /*
34 35 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
35 36 */
36 37 /*
37 38 * Various routines to handle identification
38 39 * and classification of x86 processors.
39 40 */
40 41
41 42 #include <sys/types.h>
42 43 #include <sys/archsystm.h>
43 44 #include <sys/x86_archext.h>
44 45 #include <sys/kmem.h>
45 46 #include <sys/systm.h>
46 47 #include <sys/cmn_err.h>
47 48 #include <sys/sunddi.h>
48 49 #include <sys/sunndi.h>
49 50 #include <sys/cpuvar.h>
50 51 #include <sys/processor.h>
51 52 #include <sys/sysmacros.h>
52 53 #include <sys/pg.h>
53 54 #include <sys/fp.h>
54 55 #include <sys/controlregs.h>
55 56 #include <sys/bitmap.h>
56 57 #include <sys/auxv_386.h>
57 58 #include <sys/memnode.h>
58 59 #include <sys/pci_cfgspace.h>
59 60
60 61 #ifdef __xpv
61 62 #include <sys/hypervisor.h>
62 63 #else
63 64 #include <sys/ontrap.h>
64 65 #endif
65 66
66 67 /*
67 68 * Pass 0 of cpuid feature analysis happens in locore. It contains special code
68 69 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with
69 70 * them accordingly. For most modern processors, feature detection occurs here
70 71 * in pass 1.
71 72 *
72 73 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup()
73 74 * for the boot CPU and does the basic analysis that the early kernel needs.
74 75 * x86_featureset is set based on the return value of cpuid_pass1() of the boot
75 76 * CPU.
76 77 *
77 78 * Pass 1 includes:
78 79 *
79 80 * o Determining vendor/model/family/stepping and setting x86_type and
80 81 * x86_vendor accordingly.
81 82 * o Processing the feature flags returned by the cpuid instruction while
82 83 * applying any workarounds or tricks for the specific processor.
83 84 * o Mapping the feature flags into Solaris feature bits (X86_*).
84 85 * o Processing extended feature flags if supported by the processor,
85 86 * again while applying specific processor knowledge.
86 87 * o Determining the CMT characteristics of the system.
87 88 *
88 89 * Pass 1 is done on non-boot CPUs during their initialization and the results
89 90 * are used only as a meager attempt at ensuring that all processors within the
90 91 * system support the same features.
91 92 *
92 93 * Pass 2 of cpuid feature analysis happens just at the beginning
93 94 * of startup(). It just copies in and corrects the remainder
94 95 * of the cpuid data we depend on: standard cpuid functions that we didn't
95 96 * need for pass1 feature analysis, and extended cpuid functions beyond the
96 97 * simple feature processing done in pass1.
97 98 *
98 99 * Pass 3 of cpuid analysis is invoked after basic kernel services; in
99 100 * particular kernel memory allocation has been made available. It creates a
100 101 * readable brand string based on the data collected in the first two passes.
101 102 *
102 103 * Pass 4 of cpuid analysis is invoked after post_startup() when all
103 104 * the support infrastructure for various hardware features has been
104 105 * initialized. It determines which processor features will be reported
105 106 * to userland via the aux vector.
106 107 *
107 108 * All passes are executed on all CPUs, but only the boot CPU determines what
108 109 * features the kernel will use.
109 110 *
110 111 * Much of the worst junk in this file is for the support of processors
111 112 * that didn't really implement the cpuid instruction properly.
112 113 *
113 114 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon,
114 115 * the pass numbers. Accordingly, changes to the pass code may require changes
115 116 * to the accessor code.
116 117 */
117 118
118 119 uint_t x86_vendor = X86_VENDOR_IntelClone;
119 120 uint_t x86_type = X86_TYPE_OTHER;
120 121 uint_t x86_clflush_size = 0;
121 122
122 123 uint_t pentiumpro_bug4046376;
123 124
124 125 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
125 126
126 127 static char *x86_feature_names[NUM_X86_FEATURES] = {
127 128 "lgpg",
128 129 "tsc",
129 130 "msr",
130 131 "mtrr",
131 132 "pge",
132 133 "de",
133 134 "cmov",
134 135 "mmx",
135 136 "mca",
136 137 "pae",
137 138 "cv8",
138 139 "pat",
139 140 "sep",
140 141 "sse",
141 142 "sse2",
142 143 "htt",
143 144 "asysc",
144 145 "nx",
145 146 "sse3",
146 147 "cx16",
147 148 "cmp",
148 149 "tscp",
149 150 "mwait",
150 151 "sse4a",
151 152 "cpuid",
152 153 "ssse3",
153 154 "sse4_1",
154 155 "sse4_2",
155 156 "1gpg",
↓ open down ↓ |
121 lines elided |
↑ open up ↑ |
156 157 "clfsh",
157 158 "64",
158 159 "aes",
159 160 "pclmulqdq",
160 161 "xsave",
161 162 "avx",
162 163 "vmx",
163 164 "svm",
164 165 "topoext",
165 166 "f16c",
166 - "rdrand"
167 + "rdrand",
168 + "x2apic",
167 169 };
168 170
169 171 boolean_t
170 172 is_x86_feature(void *featureset, uint_t feature)
171 173 {
172 174 ASSERT(feature < NUM_X86_FEATURES);
173 175 return (BT_TEST((ulong_t *)featureset, feature));
174 176 }
175 177
176 178 void
177 179 add_x86_feature(void *featureset, uint_t feature)
178 180 {
179 181 ASSERT(feature < NUM_X86_FEATURES);
180 182 BT_SET((ulong_t *)featureset, feature);
181 183 }
182 184
183 185 void
184 186 remove_x86_feature(void *featureset, uint_t feature)
185 187 {
186 188 ASSERT(feature < NUM_X86_FEATURES);
187 189 BT_CLEAR((ulong_t *)featureset, feature);
188 190 }
189 191
190 192 boolean_t
191 193 compare_x86_featureset(void *setA, void *setB)
192 194 {
193 195 /*
194 196 * We assume that the unused bits of the bitmap are always zero.
195 197 */
196 198 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
197 199 return (B_TRUE);
198 200 } else {
199 201 return (B_FALSE);
200 202 }
201 203 }
202 204
203 205 void
204 206 print_x86_featureset(void *featureset)
205 207 {
206 208 uint_t i;
207 209
208 210 for (i = 0; i < NUM_X86_FEATURES; i++) {
209 211 if (is_x86_feature(featureset, i)) {
210 212 cmn_err(CE_CONT, "?x86_feature: %s\n",
211 213 x86_feature_names[i]);
212 214 }
213 215 }
214 216 }
215 217
216 218 static size_t xsave_state_size = 0;
217 219 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
218 220 boolean_t xsave_force_disable = B_FALSE;
219 221
220 222 /*
221 223 * This is set to platform type we are running on.
222 224 */
223 225 static int platform_type = -1;
224 226
225 227 #if !defined(__xpv)
226 228 /*
227 229 * Variable to patch if hypervisor platform detection needs to be
228 230 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
229 231 */
230 232 int enable_platform_detection = 1;
231 233 #endif
232 234
233 235 /*
234 236 * monitor/mwait info.
235 237 *
236 238 * size_actual and buf_actual are the real address and size allocated to get
237 239 * proper mwait_buf alignement. buf_actual and size_actual should be passed
238 240 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use
239 241 * processor cache-line alignment, but this is not guarantied in the furture.
240 242 */
241 243 struct mwait_info {
242 244 size_t mon_min; /* min size to avoid missed wakeups */
243 245 size_t mon_max; /* size to avoid false wakeups */
244 246 size_t size_actual; /* size actually allocated */
245 247 void *buf_actual; /* memory actually allocated */
246 248 uint32_t support; /* processor support of monitor/mwait */
247 249 };
248 250
249 251 /*
250 252 * xsave/xrestor info.
251 253 *
252 254 * This structure contains HW feature bits and size of the xsave save area.
253 255 * Note: the kernel will use the maximum size required for all hardware
254 256 * features. It is not optimize for potential memory savings if features at
255 257 * the end of the save area are not enabled.
256 258 */
257 259 struct xsave_info {
258 260 uint32_t xsav_hw_features_low; /* Supported HW features */
259 261 uint32_t xsav_hw_features_high; /* Supported HW features */
260 262 size_t xsav_max_size; /* max size save area for HW features */
261 263 size_t ymm_size; /* AVX: size of ymm save area */
262 264 size_t ymm_offset; /* AVX: offset for ymm save area */
263 265 };
264 266
265 267
266 268 /*
267 269 * These constants determine how many of the elements of the
268 270 * cpuid we cache in the cpuid_info data structure; the
269 271 * remaining elements are accessible via the cpuid instruction.
270 272 */
271 273
272 274 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */
273 275 #define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */
274 276
275 277 /*
276 278 * Some terminology needs to be explained:
277 279 * - Socket: Something that can be plugged into a motherboard.
278 280 * - Package: Same as socket
279 281 * - Chip: Same as socket. Note that AMD's documentation uses term "chip"
280 282 * differently: there, chip is the same as processor node (below)
281 283 * - Processor node: Some AMD processors have more than one
282 284 * "subprocessor" embedded in a package. These subprocessors (nodes)
283 285 * are fully-functional processors themselves with cores, caches,
284 286 * memory controllers, PCI configuration spaces. They are connected
285 287 * inside the package with Hypertransport links. On single-node
286 288 * processors, processor node is equivalent to chip/socket/package.
287 289 * - Compute Unit: Some AMD processors pair cores in "compute units" that
288 290 * share the FPU and the I$ and L2 caches.
289 291 */
290 292
291 293 struct cpuid_info {
292 294 uint_t cpi_pass; /* last pass completed */
293 295 /*
294 296 * standard function information
295 297 */
296 298 uint_t cpi_maxeax; /* fn 0: %eax */
297 299 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */
298 300 uint_t cpi_vendor; /* enum of cpi_vendorstr */
299 301
300 302 uint_t cpi_family; /* fn 1: extended family */
301 303 uint_t cpi_model; /* fn 1: extended model */
302 304 uint_t cpi_step; /* fn 1: stepping */
303 305 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */
304 306 /* AMD: package/socket # */
305 307 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */
306 308 int cpi_clogid; /* fn 1: %ebx: thread # */
307 309 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */
308 310 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */
309 311 uint_t cpi_ncache; /* fn 2: number of elements */
310 312 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
311 313 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */
312 314 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */
313 315 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */
314 316 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */
315 317 /*
316 318 * extended function information
317 319 */
318 320 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */
319 321 char cpi_brandstr[49]; /* fn 0x8000000[234] */
320 322 uint8_t cpi_pabits; /* fn 0x80000006: %eax */
321 323 uint8_t cpi_vabits; /* fn 0x80000006: %eax */
322 324 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */
323 325
324 326 id_t cpi_coreid; /* same coreid => strands share core */
325 327 int cpi_pkgcoreid; /* core number within single package */
326 328 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */
327 329 /* Intel: fn 4: %eax[31-26] */
328 330 /*
329 331 * supported feature information
330 332 */
331 333 uint32_t cpi_support[5];
332 334 #define STD_EDX_FEATURES 0
333 335 #define AMD_EDX_FEATURES 1
334 336 #define TM_EDX_FEATURES 2
335 337 #define STD_ECX_FEATURES 3
336 338 #define AMD_ECX_FEATURES 4
337 339 /*
338 340 * Synthesized information, where known.
339 341 */
340 342 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */
341 343 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */
342 344 uint32_t cpi_socket; /* Chip package/socket type */
343 345
344 346 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */
345 347 uint32_t cpi_apicid;
346 348 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */
347 349 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */
348 350 /* Intel: 1 */
349 351 uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */
350 352 uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */
351 353
352 354 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */
353 355 };
354 356
355 357
356 358 static struct cpuid_info cpuid_info0;
357 359
358 360 /*
359 361 * These bit fields are defined by the Intel Application Note AP-485
360 362 * "Intel Processor Identification and the CPUID Instruction"
361 363 */
362 364 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
363 365 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
364 366 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
365 367 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
366 368 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
367 369 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
368 370
369 371 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx)
370 372 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx)
371 373 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx)
372 374 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx)
373 375
374 376 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
375 377 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
376 378 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
377 379 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
378 380
379 381 #define CPI_MAXEAX_MAX 0x100 /* sanity control */
380 382 #define CPI_XMAXEAX_MAX 0x80000100
381 383 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */
382 384 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */
383 385
384 386 /*
385 387 * Function 4 (Deterministic Cache Parameters) macros
386 388 * Defined by Intel Application Note AP-485
387 389 */
388 390 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26)
389 391 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14)
390 392 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9)
391 393 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8)
392 394 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5)
393 395 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0)
394 396 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8)
395 397
396 398 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22)
397 399 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12)
398 400 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0)
399 401
400 402 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0)
401 403
402 404 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0)
403 405
404 406
405 407 /*
406 408 * A couple of shorthand macros to identify "later" P6-family chips
407 409 * like the Pentium M and Core. First, the "older" P6-based stuff
408 410 * (loosely defined as "pre-Pentium-4"):
409 411 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
410 412 */
411 413
412 414 #define IS_LEGACY_P6(cpi) ( \
413 415 cpi->cpi_family == 6 && \
414 416 (cpi->cpi_model == 1 || \
415 417 cpi->cpi_model == 3 || \
416 418 cpi->cpi_model == 5 || \
417 419 cpi->cpi_model == 6 || \
418 420 cpi->cpi_model == 7 || \
419 421 cpi->cpi_model == 8 || \
420 422 cpi->cpi_model == 0xA || \
421 423 cpi->cpi_model == 0xB) \
422 424 )
423 425
424 426 /* A "new F6" is everything with family 6 that's not the above */
425 427 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
426 428
427 429 /* Extended family/model support */
428 430 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
429 431 cpi->cpi_family >= 0xf)
430 432
431 433 /*
432 434 * Info for monitor/mwait idle loop.
433 435 *
434 436 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
435 437 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
436 438 * 2006.
437 439 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
438 440 * Documentation Updates" #33633, Rev 2.05, December 2006.
439 441 */
440 442 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */
441 443 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */
442 444 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */
443 445 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
444 446 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2)
445 447 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1)
446 448 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
447 449 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
448 450 /*
449 451 * Number of sub-cstates for a given c-state.
450 452 */
451 453 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \
452 454 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
453 455
454 456 /*
455 457 * XSAVE leaf 0xD enumeration
456 458 */
457 459 #define CPUID_LEAFD_2_YMM_OFFSET 576
458 460 #define CPUID_LEAFD_2_YMM_SIZE 256
459 461
460 462 /*
461 463 * Functions we consune from cpuid_subr.c; don't publish these in a header
462 464 * file to try and keep people using the expected cpuid_* interfaces.
463 465 */
464 466 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
465 467 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
466 468 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
467 469 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
468 470 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
469 471
470 472 /*
471 473 * Apply up various platform-dependent restrictions where the
472 474 * underlying platform restrictions mean the CPU can be marked
473 475 * as less capable than its cpuid instruction would imply.
474 476 */
475 477 #if defined(__xpv)
476 478 static void
477 479 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
478 480 {
479 481 switch (eax) {
480 482 case 1: {
481 483 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
482 484 0 : CPUID_INTC_EDX_MCA;
483 485 cp->cp_edx &=
484 486 ~(mcamask |
485 487 CPUID_INTC_EDX_PSE |
486 488 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
487 489 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
488 490 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
489 491 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
490 492 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
491 493 break;
492 494 }
493 495
494 496 case 0x80000001:
495 497 cp->cp_edx &=
496 498 ~(CPUID_AMD_EDX_PSE |
497 499 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
498 500 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
499 501 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
500 502 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
501 503 CPUID_AMD_EDX_TSCP);
502 504 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
503 505 break;
504 506 default:
505 507 break;
506 508 }
507 509
508 510 switch (vendor) {
509 511 case X86_VENDOR_Intel:
510 512 switch (eax) {
511 513 case 4:
512 514 /*
513 515 * Zero out the (ncores-per-chip - 1) field
514 516 */
515 517 cp->cp_eax &= 0x03fffffff;
516 518 break;
517 519 default:
518 520 break;
519 521 }
520 522 break;
521 523 case X86_VENDOR_AMD:
522 524 switch (eax) {
523 525
524 526 case 0x80000001:
525 527 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
526 528 break;
527 529
528 530 case 0x80000008:
529 531 /*
530 532 * Zero out the (ncores-per-chip - 1) field
531 533 */
532 534 cp->cp_ecx &= 0xffffff00;
533 535 break;
534 536 default:
535 537 break;
536 538 }
537 539 break;
538 540 default:
539 541 break;
540 542 }
541 543 }
542 544 #else
543 545 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */
544 546 #endif
545 547
546 548 /*
547 549 * Some undocumented ways of patching the results of the cpuid
548 550 * instruction to permit running Solaris 10 on future cpus that
549 551 * we don't currently support. Could be set to non-zero values
550 552 * via settings in eeprom.
551 553 */
552 554
553 555 uint32_t cpuid_feature_ecx_include;
554 556 uint32_t cpuid_feature_ecx_exclude;
555 557 uint32_t cpuid_feature_edx_include;
556 558 uint32_t cpuid_feature_edx_exclude;
557 559
558 560 /*
559 561 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
560 562 */
561 563 void
562 564 cpuid_alloc_space(cpu_t *cpu)
563 565 {
564 566 /*
565 567 * By convention, cpu0 is the boot cpu, which is set up
566 568 * before memory allocation is available. All other cpus get
567 569 * their cpuid_info struct allocated here.
568 570 */
569 571 ASSERT(cpu->cpu_id != 0);
570 572 ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
571 573 cpu->cpu_m.mcpu_cpi =
572 574 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
573 575 }
574 576
575 577 void
576 578 cpuid_free_space(cpu_t *cpu)
577 579 {
578 580 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
579 581 int i;
580 582
581 583 ASSERT(cpi != NULL);
582 584 ASSERT(cpi != &cpuid_info0);
583 585
584 586 /*
585 587 * Free up any function 4 related dynamic storage
586 588 */
587 589 for (i = 1; i < cpi->cpi_std_4_size; i++)
588 590 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs));
589 591 if (cpi->cpi_std_4_size > 0)
590 592 kmem_free(cpi->cpi_std_4,
591 593 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *));
592 594
593 595 kmem_free(cpi, sizeof (*cpi));
594 596 cpu->cpu_m.mcpu_cpi = NULL;
595 597 }
596 598
597 599 #if !defined(__xpv)
598 600 /*
599 601 * Determine the type of the underlying platform. This is used to customize
600 602 * initialization of various subsystems (e.g. TSC). determine_platform() must
601 603 * only ever be called once to prevent two processors from seeing different
602 604 * values of platform_type. Must be called before cpuid_pass1(), the earliest
603 605 * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv).
604 606 */
605 607 void
606 608 determine_platform(void)
607 609 {
608 610 struct cpuid_regs cp;
609 611 uint32_t base;
610 612 uint32_t regs[4];
611 613 char *hvstr = (char *)regs;
612 614
613 615 ASSERT(platform_type == -1);
614 616
615 617 platform_type = HW_NATIVE;
616 618
617 619 if (!enable_platform_detection)
618 620 return;
619 621
620 622 /*
621 623 * If Hypervisor CPUID bit is set, try to determine hypervisor
622 624 * vendor signature, and set platform type accordingly.
623 625 *
624 626 * References:
625 627 * http://lkml.org/lkml/2008/10/1/246
626 628 * http://kb.vmware.com/kb/1009458
627 629 */
628 630 cp.cp_eax = 0x1;
629 631 (void) __cpuid_insn(&cp);
630 632 if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) {
631 633 cp.cp_eax = 0x40000000;
632 634 (void) __cpuid_insn(&cp);
633 635 regs[0] = cp.cp_ebx;
634 636 regs[1] = cp.cp_ecx;
635 637 regs[2] = cp.cp_edx;
636 638 regs[3] = 0;
637 639 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) {
638 640 platform_type = HW_XEN_HVM;
639 641 return;
640 642 }
641 643 if (strcmp(hvstr, HVSIG_VMWARE) == 0) {
642 644 platform_type = HW_VMWARE;
643 645 return;
644 646 }
645 647 if (strcmp(hvstr, HVSIG_KVM) == 0) {
646 648 platform_type = HW_KVM;
647 649 return;
648 650 }
649 651 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0)
650 652 platform_type = HW_MICROSOFT;
651 653 } else {
652 654 /*
653 655 * Check older VMware hardware versions. VMware hypervisor is
654 656 * detected by performing an IN operation to VMware hypervisor
655 657 * port and checking that value returned in %ebx is VMware
656 658 * hypervisor magic value.
657 659 *
658 660 * References: http://kb.vmware.com/kb/1009458
659 661 */
660 662 vmware_port(VMWARE_HVCMD_GETVERSION, regs);
661 663 if (regs[1] == VMWARE_HVMAGIC) {
662 664 platform_type = HW_VMWARE;
663 665 return;
664 666 }
665 667 }
666 668
667 669 /*
668 670 * Check Xen hypervisor. In a fully virtualized domain,
669 671 * Xen's pseudo-cpuid function returns a string representing the
670 672 * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum
671 673 * supported cpuid function. We need at least a (base + 2) leaf value
672 674 * to do what we want to do. Try different base values, since the
673 675 * hypervisor might use a different one depending on whether Hyper-V
674 676 * emulation is switched on by default or not.
675 677 */
676 678 for (base = 0x40000000; base < 0x40010000; base += 0x100) {
677 679 cp.cp_eax = base;
678 680 (void) __cpuid_insn(&cp);
679 681 regs[0] = cp.cp_ebx;
680 682 regs[1] = cp.cp_ecx;
681 683 regs[2] = cp.cp_edx;
682 684 regs[3] = 0;
683 685 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 &&
684 686 cp.cp_eax >= (base + 2)) {
685 687 platform_type &= ~HW_NATIVE;
686 688 platform_type |= HW_XEN_HVM;
687 689 return;
688 690 }
689 691 }
690 692 }
691 693
692 694 int
693 695 get_hwenv(void)
694 696 {
695 697 ASSERT(platform_type != -1);
696 698 return (platform_type);
697 699 }
698 700
699 701 int
700 702 is_controldom(void)
701 703 {
702 704 return (0);
703 705 }
704 706
705 707 #else
706 708
707 709 int
708 710 get_hwenv(void)
709 711 {
710 712 return (HW_XEN_PV);
711 713 }
712 714
713 715 int
714 716 is_controldom(void)
715 717 {
716 718 return (DOMAIN_IS_INITDOMAIN(xen_info));
717 719 }
718 720
719 721 #endif /* __xpv */
720 722
721 723 static void
722 724 cpuid_intel_getids(cpu_t *cpu, void *feature)
723 725 {
724 726 uint_t i;
725 727 uint_t chipid_shift = 0;
726 728 uint_t coreid_shift = 0;
727 729 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
728 730
729 731 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
730 732 chipid_shift++;
731 733
732 734 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
733 735 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
734 736
735 737 if (is_x86_feature(feature, X86FSET_CMP)) {
736 738 /*
737 739 * Multi-core (and possibly multi-threaded)
738 740 * processors.
739 741 */
740 742 uint_t ncpu_per_core;
741 743 if (cpi->cpi_ncore_per_chip == 1)
742 744 ncpu_per_core = cpi->cpi_ncpu_per_chip;
743 745 else if (cpi->cpi_ncore_per_chip > 1)
744 746 ncpu_per_core = cpi->cpi_ncpu_per_chip /
745 747 cpi->cpi_ncore_per_chip;
746 748 /*
747 749 * 8bit APIC IDs on dual core Pentiums
748 750 * look like this:
749 751 *
750 752 * +-----------------------+------+------+
751 753 * | Physical Package ID | MC | HT |
752 754 * +-----------------------+------+------+
753 755 * <------- chipid -------->
754 756 * <------- coreid --------------->
755 757 * <--- clogid -->
756 758 * <------>
757 759 * pkgcoreid
758 760 *
759 761 * Where the number of bits necessary to
760 762 * represent MC and HT fields together equals
761 763 * to the minimum number of bits necessary to
762 764 * store the value of cpi->cpi_ncpu_per_chip.
763 765 * Of those bits, the MC part uses the number
764 766 * of bits necessary to store the value of
765 767 * cpi->cpi_ncore_per_chip.
766 768 */
767 769 for (i = 1; i < ncpu_per_core; i <<= 1)
768 770 coreid_shift++;
769 771 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
770 772 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
771 773 } else if (is_x86_feature(feature, X86FSET_HTT)) {
772 774 /*
773 775 * Single-core multi-threaded processors.
774 776 */
775 777 cpi->cpi_coreid = cpi->cpi_chipid;
776 778 cpi->cpi_pkgcoreid = 0;
777 779 }
778 780 cpi->cpi_procnodeid = cpi->cpi_chipid;
779 781 cpi->cpi_compunitid = cpi->cpi_coreid;
780 782 }
781 783
782 784 static void
783 785 cpuid_amd_getids(cpu_t *cpu)
784 786 {
785 787 int i, first_half, coreidsz;
786 788 uint32_t nb_caps_reg;
787 789 uint_t node2_1;
788 790 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
789 791 struct cpuid_regs *cp;
790 792
791 793 /*
792 794 * AMD CMP chips currently have a single thread per core.
793 795 *
794 796 * Since no two cpus share a core we must assign a distinct coreid
795 797 * per cpu, and we do this by using the cpu_id. This scheme does not,
796 798 * however, guarantee that sibling cores of a chip will have sequential
797 799 * coreids starting at a multiple of the number of cores per chip -
798 800 * that is usually the case, but if the ACPI MADT table is presented
799 801 * in a different order then we need to perform a few more gymnastics
800 802 * for the pkgcoreid.
801 803 *
802 804 * All processors in the system have the same number of enabled
803 805 * cores. Cores within a processor are always numbered sequentially
804 806 * from 0 regardless of how many or which are disabled, and there
805 807 * is no way for operating system to discover the real core id when some
806 808 * are disabled.
807 809 *
808 810 * In family 0x15, the cores come in pairs called compute units. They
809 811 * share I$ and L2 caches and the FPU. Enumeration of this feature is
810 812 * simplified by the new topology extensions CPUID leaf, indicated by
811 813 * the X86 feature X86FSET_TOPOEXT.
812 814 */
813 815
814 816 cpi->cpi_coreid = cpu->cpu_id;
815 817 cpi->cpi_compunitid = cpu->cpu_id;
816 818
817 819 if (cpi->cpi_xmaxeax >= 0x80000008) {
818 820
819 821 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
820 822
821 823 /*
822 824 * In AMD parlance chip is really a node while Solaris
823 825 * sees chip as equivalent to socket/package.
824 826 */
825 827 cpi->cpi_ncore_per_chip =
826 828 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
827 829 if (coreidsz == 0) {
828 830 /* Use legacy method */
829 831 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
830 832 coreidsz++;
831 833 if (coreidsz == 0)
832 834 coreidsz = 1;
833 835 }
834 836 } else {
835 837 /* Assume single-core part */
836 838 cpi->cpi_ncore_per_chip = 1;
837 839 coreidsz = 1;
838 840 }
839 841
840 842 cpi->cpi_clogid = cpi->cpi_pkgcoreid =
841 843 cpi->cpi_apicid & ((1<<coreidsz) - 1);
842 844 cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip;
843 845
844 846 /* Get node ID, compute unit ID */
845 847 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
846 848 cpi->cpi_xmaxeax >= 0x8000001e) {
847 849 cp = &cpi->cpi_extd[0x1e];
848 850 cp->cp_eax = 0x8000001e;
849 851 (void) __cpuid_insn(cp);
850 852
851 853 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
852 854 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
853 855 cpi->cpi_cores_per_compunit = BITX(cp->cp_ebx, 15, 8) + 1;
854 856 cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0)
855 857 + (cpi->cpi_ncore_per_chip / cpi->cpi_cores_per_compunit)
856 858 * (cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg);
857 859 } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
858 860 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
859 861 } else if (cpi->cpi_family == 0x10) {
860 862 /*
861 863 * See if we are a multi-node processor.
862 864 * All processors in the system have the same number of nodes
863 865 */
864 866 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8);
865 867 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
866 868 /* Single-node */
867 869 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
868 870 coreidsz);
869 871 } else {
870 872
871 873 /*
872 874 * Multi-node revision D (2 nodes per package
873 875 * are supported)
874 876 */
875 877 cpi->cpi_procnodes_per_pkg = 2;
876 878
877 879 first_half = (cpi->cpi_pkgcoreid <=
878 880 (cpi->cpi_ncore_per_chip/2 - 1));
879 881
880 882 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
881 883 /* We are BSP */
882 884 cpi->cpi_procnodeid = (first_half ? 0 : 1);
883 885 } else {
884 886
885 887 /* We are AP */
886 888 /* NodeId[2:1] bits to use for reading F3xe8 */
887 889 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
888 890
889 891 nb_caps_reg =
890 892 pci_getl_func(0, 24 + node2_1, 3, 0xe8);
891 893
892 894 /*
893 895 * Check IntNodeNum bit (31:30, but bit 31 is
894 896 * always 0 on dual-node processors)
895 897 */
896 898 if (BITX(nb_caps_reg, 30, 30) == 0)
897 899 cpi->cpi_procnodeid = node2_1 +
898 900 !first_half;
899 901 else
900 902 cpi->cpi_procnodeid = node2_1 +
901 903 first_half;
902 904 }
903 905 }
904 906 } else {
905 907 cpi->cpi_procnodeid = 0;
906 908 }
907 909
908 910 cpi->cpi_chipid =
909 911 cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
910 912 }
911 913
912 914 /*
913 915 * Setup XFeature_Enabled_Mask register. Required by xsave feature.
914 916 */
915 917 void
916 918 setup_xfem(void)
917 919 {
918 920 uint64_t flags = XFEATURE_LEGACY_FP;
919 921
920 922 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
921 923
922 924 if (is_x86_feature(x86_featureset, X86FSET_SSE))
923 925 flags |= XFEATURE_SSE;
924 926
925 927 if (is_x86_feature(x86_featureset, X86FSET_AVX))
926 928 flags |= XFEATURE_AVX;
927 929
928 930 set_xcr(XFEATURE_ENABLED_MASK, flags);
929 931
930 932 xsave_bv_all = flags;
931 933 }
932 934
933 935 void
934 936 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
935 937 {
936 938 uint32_t mask_ecx, mask_edx;
937 939 struct cpuid_info *cpi;
938 940 struct cpuid_regs *cp;
939 941 int xcpuid;
940 942 #if !defined(__xpv)
941 943 extern int idle_cpu_prefer_mwait;
942 944 #endif
943 945
944 946 /*
945 947 * Space statically allocated for BSP, ensure pointer is set
946 948 */
947 949 if (cpu->cpu_id == 0) {
948 950 if (cpu->cpu_m.mcpu_cpi == NULL)
949 951 cpu->cpu_m.mcpu_cpi = &cpuid_info0;
950 952 }
951 953
952 954 add_x86_feature(featureset, X86FSET_CPUID);
953 955
954 956 cpi = cpu->cpu_m.mcpu_cpi;
955 957 ASSERT(cpi != NULL);
956 958 cp = &cpi->cpi_std[0];
957 959 cp->cp_eax = 0;
958 960 cpi->cpi_maxeax = __cpuid_insn(cp);
959 961 {
960 962 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
961 963 *iptr++ = cp->cp_ebx;
962 964 *iptr++ = cp->cp_edx;
963 965 *iptr++ = cp->cp_ecx;
964 966 *(char *)&cpi->cpi_vendorstr[12] = '\0';
965 967 }
966 968
967 969 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
968 970 x86_vendor = cpi->cpi_vendor; /* for compatibility */
969 971
970 972 /*
971 973 * Limit the range in case of weird hardware
972 974 */
973 975 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
974 976 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
975 977 if (cpi->cpi_maxeax < 1)
976 978 goto pass1_done;
977 979
978 980 cp = &cpi->cpi_std[1];
979 981 cp->cp_eax = 1;
980 982 (void) __cpuid_insn(cp);
981 983
982 984 /*
983 985 * Extract identifying constants for easy access.
984 986 */
985 987 cpi->cpi_model = CPI_MODEL(cpi);
986 988 cpi->cpi_family = CPI_FAMILY(cpi);
987 989
988 990 if (cpi->cpi_family == 0xf)
989 991 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
990 992
991 993 /*
992 994 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
993 995 * Intel, and presumably everyone else, uses model == 0xf, as
994 996 * one would expect (max value means possible overflow). Sigh.
995 997 */
996 998
997 999 switch (cpi->cpi_vendor) {
998 1000 case X86_VENDOR_Intel:
999 1001 if (IS_EXTENDED_MODEL_INTEL(cpi))
1000 1002 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
1001 1003 break;
1002 1004 case X86_VENDOR_AMD:
1003 1005 if (CPI_FAMILY(cpi) == 0xf)
1004 1006 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
1005 1007 break;
1006 1008 default:
1007 1009 if (cpi->cpi_model == 0xf)
1008 1010 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
1009 1011 break;
1010 1012 }
1011 1013
1012 1014 cpi->cpi_step = CPI_STEP(cpi);
1013 1015 cpi->cpi_brandid = CPI_BRANDID(cpi);
1014 1016
1015 1017 /*
1016 1018 * *default* assumptions:
1017 1019 * - believe %edx feature word
1018 1020 * - ignore %ecx feature word
1019 1021 * - 32-bit virtual and physical addressing
1020 1022 */
1021 1023 mask_edx = 0xffffffff;
1022 1024 mask_ecx = 0;
1023 1025
1024 1026 cpi->cpi_pabits = cpi->cpi_vabits = 32;
1025 1027
1026 1028 switch (cpi->cpi_vendor) {
1027 1029 case X86_VENDOR_Intel:
1028 1030 if (cpi->cpi_family == 5)
1029 1031 x86_type = X86_TYPE_P5;
1030 1032 else if (IS_LEGACY_P6(cpi)) {
1031 1033 x86_type = X86_TYPE_P6;
1032 1034 pentiumpro_bug4046376 = 1;
1033 1035 /*
1034 1036 * Clear the SEP bit when it was set erroneously
1035 1037 */
1036 1038 if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
1037 1039 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
1038 1040 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
1039 1041 x86_type = X86_TYPE_P4;
1040 1042 /*
1041 1043 * We don't currently depend on any of the %ecx
1042 1044 * features until Prescott, so we'll only check
1043 1045 * this from P4 onwards. We might want to revisit
1044 1046 * that idea later.
1045 1047 */
1046 1048 mask_ecx = 0xffffffff;
1047 1049 } else if (cpi->cpi_family > 0xf)
1048 1050 mask_ecx = 0xffffffff;
1049 1051 /*
1050 1052 * We don't support MONITOR/MWAIT if leaf 5 is not available
1051 1053 * to obtain the monitor linesize.
1052 1054 */
1053 1055 if (cpi->cpi_maxeax < 5)
1054 1056 mask_ecx &= ~CPUID_INTC_ECX_MON;
1055 1057 break;
1056 1058 case X86_VENDOR_IntelClone:
1057 1059 default:
1058 1060 break;
1059 1061 case X86_VENDOR_AMD:
1060 1062 #if defined(OPTERON_ERRATUM_108)
1061 1063 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
1062 1064 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
1063 1065 cpi->cpi_model = 0xc;
1064 1066 } else
1065 1067 #endif
1066 1068 if (cpi->cpi_family == 5) {
1067 1069 /*
1068 1070 * AMD K5 and K6
1069 1071 *
1070 1072 * These CPUs have an incomplete implementation
1071 1073 * of MCA/MCE which we mask away.
1072 1074 */
1073 1075 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
1074 1076
1075 1077 /*
1076 1078 * Model 0 uses the wrong (APIC) bit
1077 1079 * to indicate PGE. Fix it here.
1078 1080 */
1079 1081 if (cpi->cpi_model == 0) {
1080 1082 if (cp->cp_edx & 0x200) {
1081 1083 cp->cp_edx &= ~0x200;
1082 1084 cp->cp_edx |= CPUID_INTC_EDX_PGE;
1083 1085 }
1084 1086 }
1085 1087
1086 1088 /*
1087 1089 * Early models had problems w/ MMX; disable.
1088 1090 */
1089 1091 if (cpi->cpi_model < 6)
1090 1092 mask_edx &= ~CPUID_INTC_EDX_MMX;
1091 1093 }
1092 1094
1093 1095 /*
1094 1096 * For newer families, SSE3 and CX16, at least, are valid;
1095 1097 * enable all
1096 1098 */
1097 1099 if (cpi->cpi_family >= 0xf)
1098 1100 mask_ecx = 0xffffffff;
1099 1101 /*
1100 1102 * We don't support MONITOR/MWAIT if leaf 5 is not available
1101 1103 * to obtain the monitor linesize.
1102 1104 */
1103 1105 if (cpi->cpi_maxeax < 5)
1104 1106 mask_ecx &= ~CPUID_INTC_ECX_MON;
1105 1107
1106 1108 #if !defined(__xpv)
1107 1109 /*
1108 1110 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD
1109 1111 * processors. AMD does not intend MWAIT to be used in the cpu
1110 1112 * idle loop on current and future processors. 10h and future
1111 1113 * AMD processors use more power in MWAIT than HLT.
1112 1114 * Pre-family-10h Opterons do not have the MWAIT instruction.
1113 1115 */
1114 1116 idle_cpu_prefer_mwait = 0;
1115 1117 #endif
1116 1118
1117 1119 break;
1118 1120 case X86_VENDOR_TM:
1119 1121 /*
1120 1122 * workaround the NT workaround in CMS 4.1
1121 1123 */
1122 1124 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
1123 1125 (cpi->cpi_step == 2 || cpi->cpi_step == 3))
1124 1126 cp->cp_edx |= CPUID_INTC_EDX_CX8;
1125 1127 break;
1126 1128 case X86_VENDOR_Centaur:
1127 1129 /*
1128 1130 * workaround the NT workarounds again
1129 1131 */
1130 1132 if (cpi->cpi_family == 6)
1131 1133 cp->cp_edx |= CPUID_INTC_EDX_CX8;
1132 1134 break;
1133 1135 case X86_VENDOR_Cyrix:
1134 1136 /*
1135 1137 * We rely heavily on the probing in locore
1136 1138 * to actually figure out what parts, if any,
1137 1139 * of the Cyrix cpuid instruction to believe.
1138 1140 */
1139 1141 switch (x86_type) {
1140 1142 case X86_TYPE_CYRIX_486:
1141 1143 mask_edx = 0;
1142 1144 break;
1143 1145 case X86_TYPE_CYRIX_6x86:
1144 1146 mask_edx = 0;
1145 1147 break;
1146 1148 case X86_TYPE_CYRIX_6x86L:
1147 1149 mask_edx =
1148 1150 CPUID_INTC_EDX_DE |
1149 1151 CPUID_INTC_EDX_CX8;
1150 1152 break;
1151 1153 case X86_TYPE_CYRIX_6x86MX:
1152 1154 mask_edx =
1153 1155 CPUID_INTC_EDX_DE |
1154 1156 CPUID_INTC_EDX_MSR |
1155 1157 CPUID_INTC_EDX_CX8 |
1156 1158 CPUID_INTC_EDX_PGE |
1157 1159 CPUID_INTC_EDX_CMOV |
1158 1160 CPUID_INTC_EDX_MMX;
1159 1161 break;
1160 1162 case X86_TYPE_CYRIX_GXm:
1161 1163 mask_edx =
1162 1164 CPUID_INTC_EDX_MSR |
1163 1165 CPUID_INTC_EDX_CX8 |
1164 1166 CPUID_INTC_EDX_CMOV |
1165 1167 CPUID_INTC_EDX_MMX;
1166 1168 break;
1167 1169 case X86_TYPE_CYRIX_MediaGX:
1168 1170 break;
1169 1171 case X86_TYPE_CYRIX_MII:
1170 1172 case X86_TYPE_VIA_CYRIX_III:
1171 1173 mask_edx =
1172 1174 CPUID_INTC_EDX_DE |
1173 1175 CPUID_INTC_EDX_TSC |
1174 1176 CPUID_INTC_EDX_MSR |
1175 1177 CPUID_INTC_EDX_CX8 |
1176 1178 CPUID_INTC_EDX_PGE |
1177 1179 CPUID_INTC_EDX_CMOV |
1178 1180 CPUID_INTC_EDX_MMX;
1179 1181 break;
1180 1182 default:
1181 1183 break;
1182 1184 }
1183 1185 break;
1184 1186 }
1185 1187
1186 1188 #if defined(__xpv)
1187 1189 /*
1188 1190 * Do not support MONITOR/MWAIT under a hypervisor
1189 1191 */
1190 1192 mask_ecx &= ~CPUID_INTC_ECX_MON;
1191 1193 /*
1192 1194 * Do not support XSAVE under a hypervisor for now
1193 1195 */
1194 1196 xsave_force_disable = B_TRUE;
1195 1197
1196 1198 #endif /* __xpv */
1197 1199
1198 1200 if (xsave_force_disable) {
1199 1201 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
1200 1202 mask_ecx &= ~CPUID_INTC_ECX_AVX;
1201 1203 mask_ecx &= ~CPUID_INTC_ECX_F16C;
1202 1204 }
1203 1205
1204 1206 /*
1205 1207 * Now we've figured out the masks that determine
1206 1208 * which bits we choose to believe, apply the masks
1207 1209 * to the feature words, then map the kernel's view
1208 1210 * of these feature words into its feature word.
1209 1211 */
1210 1212 cp->cp_edx &= mask_edx;
1211 1213 cp->cp_ecx &= mask_ecx;
1212 1214
1213 1215 /*
1214 1216 * apply any platform restrictions (we don't call this
1215 1217 * immediately after __cpuid_insn here, because we need the
1216 1218 * workarounds applied above first)
1217 1219 */
1218 1220 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
1219 1221
1220 1222 /*
1221 1223 * fold in overrides from the "eeprom" mechanism
1222 1224 */
1223 1225 cp->cp_edx |= cpuid_feature_edx_include;
1224 1226 cp->cp_edx &= ~cpuid_feature_edx_exclude;
1225 1227
1226 1228 cp->cp_ecx |= cpuid_feature_ecx_include;
1227 1229 cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
1228 1230
1229 1231 if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
1230 1232 add_x86_feature(featureset, X86FSET_LARGEPAGE);
1231 1233 }
1232 1234 if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
1233 1235 add_x86_feature(featureset, X86FSET_TSC);
1234 1236 }
1235 1237 if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
1236 1238 add_x86_feature(featureset, X86FSET_MSR);
1237 1239 }
1238 1240 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
1239 1241 add_x86_feature(featureset, X86FSET_MTRR);
1240 1242 }
1241 1243 if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
1242 1244 add_x86_feature(featureset, X86FSET_PGE);
1243 1245 }
1244 1246 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
1245 1247 add_x86_feature(featureset, X86FSET_CMOV);
1246 1248 }
1247 1249 if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
1248 1250 add_x86_feature(featureset, X86FSET_MMX);
1249 1251 }
1250 1252 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
1251 1253 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
1252 1254 add_x86_feature(featureset, X86FSET_MCA);
1253 1255 }
1254 1256 if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
1255 1257 add_x86_feature(featureset, X86FSET_PAE);
1256 1258 }
1257 1259 if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
1258 1260 add_x86_feature(featureset, X86FSET_CX8);
1259 1261 }
1260 1262 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
1261 1263 add_x86_feature(featureset, X86FSET_CX16);
1262 1264 }
1263 1265 if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
1264 1266 add_x86_feature(featureset, X86FSET_PAT);
1265 1267 }
1266 1268 if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
1267 1269 add_x86_feature(featureset, X86FSET_SEP);
1268 1270 }
1269 1271 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
1270 1272 /*
1271 1273 * In our implementation, fxsave/fxrstor
1272 1274 * are prerequisites before we'll even
1273 1275 * try and do SSE things.
1274 1276 */
1275 1277 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
1276 1278 add_x86_feature(featureset, X86FSET_SSE);
1277 1279 }
1278 1280 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
1279 1281 add_x86_feature(featureset, X86FSET_SSE2);
1280 1282 }
1281 1283 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
1282 1284 add_x86_feature(featureset, X86FSET_SSE3);
1283 1285 }
1284 1286 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
1285 1287 add_x86_feature(featureset, X86FSET_SSSE3);
1286 1288 }
1287 1289 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
1288 1290 add_x86_feature(featureset, X86FSET_SSE4_1);
1289 1291 }
1290 1292 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
1291 1293 add_x86_feature(featureset, X86FSET_SSE4_2);
1292 1294 }
1293 1295 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
1294 1296 add_x86_feature(featureset, X86FSET_AES);
1295 1297 }
1296 1298 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
1297 1299 add_x86_feature(featureset, X86FSET_PCLMULQDQ);
1298 1300 }
1299 1301
1300 1302 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
1301 1303 add_x86_feature(featureset, X86FSET_XSAVE);
1302 1304
1303 1305 /* We only test AVX when there is XSAVE */
↓ open down ↓ |
1127 lines elided |
↑ open up ↑ |
1304 1306 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
1305 1307 add_x86_feature(featureset,
1306 1308 X86FSET_AVX);
1307 1309
1308 1310 if (cp->cp_ecx & CPUID_INTC_ECX_F16C)
1309 1311 add_x86_feature(featureset,
1310 1312 X86FSET_F16C);
1311 1313 }
1312 1314 }
1313 1315 }
1316 + if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) {
1317 + add_x86_feature(featureset, X86FSET_X2APIC);
1318 + }
1314 1319 if (cp->cp_edx & CPUID_INTC_EDX_DE) {
1315 1320 add_x86_feature(featureset, X86FSET_DE);
1316 1321 }
1317 1322 #if !defined(__xpv)
1318 1323 if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
1319 1324
1320 1325 /*
1321 1326 * We require the CLFLUSH instruction for erratum workaround
1322 1327 * to use MONITOR/MWAIT.
1323 1328 */
1324 1329 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1325 1330 cpi->cpi_mwait.support |= MWAIT_SUPPORT;
1326 1331 add_x86_feature(featureset, X86FSET_MWAIT);
1327 1332 } else {
1328 1333 extern int idle_cpu_assert_cflush_monitor;
1329 1334
1330 1335 /*
1331 1336 * All processors we are aware of which have
1332 1337 * MONITOR/MWAIT also have CLFLUSH.
1333 1338 */
1334 1339 if (idle_cpu_assert_cflush_monitor) {
1335 1340 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
1336 1341 (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
1337 1342 }
1338 1343 }
1339 1344 }
1340 1345 #endif /* __xpv */
1341 1346
1342 1347 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
1343 1348 add_x86_feature(featureset, X86FSET_VMX);
1344 1349 }
1345 1350
1346 1351 if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND)
1347 1352 add_x86_feature(featureset, X86FSET_RDRAND);
1348 1353
1349 1354 /*
1350 1355 * Only need it first time, rest of the cpus would follow suit.
1351 1356 * we only capture this for the bootcpu.
1352 1357 */
1353 1358 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1354 1359 add_x86_feature(featureset, X86FSET_CLFSH);
1355 1360 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
1356 1361 }
1357 1362 if (is_x86_feature(featureset, X86FSET_PAE))
1358 1363 cpi->cpi_pabits = 36;
1359 1364
1360 1365 /*
1361 1366 * Hyperthreading configuration is slightly tricky on Intel
1362 1367 * and pure clones, and even trickier on AMD.
1363 1368 *
1364 1369 * (AMD chose to set the HTT bit on their CMP processors,
1365 1370 * even though they're not actually hyperthreaded. Thus it
1366 1371 * takes a bit more work to figure out what's really going
1367 1372 * on ... see the handling of the CMP_LGCY bit below)
1368 1373 */
1369 1374 if (cp->cp_edx & CPUID_INTC_EDX_HTT) {
1370 1375 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
1371 1376 if (cpi->cpi_ncpu_per_chip > 1)
1372 1377 add_x86_feature(featureset, X86FSET_HTT);
1373 1378 } else {
1374 1379 cpi->cpi_ncpu_per_chip = 1;
1375 1380 }
1376 1381
1377 1382 /*
1378 1383 * Work on the "extended" feature information, doing
1379 1384 * some basic initialization for cpuid_pass2()
1380 1385 */
1381 1386 xcpuid = 0;
1382 1387 switch (cpi->cpi_vendor) {
1383 1388 case X86_VENDOR_Intel:
1384 1389 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf)
1385 1390 xcpuid++;
1386 1391 break;
1387 1392 case X86_VENDOR_AMD:
1388 1393 if (cpi->cpi_family > 5 ||
1389 1394 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
1390 1395 xcpuid++;
1391 1396 break;
1392 1397 case X86_VENDOR_Cyrix:
1393 1398 /*
1394 1399 * Only these Cyrix CPUs are -known- to support
1395 1400 * extended cpuid operations.
1396 1401 */
1397 1402 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
1398 1403 x86_type == X86_TYPE_CYRIX_GXm)
1399 1404 xcpuid++;
1400 1405 break;
1401 1406 case X86_VENDOR_Centaur:
1402 1407 case X86_VENDOR_TM:
1403 1408 default:
1404 1409 xcpuid++;
1405 1410 break;
1406 1411 }
1407 1412
1408 1413 if (xcpuid) {
1409 1414 cp = &cpi->cpi_extd[0];
1410 1415 cp->cp_eax = 0x80000000;
1411 1416 cpi->cpi_xmaxeax = __cpuid_insn(cp);
1412 1417 }
1413 1418
1414 1419 if (cpi->cpi_xmaxeax & 0x80000000) {
1415 1420
1416 1421 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
1417 1422 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
1418 1423
1419 1424 switch (cpi->cpi_vendor) {
1420 1425 case X86_VENDOR_Intel:
1421 1426 case X86_VENDOR_AMD:
1422 1427 if (cpi->cpi_xmaxeax < 0x80000001)
1423 1428 break;
1424 1429 cp = &cpi->cpi_extd[1];
1425 1430 cp->cp_eax = 0x80000001;
1426 1431 (void) __cpuid_insn(cp);
1427 1432
1428 1433 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1429 1434 cpi->cpi_family == 5 &&
1430 1435 cpi->cpi_model == 6 &&
1431 1436 cpi->cpi_step == 6) {
1432 1437 /*
1433 1438 * K6 model 6 uses bit 10 to indicate SYSC
1434 1439 * Later models use bit 11. Fix it here.
1435 1440 */
1436 1441 if (cp->cp_edx & 0x400) {
1437 1442 cp->cp_edx &= ~0x400;
1438 1443 cp->cp_edx |= CPUID_AMD_EDX_SYSC;
1439 1444 }
1440 1445 }
1441 1446
1442 1447 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
1443 1448
1444 1449 /*
1445 1450 * Compute the additions to the kernel's feature word.
1446 1451 */
1447 1452 if (cp->cp_edx & CPUID_AMD_EDX_NX) {
1448 1453 add_x86_feature(featureset, X86FSET_NX);
1449 1454 }
1450 1455
1451 1456 /*
1452 1457 * Regardless whether or not we boot 64-bit,
1453 1458 * we should have a way to identify whether
1454 1459 * the CPU is capable of running 64-bit.
1455 1460 */
1456 1461 if (cp->cp_edx & CPUID_AMD_EDX_LM) {
1457 1462 add_x86_feature(featureset, X86FSET_64);
1458 1463 }
1459 1464
1460 1465 #if defined(__amd64)
1461 1466 /* 1 GB large page - enable only for 64 bit kernel */
1462 1467 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
1463 1468 add_x86_feature(featureset, X86FSET_1GPG);
1464 1469 }
1465 1470 #endif
1466 1471
1467 1472 if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
1468 1473 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
1469 1474 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
1470 1475 add_x86_feature(featureset, X86FSET_SSE4A);
1471 1476 }
1472 1477
1473 1478 /*
1474 1479 * If both the HTT and CMP_LGCY bits are set,
1475 1480 * then we're not actually HyperThreaded. Read
1476 1481 * "AMD CPUID Specification" for more details.
1477 1482 */
1478 1483 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1479 1484 is_x86_feature(featureset, X86FSET_HTT) &&
1480 1485 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) {
1481 1486 remove_x86_feature(featureset, X86FSET_HTT);
1482 1487 add_x86_feature(featureset, X86FSET_CMP);
1483 1488 }
1484 1489 #if defined(__amd64)
1485 1490 /*
1486 1491 * It's really tricky to support syscall/sysret in
1487 1492 * the i386 kernel; we rely on sysenter/sysexit
1488 1493 * instead. In the amd64 kernel, things are -way-
1489 1494 * better.
1490 1495 */
1491 1496 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
1492 1497 add_x86_feature(featureset, X86FSET_ASYSC);
1493 1498 }
1494 1499
1495 1500 /*
1496 1501 * While we're thinking about system calls, note
1497 1502 * that AMD processors don't support sysenter
1498 1503 * in long mode at all, so don't try to program them.
1499 1504 */
1500 1505 if (x86_vendor == X86_VENDOR_AMD) {
1501 1506 remove_x86_feature(featureset, X86FSET_SEP);
1502 1507 }
1503 1508 #endif
1504 1509 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
1505 1510 add_x86_feature(featureset, X86FSET_TSCP);
1506 1511 }
1507 1512
1508 1513 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
1509 1514 add_x86_feature(featureset, X86FSET_SVM);
1510 1515 }
1511 1516
1512 1517 if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
1513 1518 add_x86_feature(featureset, X86FSET_TOPOEXT);
1514 1519 }
1515 1520 break;
1516 1521 default:
1517 1522 break;
1518 1523 }
1519 1524
1520 1525 /*
1521 1526 * Get CPUID data about processor cores and hyperthreads.
1522 1527 */
1523 1528 switch (cpi->cpi_vendor) {
1524 1529 case X86_VENDOR_Intel:
1525 1530 if (cpi->cpi_maxeax >= 4) {
1526 1531 cp = &cpi->cpi_std[4];
1527 1532 cp->cp_eax = 4;
1528 1533 cp->cp_ecx = 0;
1529 1534 (void) __cpuid_insn(cp);
1530 1535 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
1531 1536 }
1532 1537 /*FALLTHROUGH*/
1533 1538 case X86_VENDOR_AMD:
1534 1539 if (cpi->cpi_xmaxeax < 0x80000008)
1535 1540 break;
1536 1541 cp = &cpi->cpi_extd[8];
1537 1542 cp->cp_eax = 0x80000008;
1538 1543 (void) __cpuid_insn(cp);
1539 1544 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp);
1540 1545
1541 1546 /*
1542 1547 * Virtual and physical address limits from
1543 1548 * cpuid override previously guessed values.
1544 1549 */
1545 1550 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
1546 1551 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
1547 1552 break;
1548 1553 default:
1549 1554 break;
1550 1555 }
1551 1556
1552 1557 /*
1553 1558 * Derive the number of cores per chip
1554 1559 */
1555 1560 switch (cpi->cpi_vendor) {
1556 1561 case X86_VENDOR_Intel:
1557 1562 if (cpi->cpi_maxeax < 4) {
1558 1563 cpi->cpi_ncore_per_chip = 1;
1559 1564 break;
1560 1565 } else {
1561 1566 cpi->cpi_ncore_per_chip =
1562 1567 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1;
1563 1568 }
1564 1569 break;
1565 1570 case X86_VENDOR_AMD:
1566 1571 if (cpi->cpi_xmaxeax < 0x80000008) {
1567 1572 cpi->cpi_ncore_per_chip = 1;
1568 1573 break;
1569 1574 } else {
1570 1575 /*
1571 1576 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is
1572 1577 * 1 less than the number of physical cores on
1573 1578 * the chip. In family 0x10 this value can
1574 1579 * be affected by "downcoring" - it reflects
1575 1580 * 1 less than the number of cores actually
1576 1581 * enabled on this node.
1577 1582 */
1578 1583 cpi->cpi_ncore_per_chip =
1579 1584 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
1580 1585 }
1581 1586 break;
1582 1587 default:
1583 1588 cpi->cpi_ncore_per_chip = 1;
1584 1589 break;
1585 1590 }
1586 1591
1587 1592 /*
1588 1593 * Get CPUID data about TSC Invariance in Deep C-State.
1589 1594 */
1590 1595 switch (cpi->cpi_vendor) {
1591 1596 case X86_VENDOR_Intel:
1592 1597 if (cpi->cpi_maxeax >= 7) {
1593 1598 cp = &cpi->cpi_extd[7];
1594 1599 cp->cp_eax = 0x80000007;
1595 1600 cp->cp_ecx = 0;
1596 1601 (void) __cpuid_insn(cp);
1597 1602 }
1598 1603 break;
1599 1604 default:
1600 1605 break;
1601 1606 }
1602 1607 } else {
1603 1608 cpi->cpi_ncore_per_chip = 1;
1604 1609 }
1605 1610
1606 1611 /*
1607 1612 * If more than one core, then this processor is CMP.
1608 1613 */
1609 1614 if (cpi->cpi_ncore_per_chip > 1) {
1610 1615 add_x86_feature(featureset, X86FSET_CMP);
1611 1616 }
1612 1617
1613 1618 /*
1614 1619 * If the number of cores is the same as the number
1615 1620 * of CPUs, then we cannot have HyperThreading.
1616 1621 */
1617 1622 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) {
1618 1623 remove_x86_feature(featureset, X86FSET_HTT);
1619 1624 }
1620 1625
1621 1626 cpi->cpi_apicid = CPI_APIC_ID(cpi);
1622 1627 cpi->cpi_procnodes_per_pkg = 1;
1623 1628 cpi->cpi_cores_per_compunit = 1;
1624 1629 if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE &&
1625 1630 is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) {
1626 1631 /*
1627 1632 * Single-core single-threaded processors.
1628 1633 */
1629 1634 cpi->cpi_chipid = -1;
1630 1635 cpi->cpi_clogid = 0;
1631 1636 cpi->cpi_coreid = cpu->cpu_id;
1632 1637 cpi->cpi_pkgcoreid = 0;
1633 1638 if (cpi->cpi_vendor == X86_VENDOR_AMD)
1634 1639 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
1635 1640 else
1636 1641 cpi->cpi_procnodeid = cpi->cpi_chipid;
1637 1642 } else if (cpi->cpi_ncpu_per_chip > 1) {
1638 1643 if (cpi->cpi_vendor == X86_VENDOR_Intel)
1639 1644 cpuid_intel_getids(cpu, featureset);
1640 1645 else if (cpi->cpi_vendor == X86_VENDOR_AMD)
1641 1646 cpuid_amd_getids(cpu);
1642 1647 else {
1643 1648 /*
1644 1649 * All other processors are currently
1645 1650 * assumed to have single cores.
1646 1651 */
1647 1652 cpi->cpi_coreid = cpi->cpi_chipid;
1648 1653 cpi->cpi_pkgcoreid = 0;
1649 1654 cpi->cpi_procnodeid = cpi->cpi_chipid;
1650 1655 cpi->cpi_compunitid = cpi->cpi_chipid;
1651 1656 }
1652 1657 }
1653 1658
1654 1659 /*
1655 1660 * Synthesize chip "revision" and socket type
1656 1661 */
1657 1662 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
1658 1663 cpi->cpi_model, cpi->cpi_step);
1659 1664 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
1660 1665 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
1661 1666 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
1662 1667 cpi->cpi_model, cpi->cpi_step);
1663 1668
1664 1669 pass1_done:
1665 1670 cpi->cpi_pass = 1;
1666 1671 }
1667 1672
1668 1673 /*
1669 1674 * Make copies of the cpuid table entries we depend on, in
1670 1675 * part for ease of parsing now, in part so that we have only
1671 1676 * one place to correct any of it, in part for ease of
1672 1677 * later export to userland, and in part so we can look at
1673 1678 * this stuff in a crash dump.
1674 1679 */
1675 1680
1676 1681 /*ARGSUSED*/
1677 1682 void
1678 1683 cpuid_pass2(cpu_t *cpu)
1679 1684 {
1680 1685 uint_t n, nmax;
1681 1686 int i;
1682 1687 struct cpuid_regs *cp;
1683 1688 uint8_t *dp;
1684 1689 uint32_t *iptr;
1685 1690 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1686 1691
1687 1692 ASSERT(cpi->cpi_pass == 1);
1688 1693
1689 1694 if (cpi->cpi_maxeax < 1)
1690 1695 goto pass2_done;
1691 1696
1692 1697 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
1693 1698 nmax = NMAX_CPI_STD;
1694 1699 /*
1695 1700 * (We already handled n == 0 and n == 1 in pass 1)
1696 1701 */
1697 1702 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
1698 1703 cp->cp_eax = n;
1699 1704
1700 1705 /*
1701 1706 * CPUID function 4 expects %ecx to be initialized
1702 1707 * with an index which indicates which cache to return
1703 1708 * information about. The OS is expected to call function 4
1704 1709 * with %ecx set to 0, 1, 2, ... until it returns with
1705 1710 * EAX[4:0] set to 0, which indicates there are no more
1706 1711 * caches.
1707 1712 *
1708 1713 * Here, populate cpi_std[4] with the information returned by
1709 1714 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
1710 1715 * when dynamic memory allocation becomes available.
1711 1716 *
1712 1717 * Note: we need to explicitly initialize %ecx here, since
1713 1718 * function 4 may have been previously invoked.
1714 1719 */
1715 1720 if (n == 4)
1716 1721 cp->cp_ecx = 0;
1717 1722
1718 1723 (void) __cpuid_insn(cp);
1719 1724 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
1720 1725 switch (n) {
1721 1726 case 2:
1722 1727 /*
1723 1728 * "the lower 8 bits of the %eax register
1724 1729 * contain a value that identifies the number
1725 1730 * of times the cpuid [instruction] has to be
1726 1731 * executed to obtain a complete image of the
1727 1732 * processor's caching systems."
1728 1733 *
1729 1734 * How *do* they make this stuff up?
1730 1735 */
1731 1736 cpi->cpi_ncache = sizeof (*cp) *
1732 1737 BITX(cp->cp_eax, 7, 0);
1733 1738 if (cpi->cpi_ncache == 0)
1734 1739 break;
1735 1740 cpi->cpi_ncache--; /* skip count byte */
1736 1741
1737 1742 /*
1738 1743 * Well, for now, rather than attempt to implement
1739 1744 * this slightly dubious algorithm, we just look
1740 1745 * at the first 15 ..
1741 1746 */
1742 1747 if (cpi->cpi_ncache > (sizeof (*cp) - 1))
1743 1748 cpi->cpi_ncache = sizeof (*cp) - 1;
1744 1749
1745 1750 dp = cpi->cpi_cacheinfo;
1746 1751 if (BITX(cp->cp_eax, 31, 31) == 0) {
1747 1752 uint8_t *p = (void *)&cp->cp_eax;
1748 1753 for (i = 1; i < 4; i++)
1749 1754 if (p[i] != 0)
1750 1755 *dp++ = p[i];
1751 1756 }
1752 1757 if (BITX(cp->cp_ebx, 31, 31) == 0) {
1753 1758 uint8_t *p = (void *)&cp->cp_ebx;
1754 1759 for (i = 0; i < 4; i++)
1755 1760 if (p[i] != 0)
1756 1761 *dp++ = p[i];
1757 1762 }
1758 1763 if (BITX(cp->cp_ecx, 31, 31) == 0) {
1759 1764 uint8_t *p = (void *)&cp->cp_ecx;
1760 1765 for (i = 0; i < 4; i++)
1761 1766 if (p[i] != 0)
1762 1767 *dp++ = p[i];
1763 1768 }
1764 1769 if (BITX(cp->cp_edx, 31, 31) == 0) {
1765 1770 uint8_t *p = (void *)&cp->cp_edx;
1766 1771 for (i = 0; i < 4; i++)
1767 1772 if (p[i] != 0)
1768 1773 *dp++ = p[i];
1769 1774 }
1770 1775 break;
1771 1776
1772 1777 case 3: /* Processor serial number, if PSN supported */
1773 1778 break;
1774 1779
1775 1780 case 4: /* Deterministic cache parameters */
1776 1781 break;
1777 1782
1778 1783 case 5: /* Monitor/Mwait parameters */
1779 1784 {
1780 1785 size_t mwait_size;
1781 1786
1782 1787 /*
1783 1788 * check cpi_mwait.support which was set in cpuid_pass1
1784 1789 */
1785 1790 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
1786 1791 break;
1787 1792
1788 1793 /*
1789 1794 * Protect ourself from insane mwait line size.
1790 1795 * Workaround for incomplete hardware emulator(s).
1791 1796 */
1792 1797 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
1793 1798 if (mwait_size < sizeof (uint32_t) ||
1794 1799 !ISP2(mwait_size)) {
1795 1800 #if DEBUG
1796 1801 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
1797 1802 "size %ld", cpu->cpu_id, (long)mwait_size);
1798 1803 #endif
1799 1804 break;
1800 1805 }
1801 1806
1802 1807 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
1803 1808 cpi->cpi_mwait.mon_max = mwait_size;
1804 1809 if (MWAIT_EXTENSION(cpi)) {
1805 1810 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
1806 1811 if (MWAIT_INT_ENABLE(cpi))
1807 1812 cpi->cpi_mwait.support |=
1808 1813 MWAIT_ECX_INT_ENABLE;
1809 1814 }
1810 1815 break;
1811 1816 }
1812 1817 default:
1813 1818 break;
1814 1819 }
1815 1820 }
1816 1821
1817 1822 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) {
1818 1823 struct cpuid_regs regs;
1819 1824
1820 1825 cp = ®s;
1821 1826 cp->cp_eax = 0xB;
1822 1827 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1823 1828
1824 1829 (void) __cpuid_insn(cp);
1825 1830
1826 1831 /*
1827 1832 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1828 1833 * indicates that the extended topology enumeration leaf is
1829 1834 * available.
1830 1835 */
1831 1836 if (cp->cp_ebx) {
1832 1837 uint32_t x2apic_id;
1833 1838 uint_t coreid_shift = 0;
1834 1839 uint_t ncpu_per_core = 1;
1835 1840 uint_t chipid_shift = 0;
1836 1841 uint_t ncpu_per_chip = 1;
1837 1842 uint_t i;
1838 1843 uint_t level;
1839 1844
1840 1845 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1841 1846 cp->cp_eax = 0xB;
1842 1847 cp->cp_ecx = i;
1843 1848
1844 1849 (void) __cpuid_insn(cp);
1845 1850 level = CPI_CPU_LEVEL_TYPE(cp);
1846 1851
1847 1852 if (level == 1) {
1848 1853 x2apic_id = cp->cp_edx;
1849 1854 coreid_shift = BITX(cp->cp_eax, 4, 0);
1850 1855 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1851 1856 } else if (level == 2) {
1852 1857 x2apic_id = cp->cp_edx;
1853 1858 chipid_shift = BITX(cp->cp_eax, 4, 0);
1854 1859 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1855 1860 }
1856 1861 }
1857 1862
1858 1863 cpi->cpi_apicid = x2apic_id;
1859 1864 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1860 1865 cpi->cpi_ncore_per_chip = ncpu_per_chip /
1861 1866 ncpu_per_core;
1862 1867 cpi->cpi_chipid = x2apic_id >> chipid_shift;
1863 1868 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1864 1869 cpi->cpi_coreid = x2apic_id >> coreid_shift;
1865 1870 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1866 1871 }
1867 1872
1868 1873 /* Make cp NULL so that we don't stumble on others */
1869 1874 cp = NULL;
1870 1875 }
1871 1876
1872 1877 /*
1873 1878 * XSAVE enumeration
1874 1879 */
1875 1880 if (cpi->cpi_maxeax >= 0xD) {
1876 1881 struct cpuid_regs regs;
1877 1882 boolean_t cpuid_d_valid = B_TRUE;
1878 1883
1879 1884 cp = ®s;
1880 1885 cp->cp_eax = 0xD;
1881 1886 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1882 1887
1883 1888 (void) __cpuid_insn(cp);
1884 1889
1885 1890 /*
1886 1891 * Sanity checks for debug
1887 1892 */
1888 1893 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
1889 1894 (cp->cp_eax & XFEATURE_SSE) == 0) {
1890 1895 cpuid_d_valid = B_FALSE;
1891 1896 }
1892 1897
1893 1898 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
1894 1899 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
1895 1900 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
1896 1901
1897 1902 /*
1898 1903 * If the hw supports AVX, get the size and offset in the save
1899 1904 * area for the ymm state.
1900 1905 */
1901 1906 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
1902 1907 cp->cp_eax = 0xD;
1903 1908 cp->cp_ecx = 2;
1904 1909 cp->cp_edx = cp->cp_ebx = 0;
1905 1910
1906 1911 (void) __cpuid_insn(cp);
1907 1912
1908 1913 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
1909 1914 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
1910 1915 cpuid_d_valid = B_FALSE;
1911 1916 }
1912 1917
1913 1918 cpi->cpi_xsave.ymm_size = cp->cp_eax;
1914 1919 cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
1915 1920 }
1916 1921
1917 1922 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
1918 1923 xsave_state_size = 0;
1919 1924 } else if (cpuid_d_valid) {
1920 1925 xsave_state_size = cpi->cpi_xsave.xsav_max_size;
1921 1926 } else {
1922 1927 /* Broken CPUID 0xD, probably in HVM */
1923 1928 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
1924 1929 "value: hw_low = %d, hw_high = %d, xsave_size = %d"
1925 1930 ", ymm_size = %d, ymm_offset = %d\n",
1926 1931 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
1927 1932 cpi->cpi_xsave.xsav_hw_features_high,
1928 1933 (int)cpi->cpi_xsave.xsav_max_size,
1929 1934 (int)cpi->cpi_xsave.ymm_size,
1930 1935 (int)cpi->cpi_xsave.ymm_offset);
1931 1936
1932 1937 if (xsave_state_size != 0) {
1933 1938 /*
1934 1939 * This must be a non-boot CPU. We cannot
1935 1940 * continue, because boot cpu has already
1936 1941 * enabled XSAVE.
1937 1942 */
1938 1943 ASSERT(cpu->cpu_id != 0);
1939 1944 cmn_err(CE_PANIC, "cpu%d: we have already "
1940 1945 "enabled XSAVE on boot cpu, cannot "
1941 1946 "continue.", cpu->cpu_id);
1942 1947 } else {
1943 1948 /*
1944 1949 * Must be from boot CPU, OK to disable XSAVE.
1945 1950 */
1946 1951 ASSERT(cpu->cpu_id == 0);
1947 1952 remove_x86_feature(x86_featureset,
1948 1953 X86FSET_XSAVE);
1949 1954 remove_x86_feature(x86_featureset, X86FSET_AVX);
1950 1955 CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_XSAVE;
1951 1956 CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_AVX;
1952 1957 CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_F16C;
1953 1958 xsave_force_disable = B_TRUE;
1954 1959 }
1955 1960 }
1956 1961 }
1957 1962
1958 1963
1959 1964 if ((cpi->cpi_xmaxeax & 0x80000000) == 0)
1960 1965 goto pass2_done;
1961 1966
1962 1967 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD)
1963 1968 nmax = NMAX_CPI_EXTD;
1964 1969 /*
1965 1970 * Copy the extended properties, fixing them as we go.
1966 1971 * (We already handled n == 0 and n == 1 in pass 1)
1967 1972 */
1968 1973 iptr = (void *)cpi->cpi_brandstr;
1969 1974 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
1970 1975 cp->cp_eax = 0x80000000 + n;
1971 1976 (void) __cpuid_insn(cp);
1972 1977 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp);
1973 1978 switch (n) {
1974 1979 case 2:
1975 1980 case 3:
1976 1981 case 4:
1977 1982 /*
1978 1983 * Extract the brand string
1979 1984 */
1980 1985 *iptr++ = cp->cp_eax;
1981 1986 *iptr++ = cp->cp_ebx;
1982 1987 *iptr++ = cp->cp_ecx;
1983 1988 *iptr++ = cp->cp_edx;
1984 1989 break;
1985 1990 case 5:
1986 1991 switch (cpi->cpi_vendor) {
1987 1992 case X86_VENDOR_AMD:
1988 1993 /*
1989 1994 * The Athlon and Duron were the first
1990 1995 * parts to report the sizes of the
1991 1996 * TLB for large pages. Before then,
1992 1997 * we don't trust the data.
1993 1998 */
1994 1999 if (cpi->cpi_family < 6 ||
1995 2000 (cpi->cpi_family == 6 &&
1996 2001 cpi->cpi_model < 1))
1997 2002 cp->cp_eax = 0;
1998 2003 break;
1999 2004 default:
2000 2005 break;
2001 2006 }
2002 2007 break;
2003 2008 case 6:
2004 2009 switch (cpi->cpi_vendor) {
2005 2010 case X86_VENDOR_AMD:
2006 2011 /*
2007 2012 * The Athlon and Duron were the first
2008 2013 * AMD parts with L2 TLB's.
2009 2014 * Before then, don't trust the data.
2010 2015 */
2011 2016 if (cpi->cpi_family < 6 ||
2012 2017 cpi->cpi_family == 6 &&
2013 2018 cpi->cpi_model < 1)
2014 2019 cp->cp_eax = cp->cp_ebx = 0;
2015 2020 /*
2016 2021 * AMD Duron rev A0 reports L2
2017 2022 * cache size incorrectly as 1K
2018 2023 * when it is really 64K
2019 2024 */
2020 2025 if (cpi->cpi_family == 6 &&
2021 2026 cpi->cpi_model == 3 &&
2022 2027 cpi->cpi_step == 0) {
2023 2028 cp->cp_ecx &= 0xffff;
2024 2029 cp->cp_ecx |= 0x400000;
2025 2030 }
2026 2031 break;
2027 2032 case X86_VENDOR_Cyrix: /* VIA C3 */
2028 2033 /*
2029 2034 * VIA C3 processors are a bit messed
2030 2035 * up w.r.t. encoding cache sizes in %ecx
2031 2036 */
2032 2037 if (cpi->cpi_family != 6)
2033 2038 break;
2034 2039 /*
2035 2040 * model 7 and 8 were incorrectly encoded
2036 2041 *
2037 2042 * xxx is model 8 really broken?
2038 2043 */
2039 2044 if (cpi->cpi_model == 7 ||
2040 2045 cpi->cpi_model == 8)
2041 2046 cp->cp_ecx =
2042 2047 BITX(cp->cp_ecx, 31, 24) << 16 |
2043 2048 BITX(cp->cp_ecx, 23, 16) << 12 |
2044 2049 BITX(cp->cp_ecx, 15, 8) << 8 |
2045 2050 BITX(cp->cp_ecx, 7, 0);
2046 2051 /*
2047 2052 * model 9 stepping 1 has wrong associativity
2048 2053 */
2049 2054 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
2050 2055 cp->cp_ecx |= 8 << 12;
2051 2056 break;
2052 2057 case X86_VENDOR_Intel:
2053 2058 /*
2054 2059 * Extended L2 Cache features function.
2055 2060 * First appeared on Prescott.
2056 2061 */
2057 2062 default:
2058 2063 break;
2059 2064 }
2060 2065 break;
2061 2066 default:
2062 2067 break;
2063 2068 }
2064 2069 }
2065 2070
2066 2071 pass2_done:
2067 2072 cpi->cpi_pass = 2;
2068 2073 }
2069 2074
2070 2075 static const char *
2071 2076 intel_cpubrand(const struct cpuid_info *cpi)
2072 2077 {
2073 2078 int i;
2074 2079
2075 2080 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2076 2081 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2077 2082 return ("i486");
2078 2083
2079 2084 switch (cpi->cpi_family) {
2080 2085 case 5:
2081 2086 return ("Intel Pentium(r)");
2082 2087 case 6:
2083 2088 switch (cpi->cpi_model) {
2084 2089 uint_t celeron, xeon;
2085 2090 const struct cpuid_regs *cp;
2086 2091 case 0:
2087 2092 case 1:
2088 2093 case 2:
2089 2094 return ("Intel Pentium(r) Pro");
2090 2095 case 3:
2091 2096 case 4:
2092 2097 return ("Intel Pentium(r) II");
2093 2098 case 6:
2094 2099 return ("Intel Celeron(r)");
2095 2100 case 5:
2096 2101 case 7:
2097 2102 celeron = xeon = 0;
2098 2103 cp = &cpi->cpi_std[2]; /* cache info */
2099 2104
2100 2105 for (i = 1; i < 4; i++) {
2101 2106 uint_t tmp;
2102 2107
2103 2108 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
2104 2109 if (tmp == 0x40)
2105 2110 celeron++;
2106 2111 if (tmp >= 0x44 && tmp <= 0x45)
2107 2112 xeon++;
2108 2113 }
2109 2114
2110 2115 for (i = 0; i < 2; i++) {
2111 2116 uint_t tmp;
2112 2117
2113 2118 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
2114 2119 if (tmp == 0x40)
2115 2120 celeron++;
2116 2121 else if (tmp >= 0x44 && tmp <= 0x45)
2117 2122 xeon++;
2118 2123 }
2119 2124
2120 2125 for (i = 0; i < 4; i++) {
2121 2126 uint_t tmp;
2122 2127
2123 2128 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
2124 2129 if (tmp == 0x40)
2125 2130 celeron++;
2126 2131 else if (tmp >= 0x44 && tmp <= 0x45)
2127 2132 xeon++;
2128 2133 }
2129 2134
2130 2135 for (i = 0; i < 4; i++) {
2131 2136 uint_t tmp;
2132 2137
2133 2138 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
2134 2139 if (tmp == 0x40)
2135 2140 celeron++;
2136 2141 else if (tmp >= 0x44 && tmp <= 0x45)
2137 2142 xeon++;
2138 2143 }
2139 2144
2140 2145 if (celeron)
2141 2146 return ("Intel Celeron(r)");
2142 2147 if (xeon)
2143 2148 return (cpi->cpi_model == 5 ?
2144 2149 "Intel Pentium(r) II Xeon(tm)" :
2145 2150 "Intel Pentium(r) III Xeon(tm)");
2146 2151 return (cpi->cpi_model == 5 ?
2147 2152 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
2148 2153 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
2149 2154 default:
2150 2155 break;
2151 2156 }
2152 2157 default:
2153 2158 break;
2154 2159 }
2155 2160
2156 2161 /* BrandID is present if the field is nonzero */
2157 2162 if (cpi->cpi_brandid != 0) {
2158 2163 static const struct {
2159 2164 uint_t bt_bid;
2160 2165 const char *bt_str;
2161 2166 } brand_tbl[] = {
2162 2167 { 0x1, "Intel(r) Celeron(r)" },
2163 2168 { 0x2, "Intel(r) Pentium(r) III" },
2164 2169 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" },
2165 2170 { 0x4, "Intel(r) Pentium(r) III" },
2166 2171 { 0x6, "Mobile Intel(r) Pentium(r) III" },
2167 2172 { 0x7, "Mobile Intel(r) Celeron(r)" },
2168 2173 { 0x8, "Intel(r) Pentium(r) 4" },
2169 2174 { 0x9, "Intel(r) Pentium(r) 4" },
2170 2175 { 0xa, "Intel(r) Celeron(r)" },
2171 2176 { 0xb, "Intel(r) Xeon(tm)" },
2172 2177 { 0xc, "Intel(r) Xeon(tm) MP" },
2173 2178 { 0xe, "Mobile Intel(r) Pentium(r) 4" },
2174 2179 { 0xf, "Mobile Intel(r) Celeron(r)" },
2175 2180 { 0x11, "Mobile Genuine Intel(r)" },
2176 2181 { 0x12, "Intel(r) Celeron(r) M" },
2177 2182 { 0x13, "Mobile Intel(r) Celeron(r)" },
2178 2183 { 0x14, "Intel(r) Celeron(r)" },
2179 2184 { 0x15, "Mobile Genuine Intel(r)" },
2180 2185 { 0x16, "Intel(r) Pentium(r) M" },
2181 2186 { 0x17, "Mobile Intel(r) Celeron(r)" }
2182 2187 };
2183 2188 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
2184 2189 uint_t sgn;
2185 2190
2186 2191 sgn = (cpi->cpi_family << 8) |
2187 2192 (cpi->cpi_model << 4) | cpi->cpi_step;
2188 2193
2189 2194 for (i = 0; i < btblmax; i++)
2190 2195 if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
2191 2196 break;
2192 2197 if (i < btblmax) {
2193 2198 if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
2194 2199 return ("Intel(r) Celeron(r)");
2195 2200 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
2196 2201 return ("Intel(r) Xeon(tm) MP");
2197 2202 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
2198 2203 return ("Intel(r) Xeon(tm)");
2199 2204 return (brand_tbl[i].bt_str);
2200 2205 }
2201 2206 }
2202 2207
2203 2208 return (NULL);
2204 2209 }
2205 2210
2206 2211 static const char *
2207 2212 amd_cpubrand(const struct cpuid_info *cpi)
2208 2213 {
2209 2214 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2210 2215 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2211 2216 return ("i486 compatible");
2212 2217
2213 2218 switch (cpi->cpi_family) {
2214 2219 case 5:
2215 2220 switch (cpi->cpi_model) {
2216 2221 case 0:
2217 2222 case 1:
2218 2223 case 2:
2219 2224 case 3:
2220 2225 case 4:
2221 2226 case 5:
2222 2227 return ("AMD-K5(r)");
2223 2228 case 6:
2224 2229 case 7:
2225 2230 return ("AMD-K6(r)");
2226 2231 case 8:
2227 2232 return ("AMD-K6(r)-2");
2228 2233 case 9:
2229 2234 return ("AMD-K6(r)-III");
2230 2235 default:
2231 2236 return ("AMD (family 5)");
2232 2237 }
2233 2238 case 6:
2234 2239 switch (cpi->cpi_model) {
2235 2240 case 1:
2236 2241 return ("AMD-K7(tm)");
2237 2242 case 0:
2238 2243 case 2:
2239 2244 case 4:
2240 2245 return ("AMD Athlon(tm)");
2241 2246 case 3:
2242 2247 case 7:
2243 2248 return ("AMD Duron(tm)");
2244 2249 case 6:
2245 2250 case 8:
2246 2251 case 10:
2247 2252 /*
2248 2253 * Use the L2 cache size to distinguish
2249 2254 */
2250 2255 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
2251 2256 "AMD Athlon(tm)" : "AMD Duron(tm)");
2252 2257 default:
2253 2258 return ("AMD (family 6)");
2254 2259 }
2255 2260 default:
2256 2261 break;
2257 2262 }
2258 2263
2259 2264 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
2260 2265 cpi->cpi_brandid != 0) {
2261 2266 switch (BITX(cpi->cpi_brandid, 7, 5)) {
2262 2267 case 3:
2263 2268 return ("AMD Opteron(tm) UP 1xx");
2264 2269 case 4:
2265 2270 return ("AMD Opteron(tm) DP 2xx");
2266 2271 case 5:
2267 2272 return ("AMD Opteron(tm) MP 8xx");
2268 2273 default:
2269 2274 return ("AMD Opteron(tm)");
2270 2275 }
2271 2276 }
2272 2277
2273 2278 return (NULL);
2274 2279 }
2275 2280
2276 2281 static const char *
2277 2282 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
2278 2283 {
2279 2284 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2280 2285 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
2281 2286 type == X86_TYPE_CYRIX_486)
2282 2287 return ("i486 compatible");
2283 2288
2284 2289 switch (type) {
2285 2290 case X86_TYPE_CYRIX_6x86:
2286 2291 return ("Cyrix 6x86");
2287 2292 case X86_TYPE_CYRIX_6x86L:
2288 2293 return ("Cyrix 6x86L");
2289 2294 case X86_TYPE_CYRIX_6x86MX:
2290 2295 return ("Cyrix 6x86MX");
2291 2296 case X86_TYPE_CYRIX_GXm:
2292 2297 return ("Cyrix GXm");
2293 2298 case X86_TYPE_CYRIX_MediaGX:
2294 2299 return ("Cyrix MediaGX");
2295 2300 case X86_TYPE_CYRIX_MII:
2296 2301 return ("Cyrix M2");
2297 2302 case X86_TYPE_VIA_CYRIX_III:
2298 2303 return ("VIA Cyrix M3");
2299 2304 default:
2300 2305 /*
2301 2306 * Have another wild guess ..
2302 2307 */
2303 2308 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
2304 2309 return ("Cyrix 5x86");
2305 2310 else if (cpi->cpi_family == 5) {
2306 2311 switch (cpi->cpi_model) {
2307 2312 case 2:
2308 2313 return ("Cyrix 6x86"); /* Cyrix M1 */
2309 2314 case 4:
2310 2315 return ("Cyrix MediaGX");
2311 2316 default:
2312 2317 break;
2313 2318 }
2314 2319 } else if (cpi->cpi_family == 6) {
2315 2320 switch (cpi->cpi_model) {
2316 2321 case 0:
2317 2322 return ("Cyrix 6x86MX"); /* Cyrix M2? */
2318 2323 case 5:
2319 2324 case 6:
2320 2325 case 7:
2321 2326 case 8:
2322 2327 case 9:
2323 2328 return ("VIA C3");
2324 2329 default:
2325 2330 break;
2326 2331 }
2327 2332 }
2328 2333 break;
2329 2334 }
2330 2335 return (NULL);
2331 2336 }
2332 2337
2333 2338 /*
2334 2339 * This only gets called in the case that the CPU extended
2335 2340 * feature brand string (0x80000002, 0x80000003, 0x80000004)
2336 2341 * aren't available, or contain null bytes for some reason.
2337 2342 */
2338 2343 static void
2339 2344 fabricate_brandstr(struct cpuid_info *cpi)
2340 2345 {
2341 2346 const char *brand = NULL;
2342 2347
2343 2348 switch (cpi->cpi_vendor) {
2344 2349 case X86_VENDOR_Intel:
2345 2350 brand = intel_cpubrand(cpi);
2346 2351 break;
2347 2352 case X86_VENDOR_AMD:
2348 2353 brand = amd_cpubrand(cpi);
2349 2354 break;
2350 2355 case X86_VENDOR_Cyrix:
2351 2356 brand = cyrix_cpubrand(cpi, x86_type);
2352 2357 break;
2353 2358 case X86_VENDOR_NexGen:
2354 2359 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2355 2360 brand = "NexGen Nx586";
2356 2361 break;
2357 2362 case X86_VENDOR_Centaur:
2358 2363 if (cpi->cpi_family == 5)
2359 2364 switch (cpi->cpi_model) {
2360 2365 case 4:
2361 2366 brand = "Centaur C6";
2362 2367 break;
2363 2368 case 8:
2364 2369 brand = "Centaur C2";
2365 2370 break;
2366 2371 case 9:
2367 2372 brand = "Centaur C3";
2368 2373 break;
2369 2374 default:
2370 2375 break;
2371 2376 }
2372 2377 break;
2373 2378 case X86_VENDOR_Rise:
2374 2379 if (cpi->cpi_family == 5 &&
2375 2380 (cpi->cpi_model == 0 || cpi->cpi_model == 2))
2376 2381 brand = "Rise mP6";
2377 2382 break;
2378 2383 case X86_VENDOR_SiS:
2379 2384 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2380 2385 brand = "SiS 55x";
2381 2386 break;
2382 2387 case X86_VENDOR_TM:
2383 2388 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
2384 2389 brand = "Transmeta Crusoe TM3x00 or TM5x00";
2385 2390 break;
2386 2391 case X86_VENDOR_NSC:
2387 2392 case X86_VENDOR_UMC:
2388 2393 default:
2389 2394 break;
2390 2395 }
2391 2396 if (brand) {
2392 2397 (void) strcpy((char *)cpi->cpi_brandstr, brand);
2393 2398 return;
2394 2399 }
2395 2400
2396 2401 /*
2397 2402 * If all else fails ...
2398 2403 */
2399 2404 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
2400 2405 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
2401 2406 cpi->cpi_model, cpi->cpi_step);
2402 2407 }
2403 2408
2404 2409 /*
2405 2410 * This routine is called just after kernel memory allocation
2406 2411 * becomes available on cpu0, and as part of mp_startup() on
2407 2412 * the other cpus.
2408 2413 *
2409 2414 * Fixup the brand string, and collect any information from cpuid
2410 2415 * that requires dynamically allocated storage to represent.
2411 2416 */
2412 2417 /*ARGSUSED*/
2413 2418 void
2414 2419 cpuid_pass3(cpu_t *cpu)
2415 2420 {
2416 2421 int i, max, shft, level, size;
2417 2422 struct cpuid_regs regs;
2418 2423 struct cpuid_regs *cp;
2419 2424 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2420 2425
2421 2426 ASSERT(cpi->cpi_pass == 2);
2422 2427
2423 2428 /*
2424 2429 * Function 4: Deterministic cache parameters
2425 2430 *
2426 2431 * Take this opportunity to detect the number of threads
2427 2432 * sharing the last level cache, and construct a corresponding
2428 2433 * cache id. The respective cpuid_info members are initialized
2429 2434 * to the default case of "no last level cache sharing".
2430 2435 */
2431 2436 cpi->cpi_ncpu_shr_last_cache = 1;
2432 2437 cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
2433 2438
2434 2439 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) {
2435 2440
2436 2441 /*
2437 2442 * Find the # of elements (size) returned by fn 4, and along
2438 2443 * the way detect last level cache sharing details.
2439 2444 */
2440 2445 bzero(®s, sizeof (regs));
2441 2446 cp = ®s;
2442 2447 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
2443 2448 cp->cp_eax = 4;
2444 2449 cp->cp_ecx = i;
2445 2450
2446 2451 (void) __cpuid_insn(cp);
2447 2452
2448 2453 if (CPI_CACHE_TYPE(cp) == 0)
2449 2454 break;
2450 2455 level = CPI_CACHE_LVL(cp);
2451 2456 if (level > max) {
2452 2457 max = level;
2453 2458 cpi->cpi_ncpu_shr_last_cache =
2454 2459 CPI_NTHR_SHR_CACHE(cp) + 1;
2455 2460 }
2456 2461 }
2457 2462 cpi->cpi_std_4_size = size = i;
2458 2463
2459 2464 /*
2460 2465 * Allocate the cpi_std_4 array. The first element
2461 2466 * references the regs for fn 4, %ecx == 0, which
2462 2467 * cpuid_pass2() stashed in cpi->cpi_std[4].
2463 2468 */
2464 2469 if (size > 0) {
2465 2470 cpi->cpi_std_4 =
2466 2471 kmem_alloc(size * sizeof (cp), KM_SLEEP);
2467 2472 cpi->cpi_std_4[0] = &cpi->cpi_std[4];
2468 2473
2469 2474 /*
2470 2475 * Allocate storage to hold the additional regs
2471 2476 * for function 4, %ecx == 1 .. cpi_std_4_size.
2472 2477 *
2473 2478 * The regs for fn 4, %ecx == 0 has already
2474 2479 * been allocated as indicated above.
2475 2480 */
2476 2481 for (i = 1; i < size; i++) {
2477 2482 cp = cpi->cpi_std_4[i] =
2478 2483 kmem_zalloc(sizeof (regs), KM_SLEEP);
2479 2484 cp->cp_eax = 4;
2480 2485 cp->cp_ecx = i;
2481 2486
2482 2487 (void) __cpuid_insn(cp);
2483 2488 }
2484 2489 }
2485 2490 /*
2486 2491 * Determine the number of bits needed to represent
2487 2492 * the number of CPUs sharing the last level cache.
2488 2493 *
2489 2494 * Shift off that number of bits from the APIC id to
2490 2495 * derive the cache id.
2491 2496 */
2492 2497 shft = 0;
2493 2498 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
2494 2499 shft++;
2495 2500 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
2496 2501 }
2497 2502
2498 2503 /*
2499 2504 * Now fixup the brand string
2500 2505 */
2501 2506 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) {
2502 2507 fabricate_brandstr(cpi);
2503 2508 } else {
2504 2509
2505 2510 /*
2506 2511 * If we successfully extracted a brand string from the cpuid
2507 2512 * instruction, clean it up by removing leading spaces and
2508 2513 * similar junk.
2509 2514 */
2510 2515 if (cpi->cpi_brandstr[0]) {
2511 2516 size_t maxlen = sizeof (cpi->cpi_brandstr);
2512 2517 char *src, *dst;
2513 2518
2514 2519 dst = src = (char *)cpi->cpi_brandstr;
2515 2520 src[maxlen - 1] = '\0';
2516 2521 /*
2517 2522 * strip leading spaces
2518 2523 */
2519 2524 while (*src == ' ')
2520 2525 src++;
2521 2526 /*
2522 2527 * Remove any 'Genuine' or "Authentic" prefixes
2523 2528 */
2524 2529 if (strncmp(src, "Genuine ", 8) == 0)
2525 2530 src += 8;
2526 2531 if (strncmp(src, "Authentic ", 10) == 0)
2527 2532 src += 10;
2528 2533
2529 2534 /*
2530 2535 * Now do an in-place copy.
2531 2536 * Map (R) to (r) and (TM) to (tm).
2532 2537 * The era of teletypes is long gone, and there's
2533 2538 * -really- no need to shout.
2534 2539 */
2535 2540 while (*src != '\0') {
2536 2541 if (src[0] == '(') {
2537 2542 if (strncmp(src + 1, "R)", 2) == 0) {
2538 2543 (void) strncpy(dst, "(r)", 3);
2539 2544 src += 3;
2540 2545 dst += 3;
2541 2546 continue;
2542 2547 }
2543 2548 if (strncmp(src + 1, "TM)", 3) == 0) {
2544 2549 (void) strncpy(dst, "(tm)", 4);
2545 2550 src += 4;
2546 2551 dst += 4;
2547 2552 continue;
2548 2553 }
2549 2554 }
2550 2555 *dst++ = *src++;
2551 2556 }
2552 2557 *dst = '\0';
2553 2558
2554 2559 /*
2555 2560 * Finally, remove any trailing spaces
2556 2561 */
2557 2562 while (--dst > cpi->cpi_brandstr)
2558 2563 if (*dst == ' ')
2559 2564 *dst = '\0';
2560 2565 else
2561 2566 break;
2562 2567 } else
2563 2568 fabricate_brandstr(cpi);
2564 2569 }
2565 2570 cpi->cpi_pass = 3;
2566 2571 }
2567 2572
2568 2573 /*
2569 2574 * This routine is called out of bind_hwcap() much later in the life
2570 2575 * of the kernel (post_startup()). The job of this routine is to resolve
2571 2576 * the hardware feature support and kernel support for those features into
2572 2577 * what we're actually going to tell applications via the aux vector.
2573 2578 */
2574 2579 void
2575 2580 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
2576 2581 {
2577 2582 struct cpuid_info *cpi;
2578 2583 uint_t hwcap_flags = 0, hwcap_flags_2 = 0;
2579 2584
2580 2585 if (cpu == NULL)
2581 2586 cpu = CPU;
2582 2587 cpi = cpu->cpu_m.mcpu_cpi;
2583 2588
2584 2589 ASSERT(cpi->cpi_pass == 3);
2585 2590
2586 2591 if (cpi->cpi_maxeax >= 1) {
2587 2592 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
2588 2593 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
2589 2594
2590 2595 *edx = CPI_FEATURES_EDX(cpi);
2591 2596 *ecx = CPI_FEATURES_ECX(cpi);
2592 2597
2593 2598 /*
2594 2599 * [these require explicit kernel support]
2595 2600 */
2596 2601 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
2597 2602 *edx &= ~CPUID_INTC_EDX_SEP;
2598 2603
2599 2604 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
2600 2605 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
2601 2606 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
2602 2607 *edx &= ~CPUID_INTC_EDX_SSE2;
2603 2608
2604 2609 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
2605 2610 *edx &= ~CPUID_INTC_EDX_HTT;
2606 2611
2607 2612 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
2608 2613 *ecx &= ~CPUID_INTC_ECX_SSE3;
2609 2614
2610 2615 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
2611 2616 *ecx &= ~CPUID_INTC_ECX_SSSE3;
2612 2617 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
2613 2618 *ecx &= ~CPUID_INTC_ECX_SSE4_1;
2614 2619 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
2615 2620 *ecx &= ~CPUID_INTC_ECX_SSE4_2;
2616 2621 if (!is_x86_feature(x86_featureset, X86FSET_AES))
2617 2622 *ecx &= ~CPUID_INTC_ECX_AES;
2618 2623 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
2619 2624 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
2620 2625 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
2621 2626 *ecx &= ~(CPUID_INTC_ECX_XSAVE |
2622 2627 CPUID_INTC_ECX_OSXSAVE);
2623 2628 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
2624 2629 *ecx &= ~CPUID_INTC_ECX_AVX;
2625 2630 if (!is_x86_feature(x86_featureset, X86FSET_F16C))
2626 2631 *ecx &= ~CPUID_INTC_ECX_F16C;
2627 2632
2628 2633 /*
2629 2634 * [no explicit support required beyond x87 fp context]
2630 2635 */
2631 2636 if (!fpu_exists)
2632 2637 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
2633 2638
2634 2639 /*
2635 2640 * Now map the supported feature vector to things that we
2636 2641 * think userland will care about.
2637 2642 */
2638 2643 if (*edx & CPUID_INTC_EDX_SEP)
2639 2644 hwcap_flags |= AV_386_SEP;
2640 2645 if (*edx & CPUID_INTC_EDX_SSE)
2641 2646 hwcap_flags |= AV_386_FXSR | AV_386_SSE;
2642 2647 if (*edx & CPUID_INTC_EDX_SSE2)
2643 2648 hwcap_flags |= AV_386_SSE2;
2644 2649 if (*ecx & CPUID_INTC_ECX_SSE3)
2645 2650 hwcap_flags |= AV_386_SSE3;
2646 2651 if (*ecx & CPUID_INTC_ECX_SSSE3)
2647 2652 hwcap_flags |= AV_386_SSSE3;
2648 2653 if (*ecx & CPUID_INTC_ECX_SSE4_1)
2649 2654 hwcap_flags |= AV_386_SSE4_1;
2650 2655 if (*ecx & CPUID_INTC_ECX_SSE4_2)
2651 2656 hwcap_flags |= AV_386_SSE4_2;
2652 2657 if (*ecx & CPUID_INTC_ECX_MOVBE)
2653 2658 hwcap_flags |= AV_386_MOVBE;
2654 2659 if (*ecx & CPUID_INTC_ECX_AES)
2655 2660 hwcap_flags |= AV_386_AES;
2656 2661 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
2657 2662 hwcap_flags |= AV_386_PCLMULQDQ;
2658 2663 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
2659 2664 (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
2660 2665 hwcap_flags |= AV_386_XSAVE;
2661 2666
2662 2667 if (*ecx & CPUID_INTC_ECX_AVX) {
2663 2668 hwcap_flags |= AV_386_AVX;
2664 2669 if (*ecx & CPUID_INTC_ECX_F16C)
2665 2670 hwcap_flags_2 |= AV_386_2_F16C;
2666 2671 }
2667 2672 }
2668 2673 if (*ecx & CPUID_INTC_ECX_VMX)
2669 2674 hwcap_flags |= AV_386_VMX;
2670 2675 if (*ecx & CPUID_INTC_ECX_POPCNT)
2671 2676 hwcap_flags |= AV_386_POPCNT;
2672 2677 if (*edx & CPUID_INTC_EDX_FPU)
2673 2678 hwcap_flags |= AV_386_FPU;
2674 2679 if (*edx & CPUID_INTC_EDX_MMX)
2675 2680 hwcap_flags |= AV_386_MMX;
2676 2681
2677 2682 if (*edx & CPUID_INTC_EDX_TSC)
2678 2683 hwcap_flags |= AV_386_TSC;
2679 2684 if (*edx & CPUID_INTC_EDX_CX8)
2680 2685 hwcap_flags |= AV_386_CX8;
2681 2686 if (*edx & CPUID_INTC_EDX_CMOV)
2682 2687 hwcap_flags |= AV_386_CMOV;
2683 2688 if (*ecx & CPUID_INTC_ECX_CX16)
2684 2689 hwcap_flags |= AV_386_CX16;
2685 2690
2686 2691 if (*ecx & CPUID_INTC_ECX_RDRAND)
2687 2692 hwcap_flags_2 |= AV_386_2_RDRAND;
2688 2693 }
2689 2694
2690 2695 if (cpi->cpi_xmaxeax < 0x80000001)
2691 2696 goto pass4_done;
2692 2697
2693 2698 switch (cpi->cpi_vendor) {
2694 2699 struct cpuid_regs cp;
2695 2700 uint32_t *edx, *ecx;
2696 2701
2697 2702 case X86_VENDOR_Intel:
2698 2703 /*
2699 2704 * Seems like Intel duplicated what we necessary
2700 2705 * here to make the initial crop of 64-bit OS's work.
2701 2706 * Hopefully, those are the only "extended" bits
2702 2707 * they'll add.
2703 2708 */
2704 2709 /*FALLTHROUGH*/
2705 2710
2706 2711 case X86_VENDOR_AMD:
2707 2712 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
2708 2713 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
2709 2714
2710 2715 *edx = CPI_FEATURES_XTD_EDX(cpi);
2711 2716 *ecx = CPI_FEATURES_XTD_ECX(cpi);
2712 2717
2713 2718 /*
2714 2719 * [these features require explicit kernel support]
2715 2720 */
2716 2721 switch (cpi->cpi_vendor) {
2717 2722 case X86_VENDOR_Intel:
2718 2723 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2719 2724 *edx &= ~CPUID_AMD_EDX_TSCP;
2720 2725 break;
2721 2726
2722 2727 case X86_VENDOR_AMD:
2723 2728 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2724 2729 *edx &= ~CPUID_AMD_EDX_TSCP;
2725 2730 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
2726 2731 *ecx &= ~CPUID_AMD_ECX_SSE4A;
2727 2732 break;
2728 2733
2729 2734 default:
2730 2735 break;
2731 2736 }
2732 2737
2733 2738 /*
2734 2739 * [no explicit support required beyond
2735 2740 * x87 fp context and exception handlers]
2736 2741 */
2737 2742 if (!fpu_exists)
2738 2743 *edx &= ~(CPUID_AMD_EDX_MMXamd |
2739 2744 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
2740 2745
2741 2746 if (!is_x86_feature(x86_featureset, X86FSET_NX))
2742 2747 *edx &= ~CPUID_AMD_EDX_NX;
2743 2748 #if !defined(__amd64)
2744 2749 *edx &= ~CPUID_AMD_EDX_LM;
2745 2750 #endif
2746 2751 /*
2747 2752 * Now map the supported feature vector to
2748 2753 * things that we think userland will care about.
2749 2754 */
2750 2755 #if defined(__amd64)
2751 2756 if (*edx & CPUID_AMD_EDX_SYSC)
2752 2757 hwcap_flags |= AV_386_AMD_SYSC;
2753 2758 #endif
2754 2759 if (*edx & CPUID_AMD_EDX_MMXamd)
2755 2760 hwcap_flags |= AV_386_AMD_MMX;
2756 2761 if (*edx & CPUID_AMD_EDX_3DNow)
2757 2762 hwcap_flags |= AV_386_AMD_3DNow;
2758 2763 if (*edx & CPUID_AMD_EDX_3DNowx)
2759 2764 hwcap_flags |= AV_386_AMD_3DNowx;
2760 2765 if (*ecx & CPUID_AMD_ECX_SVM)
2761 2766 hwcap_flags |= AV_386_AMD_SVM;
2762 2767
2763 2768 switch (cpi->cpi_vendor) {
2764 2769 case X86_VENDOR_AMD:
2765 2770 if (*edx & CPUID_AMD_EDX_TSCP)
2766 2771 hwcap_flags |= AV_386_TSCP;
2767 2772 if (*ecx & CPUID_AMD_ECX_AHF64)
2768 2773 hwcap_flags |= AV_386_AHF;
2769 2774 if (*ecx & CPUID_AMD_ECX_SSE4A)
2770 2775 hwcap_flags |= AV_386_AMD_SSE4A;
2771 2776 if (*ecx & CPUID_AMD_ECX_LZCNT)
2772 2777 hwcap_flags |= AV_386_AMD_LZCNT;
2773 2778 break;
2774 2779
2775 2780 case X86_VENDOR_Intel:
2776 2781 if (*edx & CPUID_AMD_EDX_TSCP)
2777 2782 hwcap_flags |= AV_386_TSCP;
2778 2783 /*
2779 2784 * Aarrgh.
2780 2785 * Intel uses a different bit in the same word.
2781 2786 */
2782 2787 if (*ecx & CPUID_INTC_ECX_AHF64)
2783 2788 hwcap_flags |= AV_386_AHF;
2784 2789 break;
2785 2790
2786 2791 default:
2787 2792 break;
2788 2793 }
2789 2794 break;
2790 2795
2791 2796 case X86_VENDOR_TM:
2792 2797 cp.cp_eax = 0x80860001;
2793 2798 (void) __cpuid_insn(&cp);
2794 2799 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
2795 2800 break;
2796 2801
2797 2802 default:
2798 2803 break;
2799 2804 }
2800 2805
2801 2806 pass4_done:
2802 2807 cpi->cpi_pass = 4;
2803 2808 if (hwcap_out != NULL) {
2804 2809 hwcap_out[0] = hwcap_flags;
2805 2810 hwcap_out[1] = hwcap_flags_2;
2806 2811 }
2807 2812 }
2808 2813
2809 2814
2810 2815 /*
2811 2816 * Simulate the cpuid instruction using the data we previously
2812 2817 * captured about this CPU. We try our best to return the truth
2813 2818 * about the hardware, independently of kernel support.
2814 2819 */
2815 2820 uint32_t
2816 2821 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
2817 2822 {
2818 2823 struct cpuid_info *cpi;
2819 2824 struct cpuid_regs *xcp;
2820 2825
2821 2826 if (cpu == NULL)
2822 2827 cpu = CPU;
2823 2828 cpi = cpu->cpu_m.mcpu_cpi;
2824 2829
2825 2830 ASSERT(cpuid_checkpass(cpu, 3));
2826 2831
2827 2832 /*
2828 2833 * CPUID data is cached in two separate places: cpi_std for standard
2829 2834 * CPUID functions, and cpi_extd for extended CPUID functions.
2830 2835 */
2831 2836 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD)
2832 2837 xcp = &cpi->cpi_std[cp->cp_eax];
2833 2838 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax &&
2834 2839 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD)
2835 2840 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000];
2836 2841 else
2837 2842 /*
2838 2843 * The caller is asking for data from an input parameter which
2839 2844 * the kernel has not cached. In this case we go fetch from
2840 2845 * the hardware and return the data directly to the user.
2841 2846 */
2842 2847 return (__cpuid_insn(cp));
2843 2848
2844 2849 cp->cp_eax = xcp->cp_eax;
2845 2850 cp->cp_ebx = xcp->cp_ebx;
2846 2851 cp->cp_ecx = xcp->cp_ecx;
2847 2852 cp->cp_edx = xcp->cp_edx;
2848 2853 return (cp->cp_eax);
2849 2854 }
2850 2855
2851 2856 int
2852 2857 cpuid_checkpass(cpu_t *cpu, int pass)
2853 2858 {
2854 2859 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
2855 2860 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
2856 2861 }
2857 2862
2858 2863 int
2859 2864 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
2860 2865 {
2861 2866 ASSERT(cpuid_checkpass(cpu, 3));
2862 2867
2863 2868 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
2864 2869 }
2865 2870
2866 2871 int
2867 2872 cpuid_is_cmt(cpu_t *cpu)
2868 2873 {
2869 2874 if (cpu == NULL)
2870 2875 cpu = CPU;
2871 2876
2872 2877 ASSERT(cpuid_checkpass(cpu, 1));
2873 2878
2874 2879 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
2875 2880 }
2876 2881
2877 2882 /*
2878 2883 * AMD and Intel both implement the 64-bit variant of the syscall
2879 2884 * instruction (syscallq), so if there's -any- support for syscall,
2880 2885 * cpuid currently says "yes, we support this".
2881 2886 *
2882 2887 * However, Intel decided to -not- implement the 32-bit variant of the
2883 2888 * syscall instruction, so we provide a predicate to allow our caller
2884 2889 * to test that subtlety here.
2885 2890 *
2886 2891 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
2887 2892 * even in the case where the hardware would in fact support it.
2888 2893 */
2889 2894 /*ARGSUSED*/
2890 2895 int
2891 2896 cpuid_syscall32_insn(cpu_t *cpu)
2892 2897 {
2893 2898 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
2894 2899
2895 2900 #if !defined(__xpv)
2896 2901 if (cpu == NULL)
2897 2902 cpu = CPU;
2898 2903
2899 2904 /*CSTYLED*/
2900 2905 {
2901 2906 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2902 2907
2903 2908 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2904 2909 cpi->cpi_xmaxeax >= 0x80000001 &&
2905 2910 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
2906 2911 return (1);
2907 2912 }
2908 2913 #endif
2909 2914 return (0);
2910 2915 }
2911 2916
2912 2917 int
2913 2918 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
2914 2919 {
2915 2920 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2916 2921
2917 2922 static const char fmt[] =
2918 2923 "x86 (%s %X family %d model %d step %d clock %d MHz)";
2919 2924 static const char fmt_ht[] =
2920 2925 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
2921 2926
2922 2927 ASSERT(cpuid_checkpass(cpu, 1));
2923 2928
2924 2929 if (cpuid_is_cmt(cpu))
2925 2930 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
2926 2931 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2927 2932 cpi->cpi_family, cpi->cpi_model,
2928 2933 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2929 2934 return (snprintf(s, n, fmt,
2930 2935 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2931 2936 cpi->cpi_family, cpi->cpi_model,
2932 2937 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2933 2938 }
2934 2939
2935 2940 const char *
2936 2941 cpuid_getvendorstr(cpu_t *cpu)
2937 2942 {
2938 2943 ASSERT(cpuid_checkpass(cpu, 1));
2939 2944 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
2940 2945 }
2941 2946
2942 2947 uint_t
2943 2948 cpuid_getvendor(cpu_t *cpu)
2944 2949 {
2945 2950 ASSERT(cpuid_checkpass(cpu, 1));
2946 2951 return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
2947 2952 }
2948 2953
2949 2954 uint_t
2950 2955 cpuid_getfamily(cpu_t *cpu)
2951 2956 {
2952 2957 ASSERT(cpuid_checkpass(cpu, 1));
2953 2958 return (cpu->cpu_m.mcpu_cpi->cpi_family);
2954 2959 }
2955 2960
2956 2961 uint_t
2957 2962 cpuid_getmodel(cpu_t *cpu)
2958 2963 {
2959 2964 ASSERT(cpuid_checkpass(cpu, 1));
2960 2965 return (cpu->cpu_m.mcpu_cpi->cpi_model);
2961 2966 }
2962 2967
2963 2968 uint_t
2964 2969 cpuid_get_ncpu_per_chip(cpu_t *cpu)
2965 2970 {
2966 2971 ASSERT(cpuid_checkpass(cpu, 1));
2967 2972 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
2968 2973 }
2969 2974
2970 2975 uint_t
2971 2976 cpuid_get_ncore_per_chip(cpu_t *cpu)
2972 2977 {
2973 2978 ASSERT(cpuid_checkpass(cpu, 1));
2974 2979 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
2975 2980 }
2976 2981
2977 2982 uint_t
2978 2983 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
2979 2984 {
2980 2985 ASSERT(cpuid_checkpass(cpu, 2));
2981 2986 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
2982 2987 }
2983 2988
2984 2989 id_t
2985 2990 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
2986 2991 {
2987 2992 ASSERT(cpuid_checkpass(cpu, 2));
2988 2993 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
2989 2994 }
2990 2995
2991 2996 uint_t
2992 2997 cpuid_getstep(cpu_t *cpu)
2993 2998 {
2994 2999 ASSERT(cpuid_checkpass(cpu, 1));
2995 3000 return (cpu->cpu_m.mcpu_cpi->cpi_step);
2996 3001 }
2997 3002
2998 3003 uint_t
2999 3004 cpuid_getsig(struct cpu *cpu)
3000 3005 {
3001 3006 ASSERT(cpuid_checkpass(cpu, 1));
3002 3007 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
3003 3008 }
3004 3009
3005 3010 uint32_t
3006 3011 cpuid_getchiprev(struct cpu *cpu)
3007 3012 {
3008 3013 ASSERT(cpuid_checkpass(cpu, 1));
3009 3014 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
3010 3015 }
3011 3016
3012 3017 const char *
3013 3018 cpuid_getchiprevstr(struct cpu *cpu)
3014 3019 {
3015 3020 ASSERT(cpuid_checkpass(cpu, 1));
3016 3021 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
3017 3022 }
3018 3023
3019 3024 uint32_t
3020 3025 cpuid_getsockettype(struct cpu *cpu)
3021 3026 {
3022 3027 ASSERT(cpuid_checkpass(cpu, 1));
3023 3028 return (cpu->cpu_m.mcpu_cpi->cpi_socket);
3024 3029 }
3025 3030
3026 3031 const char *
3027 3032 cpuid_getsocketstr(cpu_t *cpu)
3028 3033 {
3029 3034 static const char *socketstr = NULL;
3030 3035 struct cpuid_info *cpi;
3031 3036
3032 3037 ASSERT(cpuid_checkpass(cpu, 1));
3033 3038 cpi = cpu->cpu_m.mcpu_cpi;
3034 3039
3035 3040 /* Assume that socket types are the same across the system */
3036 3041 if (socketstr == NULL)
3037 3042 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
3038 3043 cpi->cpi_model, cpi->cpi_step);
3039 3044
3040 3045
3041 3046 return (socketstr);
3042 3047 }
3043 3048
3044 3049 int
3045 3050 cpuid_get_chipid(cpu_t *cpu)
3046 3051 {
3047 3052 ASSERT(cpuid_checkpass(cpu, 1));
3048 3053
3049 3054 if (cpuid_is_cmt(cpu))
3050 3055 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
3051 3056 return (cpu->cpu_id);
3052 3057 }
3053 3058
3054 3059 id_t
3055 3060 cpuid_get_coreid(cpu_t *cpu)
3056 3061 {
3057 3062 ASSERT(cpuid_checkpass(cpu, 1));
3058 3063 return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
3059 3064 }
3060 3065
3061 3066 int
3062 3067 cpuid_get_pkgcoreid(cpu_t *cpu)
3063 3068 {
3064 3069 ASSERT(cpuid_checkpass(cpu, 1));
3065 3070 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
3066 3071 }
3067 3072
3068 3073 int
3069 3074 cpuid_get_clogid(cpu_t *cpu)
3070 3075 {
3071 3076 ASSERT(cpuid_checkpass(cpu, 1));
3072 3077 return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
3073 3078 }
3074 3079
3075 3080 int
3076 3081 cpuid_get_cacheid(cpu_t *cpu)
3077 3082 {
3078 3083 ASSERT(cpuid_checkpass(cpu, 1));
3079 3084 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
3080 3085 }
3081 3086
3082 3087 uint_t
3083 3088 cpuid_get_procnodeid(cpu_t *cpu)
3084 3089 {
3085 3090 ASSERT(cpuid_checkpass(cpu, 1));
3086 3091 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
3087 3092 }
3088 3093
3089 3094 uint_t
3090 3095 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
3091 3096 {
3092 3097 ASSERT(cpuid_checkpass(cpu, 1));
3093 3098 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
3094 3099 }
3095 3100
3096 3101 uint_t
3097 3102 cpuid_get_compunitid(cpu_t *cpu)
3098 3103 {
3099 3104 ASSERT(cpuid_checkpass(cpu, 1));
3100 3105 return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
3101 3106 }
3102 3107
3103 3108 uint_t
3104 3109 cpuid_get_cores_per_compunit(cpu_t *cpu)
3105 3110 {
3106 3111 ASSERT(cpuid_checkpass(cpu, 1));
3107 3112 return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
3108 3113 }
3109 3114
3110 3115 /*ARGSUSED*/
3111 3116 int
3112 3117 cpuid_have_cr8access(cpu_t *cpu)
3113 3118 {
3114 3119 #if defined(__amd64)
3115 3120 return (1);
3116 3121 #else
3117 3122 struct cpuid_info *cpi;
3118 3123
3119 3124 ASSERT(cpu != NULL);
3120 3125 cpi = cpu->cpu_m.mcpu_cpi;
3121 3126 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
3122 3127 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
3123 3128 return (1);
3124 3129 return (0);
3125 3130 #endif
3126 3131 }
3127 3132
3128 3133 uint32_t
3129 3134 cpuid_get_apicid(cpu_t *cpu)
3130 3135 {
3131 3136 ASSERT(cpuid_checkpass(cpu, 1));
3132 3137 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
3133 3138 return (UINT32_MAX);
3134 3139 } else {
3135 3140 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
3136 3141 }
3137 3142 }
3138 3143
3139 3144 void
3140 3145 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
3141 3146 {
3142 3147 struct cpuid_info *cpi;
3143 3148
3144 3149 if (cpu == NULL)
3145 3150 cpu = CPU;
3146 3151 cpi = cpu->cpu_m.mcpu_cpi;
3147 3152
3148 3153 ASSERT(cpuid_checkpass(cpu, 1));
3149 3154
3150 3155 if (pabits)
3151 3156 *pabits = cpi->cpi_pabits;
3152 3157 if (vabits)
3153 3158 *vabits = cpi->cpi_vabits;
3154 3159 }
3155 3160
3156 3161 /*
3157 3162 * Returns the number of data TLB entries for a corresponding
3158 3163 * pagesize. If it can't be computed, or isn't known, the
3159 3164 * routine returns zero. If you ask about an architecturally
3160 3165 * impossible pagesize, the routine will panic (so that the
3161 3166 * hat implementor knows that things are inconsistent.)
3162 3167 */
3163 3168 uint_t
3164 3169 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
3165 3170 {
3166 3171 struct cpuid_info *cpi;
3167 3172 uint_t dtlb_nent = 0;
3168 3173
3169 3174 if (cpu == NULL)
3170 3175 cpu = CPU;
3171 3176 cpi = cpu->cpu_m.mcpu_cpi;
3172 3177
3173 3178 ASSERT(cpuid_checkpass(cpu, 1));
3174 3179
3175 3180 /*
3176 3181 * Check the L2 TLB info
3177 3182 */
3178 3183 if (cpi->cpi_xmaxeax >= 0x80000006) {
3179 3184 struct cpuid_regs *cp = &cpi->cpi_extd[6];
3180 3185
3181 3186 switch (pagesize) {
3182 3187
3183 3188 case 4 * 1024:
3184 3189 /*
3185 3190 * All zero in the top 16 bits of the register
3186 3191 * indicates a unified TLB. Size is in low 16 bits.
3187 3192 */
3188 3193 if ((cp->cp_ebx & 0xffff0000) == 0)
3189 3194 dtlb_nent = cp->cp_ebx & 0x0000ffff;
3190 3195 else
3191 3196 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
3192 3197 break;
3193 3198
3194 3199 case 2 * 1024 * 1024:
3195 3200 if ((cp->cp_eax & 0xffff0000) == 0)
3196 3201 dtlb_nent = cp->cp_eax & 0x0000ffff;
3197 3202 else
3198 3203 dtlb_nent = BITX(cp->cp_eax, 27, 16);
3199 3204 break;
3200 3205
3201 3206 default:
3202 3207 panic("unknown L2 pagesize");
3203 3208 /*NOTREACHED*/
3204 3209 }
3205 3210 }
3206 3211
3207 3212 if (dtlb_nent != 0)
3208 3213 return (dtlb_nent);
3209 3214
3210 3215 /*
3211 3216 * No L2 TLB support for this size, try L1.
3212 3217 */
3213 3218 if (cpi->cpi_xmaxeax >= 0x80000005) {
3214 3219 struct cpuid_regs *cp = &cpi->cpi_extd[5];
3215 3220
3216 3221 switch (pagesize) {
3217 3222 case 4 * 1024:
3218 3223 dtlb_nent = BITX(cp->cp_ebx, 23, 16);
3219 3224 break;
3220 3225 case 2 * 1024 * 1024:
3221 3226 dtlb_nent = BITX(cp->cp_eax, 23, 16);
3222 3227 break;
3223 3228 default:
3224 3229 panic("unknown L1 d-TLB pagesize");
3225 3230 /*NOTREACHED*/
3226 3231 }
3227 3232 }
3228 3233
3229 3234 return (dtlb_nent);
3230 3235 }
3231 3236
3232 3237 /*
3233 3238 * Return 0 if the erratum is not present or not applicable, positive
3234 3239 * if it is, and negative if the status of the erratum is unknown.
3235 3240 *
3236 3241 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
3237 3242 * Processors" #25759, Rev 3.57, August 2005
3238 3243 */
3239 3244 int
3240 3245 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
3241 3246 {
3242 3247 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3243 3248 uint_t eax;
3244 3249
3245 3250 /*
3246 3251 * Bail out if this CPU isn't an AMD CPU, or if it's
3247 3252 * a legacy (32-bit) AMD CPU.
3248 3253 */
3249 3254 if (cpi->cpi_vendor != X86_VENDOR_AMD ||
3250 3255 cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
3251 3256 cpi->cpi_family == 6)
3252 3257
3253 3258 return (0);
3254 3259
3255 3260 eax = cpi->cpi_std[1].cp_eax;
3256 3261
3257 3262 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50)
3258 3263 #define SH_B3(eax) (eax == 0xf51)
3259 3264 #define B(eax) (SH_B0(eax) || SH_B3(eax))
3260 3265
3261 3266 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58)
3262 3267
3263 3268 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
3264 3269 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
3265 3270 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2)
3266 3271 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
3267 3272
3268 3273 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
3269 3274 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0)
3270 3275 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0)
3271 3276 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
3272 3277
3273 3278 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
3274 3279 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */
3275 3280 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0)
3276 3281 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71)
3277 3282 #define BH_E4(eax) (eax == 0x20fb1)
3278 3283 #define SH_E5(eax) (eax == 0x20f42)
3279 3284 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2)
3280 3285 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32)
3281 3286 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
3282 3287 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
3283 3288 DH_E6(eax) || JH_E6(eax))
3284 3289
3285 3290 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
3286 3291 #define DR_B0(eax) (eax == 0x100f20)
3287 3292 #define DR_B1(eax) (eax == 0x100f21)
3288 3293 #define DR_BA(eax) (eax == 0x100f2a)
3289 3294 #define DR_B2(eax) (eax == 0x100f22)
3290 3295 #define DR_B3(eax) (eax == 0x100f23)
3291 3296 #define RB_C0(eax) (eax == 0x100f40)
3292 3297
3293 3298 switch (erratum) {
3294 3299 case 1:
3295 3300 return (cpi->cpi_family < 0x10);
3296 3301 case 51: /* what does the asterisk mean? */
3297 3302 return (B(eax) || SH_C0(eax) || CG(eax));
3298 3303 case 52:
3299 3304 return (B(eax));
3300 3305 case 57:
3301 3306 return (cpi->cpi_family <= 0x11);
3302 3307 case 58:
3303 3308 return (B(eax));
3304 3309 case 60:
3305 3310 return (cpi->cpi_family <= 0x11);
3306 3311 case 61:
3307 3312 case 62:
3308 3313 case 63:
3309 3314 case 64:
3310 3315 case 65:
3311 3316 case 66:
3312 3317 case 68:
3313 3318 case 69:
3314 3319 case 70:
3315 3320 case 71:
3316 3321 return (B(eax));
3317 3322 case 72:
3318 3323 return (SH_B0(eax));
3319 3324 case 74:
3320 3325 return (B(eax));
3321 3326 case 75:
3322 3327 return (cpi->cpi_family < 0x10);
3323 3328 case 76:
3324 3329 return (B(eax));
3325 3330 case 77:
3326 3331 return (cpi->cpi_family <= 0x11);
3327 3332 case 78:
3328 3333 return (B(eax) || SH_C0(eax));
3329 3334 case 79:
3330 3335 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3331 3336 case 80:
3332 3337 case 81:
3333 3338 case 82:
3334 3339 return (B(eax));
3335 3340 case 83:
3336 3341 return (B(eax) || SH_C0(eax) || CG(eax));
3337 3342 case 85:
3338 3343 return (cpi->cpi_family < 0x10);
3339 3344 case 86:
3340 3345 return (SH_C0(eax) || CG(eax));
3341 3346 case 88:
3342 3347 #if !defined(__amd64)
3343 3348 return (0);
3344 3349 #else
3345 3350 return (B(eax) || SH_C0(eax));
3346 3351 #endif
3347 3352 case 89:
3348 3353 return (cpi->cpi_family < 0x10);
3349 3354 case 90:
3350 3355 return (B(eax) || SH_C0(eax) || CG(eax));
3351 3356 case 91:
3352 3357 case 92:
3353 3358 return (B(eax) || SH_C0(eax));
3354 3359 case 93:
3355 3360 return (SH_C0(eax));
3356 3361 case 94:
3357 3362 return (B(eax) || SH_C0(eax) || CG(eax));
3358 3363 case 95:
3359 3364 #if !defined(__amd64)
3360 3365 return (0);
3361 3366 #else
3362 3367 return (B(eax) || SH_C0(eax));
3363 3368 #endif
3364 3369 case 96:
3365 3370 return (B(eax) || SH_C0(eax) || CG(eax));
3366 3371 case 97:
3367 3372 case 98:
3368 3373 return (SH_C0(eax) || CG(eax));
3369 3374 case 99:
3370 3375 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3371 3376 case 100:
3372 3377 return (B(eax) || SH_C0(eax));
3373 3378 case 101:
3374 3379 case 103:
3375 3380 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3376 3381 case 104:
3377 3382 return (SH_C0(eax) || CG(eax) || D0(eax));
3378 3383 case 105:
3379 3384 case 106:
3380 3385 case 107:
3381 3386 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3382 3387 case 108:
3383 3388 return (DH_CG(eax));
3384 3389 case 109:
3385 3390 return (SH_C0(eax) || CG(eax) || D0(eax));
3386 3391 case 110:
3387 3392 return (D0(eax) || EX(eax));
3388 3393 case 111:
3389 3394 return (CG(eax));
3390 3395 case 112:
3391 3396 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3392 3397 case 113:
3393 3398 return (eax == 0x20fc0);
3394 3399 case 114:
3395 3400 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3396 3401 case 115:
3397 3402 return (SH_E0(eax) || JH_E1(eax));
3398 3403 case 116:
3399 3404 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3400 3405 case 117:
3401 3406 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3402 3407 case 118:
3403 3408 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
3404 3409 JH_E6(eax));
3405 3410 case 121:
3406 3411 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3407 3412 case 122:
3408 3413 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
3409 3414 case 123:
3410 3415 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
3411 3416 case 131:
3412 3417 return (cpi->cpi_family < 0x10);
3413 3418 case 6336786:
3414 3419 /*
3415 3420 * Test for AdvPowerMgmtInfo.TscPStateInvariant
3416 3421 * if this is a K8 family or newer processor
3417 3422 */
3418 3423 if (CPI_FAMILY(cpi) == 0xf) {
3419 3424 struct cpuid_regs regs;
3420 3425 regs.cp_eax = 0x80000007;
3421 3426 (void) __cpuid_insn(®s);
3422 3427 return (!(regs.cp_edx & 0x100));
3423 3428 }
3424 3429 return (0);
3425 3430 case 6323525:
3426 3431 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
3427 3432 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
3428 3433
3429 3434 case 6671130:
3430 3435 /*
3431 3436 * check for processors (pre-Shanghai) that do not provide
3432 3437 * optimal management of 1gb ptes in its tlb.
3433 3438 */
3434 3439 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
3435 3440
3436 3441 case 298:
3437 3442 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
3438 3443 DR_B2(eax) || RB_C0(eax));
3439 3444
3440 3445 case 721:
3441 3446 #if defined(__amd64)
3442 3447 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
3443 3448 #else
3444 3449 return (0);
3445 3450 #endif
3446 3451
3447 3452 default:
3448 3453 return (-1);
3449 3454
3450 3455 }
3451 3456 }
3452 3457
3453 3458 /*
3454 3459 * Determine if specified erratum is present via OSVW (OS Visible Workaround).
3455 3460 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
3456 3461 */
3457 3462 int
3458 3463 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
3459 3464 {
3460 3465 struct cpuid_info *cpi;
3461 3466 uint_t osvwid;
3462 3467 static int osvwfeature = -1;
3463 3468 uint64_t osvwlength;
3464 3469
3465 3470
3466 3471 cpi = cpu->cpu_m.mcpu_cpi;
3467 3472
3468 3473 /* confirm OSVW supported */
3469 3474 if (osvwfeature == -1) {
3470 3475 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
3471 3476 } else {
3472 3477 /* assert that osvw feature setting is consistent on all cpus */
3473 3478 ASSERT(osvwfeature ==
3474 3479 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
3475 3480 }
3476 3481 if (!osvwfeature)
3477 3482 return (-1);
3478 3483
3479 3484 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
3480 3485
3481 3486 switch (erratum) {
3482 3487 case 298: /* osvwid is 0 */
3483 3488 osvwid = 0;
3484 3489 if (osvwlength <= (uint64_t)osvwid) {
3485 3490 /* osvwid 0 is unknown */
3486 3491 return (-1);
3487 3492 }
3488 3493
3489 3494 /*
3490 3495 * Check the OSVW STATUS MSR to determine the state
3491 3496 * of the erratum where:
3492 3497 * 0 - fixed by HW
3493 3498 * 1 - BIOS has applied the workaround when BIOS
3494 3499 * workaround is available. (Or for other errata,
3495 3500 * OS workaround is required.)
3496 3501 * For a value of 1, caller will confirm that the
3497 3502 * erratum 298 workaround has indeed been applied by BIOS.
3498 3503 *
3499 3504 * A 1 may be set in cpus that have a HW fix
3500 3505 * in a mixed cpu system. Regarding erratum 298:
3501 3506 * In a multiprocessor platform, the workaround above
3502 3507 * should be applied to all processors regardless of
3503 3508 * silicon revision when an affected processor is
3504 3509 * present.
3505 3510 */
3506 3511
3507 3512 return (rdmsr(MSR_AMD_OSVW_STATUS +
3508 3513 (osvwid / OSVW_ID_CNT_PER_MSR)) &
3509 3514 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
3510 3515
3511 3516 default:
3512 3517 return (-1);
3513 3518 }
3514 3519 }
3515 3520
3516 3521 static const char assoc_str[] = "associativity";
3517 3522 static const char line_str[] = "line-size";
3518 3523 static const char size_str[] = "size";
3519 3524
3520 3525 static void
3521 3526 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
3522 3527 uint32_t val)
3523 3528 {
3524 3529 char buf[128];
3525 3530
3526 3531 /*
3527 3532 * ndi_prop_update_int() is used because it is desirable for
3528 3533 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
3529 3534 */
3530 3535 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
3531 3536 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
3532 3537 }
3533 3538
3534 3539 /*
3535 3540 * Intel-style cache/tlb description
3536 3541 *
3537 3542 * Standard cpuid level 2 gives a randomly ordered
3538 3543 * selection of tags that index into a table that describes
3539 3544 * cache and tlb properties.
3540 3545 */
3541 3546
3542 3547 static const char l1_icache_str[] = "l1-icache";
3543 3548 static const char l1_dcache_str[] = "l1-dcache";
3544 3549 static const char l2_cache_str[] = "l2-cache";
3545 3550 static const char l3_cache_str[] = "l3-cache";
3546 3551 static const char itlb4k_str[] = "itlb-4K";
3547 3552 static const char dtlb4k_str[] = "dtlb-4K";
3548 3553 static const char itlb2M_str[] = "itlb-2M";
3549 3554 static const char itlb4M_str[] = "itlb-4M";
3550 3555 static const char dtlb4M_str[] = "dtlb-4M";
3551 3556 static const char dtlb24_str[] = "dtlb0-2M-4M";
3552 3557 static const char itlb424_str[] = "itlb-4K-2M-4M";
3553 3558 static const char itlb24_str[] = "itlb-2M-4M";
3554 3559 static const char dtlb44_str[] = "dtlb-4K-4M";
3555 3560 static const char sl1_dcache_str[] = "sectored-l1-dcache";
3556 3561 static const char sl2_cache_str[] = "sectored-l2-cache";
3557 3562 static const char itrace_str[] = "itrace-cache";
3558 3563 static const char sl3_cache_str[] = "sectored-l3-cache";
3559 3564 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
3560 3565
3561 3566 static const struct cachetab {
3562 3567 uint8_t ct_code;
3563 3568 uint8_t ct_assoc;
3564 3569 uint16_t ct_line_size;
3565 3570 size_t ct_size;
3566 3571 const char *ct_label;
3567 3572 } intel_ctab[] = {
3568 3573 /*
3569 3574 * maintain descending order!
3570 3575 *
3571 3576 * Codes ignored - Reason
3572 3577 * ----------------------
3573 3578 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
3574 3579 * f0H/f1H - Currently we do not interpret prefetch size by design
3575 3580 */
3576 3581 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
3577 3582 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
3578 3583 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
3579 3584 { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
3580 3585 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
3581 3586 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
3582 3587 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
3583 3588 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
3584 3589 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
3585 3590 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
3586 3591 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
3587 3592 { 0xd0, 4, 64, 512*1024, l3_cache_str},
3588 3593 { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
3589 3594 { 0xc0, 4, 0, 8, dtlb44_str },
3590 3595 { 0xba, 4, 0, 64, dtlb4k_str },
3591 3596 { 0xb4, 4, 0, 256, dtlb4k_str },
3592 3597 { 0xb3, 4, 0, 128, dtlb4k_str },
3593 3598 { 0xb2, 4, 0, 64, itlb4k_str },
3594 3599 { 0xb0, 4, 0, 128, itlb4k_str },
3595 3600 { 0x87, 8, 64, 1024*1024, l2_cache_str},
3596 3601 { 0x86, 4, 64, 512*1024, l2_cache_str},
3597 3602 { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
3598 3603 { 0x84, 8, 32, 1024*1024, l2_cache_str},
3599 3604 { 0x83, 8, 32, 512*1024, l2_cache_str},
3600 3605 { 0x82, 8, 32, 256*1024, l2_cache_str},
3601 3606 { 0x80, 8, 64, 512*1024, l2_cache_str},
3602 3607 { 0x7f, 2, 64, 512*1024, l2_cache_str},
3603 3608 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
3604 3609 { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
3605 3610 { 0x7b, 8, 64, 512*1024, sl2_cache_str},
3606 3611 { 0x7a, 8, 64, 256*1024, sl2_cache_str},
3607 3612 { 0x79, 8, 64, 128*1024, sl2_cache_str},
3608 3613 { 0x78, 8, 64, 1024*1024, l2_cache_str},
3609 3614 { 0x73, 8, 0, 64*1024, itrace_str},
3610 3615 { 0x72, 8, 0, 32*1024, itrace_str},
3611 3616 { 0x71, 8, 0, 16*1024, itrace_str},
3612 3617 { 0x70, 8, 0, 12*1024, itrace_str},
3613 3618 { 0x68, 4, 64, 32*1024, sl1_dcache_str},
3614 3619 { 0x67, 4, 64, 16*1024, sl1_dcache_str},
3615 3620 { 0x66, 4, 64, 8*1024, sl1_dcache_str},
3616 3621 { 0x60, 8, 64, 16*1024, sl1_dcache_str},
3617 3622 { 0x5d, 0, 0, 256, dtlb44_str},
3618 3623 { 0x5c, 0, 0, 128, dtlb44_str},
3619 3624 { 0x5b, 0, 0, 64, dtlb44_str},
3620 3625 { 0x5a, 4, 0, 32, dtlb24_str},
3621 3626 { 0x59, 0, 0, 16, dtlb4k_str},
3622 3627 { 0x57, 4, 0, 16, dtlb4k_str},
3623 3628 { 0x56, 4, 0, 16, dtlb4M_str},
3624 3629 { 0x55, 0, 0, 7, itlb24_str},
3625 3630 { 0x52, 0, 0, 256, itlb424_str},
3626 3631 { 0x51, 0, 0, 128, itlb424_str},
3627 3632 { 0x50, 0, 0, 64, itlb424_str},
3628 3633 { 0x4f, 0, 0, 32, itlb4k_str},
3629 3634 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
3630 3635 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
3631 3636 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
3632 3637 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
3633 3638 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
3634 3639 { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
3635 3640 { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
3636 3641 { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
3637 3642 { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
3638 3643 { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
3639 3644 { 0x44, 4, 32, 1024*1024, l2_cache_str},
3640 3645 { 0x43, 4, 32, 512*1024, l2_cache_str},
3641 3646 { 0x42, 4, 32, 256*1024, l2_cache_str},
3642 3647 { 0x41, 4, 32, 128*1024, l2_cache_str},
3643 3648 { 0x3e, 4, 64, 512*1024, sl2_cache_str},
3644 3649 { 0x3d, 6, 64, 384*1024, sl2_cache_str},
3645 3650 { 0x3c, 4, 64, 256*1024, sl2_cache_str},
3646 3651 { 0x3b, 2, 64, 128*1024, sl2_cache_str},
3647 3652 { 0x3a, 6, 64, 192*1024, sl2_cache_str},
3648 3653 { 0x39, 4, 64, 128*1024, sl2_cache_str},
3649 3654 { 0x30, 8, 64, 32*1024, l1_icache_str},
3650 3655 { 0x2c, 8, 64, 32*1024, l1_dcache_str},
3651 3656 { 0x29, 8, 64, 4096*1024, sl3_cache_str},
3652 3657 { 0x25, 8, 64, 2048*1024, sl3_cache_str},
3653 3658 { 0x23, 8, 64, 1024*1024, sl3_cache_str},
3654 3659 { 0x22, 4, 64, 512*1024, sl3_cache_str},
3655 3660 { 0x0e, 6, 64, 24*1024, l1_dcache_str},
3656 3661 { 0x0d, 4, 32, 16*1024, l1_dcache_str},
3657 3662 { 0x0c, 4, 32, 16*1024, l1_dcache_str},
3658 3663 { 0x0b, 4, 0, 4, itlb4M_str},
3659 3664 { 0x0a, 2, 32, 8*1024, l1_dcache_str},
3660 3665 { 0x08, 4, 32, 16*1024, l1_icache_str},
3661 3666 { 0x06, 4, 32, 8*1024, l1_icache_str},
3662 3667 { 0x05, 4, 0, 32, dtlb4M_str},
3663 3668 { 0x04, 4, 0, 8, dtlb4M_str},
3664 3669 { 0x03, 4, 0, 64, dtlb4k_str},
3665 3670 { 0x02, 4, 0, 2, itlb4M_str},
3666 3671 { 0x01, 4, 0, 32, itlb4k_str},
3667 3672 { 0 }
3668 3673 };
3669 3674
3670 3675 static const struct cachetab cyrix_ctab[] = {
3671 3676 { 0x70, 4, 0, 32, "tlb-4K" },
3672 3677 { 0x80, 4, 16, 16*1024, "l1-cache" },
3673 3678 { 0 }
3674 3679 };
3675 3680
3676 3681 /*
3677 3682 * Search a cache table for a matching entry
3678 3683 */
3679 3684 static const struct cachetab *
3680 3685 find_cacheent(const struct cachetab *ct, uint_t code)
3681 3686 {
3682 3687 if (code != 0) {
3683 3688 for (; ct->ct_code != 0; ct++)
3684 3689 if (ct->ct_code <= code)
3685 3690 break;
3686 3691 if (ct->ct_code == code)
3687 3692 return (ct);
3688 3693 }
3689 3694 return (NULL);
3690 3695 }
3691 3696
3692 3697 /*
3693 3698 * Populate cachetab entry with L2 or L3 cache-information using
3694 3699 * cpuid function 4. This function is called from intel_walk_cacheinfo()
3695 3700 * when descriptor 0x49 is encountered. It returns 0 if no such cache
3696 3701 * information is found.
3697 3702 */
3698 3703 static int
3699 3704 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
3700 3705 {
3701 3706 uint32_t level, i;
3702 3707 int ret = 0;
3703 3708
3704 3709 for (i = 0; i < cpi->cpi_std_4_size; i++) {
3705 3710 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]);
3706 3711
3707 3712 if (level == 2 || level == 3) {
3708 3713 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1;
3709 3714 ct->ct_line_size =
3710 3715 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1;
3711 3716 ct->ct_size = ct->ct_assoc *
3712 3717 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) *
3713 3718 ct->ct_line_size *
3714 3719 (cpi->cpi_std_4[i]->cp_ecx + 1);
3715 3720
3716 3721 if (level == 2) {
3717 3722 ct->ct_label = l2_cache_str;
3718 3723 } else if (level == 3) {
3719 3724 ct->ct_label = l3_cache_str;
3720 3725 }
3721 3726 ret = 1;
3722 3727 }
3723 3728 }
3724 3729
3725 3730 return (ret);
3726 3731 }
3727 3732
3728 3733 /*
3729 3734 * Walk the cacheinfo descriptor, applying 'func' to every valid element
3730 3735 * The walk is terminated if the walker returns non-zero.
3731 3736 */
3732 3737 static void
3733 3738 intel_walk_cacheinfo(struct cpuid_info *cpi,
3734 3739 void *arg, int (*func)(void *, const struct cachetab *))
3735 3740 {
3736 3741 const struct cachetab *ct;
3737 3742 struct cachetab des_49_ct, des_b1_ct;
3738 3743 uint8_t *dp;
3739 3744 int i;
3740 3745
3741 3746 if ((dp = cpi->cpi_cacheinfo) == NULL)
3742 3747 return;
3743 3748 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3744 3749 /*
3745 3750 * For overloaded descriptor 0x49 we use cpuid function 4
3746 3751 * if supported by the current processor, to create
3747 3752 * cache information.
3748 3753 * For overloaded descriptor 0xb1 we use X86_PAE flag
3749 3754 * to disambiguate the cache information.
3750 3755 */
3751 3756 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
3752 3757 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
3753 3758 ct = &des_49_ct;
3754 3759 } else if (*dp == 0xb1) {
3755 3760 des_b1_ct.ct_code = 0xb1;
3756 3761 des_b1_ct.ct_assoc = 4;
3757 3762 des_b1_ct.ct_line_size = 0;
3758 3763 if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
3759 3764 des_b1_ct.ct_size = 8;
3760 3765 des_b1_ct.ct_label = itlb2M_str;
3761 3766 } else {
3762 3767 des_b1_ct.ct_size = 4;
3763 3768 des_b1_ct.ct_label = itlb4M_str;
3764 3769 }
3765 3770 ct = &des_b1_ct;
3766 3771 } else {
3767 3772 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
3768 3773 continue;
3769 3774 }
3770 3775 }
3771 3776
3772 3777 if (func(arg, ct) != 0) {
3773 3778 break;
3774 3779 }
3775 3780 }
3776 3781 }
3777 3782
3778 3783 /*
3779 3784 * (Like the Intel one, except for Cyrix CPUs)
3780 3785 */
3781 3786 static void
3782 3787 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
3783 3788 void *arg, int (*func)(void *, const struct cachetab *))
3784 3789 {
3785 3790 const struct cachetab *ct;
3786 3791 uint8_t *dp;
3787 3792 int i;
3788 3793
3789 3794 if ((dp = cpi->cpi_cacheinfo) == NULL)
3790 3795 return;
3791 3796 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3792 3797 /*
3793 3798 * Search Cyrix-specific descriptor table first ..
3794 3799 */
3795 3800 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
3796 3801 if (func(arg, ct) != 0)
3797 3802 break;
3798 3803 continue;
3799 3804 }
3800 3805 /*
3801 3806 * .. else fall back to the Intel one
3802 3807 */
3803 3808 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
3804 3809 if (func(arg, ct) != 0)
3805 3810 break;
3806 3811 continue;
3807 3812 }
3808 3813 }
3809 3814 }
3810 3815
3811 3816 /*
3812 3817 * A cacheinfo walker that adds associativity, line-size, and size properties
3813 3818 * to the devinfo node it is passed as an argument.
3814 3819 */
3815 3820 static int
3816 3821 add_cacheent_props(void *arg, const struct cachetab *ct)
3817 3822 {
3818 3823 dev_info_t *devi = arg;
3819 3824
3820 3825 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
3821 3826 if (ct->ct_line_size != 0)
3822 3827 add_cache_prop(devi, ct->ct_label, line_str,
3823 3828 ct->ct_line_size);
3824 3829 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
3825 3830 return (0);
3826 3831 }
3827 3832
3828 3833
3829 3834 static const char fully_assoc[] = "fully-associative?";
3830 3835
3831 3836 /*
3832 3837 * AMD style cache/tlb description
3833 3838 *
3834 3839 * Extended functions 5 and 6 directly describe properties of
3835 3840 * tlbs and various cache levels.
3836 3841 */
3837 3842 static void
3838 3843 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3839 3844 {
3840 3845 switch (assoc) {
3841 3846 case 0: /* reserved; ignore */
3842 3847 break;
3843 3848 default:
3844 3849 add_cache_prop(devi, label, assoc_str, assoc);
3845 3850 break;
3846 3851 case 0xff:
3847 3852 add_cache_prop(devi, label, fully_assoc, 1);
3848 3853 break;
3849 3854 }
3850 3855 }
3851 3856
3852 3857 static void
3853 3858 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3854 3859 {
3855 3860 if (size == 0)
3856 3861 return;
3857 3862 add_cache_prop(devi, label, size_str, size);
3858 3863 add_amd_assoc(devi, label, assoc);
3859 3864 }
3860 3865
3861 3866 static void
3862 3867 add_amd_cache(dev_info_t *devi, const char *label,
3863 3868 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3864 3869 {
3865 3870 if (size == 0 || line_size == 0)
3866 3871 return;
3867 3872 add_amd_assoc(devi, label, assoc);
3868 3873 /*
3869 3874 * Most AMD parts have a sectored cache. Multiple cache lines are
3870 3875 * associated with each tag. A sector consists of all cache lines
3871 3876 * associated with a tag. For example, the AMD K6-III has a sector
3872 3877 * size of 2 cache lines per tag.
3873 3878 */
3874 3879 if (lines_per_tag != 0)
3875 3880 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3876 3881 add_cache_prop(devi, label, line_str, line_size);
3877 3882 add_cache_prop(devi, label, size_str, size * 1024);
3878 3883 }
3879 3884
3880 3885 static void
3881 3886 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3882 3887 {
3883 3888 switch (assoc) {
3884 3889 case 0: /* off */
3885 3890 break;
3886 3891 case 1:
3887 3892 case 2:
3888 3893 case 4:
3889 3894 add_cache_prop(devi, label, assoc_str, assoc);
3890 3895 break;
3891 3896 case 6:
3892 3897 add_cache_prop(devi, label, assoc_str, 8);
3893 3898 break;
3894 3899 case 8:
3895 3900 add_cache_prop(devi, label, assoc_str, 16);
3896 3901 break;
3897 3902 case 0xf:
3898 3903 add_cache_prop(devi, label, fully_assoc, 1);
3899 3904 break;
3900 3905 default: /* reserved; ignore */
3901 3906 break;
3902 3907 }
3903 3908 }
3904 3909
3905 3910 static void
3906 3911 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3907 3912 {
3908 3913 if (size == 0 || assoc == 0)
3909 3914 return;
3910 3915 add_amd_l2_assoc(devi, label, assoc);
3911 3916 add_cache_prop(devi, label, size_str, size);
3912 3917 }
3913 3918
3914 3919 static void
3915 3920 add_amd_l2_cache(dev_info_t *devi, const char *label,
3916 3921 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3917 3922 {
3918 3923 if (size == 0 || assoc == 0 || line_size == 0)
3919 3924 return;
3920 3925 add_amd_l2_assoc(devi, label, assoc);
3921 3926 if (lines_per_tag != 0)
3922 3927 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3923 3928 add_cache_prop(devi, label, line_str, line_size);
3924 3929 add_cache_prop(devi, label, size_str, size * 1024);
3925 3930 }
3926 3931
3927 3932 static void
3928 3933 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
3929 3934 {
3930 3935 struct cpuid_regs *cp;
3931 3936
3932 3937 if (cpi->cpi_xmaxeax < 0x80000005)
3933 3938 return;
3934 3939 cp = &cpi->cpi_extd[5];
3935 3940
3936 3941 /*
3937 3942 * 4M/2M L1 TLB configuration
3938 3943 *
3939 3944 * We report the size for 2M pages because AMD uses two
3940 3945 * TLB entries for one 4M page.
3941 3946 */
3942 3947 add_amd_tlb(devi, "dtlb-2M",
3943 3948 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
3944 3949 add_amd_tlb(devi, "itlb-2M",
3945 3950 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
3946 3951
3947 3952 /*
3948 3953 * 4K L1 TLB configuration
3949 3954 */
3950 3955
3951 3956 switch (cpi->cpi_vendor) {
3952 3957 uint_t nentries;
3953 3958 case X86_VENDOR_TM:
3954 3959 if (cpi->cpi_family >= 5) {
3955 3960 /*
3956 3961 * Crusoe processors have 256 TLB entries, but
3957 3962 * cpuid data format constrains them to only
3958 3963 * reporting 255 of them.
3959 3964 */
3960 3965 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
3961 3966 nentries = 256;
3962 3967 /*
3963 3968 * Crusoe processors also have a unified TLB
3964 3969 */
3965 3970 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
3966 3971 nentries);
3967 3972 break;
3968 3973 }
3969 3974 /*FALLTHROUGH*/
3970 3975 default:
3971 3976 add_amd_tlb(devi, itlb4k_str,
3972 3977 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
3973 3978 add_amd_tlb(devi, dtlb4k_str,
3974 3979 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
3975 3980 break;
3976 3981 }
3977 3982
3978 3983 /*
3979 3984 * data L1 cache configuration
3980 3985 */
3981 3986
3982 3987 add_amd_cache(devi, l1_dcache_str,
3983 3988 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
3984 3989 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
3985 3990
3986 3991 /*
3987 3992 * code L1 cache configuration
3988 3993 */
3989 3994
3990 3995 add_amd_cache(devi, l1_icache_str,
3991 3996 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
3992 3997 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
3993 3998
3994 3999 if (cpi->cpi_xmaxeax < 0x80000006)
3995 4000 return;
3996 4001 cp = &cpi->cpi_extd[6];
3997 4002
3998 4003 /* Check for a unified L2 TLB for large pages */
3999 4004
4000 4005 if (BITX(cp->cp_eax, 31, 16) == 0)
4001 4006 add_amd_l2_tlb(devi, "l2-tlb-2M",
4002 4007 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4003 4008 else {
4004 4009 add_amd_l2_tlb(devi, "l2-dtlb-2M",
4005 4010 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
4006 4011 add_amd_l2_tlb(devi, "l2-itlb-2M",
4007 4012 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4008 4013 }
4009 4014
4010 4015 /* Check for a unified L2 TLB for 4K pages */
4011 4016
4012 4017 if (BITX(cp->cp_ebx, 31, 16) == 0) {
4013 4018 add_amd_l2_tlb(devi, "l2-tlb-4K",
4014 4019 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4015 4020 } else {
4016 4021 add_amd_l2_tlb(devi, "l2-dtlb-4K",
4017 4022 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
4018 4023 add_amd_l2_tlb(devi, "l2-itlb-4K",
4019 4024 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4020 4025 }
4021 4026
4022 4027 add_amd_l2_cache(devi, l2_cache_str,
4023 4028 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
4024 4029 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
4025 4030 }
4026 4031
4027 4032 /*
4028 4033 * There are two basic ways that the x86 world describes it cache
4029 4034 * and tlb architecture - Intel's way and AMD's way.
4030 4035 *
4031 4036 * Return which flavor of cache architecture we should use
4032 4037 */
4033 4038 static int
4034 4039 x86_which_cacheinfo(struct cpuid_info *cpi)
4035 4040 {
4036 4041 switch (cpi->cpi_vendor) {
4037 4042 case X86_VENDOR_Intel:
4038 4043 if (cpi->cpi_maxeax >= 2)
4039 4044 return (X86_VENDOR_Intel);
4040 4045 break;
4041 4046 case X86_VENDOR_AMD:
4042 4047 /*
4043 4048 * The K5 model 1 was the first part from AMD that reported
4044 4049 * cache sizes via extended cpuid functions.
4045 4050 */
4046 4051 if (cpi->cpi_family > 5 ||
4047 4052 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
4048 4053 return (X86_VENDOR_AMD);
4049 4054 break;
4050 4055 case X86_VENDOR_TM:
4051 4056 if (cpi->cpi_family >= 5)
4052 4057 return (X86_VENDOR_AMD);
4053 4058 /*FALLTHROUGH*/
4054 4059 default:
4055 4060 /*
4056 4061 * If they have extended CPU data for 0x80000005
4057 4062 * then we assume they have AMD-format cache
4058 4063 * information.
4059 4064 *
4060 4065 * If not, and the vendor happens to be Cyrix,
4061 4066 * then try our-Cyrix specific handler.
4062 4067 *
4063 4068 * If we're not Cyrix, then assume we're using Intel's
4064 4069 * table-driven format instead.
4065 4070 */
4066 4071 if (cpi->cpi_xmaxeax >= 0x80000005)
4067 4072 return (X86_VENDOR_AMD);
4068 4073 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
4069 4074 return (X86_VENDOR_Cyrix);
4070 4075 else if (cpi->cpi_maxeax >= 2)
4071 4076 return (X86_VENDOR_Intel);
4072 4077 break;
4073 4078 }
4074 4079 return (-1);
4075 4080 }
4076 4081
4077 4082 void
4078 4083 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
4079 4084 struct cpuid_info *cpi)
4080 4085 {
4081 4086 dev_info_t *cpu_devi;
4082 4087 int create;
4083 4088
4084 4089 cpu_devi = (dev_info_t *)dip;
4085 4090
4086 4091 /* device_type */
4087 4092 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4088 4093 "device_type", "cpu");
4089 4094
4090 4095 /* reg */
4091 4096 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4092 4097 "reg", cpu_id);
4093 4098
4094 4099 /* cpu-mhz, and clock-frequency */
4095 4100 if (cpu_freq > 0) {
4096 4101 long long mul;
4097 4102
4098 4103 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4099 4104 "cpu-mhz", cpu_freq);
4100 4105 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
4101 4106 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4102 4107 "clock-frequency", (int)mul);
4103 4108 }
4104 4109
4105 4110 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
4106 4111 return;
4107 4112 }
4108 4113
4109 4114 /* vendor-id */
4110 4115 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4111 4116 "vendor-id", cpi->cpi_vendorstr);
4112 4117
4113 4118 if (cpi->cpi_maxeax == 0) {
4114 4119 return;
4115 4120 }
4116 4121
4117 4122 /*
4118 4123 * family, model, and step
4119 4124 */
4120 4125 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4121 4126 "family", CPI_FAMILY(cpi));
4122 4127 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4123 4128 "cpu-model", CPI_MODEL(cpi));
4124 4129 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4125 4130 "stepping-id", CPI_STEP(cpi));
4126 4131
4127 4132 /* type */
4128 4133 switch (cpi->cpi_vendor) {
4129 4134 case X86_VENDOR_Intel:
4130 4135 create = 1;
4131 4136 break;
4132 4137 default:
4133 4138 create = 0;
4134 4139 break;
4135 4140 }
4136 4141 if (create)
4137 4142 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4138 4143 "type", CPI_TYPE(cpi));
4139 4144
4140 4145 /* ext-family */
4141 4146 switch (cpi->cpi_vendor) {
4142 4147 case X86_VENDOR_Intel:
4143 4148 case X86_VENDOR_AMD:
4144 4149 create = cpi->cpi_family >= 0xf;
4145 4150 break;
4146 4151 default:
4147 4152 create = 0;
4148 4153 break;
4149 4154 }
4150 4155 if (create)
4151 4156 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4152 4157 "ext-family", CPI_FAMILY_XTD(cpi));
4153 4158
4154 4159 /* ext-model */
4155 4160 switch (cpi->cpi_vendor) {
4156 4161 case X86_VENDOR_Intel:
4157 4162 create = IS_EXTENDED_MODEL_INTEL(cpi);
4158 4163 break;
4159 4164 case X86_VENDOR_AMD:
4160 4165 create = CPI_FAMILY(cpi) == 0xf;
4161 4166 break;
4162 4167 default:
4163 4168 create = 0;
4164 4169 break;
4165 4170 }
4166 4171 if (create)
4167 4172 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4168 4173 "ext-model", CPI_MODEL_XTD(cpi));
4169 4174
4170 4175 /* generation */
4171 4176 switch (cpi->cpi_vendor) {
4172 4177 case X86_VENDOR_AMD:
4173 4178 /*
4174 4179 * AMD K5 model 1 was the first part to support this
4175 4180 */
4176 4181 create = cpi->cpi_xmaxeax >= 0x80000001;
4177 4182 break;
4178 4183 default:
4179 4184 create = 0;
4180 4185 break;
4181 4186 }
4182 4187 if (create)
4183 4188 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4184 4189 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
4185 4190
4186 4191 /* brand-id */
4187 4192 switch (cpi->cpi_vendor) {
4188 4193 case X86_VENDOR_Intel:
4189 4194 /*
4190 4195 * brand id first appeared on Pentium III Xeon model 8,
4191 4196 * and Celeron model 8 processors and Opteron
4192 4197 */
4193 4198 create = cpi->cpi_family > 6 ||
4194 4199 (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
4195 4200 break;
4196 4201 case X86_VENDOR_AMD:
4197 4202 create = cpi->cpi_family >= 0xf;
4198 4203 break;
4199 4204 default:
4200 4205 create = 0;
4201 4206 break;
4202 4207 }
4203 4208 if (create && cpi->cpi_brandid != 0) {
4204 4209 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4205 4210 "brand-id", cpi->cpi_brandid);
4206 4211 }
4207 4212
4208 4213 /* chunks, and apic-id */
4209 4214 switch (cpi->cpi_vendor) {
4210 4215 /*
4211 4216 * first available on Pentium IV and Opteron (K8)
4212 4217 */
4213 4218 case X86_VENDOR_Intel:
4214 4219 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4215 4220 break;
4216 4221 case X86_VENDOR_AMD:
4217 4222 create = cpi->cpi_family >= 0xf;
4218 4223 break;
4219 4224 default:
4220 4225 create = 0;
4221 4226 break;
4222 4227 }
4223 4228 if (create) {
4224 4229 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4225 4230 "chunks", CPI_CHUNKS(cpi));
4226 4231 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4227 4232 "apic-id", cpi->cpi_apicid);
4228 4233 if (cpi->cpi_chipid >= 0) {
4229 4234 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4230 4235 "chip#", cpi->cpi_chipid);
4231 4236 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4232 4237 "clog#", cpi->cpi_clogid);
4233 4238 }
4234 4239 }
4235 4240
4236 4241 /* cpuid-features */
4237 4242 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4238 4243 "cpuid-features", CPI_FEATURES_EDX(cpi));
4239 4244
4240 4245
4241 4246 /* cpuid-features-ecx */
4242 4247 switch (cpi->cpi_vendor) {
4243 4248 case X86_VENDOR_Intel:
4244 4249 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4245 4250 break;
4246 4251 case X86_VENDOR_AMD:
4247 4252 create = cpi->cpi_family >= 0xf;
4248 4253 break;
4249 4254 default:
4250 4255 create = 0;
4251 4256 break;
4252 4257 }
4253 4258 if (create)
4254 4259 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4255 4260 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
4256 4261
4257 4262 /* ext-cpuid-features */
4258 4263 switch (cpi->cpi_vendor) {
4259 4264 case X86_VENDOR_Intel:
4260 4265 case X86_VENDOR_AMD:
4261 4266 case X86_VENDOR_Cyrix:
4262 4267 case X86_VENDOR_TM:
4263 4268 case X86_VENDOR_Centaur:
4264 4269 create = cpi->cpi_xmaxeax >= 0x80000001;
4265 4270 break;
4266 4271 default:
4267 4272 create = 0;
4268 4273 break;
4269 4274 }
4270 4275 if (create) {
4271 4276 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4272 4277 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
4273 4278 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4274 4279 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
4275 4280 }
4276 4281
4277 4282 /*
4278 4283 * Brand String first appeared in Intel Pentium IV, AMD K5
4279 4284 * model 1, and Cyrix GXm. On earlier models we try and
4280 4285 * simulate something similar .. so this string should always
4281 4286 * same -something- about the processor, however lame.
4282 4287 */
4283 4288 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4284 4289 "brand-string", cpi->cpi_brandstr);
4285 4290
4286 4291 /*
4287 4292 * Finally, cache and tlb information
4288 4293 */
4289 4294 switch (x86_which_cacheinfo(cpi)) {
4290 4295 case X86_VENDOR_Intel:
4291 4296 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4292 4297 break;
4293 4298 case X86_VENDOR_Cyrix:
4294 4299 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4295 4300 break;
4296 4301 case X86_VENDOR_AMD:
4297 4302 amd_cache_info(cpi, cpu_devi);
4298 4303 break;
4299 4304 default:
4300 4305 break;
4301 4306 }
4302 4307 }
4303 4308
4304 4309 struct l2info {
4305 4310 int *l2i_csz;
4306 4311 int *l2i_lsz;
4307 4312 int *l2i_assoc;
4308 4313 int l2i_ret;
4309 4314 };
4310 4315
4311 4316 /*
4312 4317 * A cacheinfo walker that fetches the size, line-size and associativity
4313 4318 * of the L2 cache
4314 4319 */
4315 4320 static int
4316 4321 intel_l2cinfo(void *arg, const struct cachetab *ct)
4317 4322 {
4318 4323 struct l2info *l2i = arg;
4319 4324 int *ip;
4320 4325
4321 4326 if (ct->ct_label != l2_cache_str &&
4322 4327 ct->ct_label != sl2_cache_str)
4323 4328 return (0); /* not an L2 -- keep walking */
4324 4329
4325 4330 if ((ip = l2i->l2i_csz) != NULL)
4326 4331 *ip = ct->ct_size;
4327 4332 if ((ip = l2i->l2i_lsz) != NULL)
4328 4333 *ip = ct->ct_line_size;
4329 4334 if ((ip = l2i->l2i_assoc) != NULL)
4330 4335 *ip = ct->ct_assoc;
4331 4336 l2i->l2i_ret = ct->ct_size;
4332 4337 return (1); /* was an L2 -- terminate walk */
4333 4338 }
4334 4339
4335 4340 /*
4336 4341 * AMD L2/L3 Cache and TLB Associativity Field Definition:
4337 4342 *
4338 4343 * Unlike the associativity for the L1 cache and tlb where the 8 bit
4339 4344 * value is the associativity, the associativity for the L2 cache and
4340 4345 * tlb is encoded in the following table. The 4 bit L2 value serves as
4341 4346 * an index into the amd_afd[] array to determine the associativity.
4342 4347 * -1 is undefined. 0 is fully associative.
4343 4348 */
4344 4349
4345 4350 static int amd_afd[] =
4346 4351 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
4347 4352
4348 4353 static void
4349 4354 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
4350 4355 {
4351 4356 struct cpuid_regs *cp;
4352 4357 uint_t size, assoc;
4353 4358 int i;
4354 4359 int *ip;
4355 4360
4356 4361 if (cpi->cpi_xmaxeax < 0x80000006)
4357 4362 return;
4358 4363 cp = &cpi->cpi_extd[6];
4359 4364
4360 4365 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
4361 4366 (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
4362 4367 uint_t cachesz = size * 1024;
4363 4368 assoc = amd_afd[i];
4364 4369
4365 4370 ASSERT(assoc != -1);
4366 4371
4367 4372 if ((ip = l2i->l2i_csz) != NULL)
4368 4373 *ip = cachesz;
4369 4374 if ((ip = l2i->l2i_lsz) != NULL)
4370 4375 *ip = BITX(cp->cp_ecx, 7, 0);
4371 4376 if ((ip = l2i->l2i_assoc) != NULL)
4372 4377 *ip = assoc;
4373 4378 l2i->l2i_ret = cachesz;
4374 4379 }
4375 4380 }
4376 4381
4377 4382 int
4378 4383 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
4379 4384 {
4380 4385 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4381 4386 struct l2info __l2info, *l2i = &__l2info;
4382 4387
4383 4388 l2i->l2i_csz = csz;
4384 4389 l2i->l2i_lsz = lsz;
4385 4390 l2i->l2i_assoc = assoc;
4386 4391 l2i->l2i_ret = -1;
4387 4392
4388 4393 switch (x86_which_cacheinfo(cpi)) {
4389 4394 case X86_VENDOR_Intel:
4390 4395 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4391 4396 break;
4392 4397 case X86_VENDOR_Cyrix:
4393 4398 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4394 4399 break;
4395 4400 case X86_VENDOR_AMD:
4396 4401 amd_l2cacheinfo(cpi, l2i);
4397 4402 break;
4398 4403 default:
4399 4404 break;
4400 4405 }
4401 4406 return (l2i->l2i_ret);
4402 4407 }
4403 4408
4404 4409 #if !defined(__xpv)
4405 4410
4406 4411 uint32_t *
4407 4412 cpuid_mwait_alloc(cpu_t *cpu)
4408 4413 {
4409 4414 uint32_t *ret;
4410 4415 size_t mwait_size;
4411 4416
4412 4417 ASSERT(cpuid_checkpass(CPU, 2));
4413 4418
4414 4419 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
4415 4420 if (mwait_size == 0)
4416 4421 return (NULL);
4417 4422
4418 4423 /*
4419 4424 * kmem_alloc() returns cache line size aligned data for mwait_size
4420 4425 * allocations. mwait_size is currently cache line sized. Neither
4421 4426 * of these implementation details are guarantied to be true in the
4422 4427 * future.
4423 4428 *
4424 4429 * First try allocating mwait_size as kmem_alloc() currently returns
4425 4430 * correctly aligned memory. If kmem_alloc() does not return
4426 4431 * mwait_size aligned memory, then use mwait_size ROUNDUP.
4427 4432 *
4428 4433 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
4429 4434 * decide to free this memory.
4430 4435 */
4431 4436 ret = kmem_zalloc(mwait_size, KM_SLEEP);
4432 4437 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
4433 4438 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4434 4439 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
4435 4440 *ret = MWAIT_RUNNING;
4436 4441 return (ret);
4437 4442 } else {
4438 4443 kmem_free(ret, mwait_size);
4439 4444 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
4440 4445 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4441 4446 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
4442 4447 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
4443 4448 *ret = MWAIT_RUNNING;
4444 4449 return (ret);
4445 4450 }
4446 4451 }
4447 4452
4448 4453 void
4449 4454 cpuid_mwait_free(cpu_t *cpu)
4450 4455 {
4451 4456 if (cpu->cpu_m.mcpu_cpi == NULL) {
4452 4457 return;
4453 4458 }
4454 4459
4455 4460 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
4456 4461 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
4457 4462 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
4458 4463 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
4459 4464 }
4460 4465
4461 4466 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
4462 4467 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
4463 4468 }
4464 4469
4465 4470 void
4466 4471 patch_tsc_read(int flag)
4467 4472 {
4468 4473 size_t cnt;
4469 4474
4470 4475 switch (flag) {
4471 4476 case X86_NO_TSC:
4472 4477 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
4473 4478 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
4474 4479 break;
4475 4480 case X86_HAVE_TSCP:
4476 4481 cnt = &_tscp_end - &_tscp_start;
4477 4482 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
4478 4483 break;
4479 4484 case X86_TSC_MFENCE:
4480 4485 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
4481 4486 (void) memcpy((void *)tsc_read,
4482 4487 (void *)&_tsc_mfence_start, cnt);
4483 4488 break;
4484 4489 case X86_TSC_LFENCE:
4485 4490 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
4486 4491 (void) memcpy((void *)tsc_read,
4487 4492 (void *)&_tsc_lfence_start, cnt);
4488 4493 break;
4489 4494 default:
4490 4495 break;
4491 4496 }
4492 4497 }
4493 4498
4494 4499 int
4495 4500 cpuid_deep_cstates_supported(void)
4496 4501 {
4497 4502 struct cpuid_info *cpi;
4498 4503 struct cpuid_regs regs;
4499 4504
4500 4505 ASSERT(cpuid_checkpass(CPU, 1));
4501 4506
4502 4507 cpi = CPU->cpu_m.mcpu_cpi;
4503 4508
4504 4509 if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
4505 4510 return (0);
4506 4511
4507 4512 switch (cpi->cpi_vendor) {
4508 4513 case X86_VENDOR_Intel:
4509 4514 if (cpi->cpi_xmaxeax < 0x80000007)
4510 4515 return (0);
4511 4516
4512 4517 /*
4513 4518 * TSC run at a constant rate in all ACPI C-states?
4514 4519 */
4515 4520 regs.cp_eax = 0x80000007;
4516 4521 (void) __cpuid_insn(®s);
4517 4522 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
4518 4523
4519 4524 default:
4520 4525 return (0);
4521 4526 }
4522 4527 }
4523 4528
4524 4529 #endif /* !__xpv */
4525 4530
4526 4531 void
4527 4532 post_startup_cpu_fixups(void)
4528 4533 {
4529 4534 #ifndef __xpv
4530 4535 /*
4531 4536 * Some AMD processors support C1E state. Entering this state will
4532 4537 * cause the local APIC timer to stop, which we can't deal with at
4533 4538 * this time.
4534 4539 */
4535 4540 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
4536 4541 on_trap_data_t otd;
4537 4542 uint64_t reg;
4538 4543
4539 4544 if (!on_trap(&otd, OT_DATA_ACCESS)) {
4540 4545 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
4541 4546 /* Disable C1E state if it is enabled by BIOS */
4542 4547 if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
4543 4548 AMD_ACTONCMPHALT_MASK) {
4544 4549 reg &= ~(AMD_ACTONCMPHALT_MASK <<
4545 4550 AMD_ACTONCMPHALT_SHIFT);
4546 4551 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
4547 4552 }
4548 4553 }
4549 4554 no_trap();
4550 4555 }
4551 4556 #endif /* !__xpv */
4552 4557 }
4553 4558
4554 4559 /*
4555 4560 * Setup necessary registers to enable XSAVE feature on this processor.
4556 4561 * This function needs to be called early enough, so that no xsave/xrstor
4557 4562 * ops will execute on the processor before the MSRs are properly set up.
4558 4563 *
4559 4564 * Current implementation has the following assumption:
4560 4565 * - cpuid_pass1() is done, so that X86 features are known.
4561 4566 * - fpu_probe() is done, so that fp_save_mech is chosen.
4562 4567 */
4563 4568 void
4564 4569 xsave_setup_msr(cpu_t *cpu)
4565 4570 {
4566 4571 ASSERT(fp_save_mech == FP_XSAVE);
4567 4572 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
4568 4573
4569 4574 /* Enable OSXSAVE in CR4. */
4570 4575 setcr4(getcr4() | CR4_OSXSAVE);
4571 4576 /*
4572 4577 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
4573 4578 * correct value.
4574 4579 */
4575 4580 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
4576 4581 setup_xfem();
4577 4582 }
4578 4583
4579 4584 /*
4580 4585 * Starting with the Westmere processor the local
4581 4586 * APIC timer will continue running in all C-states,
4582 4587 * including the deepest C-states.
4583 4588 */
4584 4589 int
4585 4590 cpuid_arat_supported(void)
4586 4591 {
4587 4592 struct cpuid_info *cpi;
4588 4593 struct cpuid_regs regs;
4589 4594
4590 4595 ASSERT(cpuid_checkpass(CPU, 1));
4591 4596 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4592 4597
4593 4598 cpi = CPU->cpu_m.mcpu_cpi;
4594 4599
4595 4600 switch (cpi->cpi_vendor) {
4596 4601 case X86_VENDOR_Intel:
4597 4602 /*
4598 4603 * Always-running Local APIC Timer is
4599 4604 * indicated by CPUID.6.EAX[2].
4600 4605 */
4601 4606 if (cpi->cpi_maxeax >= 6) {
4602 4607 regs.cp_eax = 6;
4603 4608 (void) cpuid_insn(NULL, ®s);
4604 4609 return (regs.cp_eax & CPUID_CSTATE_ARAT);
4605 4610 } else {
4606 4611 return (0);
4607 4612 }
4608 4613 default:
4609 4614 return (0);
4610 4615 }
4611 4616 }
4612 4617
4613 4618 /*
4614 4619 * Check support for Intel ENERGY_PERF_BIAS feature
4615 4620 */
4616 4621 int
4617 4622 cpuid_iepb_supported(struct cpu *cp)
4618 4623 {
4619 4624 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
4620 4625 struct cpuid_regs regs;
4621 4626
4622 4627 ASSERT(cpuid_checkpass(cp, 1));
4623 4628
4624 4629 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
4625 4630 !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
4626 4631 return (0);
4627 4632 }
4628 4633
4629 4634 /*
4630 4635 * Intel ENERGY_PERF_BIAS MSR is indicated by
4631 4636 * capability bit CPUID.6.ECX.3
4632 4637 */
4633 4638 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
4634 4639 return (0);
4635 4640
4636 4641 regs.cp_eax = 0x6;
4637 4642 (void) cpuid_insn(NULL, ®s);
4638 4643 return (regs.cp_ecx & CPUID_EPB_SUPPORT);
4639 4644 }
4640 4645
4641 4646 /*
4642 4647 * Check support for TSC deadline timer
4643 4648 *
4644 4649 * TSC deadline timer provides a superior software programming
4645 4650 * model over local APIC timer that eliminates "time drifts".
4646 4651 * Instead of specifying a relative time, software specifies an
4647 4652 * absolute time as the target at which the processor should
4648 4653 * generate a timer event.
4649 4654 */
4650 4655 int
4651 4656 cpuid_deadline_tsc_supported(void)
4652 4657 {
4653 4658 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
4654 4659 struct cpuid_regs regs;
4655 4660
4656 4661 ASSERT(cpuid_checkpass(CPU, 1));
4657 4662 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4658 4663
4659 4664 switch (cpi->cpi_vendor) {
4660 4665 case X86_VENDOR_Intel:
4661 4666 if (cpi->cpi_maxeax >= 1) {
4662 4667 regs.cp_eax = 1;
4663 4668 (void) cpuid_insn(NULL, ®s);
4664 4669 return (regs.cp_ecx & CPUID_DEADLINE_TSC);
4665 4670 } else {
4666 4671 return (0);
4667 4672 }
4668 4673 default:
4669 4674 return (0);
4670 4675 }
4671 4676 }
4672 4677
4673 4678 #if defined(__amd64) && !defined(__xpv)
4674 4679 /*
4675 4680 * Patch in versions of bcopy for high performance Intel Nhm processors
4676 4681 * and later...
4677 4682 */
4678 4683 void
4679 4684 patch_memops(uint_t vendor)
4680 4685 {
4681 4686 size_t cnt, i;
4682 4687 caddr_t to, from;
4683 4688
4684 4689 if ((vendor == X86_VENDOR_Intel) &&
4685 4690 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
4686 4691 cnt = &bcopy_patch_end - &bcopy_patch_start;
4687 4692 to = &bcopy_ck_size;
4688 4693 from = &bcopy_patch_start;
4689 4694 for (i = 0; i < cnt; i++) {
4690 4695 *to++ = *from++;
4691 4696 }
4692 4697 }
4693 4698 }
4694 4699 #endif /* __amd64 && !__xpv */
4695 4700
4696 4701 /*
4697 4702 * This function finds the number of bits to represent the number of cores per
4698 4703 * chip and the number of strands per core for the Intel platforms.
4699 4704 * It re-uses the x2APIC cpuid code of the cpuid_pass2().
4700 4705 */
4701 4706 void
4702 4707 cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits)
4703 4708 {
4704 4709 struct cpuid_regs regs;
4705 4710 struct cpuid_regs *cp = ®s;
4706 4711
4707 4712 if (vendor != X86_VENDOR_Intel) {
4708 4713 return;
4709 4714 }
4710 4715
4711 4716 /* if the cpuid level is 0xB, extended topo is available. */
4712 4717 cp->cp_eax = 0;
4713 4718 if (__cpuid_insn(cp) >= 0xB) {
4714 4719
4715 4720 cp->cp_eax = 0xB;
4716 4721 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
4717 4722 (void) __cpuid_insn(cp);
4718 4723
4719 4724 /*
4720 4725 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
4721 4726 * indicates that the extended topology enumeration leaf is
4722 4727 * available.
4723 4728 */
4724 4729 if (cp->cp_ebx) {
4725 4730 uint_t coreid_shift = 0;
4726 4731 uint_t chipid_shift = 0;
4727 4732 uint_t i;
4728 4733 uint_t level;
4729 4734
4730 4735 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
4731 4736 cp->cp_eax = 0xB;
4732 4737 cp->cp_ecx = i;
4733 4738
4734 4739 (void) __cpuid_insn(cp);
4735 4740 level = CPI_CPU_LEVEL_TYPE(cp);
4736 4741
4737 4742 if (level == 1) {
4738 4743 /*
4739 4744 * Thread level processor topology
4740 4745 * Number of bits shift right APIC ID
4741 4746 * to get the coreid.
4742 4747 */
4743 4748 coreid_shift = BITX(cp->cp_eax, 4, 0);
4744 4749 } else if (level == 2) {
4745 4750 /*
4746 4751 * Core level processor topology
4747 4752 * Number of bits shift right APIC ID
4748 4753 * to get the chipid.
4749 4754 */
4750 4755 chipid_shift = BITX(cp->cp_eax, 4, 0);
4751 4756 }
4752 4757 }
4753 4758
4754 4759 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
4755 4760 *strand_nbits = coreid_shift;
4756 4761 *core_nbits = chipid_shift - coreid_shift;
4757 4762 }
4758 4763 }
4759 4764 }
4760 4765 }
↓ open down ↓ |
3437 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX