Print this page
XXXX pass in cpu_pause_func via pause_cpus
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/os/cpr_impl.c
+++ new/usr/src/uts/i86pc/os/cpr_impl.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /*
26 26 * Platform specific implementation code
27 27 * Currently only suspend to RAM is supported (ACPI S3)
28 28 */
29 29
30 30 #define SUNDDI_IMPL
31 31
32 32 #include <sys/types.h>
33 33 #include <sys/promif.h>
34 34 #include <sys/prom_isa.h>
35 35 #include <sys/prom_plat.h>
36 36 #include <sys/cpuvar.h>
37 37 #include <sys/pte.h>
38 38 #include <vm/hat.h>
39 39 #include <vm/page.h>
40 40 #include <vm/as.h>
41 41 #include <sys/cpr.h>
42 42 #include <sys/kmem.h>
43 43 #include <sys/clock.h>
44 44 #include <sys/kmem.h>
45 45 #include <sys/panic.h>
46 46 #include <vm/seg_kmem.h>
47 47 #include <sys/cpu_module.h>
48 48 #include <sys/callb.h>
49 49 #include <sys/machsystm.h>
50 50 #include <sys/vmsystm.h>
51 51 #include <sys/systm.h>
52 52 #include <sys/archsystm.h>
53 53 #include <sys/stack.h>
54 54 #include <sys/fs/ufs_fs.h>
55 55 #include <sys/memlist.h>
56 56 #include <sys/bootconf.h>
57 57 #include <sys/thread.h>
58 58 #include <sys/x_call.h>
59 59 #include <sys/smp_impldefs.h>
60 60 #include <vm/vm_dep.h>
61 61 #include <sys/psm.h>
62 62 #include <sys/epm.h>
63 63 #include <sys/cpr_wakecode.h>
64 64 #include <sys/x86_archext.h>
65 65 #include <sys/reboot.h>
66 66 #include <sys/acpi/acpi.h>
67 67 #include <sys/acpica.h>
68 68 #include <sys/fp.h>
69 69 #include <sys/sysmacros.h>
70 70
71 71 #define AFMT "%lx"
72 72
73 73 extern int flushes_require_xcalls;
74 74 extern cpuset_t cpu_ready_set;
75 75
76 76 #if defined(__amd64)
77 77 extern void *wc_long_mode_64(void);
78 78 #endif /* __amd64 */
79 79 extern int tsc_gethrtime_enable;
80 80 extern void i_cpr_start_cpu(void);
81 81
82 82 ushort_t cpr_mach_type = CPR_MACHTYPE_X86;
83 83 void (*cpr_start_cpu_func)(void) = i_cpr_start_cpu;
84 84
85 85 static wc_cpu_t *wc_other_cpus = NULL;
86 86 static cpuset_t procset;
87 87
88 88 static void
89 89 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt);
90 90
91 91 static int i_cpr_platform_alloc(psm_state_request_t *req);
92 92 static void i_cpr_platform_free(psm_state_request_t *req);
93 93 static int i_cpr_save_apic(psm_state_request_t *req);
94 94 static int i_cpr_restore_apic(psm_state_request_t *req);
95 95 static int wait_for_set(cpuset_t *set, int who);
96 96
97 97 static void i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu);
98 98 void i_cpr_restore_stack(kthread_t *t, greg_t *save_stack);
99 99
100 100 #ifdef STACK_GROWTH_DOWN
101 101 #define CPR_GET_STACK_START(t) ((t)->t_stkbase)
102 102 #define CPR_GET_STACK_END(t) ((t)->t_stk)
103 103 #else
104 104 #define CPR_GET_STACK_START(t) ((t)->t_stk)
105 105 #define CPR_GET_STACK_END(t) ((t)->t_stkbase)
106 106 #endif /* STACK_GROWTH_DOWN */
107 107
108 108 /*
109 109 * restart paused slave cpus
110 110 */
111 111 void
112 112 i_cpr_machdep_setup(void)
113 113 {
114 114 if (ncpus > 1) {
115 115 CPR_DEBUG(CPR_DEBUG1, ("MP restarted...\n"));
116 116 mutex_enter(&cpu_lock);
117 117 start_cpus();
118 118 mutex_exit(&cpu_lock);
119 119 }
120 120 }
121 121
122 122
123 123 /*
124 124 * Stop all interrupt activities in the system
125 125 */
126 126 void
127 127 i_cpr_stop_intr(void)
128 128 {
129 129 (void) spl7();
130 130 }
131 131
132 132 /*
133 133 * Set machine up to take interrupts
134 134 */
135 135 void
136 136 i_cpr_enable_intr(void)
137 137 {
138 138 (void) spl0();
139 139 }
140 140
141 141 /*
142 142 * Save miscellaneous information which needs to be written to the
143 143 * state file. This information is required to re-initialize
144 144 * kernel/prom handshaking.
145 145 */
146 146 void
147 147 i_cpr_save_machdep_info(void)
148 148 {
149 149 int notcalled = 0;
150 150 ASSERT(notcalled);
151 151 }
152 152
153 153
154 154 void
155 155 i_cpr_set_tbr(void)
156 156 {
157 157 }
158 158
159 159
160 160 processorid_t
161 161 i_cpr_bootcpuid(void)
162 162 {
163 163 return (0);
164 164 }
165 165
166 166 /*
167 167 * cpu0 should contain bootcpu info
168 168 */
169 169 cpu_t *
170 170 i_cpr_bootcpu(void)
171 171 {
172 172 ASSERT(MUTEX_HELD(&cpu_lock));
173 173
174 174 return (cpu_get(i_cpr_bootcpuid()));
175 175 }
176 176
177 177 /*
178 178 * Save context for the specified CPU
179 179 */
180 180 void *
181 181 i_cpr_save_context(void *arg)
182 182 {
183 183 long index = (long)arg;
184 184 psm_state_request_t *papic_state;
185 185 int resuming;
186 186 int ret;
187 187 wc_cpu_t *wc_cpu = wc_other_cpus + index;
188 188
189 189 PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index))
190 190
191 191 ASSERT(index < NCPU);
192 192
193 193 papic_state = &(wc_cpu)->wc_apic_state;
194 194
195 195 ret = i_cpr_platform_alloc(papic_state);
196 196 ASSERT(ret == 0);
197 197
198 198 ret = i_cpr_save_apic(papic_state);
199 199 ASSERT(ret == 0);
200 200
201 201 i_cpr_save_stack(curthread, wc_cpu);
202 202
203 203 /*
204 204 * wc_save_context returns twice, once when susending and
205 205 * once when resuming, wc_save_context() returns 0 when
206 206 * suspending and non-zero upon resume
207 207 */
208 208 resuming = (wc_save_context(wc_cpu) == 0);
209 209
210 210 /*
211 211 * do NOT call any functions after this point, because doing so
212 212 * will modify the stack that we are running on
213 213 */
214 214
215 215 if (resuming) {
216 216
217 217 ret = i_cpr_restore_apic(papic_state);
218 218 ASSERT(ret == 0);
219 219
220 220 i_cpr_platform_free(papic_state);
221 221
222 222 /*
223 223 * Enable interrupts on this cpu.
224 224 * Do not bind interrupts to this CPU's local APIC until
225 225 * the CPU is ready to receive interrupts.
226 226 */
227 227 ASSERT(CPU->cpu_id != i_cpr_bootcpuid());
228 228 mutex_enter(&cpu_lock);
229 229 cpu_enable_intr(CPU);
230 230 mutex_exit(&cpu_lock);
231 231
232 232 /*
233 233 * Setting the bit in cpu_ready_set must be the last operation
234 234 * in processor initialization; the boot CPU will continue to
235 235 * boot once it sees this bit set for all active CPUs.
236 236 */
237 237 CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id);
238 238
239 239 PMD(PMD_SX,
240 240 ("i_cpr_save_context() resuming cpu %d in cpu_ready_set\n",
241 241 CPU->cpu_id))
242 242 } else {
243 243 /*
244 244 * Disable interrupts on this CPU so that PSM knows not to bind
245 245 * interrupts here on resume until the CPU has executed
246 246 * cpu_enable_intr() (above) in the resume path.
247 247 * We explicitly do not grab cpu_lock here because at this point
248 248 * in the suspend process, the boot cpu owns cpu_lock and all
249 249 * other cpus are also executing in the pause thread (only
250 250 * modifying their respective CPU structure).
251 251 */
252 252 (void) cpu_disable_intr(CPU);
253 253 }
254 254
255 255 PMD(PMD_SX, ("i_cpr_save_context: wc_save_context returns %d\n",
256 256 resuming))
257 257
258 258 return (NULL);
259 259 }
260 260
261 261 static ushort_t *warm_reset_vector = NULL;
262 262
263 263 static ushort_t *
264 264 map_warm_reset_vector()
265 265 {
266 266 /*LINTED*/
267 267 if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
268 268 sizeof (ushort_t *), PROT_READ|PROT_WRITE)))
269 269 return (NULL);
270 270
271 271 /*
272 272 * setup secondary cpu bios boot up vector
273 273 */
274 274 *warm_reset_vector = (ushort_t)((caddr_t)
275 275 /*LINTED*/
276 276 ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
277 277 + ((ulong_t)rm_platter_va & 0xf));
278 278 warm_reset_vector++;
279 279 *warm_reset_vector = (ushort_t)(rm_platter_pa >> 4);
280 280
281 281 --warm_reset_vector;
282 282 return (warm_reset_vector);
283 283 }
284 284
285 285 void
286 286 i_cpr_pre_resume_cpus()
287 287 {
288 288 /*
289 289 * this is a cut down version of start_other_cpus()
290 290 * just do the initialization to wake the other cpus
291 291 */
292 292 unsigned who;
293 293 int boot_cpuid = i_cpr_bootcpuid();
294 294 uint32_t code_length = 0;
295 295 caddr_t wakevirt = rm_platter_va;
296 296 /*LINTED*/
297 297 wakecode_t *wp = (wakecode_t *)wakevirt;
298 298 char *str = "i_cpr_pre_resume_cpus";
299 299 extern int get_tsc_ready();
300 300 int err;
301 301
302 302 /*LINTED*/
303 303 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
304 304
305 305 /*
306 306 * If startup wasn't able to find a page under 1M, we cannot
307 307 * proceed.
308 308 */
309 309 if (rm_platter_va == 0) {
310 310 cmn_err(CE_WARN, "Cannot suspend the system because no "
311 311 "memory below 1M could be found for processor startup");
312 312 return;
313 313 }
314 314
315 315 /*
316 316 * Copy the real mode code at "real_mode_start" to the
317 317 * page at rm_platter_va.
318 318 */
319 319 warm_reset_vector = map_warm_reset_vector();
320 320 if (warm_reset_vector == NULL) {
321 321 PMD(PMD_SX, ("i_cpr_pre_resume_cpus() returning #2\n"))
322 322 return;
323 323 }
324 324
325 325 flushes_require_xcalls = 1;
326 326
327 327 /*
328 328 * We lock our affinity to the master CPU to ensure that all slave CPUs
329 329 * do their TSC syncs with the same CPU.
330 330 */
331 331
332 332 affinity_set(CPU_CURRENT);
333 333
334 334 /*
335 335 * Mark the boot cpu as being ready and in the procset, since we are
336 336 * running on that cpu.
337 337 */
338 338 CPUSET_ONLY(cpu_ready_set, boot_cpuid);
339 339 CPUSET_ONLY(procset, boot_cpuid);
340 340
341 341 for (who = 0; who < max_ncpus; who++) {
342 342
343 343 wc_cpu_t *cpup = wc_other_cpus + who;
344 344 wc_desctbr_t gdt;
345 345
346 346 if (who == boot_cpuid)
347 347 continue;
348 348
349 349 if (!CPU_IN_SET(mp_cpus, who))
350 350 continue;
351 351
352 352 PMD(PMD_SX, ("%s() waking up %d cpu\n", str, who))
353 353
354 354 bcopy(cpup, &(wp->wc_cpu), sizeof (wc_cpu_t));
355 355
356 356 gdt.base = cpup->wc_gdt_base;
357 357 gdt.limit = cpup->wc_gdt_limit;
358 358
359 359 #if defined(__amd64)
360 360 code_length = (uint32_t)((uintptr_t)wc_long_mode_64 -
361 361 (uintptr_t)wc_rm_start);
362 362 #else
363 363 code_length = 0;
364 364 #endif
365 365
366 366 init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt);
367 367
368 368 mutex_enter(&cpu_lock);
369 369 err = mach_cpuid_start(who, rm_platter_va);
370 370 mutex_exit(&cpu_lock);
371 371 if (err != 0) {
372 372 cmn_err(CE_WARN, "cpu%d: failed to start during "
373 373 "suspend/resume error %d", who, err);
374 374 continue;
375 375 }
376 376
377 377 PMD(PMD_SX, ("%s() #1 waiting for %d in procset\n", str, who))
378 378
379 379 if (!wait_for_set(&procset, who))
380 380 continue;
381 381
382 382 PMD(PMD_SX, ("%s() %d cpu started\n", str, who))
383 383
384 384 PMD(PMD_SX, ("%s() tsc_ready = %d\n", str, get_tsc_ready()))
385 385
386 386 if (tsc_gethrtime_enable) {
387 387 PMD(PMD_SX, ("%s() calling tsc_sync_master\n", str))
388 388 tsc_sync_master(who);
389 389 }
390 390
391 391 PMD(PMD_SX, ("%s() waiting for %d in cpu_ready_set\n", str,
392 392 who))
393 393 /*
394 394 * Wait for cpu to declare that it is ready, we want the
395 395 * cpus to start serially instead of in parallel, so that
396 396 * they do not contend with each other in wc_rm_start()
397 397 */
398 398 if (!wait_for_set(&cpu_ready_set, who))
399 399 continue;
400 400
401 401 /*
402 402 * do not need to re-initialize dtrace using dtrace_cpu_init
403 403 * function
404 404 */
405 405 PMD(PMD_SX, ("%s() cpu %d now ready\n", str, who))
406 406 }
407 407
408 408 affinity_clear();
409 409
410 410 PMD(PMD_SX, ("%s() all cpus now ready\n", str))
411 411
412 412 }
413 413
414 414 static void
415 415 unmap_warm_reset_vector(ushort_t *warm_reset_vector)
416 416 {
417 417 psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *));
418 418 }
419 419
420 420 /*
421 421 * We need to setup a 1:1 (virtual to physical) mapping for the
422 422 * page containing the wakeup code.
423 423 */
424 424 static struct as *save_as; /* when switching to kas */
425 425
426 426 static void
427 427 unmap_wakeaddr_1to1(uint64_t wakephys)
428 428 {
429 429 uintptr_t wp = (uintptr_t)wakephys;
430 430 hat_setup(save_as->a_hat, 0); /* switch back from kernel hat */
431 431 hat_unload(kas.a_hat, (caddr_t)wp, PAGESIZE, HAT_UNLOAD);
432 432 }
433 433
434 434 void
435 435 i_cpr_post_resume_cpus()
436 436 {
437 437 uint64_t wakephys = rm_platter_pa;
438 438
439 439 if (warm_reset_vector != NULL)
440 440 unmap_warm_reset_vector(warm_reset_vector);
441 441
442 442 hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
443 443 HAT_UNLOAD);
444 444
445 445 /*
446 446 * cmi_post_mpstartup() is only required upon boot not upon
447 447 * resume from RAM
448 448 */
449 449
450 450 PT(PT_UNDO1to1);
451 451 /* Tear down 1:1 mapping for wakeup code */
452 452 unmap_wakeaddr_1to1(wakephys);
453 453 }
454 454
455 455 /* ARGSUSED */
456 456 void
457 457 i_cpr_handle_xc(int flag)
458 458 {
459 459 }
460 460
461 461 int
462 462 i_cpr_reusable_supported(void)
463 463 {
464 464 return (0);
465 465 }
466 466 static void
467 467 map_wakeaddr_1to1(uint64_t wakephys)
468 468 {
469 469 uintptr_t wp = (uintptr_t)wakephys;
470 470 hat_devload(kas.a_hat, (caddr_t)wp, PAGESIZE, btop(wakephys),
471 471 (PROT_READ|PROT_WRITE|PROT_EXEC|HAT_STORECACHING_OK|HAT_NOSYNC),
472 472 HAT_LOAD);
473 473 save_as = curthread->t_procp->p_as;
474 474 hat_setup(kas.a_hat, 0); /* switch to kernel-only hat */
475 475 }
476 476
477 477
478 478 void
479 479 prt_other_cpus()
480 480 {
481 481 int who;
482 482
483 483 if (ncpus == 1) {
484 484 PMD(PMD_SX, ("prt_other_cpus() other cpu table empty for "
485 485 "uniprocessor machine\n"))
486 486 return;
487 487 }
488 488
489 489 for (who = 0; who < max_ncpus; who++) {
490 490
491 491 wc_cpu_t *cpup = wc_other_cpus + who;
492 492
493 493 if (!CPU_IN_SET(mp_cpus, who))
494 494 continue;
495 495
496 496 PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, "
497 497 "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase="
498 498 AFMT ", sp=%lx\n", who,
499 499 (void *)cpup->wc_gdt_base, cpup->wc_gdt_limit,
500 500 (void *)cpup->wc_idt_base, cpup->wc_idt_limit,
501 501 (long)cpup->wc_ldt, (long)cpup->wc_tr,
502 502 (long)cpup->wc_kgsbase, (long)cpup->wc_rsp))
503 503 }
504 504 }
505 505
506 506 /*
507 507 * Power down the system.
508 508 */
509 509 int
510 510 i_cpr_power_down(int sleeptype)
511 511 {
512 512 caddr_t wakevirt = rm_platter_va;
513 513 uint64_t wakephys = rm_platter_pa;
514 514 ulong_t saved_intr;
515 515 uint32_t code_length = 0;
516 516 wc_desctbr_t gdt;
517 517 /*LINTED*/
518 518 wakecode_t *wp = (wakecode_t *)wakevirt;
519 519 /*LINTED*/
520 520 rm_platter_t *wcpp = (rm_platter_t *)wakevirt;
521 521 wc_cpu_t *cpup = &(wp->wc_cpu);
522 522 dev_info_t *ppm;
523 523 int ret = 0;
524 524 power_req_t power_req;
525 525 char *str = "i_cpr_power_down";
526 526 #if defined(__amd64)
527 527 /*LINTED*/
528 528 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
529 529 #endif
530 530 extern int cpr_suspend_succeeded;
531 531 extern void kernel_wc_code();
532 532
533 533 ASSERT(sleeptype == CPR_TORAM);
534 534 ASSERT(CPU->cpu_id == 0);
535 535
536 536 if ((ppm = PPM(ddi_root_node())) == NULL) {
537 537 PMD(PMD_SX, ("%s: root node not claimed\n", str))
538 538 return (ENOTTY);
539 539 }
540 540
541 541 PMD(PMD_SX, ("Entering %s()\n", str))
542 542
543 543 PT(PT_IC);
544 544 saved_intr = intr_clear();
545 545
546 546 PT(PT_1to1);
547 547 /* Setup 1:1 mapping for wakeup code */
548 548 map_wakeaddr_1to1(wakephys);
549 549
550 550 PMD(PMD_SX, ("ncpus=%d\n", ncpus))
551 551
552 552 PMD(PMD_SX, ("wc_rm_end - wc_rm_start=%lx WC_CODESIZE=%x\n",
553 553 ((size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start)),
554 554 WC_CODESIZE))
555 555
556 556 PMD(PMD_SX, ("wakevirt=%p, wakephys=%x\n",
557 557 (void *)wakevirt, (uint_t)wakephys))
558 558
559 559 ASSERT(((size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start)) <
560 560 WC_CODESIZE);
561 561
562 562 bzero(wakevirt, PAGESIZE);
563 563
564 564 /* Copy code to rm_platter */
565 565 bcopy((caddr_t)wc_rm_start, wakevirt,
566 566 (size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start));
567 567
568 568 prt_other_cpus();
569 569
570 570 #if defined(__amd64)
571 571
572 572 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
573 573 (ulong_t)real_mode_platter->rm_cr4, (ulong_t)getcr4()))
574 574
575 575 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
576 576 (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
577 577
578 578 real_mode_platter->rm_cr4 = getcr4();
579 579 real_mode_platter->rm_pdbr = getcr3();
580 580
581 581 rmp_gdt_init(real_mode_platter);
582 582
583 583 /*
584 584 * Since the CPU needs to jump to protected mode using an identity
585 585 * mapped address, we need to calculate it here.
586 586 */
587 587 real_mode_platter->rm_longmode64_addr = rm_platter_pa +
588 588 (uint32_t)((uintptr_t)wc_long_mode_64 - (uintptr_t)wc_rm_start);
589 589
590 590 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
591 591 (ulong_t)real_mode_platter->rm_cr4, getcr4()))
592 592 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
593 593 (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
594 594
595 595 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
596 596 (ulong_t)real_mode_platter->rm_longmode64_addr))
597 597
598 598 #endif
599 599
600 600 PT(PT_SC);
601 601 if (wc_save_context(cpup)) {
602 602
603 603 ret = i_cpr_platform_alloc(&(wc_other_cpus->wc_apic_state));
604 604 if (ret != 0)
605 605 return (ret);
606 606
607 607 ret = i_cpr_save_apic(&(wc_other_cpus->wc_apic_state));
608 608 PMD(PMD_SX, ("%s: i_cpr_save_apic() returned %d\n", str, ret))
609 609 if (ret != 0)
610 610 return (ret);
611 611
612 612 PMD(PMD_SX, ("wakephys=%x, kernel_wc_code=%p\n",
613 613 (uint_t)wakephys, (void *)&kernel_wc_code))
614 614 PMD(PMD_SX, ("virtaddr=%lx, retaddr=%lx\n",
615 615 (long)cpup->wc_virtaddr, (long)cpup->wc_retaddr))
616 616 PMD(PMD_SX, ("ebx=%x, edi=%x, esi=%x, ebp=%x, esp=%x\n",
617 617 cpup->wc_ebx, cpup->wc_edi, cpup->wc_esi, cpup->wc_ebp,
618 618 cpup->wc_esp))
619 619 PMD(PMD_SX, ("cr0=%lx, cr3=%lx, cr4=%lx\n",
620 620 (long)cpup->wc_cr0, (long)cpup->wc_cr3,
621 621 (long)cpup->wc_cr4))
622 622 PMD(PMD_SX, ("cs=%x, ds=%x, es=%x, ss=%x, fs=%lx, gs=%lx, "
623 623 "flgs=%lx\n", cpup->wc_cs, cpup->wc_ds, cpup->wc_es,
624 624 cpup->wc_ss, (long)cpup->wc_fs, (long)cpup->wc_gs,
625 625 (long)cpup->wc_eflags))
626 626
627 627 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
628 628 "kgbase=%lx\n", (void *)cpup->wc_gdt_base,
629 629 cpup->wc_gdt_limit, (void *)cpup->wc_idt_base,
630 630 cpup->wc_idt_limit, (long)cpup->wc_ldt,
631 631 (long)cpup->wc_tr, (long)cpup->wc_kgsbase))
632 632
633 633 gdt.base = cpup->wc_gdt_base;
634 634 gdt.limit = cpup->wc_gdt_limit;
635 635
636 636 #if defined(__amd64)
637 637 code_length = (uint32_t)((uintptr_t)wc_long_mode_64 -
638 638 (uintptr_t)wc_rm_start);
639 639 #else
640 640 code_length = 0;
641 641 #endif
642 642
643 643 init_real_mode_platter(0, code_length, cpup->wc_cr4, gdt);
644 644
645 645 #if defined(__amd64)
646 646 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
647 647 (ulong_t)wcpp->rm_cr4, getcr4()))
648 648
649 649 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
650 650 (ulong_t)wcpp->rm_pdbr, getcr3()))
651 651
652 652 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
653 653 (ulong_t)wcpp->rm_longmode64_addr))
654 654
655 655 PMD(PMD_SX,
656 656 ("real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64]=%lx\n",
657 657 (ulong_t)wcpp->rm_temp_gdt[TEMPGDT_KCODE64]))
658 658 #endif
659 659
660 660 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
661 661 "kgsbase=%lx\n", (void *)wcpp->rm_gdt_base,
662 662 wcpp->rm_gdt_lim, (void *)wcpp->rm_idt_base,
663 663 wcpp->rm_idt_lim, (long)cpup->wc_ldt, (long)cpup->wc_tr,
664 664 (long)cpup->wc_kgsbase))
665 665
666 666 power_req.request_type = PMR_PPM_ENTER_SX;
667 667 power_req.req.ppm_power_enter_sx_req.sx_state = S3;
668 668 power_req.req.ppm_power_enter_sx_req.test_point =
669 669 cpr_test_point;
670 670 power_req.req.ppm_power_enter_sx_req.wakephys = wakephys;
671 671
672 672 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_ENTER_SX\n", str))
673 673 PT(PT_PPMCTLOP);
674 674 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
675 675 &power_req, &ret);
676 676 PMD(PMD_SX, ("%s: returns %d\n", str, ret))
677 677
678 678 /*
679 679 * If it works, we get control back to the else branch below
680 680 * If we get control back here, it didn't work.
681 681 * XXX return EINVAL here?
682 682 */
683 683
684 684 unmap_wakeaddr_1to1(wakephys);
685 685 intr_restore(saved_intr);
686 686
687 687 return (ret);
688 688 } else {
689 689 cpr_suspend_succeeded = 1;
690 690
691 691 power_req.request_type = PMR_PPM_EXIT_SX;
692 692 power_req.req.ppm_power_enter_sx_req.sx_state = S3;
693 693
694 694 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_EXIT_SX\n", str))
695 695 PT(PT_PPMCTLOP);
696 696 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
697 697 &power_req, &ret);
698 698 PMD(PMD_SX, ("%s: returns %d\n", str, ret))
699 699
700 700 ret = i_cpr_restore_apic(&(wc_other_cpus->wc_apic_state));
701 701 /*
702 702 * the restore should never fail, if the saved suceeded
703 703 */
704 704 ASSERT(ret == 0);
705 705
706 706 i_cpr_platform_free(&(wc_other_cpus->wc_apic_state));
707 707
708 708 /*
709 709 * Enable interrupts on boot cpu.
710 710 */
711 711 ASSERT(CPU->cpu_id == i_cpr_bootcpuid());
712 712 mutex_enter(&cpu_lock);
713 713 cpu_enable_intr(CPU);
714 714 mutex_exit(&cpu_lock);
715 715
716 716 PT(PT_INTRRESTORE);
717 717 intr_restore(saved_intr);
718 718 PT(PT_CPU);
719 719
720 720 return (ret);
721 721 }
722 722 }
723 723
724 724 /*
725 725 * Stop all other cpu's before halting or rebooting. We pause the cpu's
726 726 * instead of sending a cross call.
727 727 * Stolen from sun4/os/mp_states.c
728 728 */
729 729
↓ open down ↓ |
729 lines elided |
↑ open up ↑ |
730 730 static int cpu_are_paused; /* sic */
731 731
732 732 void
733 733 i_cpr_stop_other_cpus(void)
734 734 {
735 735 mutex_enter(&cpu_lock);
736 736 if (cpu_are_paused) {
737 737 mutex_exit(&cpu_lock);
738 738 return;
739 739 }
740 - pause_cpus(NULL);
740 + pause_cpus(NULL, NULL);
741 741 cpu_are_paused = 1;
742 742
743 743 mutex_exit(&cpu_lock);
744 744 }
745 745
746 746 int
747 747 i_cpr_is_supported(int sleeptype)
748 748 {
749 749 extern int cpr_supported_override;
750 750 extern int cpr_platform_enable;
751 751 extern int pm_S3_enabled;
752 752
753 753 if (sleeptype != CPR_TORAM)
754 754 return (0);
755 755
756 756 /*
757 757 * The next statement tests if a specific platform has turned off
758 758 * cpr support.
759 759 */
760 760 if (cpr_supported_override)
761 761 return (0);
762 762
763 763 /*
764 764 * If a platform has specifically turned on cpr support ...
765 765 */
766 766 if (cpr_platform_enable)
767 767 return (1);
768 768
769 769 return (pm_S3_enabled);
770 770 }
771 771
772 772 void
773 773 i_cpr_bitmap_cleanup(void)
774 774 {
775 775 }
776 776
777 777 void
778 778 i_cpr_free_memory_resources(void)
779 779 {
780 780 }
781 781
782 782 /*
783 783 * Needed only for S3 so far
784 784 */
785 785 static int
786 786 i_cpr_platform_alloc(psm_state_request_t *req)
787 787 {
788 788 #ifdef DEBUG
789 789 char *str = "i_cpr_platform_alloc";
790 790 #endif
791 791
792 792 PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
793 793
794 794 if (psm_state == NULL) {
795 795 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
796 796 return (0);
797 797 }
798 798
799 799 req->psr_cmd = PSM_STATE_ALLOC;
800 800 return ((*psm_state)(req));
801 801 }
802 802
803 803 /*
804 804 * Needed only for S3 so far
805 805 */
806 806 static void
807 807 i_cpr_platform_free(psm_state_request_t *req)
808 808 {
809 809 #ifdef DEBUG
810 810 char *str = "i_cpr_platform_free";
811 811 #endif
812 812
813 813 PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
814 814
815 815 if (psm_state == NULL) {
816 816 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
817 817 return;
818 818 }
819 819
820 820 req->psr_cmd = PSM_STATE_FREE;
821 821 (void) (*psm_state)(req);
822 822 }
823 823
824 824 static int
825 825 i_cpr_save_apic(psm_state_request_t *req)
826 826 {
827 827 #ifdef DEBUG
828 828 char *str = "i_cpr_save_apic";
829 829 #endif
830 830
831 831 if (psm_state == NULL) {
832 832 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
833 833 return (0);
834 834 }
835 835
836 836 req->psr_cmd = PSM_STATE_SAVE;
837 837 return ((*psm_state)(req));
838 838 }
839 839
840 840 static int
841 841 i_cpr_restore_apic(psm_state_request_t *req)
842 842 {
843 843 #ifdef DEBUG
844 844 char *str = "i_cpr_restore_apic";
845 845 #endif
846 846
847 847 if (psm_state == NULL) {
848 848 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
849 849 return (0);
850 850 }
851 851
852 852 req->psr_cmd = PSM_STATE_RESTORE;
853 853 return ((*psm_state)(req));
854 854 }
855 855
856 856
857 857 /* stop lint complaining about offset not being used in 32bit mode */
858 858 #if !defined(__amd64)
859 859 /*ARGSUSED*/
860 860 #endif
861 861 static void
862 862 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt)
863 863 {
864 864 /*LINTED*/
865 865 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
866 866
867 867 /*
868 868 * Fill up the real mode platter to make it easy for real mode code to
869 869 * kick it off. This area should really be one passed by boot to kernel
870 870 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
871 871 * have identical physical and virtual address in paged mode.
872 872 */
873 873
874 874 real_mode_platter->rm_pdbr = getcr3();
875 875 real_mode_platter->rm_cpu = cpun;
876 876 real_mode_platter->rm_cr4 = cr4;
877 877
878 878 real_mode_platter->rm_gdt_base = gdt.base;
879 879 real_mode_platter->rm_gdt_lim = gdt.limit;
880 880
881 881 #if defined(__amd64)
882 882 if (getcr3() > 0xffffffffUL)
883 883 panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
884 884 "located above 4G in physical memory (@ 0x%llx).",
885 885 (unsigned long long)getcr3());
886 886
887 887 /*
888 888 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
889 889 * by code in real_mode_start():
890 890 *
891 891 * GDT[0]: NULL selector
892 892 * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
893 893 *
894 894 * Clear the IDT as interrupts will be off and a limit of 0 will cause
895 895 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
896 896 * a course of action as any other, though it may cause the entire
897 897 * platform to reset in some cases...
898 898 */
899 899 real_mode_platter->rm_temp_gdt[0] = 0ULL;
900 900 real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
901 901
902 902 real_mode_platter->rm_temp_gdt_lim = (ushort_t)
903 903 (sizeof (real_mode_platter->rm_temp_gdt) - 1);
904 904 real_mode_platter->rm_temp_gdt_base = rm_platter_pa +
905 905 offsetof(rm_platter_t, rm_temp_gdt);
906 906
907 907 real_mode_platter->rm_temp_idt_lim = 0;
908 908 real_mode_platter->rm_temp_idt_base = 0;
909 909
910 910 /*
911 911 * Since the CPU needs to jump to protected mode using an identity
912 912 * mapped address, we need to calculate it here.
913 913 */
914 914 real_mode_platter->rm_longmode64_addr = rm_platter_pa + offset;
915 915 #endif /* __amd64 */
916 916
917 917 /* return; */
918 918 }
919 919
920 920 void
921 921 i_cpr_start_cpu(void)
922 922 {
923 923
924 924 struct cpu *cp = CPU;
925 925
926 926 char *str = "i_cpr_start_cpu";
927 927 extern void init_cpu_syscall(struct cpu *cp);
928 928
929 929 PMD(PMD_SX, ("%s() called\n", str))
930 930
931 931 PMD(PMD_SX, ("%s() #0 cp->cpu_base_spl %d\n", str,
932 932 cp->cpu_base_spl))
933 933
934 934 mutex_enter(&cpu_lock);
935 935 if (cp == i_cpr_bootcpu()) {
936 936 mutex_exit(&cpu_lock);
937 937 PMD(PMD_SX,
938 938 ("%s() called on bootcpu nothing to do!\n", str))
939 939 return;
940 940 }
941 941 mutex_exit(&cpu_lock);
942 942
943 943 /*
944 944 * We need to Sync PAT with cpu0's PAT. We have to do
945 945 * this with interrupts disabled.
946 946 */
947 947 if (is_x86_feature(x86_featureset, X86FSET_PAT))
948 948 pat_sync();
949 949
950 950 /*
951 951 * If we use XSAVE, we need to restore XFEATURE_ENABLE_MASK register.
952 952 */
953 953 if (fp_save_mech == FP_XSAVE) {
954 954 setup_xfem();
955 955 }
956 956
957 957 /*
958 958 * Initialize this CPU's syscall handlers
959 959 */
960 960 init_cpu_syscall(cp);
961 961
962 962 PMD(PMD_SX, ("%s() #1 cp->cpu_base_spl %d\n", str, cp->cpu_base_spl))
963 963
964 964 /*
965 965 * Do not need to call cpuid_pass2(), cpuid_pass3(), cpuid_pass4() or
966 966 * init_cpu_info(), since the work that they do is only needed to
967 967 * be done once at boot time
968 968 */
969 969
970 970
971 971 mutex_enter(&cpu_lock);
972 972 CPUSET_ADD(procset, cp->cpu_id);
973 973 mutex_exit(&cpu_lock);
974 974
975 975 PMD(PMD_SX, ("%s() #2 cp->cpu_base_spl %d\n", str,
976 976 cp->cpu_base_spl))
977 977
978 978 if (tsc_gethrtime_enable) {
979 979 PMD(PMD_SX, ("%s() calling tsc_sync_slave\n", str))
980 980 tsc_sync_slave();
981 981 }
982 982
983 983 PMD(PMD_SX, ("%s() cp->cpu_id %d, cp->cpu_intr_actv %d\n", str,
984 984 cp->cpu_id, cp->cpu_intr_actv))
985 985 PMD(PMD_SX, ("%s() #3 cp->cpu_base_spl %d\n", str,
986 986 cp->cpu_base_spl))
987 987
988 988 (void) spl0(); /* enable interrupts */
989 989
990 990 PMD(PMD_SX, ("%s() #4 cp->cpu_base_spl %d\n", str,
991 991 cp->cpu_base_spl))
992 992
993 993 /*
994 994 * Set up the CPU module for this CPU. This can't be done before
995 995 * this CPU is made CPU_READY, because we may (in heterogeneous systems)
996 996 * need to go load another CPU module. The act of attempting to load
997 997 * a module may trigger a cross-call, which will ASSERT unless this
998 998 * cpu is CPU_READY.
999 999 */
1000 1000
1001 1001 /*
1002 1002 * cmi already been init'd (during boot), so do not need to do it again
1003 1003 */
1004 1004 #ifdef PM_REINITMCAONRESUME
1005 1005 if (is_x86_feature(x86_featureset, X86FSET_MCA))
1006 1006 cmi_mca_init();
1007 1007 #endif
1008 1008
1009 1009 PMD(PMD_SX, ("%s() returning\n", str))
1010 1010
1011 1011 /* return; */
1012 1012 }
1013 1013
1014 1014 void
1015 1015 i_cpr_alloc_cpus(void)
1016 1016 {
1017 1017 char *str = "i_cpr_alloc_cpus";
1018 1018
1019 1019 PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
1020 1020 /*
1021 1021 * we allocate this only when we actually need it to save on
1022 1022 * kernel memory
1023 1023 */
1024 1024
1025 1025 if (wc_other_cpus == NULL) {
1026 1026 wc_other_cpus = kmem_zalloc(max_ncpus * sizeof (wc_cpu_t),
1027 1027 KM_SLEEP);
1028 1028 }
1029 1029
1030 1030 }
1031 1031
1032 1032 void
1033 1033 i_cpr_free_cpus(void)
1034 1034 {
1035 1035 int index;
1036 1036 wc_cpu_t *wc_cpu;
1037 1037
1038 1038 if (wc_other_cpus != NULL) {
1039 1039 for (index = 0; index < max_ncpus; index++) {
1040 1040 wc_cpu = wc_other_cpus + index;
1041 1041 if (wc_cpu->wc_saved_stack != NULL) {
1042 1042 kmem_free(wc_cpu->wc_saved_stack,
1043 1043 wc_cpu->wc_saved_stack_size);
1044 1044 }
1045 1045 }
1046 1046
1047 1047 kmem_free((void *) wc_other_cpus,
1048 1048 max_ncpus * sizeof (wc_cpu_t));
1049 1049 wc_other_cpus = NULL;
1050 1050 }
1051 1051 }
1052 1052
1053 1053 /*
1054 1054 * wrapper for acpica_ddi_save_resources()
1055 1055 */
1056 1056 void
1057 1057 i_cpr_save_configuration(dev_info_t *dip)
1058 1058 {
1059 1059 acpica_ddi_save_resources(dip);
1060 1060 }
1061 1061
1062 1062 /*
1063 1063 * wrapper for acpica_ddi_restore_resources()
1064 1064 */
1065 1065 void
1066 1066 i_cpr_restore_configuration(dev_info_t *dip)
1067 1067 {
1068 1068 acpica_ddi_restore_resources(dip);
1069 1069 }
1070 1070
1071 1071 static int
1072 1072 wait_for_set(cpuset_t *set, int who)
1073 1073 {
1074 1074 int delays;
1075 1075 char *str = "wait_for_set";
1076 1076
1077 1077 for (delays = 0; !CPU_IN_SET(*set, who); delays++) {
1078 1078 if (delays == 500) {
1079 1079 /*
1080 1080 * After five seconds, things are probably
1081 1081 * looking a bit bleak - explain the hang.
1082 1082 */
1083 1083 cmn_err(CE_NOTE, "cpu%d: started, "
1084 1084 "but not running in the kernel yet", who);
1085 1085 PMD(PMD_SX, ("%s() %d cpu started "
1086 1086 "but not running in the kernel yet\n",
1087 1087 str, who))
1088 1088 } else if (delays > 2000) {
1089 1089 /*
1090 1090 * We waited at least 20 seconds, bail ..
1091 1091 */
1092 1092 cmn_err(CE_WARN, "cpu%d: timed out", who);
1093 1093 PMD(PMD_SX, ("%s() %d cpu timed out\n",
1094 1094 str, who))
1095 1095 return (0);
1096 1096 }
1097 1097
1098 1098 /*
1099 1099 * wait at least 10ms, then check again..
1100 1100 */
1101 1101 drv_usecwait(10000);
1102 1102 }
1103 1103
1104 1104 return (1);
1105 1105 }
1106 1106
1107 1107 static void
1108 1108 i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu)
1109 1109 {
1110 1110 size_t stack_size; /* size of stack */
1111 1111 caddr_t start = CPR_GET_STACK_START(t); /* stack start */
1112 1112 caddr_t end = CPR_GET_STACK_END(t); /* stack end */
1113 1113
1114 1114 stack_size = (size_t)end - (size_t)start;
1115 1115
1116 1116 if (wc_cpu->wc_saved_stack_size < stack_size) {
1117 1117 if (wc_cpu->wc_saved_stack != NULL) {
1118 1118 kmem_free(wc_cpu->wc_saved_stack,
1119 1119 wc_cpu->wc_saved_stack_size);
1120 1120 }
1121 1121 wc_cpu->wc_saved_stack = kmem_zalloc(stack_size, KM_SLEEP);
1122 1122 wc_cpu->wc_saved_stack_size = stack_size;
1123 1123 }
1124 1124
1125 1125 bcopy(start, wc_cpu->wc_saved_stack, stack_size);
1126 1126 }
1127 1127
1128 1128 void
1129 1129 i_cpr_restore_stack(kthread_t *t, greg_t *save_stack)
1130 1130 {
1131 1131 size_t stack_size; /* size of stack */
1132 1132 caddr_t start = CPR_GET_STACK_START(t); /* stack start */
1133 1133 caddr_t end = CPR_GET_STACK_END(t); /* stack end */
1134 1134
1135 1135 stack_size = (size_t)end - (size_t)start;
1136 1136
1137 1137 bcopy(save_stack, start, stack_size);
1138 1138 }
↓ open down ↓ |
388 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX