1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /*
  26  * Platform specific implementation code
  27  * Currently only suspend to RAM is supported (ACPI S3)
  28  */
  29 
  30 #define SUNDDI_IMPL
  31 
  32 #include <sys/types.h>
  33 #include <sys/promif.h>
  34 #include <sys/prom_isa.h>
  35 #include <sys/prom_plat.h>
  36 #include <sys/cpuvar.h>
  37 #include <sys/pte.h>
  38 #include <vm/hat.h>
  39 #include <vm/page.h>
  40 #include <vm/as.h>
  41 #include <sys/cpr.h>
  42 #include <sys/kmem.h>
  43 #include <sys/clock.h>
  44 #include <sys/kmem.h>
  45 #include <sys/panic.h>
  46 #include <vm/seg_kmem.h>
  47 #include <sys/cpu_module.h>
  48 #include <sys/callb.h>
  49 #include <sys/machsystm.h>
  50 #include <sys/vmsystm.h>
  51 #include <sys/systm.h>
  52 #include <sys/archsystm.h>
  53 #include <sys/stack.h>
  54 #include <sys/fs/ufs_fs.h>
  55 #include <sys/memlist.h>
  56 #include <sys/bootconf.h>
  57 #include <sys/thread.h>
  58 #include <sys/x_call.h>
  59 #include <sys/smp_impldefs.h>
  60 #include <vm/vm_dep.h>
  61 #include <sys/psm.h>
  62 #include <sys/epm.h>
  63 #include <sys/cpr_wakecode.h>
  64 #include <sys/x86_archext.h>
  65 #include <sys/reboot.h>
  66 #include <sys/acpi/acpi.h>
  67 #include <sys/acpica.h>
  68 #include <sys/fp.h>
  69 #include <sys/sysmacros.h>
  70 
  71 #define AFMT    "%lx"
  72 
  73 extern int      flushes_require_xcalls;
  74 extern cpuset_t cpu_ready_set;
  75 
  76 #if defined(__amd64)
  77 extern void     *wc_long_mode_64(void);
  78 #endif  /* __amd64 */
  79 extern int      tsc_gethrtime_enable;
  80 extern  void    i_cpr_start_cpu(void);
  81 
  82 ushort_t        cpr_mach_type = CPR_MACHTYPE_X86;
  83 void            (*cpr_start_cpu_func)(void) = i_cpr_start_cpu;
  84 
  85 static wc_cpu_t *wc_other_cpus = NULL;
  86 static cpuset_t procset;
  87 
  88 static void
  89 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt);
  90 
  91 static int i_cpr_platform_alloc(psm_state_request_t *req);
  92 static void i_cpr_platform_free(psm_state_request_t *req);
  93 static int i_cpr_save_apic(psm_state_request_t *req);
  94 static int i_cpr_restore_apic(psm_state_request_t *req);
  95 static int wait_for_set(cpuset_t *set, int who);
  96 
  97 static  void i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu);
  98 void i_cpr_restore_stack(kthread_t *t, greg_t *save_stack);
  99 
 100 #ifdef STACK_GROWTH_DOWN
 101 #define CPR_GET_STACK_START(t) ((t)->t_stkbase)
 102 #define CPR_GET_STACK_END(t) ((t)->t_stk)
 103 #else
 104 #define CPR_GET_STACK_START(t) ((t)->t_stk)
 105 #define CPR_GET_STACK_END(t) ((t)->t_stkbase)
 106 #endif  /* STACK_GROWTH_DOWN */
 107 
 108 /*
 109  * restart paused slave cpus
 110  */
 111 void
 112 i_cpr_machdep_setup(void)
 113 {
 114         if (ncpus > 1) {
 115                 CPR_DEBUG(CPR_DEBUG1, ("MP restarted...\n"));
 116                 mutex_enter(&cpu_lock);
 117                 start_cpus();
 118                 mutex_exit(&cpu_lock);
 119         }
 120 }
 121 
 122 
 123 /*
 124  * Stop all interrupt activities in the system
 125  */
 126 void
 127 i_cpr_stop_intr(void)
 128 {
 129         (void) spl7();
 130 }
 131 
 132 /*
 133  * Set machine up to take interrupts
 134  */
 135 void
 136 i_cpr_enable_intr(void)
 137 {
 138         (void) spl0();
 139 }
 140 
 141 /*
 142  * Save miscellaneous information which needs to be written to the
 143  * state file.  This information is required to re-initialize
 144  * kernel/prom handshaking.
 145  */
 146 void
 147 i_cpr_save_machdep_info(void)
 148 {
 149         int notcalled = 0;
 150         ASSERT(notcalled);
 151 }
 152 
 153 
 154 void
 155 i_cpr_set_tbr(void)
 156 {
 157 }
 158 
 159 
 160 processorid_t
 161 i_cpr_bootcpuid(void)
 162 {
 163         return (0);
 164 }
 165 
 166 /*
 167  * cpu0 should contain bootcpu info
 168  */
 169 cpu_t *
 170 i_cpr_bootcpu(void)
 171 {
 172         ASSERT(MUTEX_HELD(&cpu_lock));
 173 
 174         return (cpu_get(i_cpr_bootcpuid()));
 175 }
 176 
 177 /*
 178  *      Save context for the specified CPU
 179  */
 180 void *
 181 i_cpr_save_context(void *arg)
 182 {
 183         long    index = (long)arg;
 184         psm_state_request_t *papic_state;
 185         int resuming;
 186         int     ret;
 187         wc_cpu_t        *wc_cpu = wc_other_cpus + index;
 188 
 189         PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index))
 190 
 191         ASSERT(index < NCPU);
 192 
 193         papic_state = &(wc_cpu)->wc_apic_state;
 194 
 195         ret = i_cpr_platform_alloc(papic_state);
 196         ASSERT(ret == 0);
 197 
 198         ret = i_cpr_save_apic(papic_state);
 199         ASSERT(ret == 0);
 200 
 201         i_cpr_save_stack(curthread, wc_cpu);
 202 
 203         /*
 204          * wc_save_context returns twice, once when susending and
 205          * once when resuming,  wc_save_context() returns 0 when
 206          * suspending and non-zero upon resume
 207          */
 208         resuming = (wc_save_context(wc_cpu) == 0);
 209 
 210         /*
 211          * do NOT call any functions after this point, because doing so
 212          * will modify the stack that we are running on
 213          */
 214 
 215         if (resuming) {
 216 
 217                 ret = i_cpr_restore_apic(papic_state);
 218                 ASSERT(ret == 0);
 219 
 220                 i_cpr_platform_free(papic_state);
 221 
 222                 /*
 223                  * Enable interrupts on this cpu.
 224                  * Do not bind interrupts to this CPU's local APIC until
 225                  * the CPU is ready to receive interrupts.
 226                  */
 227                 ASSERT(CPU->cpu_id != i_cpr_bootcpuid());
 228                 mutex_enter(&cpu_lock);
 229                 cpu_enable_intr(CPU);
 230                 mutex_exit(&cpu_lock);
 231 
 232                 /*
 233                  * Setting the bit in cpu_ready_set must be the last operation
 234                  * in processor initialization; the boot CPU will continue to
 235                  * boot once it sees this bit set for all active CPUs.
 236                  */
 237                 CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id);
 238 
 239                 PMD(PMD_SX,
 240                     ("i_cpr_save_context() resuming cpu %d in cpu_ready_set\n",
 241                     CPU->cpu_id))
 242         } else {
 243                 /*
 244                  * Disable interrupts on this CPU so that PSM knows not to bind
 245                  * interrupts here on resume until the CPU has executed
 246                  * cpu_enable_intr() (above) in the resume path.
 247                  * We explicitly do not grab cpu_lock here because at this point
 248                  * in the suspend process, the boot cpu owns cpu_lock and all
 249                  * other cpus are also executing in the pause thread (only
 250                  * modifying their respective CPU structure).
 251                  */
 252                 (void) cpu_disable_intr(CPU);
 253         }
 254 
 255         PMD(PMD_SX, ("i_cpr_save_context: wc_save_context returns %d\n",
 256             resuming))
 257 
 258         return (NULL);
 259 }
 260 
 261 static ushort_t *warm_reset_vector = NULL;
 262 
 263 static ushort_t *
 264 map_warm_reset_vector()
 265 {
 266         /*LINTED*/
 267         if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
 268             sizeof (ushort_t *), PROT_READ|PROT_WRITE)))
 269                 return (NULL);
 270 
 271         /*
 272          * setup secondary cpu bios boot up vector
 273          */
 274         *warm_reset_vector = (ushort_t)((caddr_t)
 275             /*LINTED*/
 276             ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
 277             + ((ulong_t)rm_platter_va & 0xf));
 278         warm_reset_vector++;
 279         *warm_reset_vector = (ushort_t)(rm_platter_pa >> 4);
 280 
 281         --warm_reset_vector;
 282         return (warm_reset_vector);
 283 }
 284 
 285 void
 286 i_cpr_pre_resume_cpus()
 287 {
 288         /*
 289          * this is a cut down version of start_other_cpus()
 290          * just do the initialization to wake the other cpus
 291          */
 292         unsigned who;
 293         int boot_cpuid = i_cpr_bootcpuid();
 294         uint32_t                code_length = 0;
 295         caddr_t                 wakevirt = rm_platter_va;
 296         /*LINTED*/
 297         wakecode_t              *wp = (wakecode_t *)wakevirt;
 298         char *str = "i_cpr_pre_resume_cpus";
 299         extern int get_tsc_ready();
 300         int err;
 301 
 302         /*LINTED*/
 303         rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
 304 
 305         /*
 306          * If startup wasn't able to find a page under 1M, we cannot
 307          * proceed.
 308          */
 309         if (rm_platter_va == 0) {
 310                 cmn_err(CE_WARN, "Cannot suspend the system because no "
 311                     "memory below 1M could be found for processor startup");
 312                 return;
 313         }
 314 
 315         /*
 316          * Copy the real mode code at "real_mode_start" to the
 317          * page at rm_platter_va.
 318          */
 319         warm_reset_vector = map_warm_reset_vector();
 320         if (warm_reset_vector == NULL) {
 321                 PMD(PMD_SX, ("i_cpr_pre_resume_cpus() returning #2\n"))
 322                 return;
 323         }
 324 
 325         flushes_require_xcalls = 1;
 326 
 327         /*
 328          * We lock our affinity to the master CPU to ensure that all slave CPUs
 329          * do their TSC syncs with the same CPU.
 330          */
 331 
 332         affinity_set(CPU_CURRENT);
 333 
 334         /*
 335          * Mark the boot cpu as being ready and in the procset, since we are
 336          * running on that cpu.
 337          */
 338         CPUSET_ONLY(cpu_ready_set, boot_cpuid);
 339         CPUSET_ONLY(procset, boot_cpuid);
 340 
 341         for (who = 0; who < max_ncpus; who++) {
 342 
 343                 wc_cpu_t        *cpup = wc_other_cpus + who;
 344                 wc_desctbr_t    gdt;
 345 
 346                 if (who == boot_cpuid)
 347                         continue;
 348 
 349                 if (!CPU_IN_SET(mp_cpus, who))
 350                         continue;
 351 
 352                 PMD(PMD_SX, ("%s() waking up %d cpu\n", str, who))
 353 
 354                 bcopy(cpup, &(wp->wc_cpu), sizeof (wc_cpu_t));
 355 
 356                 gdt.base = cpup->wc_gdt_base;
 357                 gdt.limit = cpup->wc_gdt_limit;
 358 
 359 #if defined(__amd64)
 360                 code_length = (uint32_t)((uintptr_t)wc_long_mode_64 -
 361                     (uintptr_t)wc_rm_start);
 362 #else
 363                 code_length = 0;
 364 #endif
 365 
 366                 init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt);
 367 
 368                 mutex_enter(&cpu_lock);
 369                 err = mach_cpuid_start(who, rm_platter_va);
 370                 mutex_exit(&cpu_lock);
 371                 if (err != 0) {
 372                         cmn_err(CE_WARN, "cpu%d: failed to start during "
 373                             "suspend/resume error %d", who, err);
 374                         continue;
 375                 }
 376 
 377                 PMD(PMD_SX, ("%s() #1 waiting for %d in procset\n", str, who))
 378 
 379                 if (!wait_for_set(&procset, who))
 380                         continue;
 381 
 382                 PMD(PMD_SX, ("%s() %d cpu started\n", str, who))
 383 
 384                 PMD(PMD_SX, ("%s() tsc_ready = %d\n", str, get_tsc_ready()))
 385 
 386                 if (tsc_gethrtime_enable) {
 387                         PMD(PMD_SX, ("%s() calling tsc_sync_master\n", str))
 388                         tsc_sync_master(who);
 389                 }
 390 
 391                 PMD(PMD_SX, ("%s() waiting for %d in cpu_ready_set\n", str,
 392                     who))
 393                 /*
 394                  * Wait for cpu to declare that it is ready, we want the
 395                  * cpus to start serially instead of in parallel, so that
 396                  * they do not contend with each other in wc_rm_start()
 397                  */
 398                 if (!wait_for_set(&cpu_ready_set, who))
 399                         continue;
 400 
 401                 /*
 402                  * do not need to re-initialize dtrace using dtrace_cpu_init
 403                  * function
 404                  */
 405                 PMD(PMD_SX, ("%s() cpu %d now ready\n", str, who))
 406         }
 407 
 408         affinity_clear();
 409 
 410         PMD(PMD_SX, ("%s() all cpus now ready\n", str))
 411 
 412 }
 413 
 414 static void
 415 unmap_warm_reset_vector(ushort_t *warm_reset_vector)
 416 {
 417         psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *));
 418 }
 419 
 420 /*
 421  * We need to setup a 1:1 (virtual to physical) mapping for the
 422  * page containing the wakeup code.
 423  */
 424 static struct as *save_as;      /* when switching to kas */
 425 
 426 static void
 427 unmap_wakeaddr_1to1(uint64_t wakephys)
 428 {
 429         uintptr_t       wp = (uintptr_t)wakephys;
 430         hat_setup(save_as->a_hat, 0);        /* switch back from kernel hat */
 431         hat_unload(kas.a_hat, (caddr_t)wp, PAGESIZE, HAT_UNLOAD);
 432 }
 433 
 434 void
 435 i_cpr_post_resume_cpus()
 436 {
 437         uint64_t        wakephys = rm_platter_pa;
 438 
 439         if (warm_reset_vector != NULL)
 440                 unmap_warm_reset_vector(warm_reset_vector);
 441 
 442         hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
 443             HAT_UNLOAD);
 444 
 445         /*
 446          * cmi_post_mpstartup() is only required upon boot not upon
 447          * resume from RAM
 448          */
 449 
 450         PT(PT_UNDO1to1);
 451         /* Tear down 1:1 mapping for wakeup code */
 452         unmap_wakeaddr_1to1(wakephys);
 453 }
 454 
 455 /* ARGSUSED */
 456 void
 457 i_cpr_handle_xc(int flag)
 458 {
 459 }
 460 
 461 int
 462 i_cpr_reusable_supported(void)
 463 {
 464         return (0);
 465 }
 466 static void
 467 map_wakeaddr_1to1(uint64_t wakephys)
 468 {
 469         uintptr_t       wp = (uintptr_t)wakephys;
 470         hat_devload(kas.a_hat, (caddr_t)wp, PAGESIZE, btop(wakephys),
 471             (PROT_READ|PROT_WRITE|PROT_EXEC|HAT_STORECACHING_OK|HAT_NOSYNC),
 472             HAT_LOAD);
 473         save_as = curthread->t_procp->p_as;
 474         hat_setup(kas.a_hat, 0);        /* switch to kernel-only hat */
 475 }
 476 
 477 
 478 void
 479 prt_other_cpus()
 480 {
 481         int     who;
 482 
 483         if (ncpus == 1) {
 484                 PMD(PMD_SX, ("prt_other_cpus() other cpu table empty for "
 485                     "uniprocessor machine\n"))
 486                 return;
 487         }
 488 
 489         for (who = 0; who < max_ncpus; who++) {
 490 
 491                 wc_cpu_t        *cpup = wc_other_cpus + who;
 492 
 493                 if (!CPU_IN_SET(mp_cpus, who))
 494                         continue;
 495 
 496                 PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, "
 497                     "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase="
 498                     AFMT ", sp=%lx\n", who,
 499                     (void *)cpup->wc_gdt_base, cpup->wc_gdt_limit,
 500                     (void *)cpup->wc_idt_base, cpup->wc_idt_limit,
 501                     (long)cpup->wc_ldt, (long)cpup->wc_tr,
 502                     (long)cpup->wc_kgsbase, (long)cpup->wc_rsp))
 503         }
 504 }
 505 
 506 /*
 507  * Power down the system.
 508  */
 509 int
 510 i_cpr_power_down(int sleeptype)
 511 {
 512         caddr_t         wakevirt = rm_platter_va;
 513         uint64_t        wakephys = rm_platter_pa;
 514         ulong_t         saved_intr;
 515         uint32_t        code_length = 0;
 516         wc_desctbr_t    gdt;
 517         /*LINTED*/
 518         wakecode_t      *wp = (wakecode_t *)wakevirt;
 519         /*LINTED*/
 520         rm_platter_t    *wcpp = (rm_platter_t *)wakevirt;
 521         wc_cpu_t        *cpup = &(wp->wc_cpu);
 522         dev_info_t      *ppm;
 523         int             ret = 0;
 524         power_req_t     power_req;
 525         char *str =     "i_cpr_power_down";
 526 #if defined(__amd64)
 527         /*LINTED*/
 528         rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
 529 #endif
 530         extern int      cpr_suspend_succeeded;
 531         extern void     kernel_wc_code();
 532 
 533         ASSERT(sleeptype == CPR_TORAM);
 534         ASSERT(CPU->cpu_id == 0);
 535 
 536         if ((ppm = PPM(ddi_root_node())) == NULL) {
 537                 PMD(PMD_SX, ("%s: root node not claimed\n", str))
 538                 return (ENOTTY);
 539         }
 540 
 541         PMD(PMD_SX, ("Entering %s()\n", str))
 542 
 543         PT(PT_IC);
 544         saved_intr = intr_clear();
 545 
 546         PT(PT_1to1);
 547         /* Setup 1:1 mapping for wakeup code */
 548         map_wakeaddr_1to1(wakephys);
 549 
 550         PMD(PMD_SX, ("ncpus=%d\n", ncpus))
 551 
 552         PMD(PMD_SX, ("wc_rm_end - wc_rm_start=%lx WC_CODESIZE=%x\n",
 553             ((size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start)),
 554             WC_CODESIZE))
 555 
 556         PMD(PMD_SX, ("wakevirt=%p, wakephys=%x\n",
 557             (void *)wakevirt, (uint_t)wakephys))
 558 
 559         ASSERT(((size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start)) <
 560             WC_CODESIZE);
 561 
 562         bzero(wakevirt, PAGESIZE);
 563 
 564         /* Copy code to rm_platter */
 565         bcopy((caddr_t)wc_rm_start, wakevirt,
 566             (size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start));
 567 
 568         prt_other_cpus();
 569 
 570 #if defined(__amd64)
 571 
 572         PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
 573             (ulong_t)real_mode_platter->rm_cr4, (ulong_t)getcr4()))
 574 
 575         PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
 576             (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
 577 
 578         real_mode_platter->rm_cr4 = getcr4();
 579         real_mode_platter->rm_pdbr = getcr3();
 580 
 581         rmp_gdt_init(real_mode_platter);
 582 
 583         /*
 584          * Since the CPU needs to jump to protected mode using an identity
 585          * mapped address, we need to calculate it here.
 586          */
 587         real_mode_platter->rm_longmode64_addr = rm_platter_pa +
 588             (uint32_t)((uintptr_t)wc_long_mode_64 - (uintptr_t)wc_rm_start);
 589 
 590         PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
 591             (ulong_t)real_mode_platter->rm_cr4, getcr4()))
 592         PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
 593             (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
 594 
 595         PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
 596             (ulong_t)real_mode_platter->rm_longmode64_addr))
 597 
 598 #endif
 599 
 600         PT(PT_SC);
 601         if (wc_save_context(cpup)) {
 602 
 603                 ret = i_cpr_platform_alloc(&(wc_other_cpus->wc_apic_state));
 604                 if (ret != 0)
 605                         return (ret);
 606 
 607                 ret = i_cpr_save_apic(&(wc_other_cpus->wc_apic_state));
 608                 PMD(PMD_SX, ("%s: i_cpr_save_apic() returned %d\n", str, ret))
 609                 if (ret != 0)
 610                         return (ret);
 611 
 612                 PMD(PMD_SX, ("wakephys=%x, kernel_wc_code=%p\n",
 613                     (uint_t)wakephys, (void *)&kernel_wc_code))
 614                 PMD(PMD_SX, ("virtaddr=%lx, retaddr=%lx\n",
 615                     (long)cpup->wc_virtaddr, (long)cpup->wc_retaddr))
 616                 PMD(PMD_SX, ("ebx=%x, edi=%x, esi=%x, ebp=%x, esp=%x\n",
 617                     cpup->wc_ebx, cpup->wc_edi, cpup->wc_esi, cpup->wc_ebp,
 618                     cpup->wc_esp))
 619                 PMD(PMD_SX, ("cr0=%lx, cr3=%lx, cr4=%lx\n",
 620                     (long)cpup->wc_cr0, (long)cpup->wc_cr3,
 621                     (long)cpup->wc_cr4))
 622                 PMD(PMD_SX, ("cs=%x, ds=%x, es=%x, ss=%x, fs=%lx, gs=%lx, "
 623                     "flgs=%lx\n", cpup->wc_cs, cpup->wc_ds, cpup->wc_es,
 624                     cpup->wc_ss, (long)cpup->wc_fs, (long)cpup->wc_gs,
 625                     (long)cpup->wc_eflags))
 626 
 627                 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
 628                     "kgbase=%lx\n", (void *)cpup->wc_gdt_base,
 629                     cpup->wc_gdt_limit, (void *)cpup->wc_idt_base,
 630                     cpup->wc_idt_limit, (long)cpup->wc_ldt,
 631                     (long)cpup->wc_tr, (long)cpup->wc_kgsbase))
 632 
 633                 gdt.base = cpup->wc_gdt_base;
 634                 gdt.limit = cpup->wc_gdt_limit;
 635 
 636 #if defined(__amd64)
 637                 code_length = (uint32_t)((uintptr_t)wc_long_mode_64 -
 638                     (uintptr_t)wc_rm_start);
 639 #else
 640                 code_length = 0;
 641 #endif
 642 
 643                 init_real_mode_platter(0, code_length, cpup->wc_cr4, gdt);
 644 
 645 #if defined(__amd64)
 646                 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
 647                     (ulong_t)wcpp->rm_cr4, getcr4()))
 648 
 649                 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
 650                     (ulong_t)wcpp->rm_pdbr, getcr3()))
 651 
 652                 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
 653                     (ulong_t)wcpp->rm_longmode64_addr))
 654 
 655                 PMD(PMD_SX,
 656                     ("real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64]=%lx\n",
 657                     (ulong_t)wcpp->rm_temp_gdt[TEMPGDT_KCODE64]))
 658 #endif
 659 
 660                 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
 661                     "kgsbase=%lx\n", (void *)wcpp->rm_gdt_base,
 662                     wcpp->rm_gdt_lim, (void *)wcpp->rm_idt_base,
 663                     wcpp->rm_idt_lim, (long)cpup->wc_ldt, (long)cpup->wc_tr,
 664                     (long)cpup->wc_kgsbase))
 665 
 666                 power_req.request_type = PMR_PPM_ENTER_SX;
 667                 power_req.req.ppm_power_enter_sx_req.sx_state = S3;
 668                 power_req.req.ppm_power_enter_sx_req.test_point =
 669                     cpr_test_point;
 670                 power_req.req.ppm_power_enter_sx_req.wakephys = wakephys;
 671 
 672                 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_ENTER_SX\n", str))
 673                 PT(PT_PPMCTLOP);
 674                 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
 675                     &power_req, &ret);
 676                 PMD(PMD_SX, ("%s: returns %d\n", str, ret))
 677 
 678                 /*
 679                  * If it works, we get control back to the else branch below
 680                  * If we get control back here, it didn't work.
 681                  * XXX return EINVAL here?
 682                  */
 683 
 684                 unmap_wakeaddr_1to1(wakephys);
 685                 intr_restore(saved_intr);
 686 
 687                 return (ret);
 688         } else {
 689                 cpr_suspend_succeeded = 1;
 690 
 691                 power_req.request_type = PMR_PPM_EXIT_SX;
 692                 power_req.req.ppm_power_enter_sx_req.sx_state = S3;
 693 
 694                 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_EXIT_SX\n", str))
 695                 PT(PT_PPMCTLOP);
 696                 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
 697                     &power_req, &ret);
 698                 PMD(PMD_SX, ("%s: returns %d\n", str, ret))
 699 
 700                 ret = i_cpr_restore_apic(&(wc_other_cpus->wc_apic_state));
 701                 /*
 702                  * the restore should never fail, if the saved suceeded
 703                  */
 704                 ASSERT(ret == 0);
 705 
 706                 i_cpr_platform_free(&(wc_other_cpus->wc_apic_state));
 707 
 708                 /*
 709                  * Enable interrupts on boot cpu.
 710                  */
 711                 ASSERT(CPU->cpu_id == i_cpr_bootcpuid());
 712                 mutex_enter(&cpu_lock);
 713                 cpu_enable_intr(CPU);
 714                 mutex_exit(&cpu_lock);
 715 
 716                 PT(PT_INTRRESTORE);
 717                 intr_restore(saved_intr);
 718                 PT(PT_CPU);
 719 
 720                 return (ret);
 721         }
 722 }
 723 
 724 /*
 725  * Stop all other cpu's before halting or rebooting. We pause the cpu's
 726  * instead of sending a cross call.
 727  * Stolen from sun4/os/mp_states.c
 728  */
 729 
 730 static int cpu_are_paused;      /* sic */
 731 
 732 void
 733 i_cpr_stop_other_cpus(void)
 734 {
 735         mutex_enter(&cpu_lock);
 736         if (cpu_are_paused) {
 737                 mutex_exit(&cpu_lock);
 738                 return;
 739         }
 740         pause_cpus(NULL, NULL);
 741         cpu_are_paused = 1;
 742 
 743         mutex_exit(&cpu_lock);
 744 }
 745 
 746 int
 747 i_cpr_is_supported(int sleeptype)
 748 {
 749         extern int cpr_supported_override;
 750         extern int cpr_platform_enable;
 751         extern int pm_S3_enabled;
 752 
 753         if (sleeptype != CPR_TORAM)
 754                 return (0);
 755 
 756         /*
 757          * The next statement tests if a specific platform has turned off
 758          * cpr support.
 759          */
 760         if (cpr_supported_override)
 761                 return (0);
 762 
 763         /*
 764          * If a platform has specifically turned on cpr support ...
 765          */
 766         if (cpr_platform_enable)
 767                 return (1);
 768 
 769         return (pm_S3_enabled);
 770 }
 771 
 772 void
 773 i_cpr_bitmap_cleanup(void)
 774 {
 775 }
 776 
 777 void
 778 i_cpr_free_memory_resources(void)
 779 {
 780 }
 781 
 782 /*
 783  * Needed only for S3 so far
 784  */
 785 static int
 786 i_cpr_platform_alloc(psm_state_request_t *req)
 787 {
 788 #ifdef DEBUG
 789         char    *str = "i_cpr_platform_alloc";
 790 #endif
 791 
 792         PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
 793 
 794         if (psm_state == NULL) {
 795                 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
 796                 return (0);
 797         }
 798 
 799         req->psr_cmd = PSM_STATE_ALLOC;
 800         return ((*psm_state)(req));
 801 }
 802 
 803 /*
 804  * Needed only for S3 so far
 805  */
 806 static void
 807 i_cpr_platform_free(psm_state_request_t *req)
 808 {
 809 #ifdef DEBUG
 810         char    *str = "i_cpr_platform_free";
 811 #endif
 812 
 813         PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
 814 
 815         if (psm_state == NULL) {
 816                 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
 817                 return;
 818         }
 819 
 820         req->psr_cmd = PSM_STATE_FREE;
 821         (void) (*psm_state)(req);
 822 }
 823 
 824 static int
 825 i_cpr_save_apic(psm_state_request_t *req)
 826 {
 827 #ifdef DEBUG
 828         char    *str = "i_cpr_save_apic";
 829 #endif
 830 
 831         if (psm_state == NULL) {
 832                 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
 833                 return (0);
 834         }
 835 
 836         req->psr_cmd = PSM_STATE_SAVE;
 837         return ((*psm_state)(req));
 838 }
 839 
 840 static int
 841 i_cpr_restore_apic(psm_state_request_t *req)
 842 {
 843 #ifdef DEBUG
 844         char    *str = "i_cpr_restore_apic";
 845 #endif
 846 
 847         if (psm_state == NULL) {
 848                 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
 849                 return (0);
 850         }
 851 
 852         req->psr_cmd = PSM_STATE_RESTORE;
 853         return ((*psm_state)(req));
 854 }
 855 
 856 
 857 /* stop lint complaining about offset not being used in 32bit mode */
 858 #if !defined(__amd64)
 859 /*ARGSUSED*/
 860 #endif
 861 static void
 862 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt)
 863 {
 864         /*LINTED*/
 865         rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
 866 
 867         /*
 868          * Fill up the real mode platter to make it easy for real mode code to
 869          * kick it off. This area should really be one passed by boot to kernel
 870          * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
 871          * have identical physical and virtual address in paged mode.
 872          */
 873 
 874         real_mode_platter->rm_pdbr = getcr3();
 875         real_mode_platter->rm_cpu = cpun;
 876         real_mode_platter->rm_cr4 = cr4;
 877 
 878         real_mode_platter->rm_gdt_base = gdt.base;
 879         real_mode_platter->rm_gdt_lim = gdt.limit;
 880 
 881 #if defined(__amd64)
 882         if (getcr3() > 0xffffffffUL)
 883                 panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
 884                     "located above 4G in physical memory (@ 0x%llx).",
 885                     (unsigned long long)getcr3());
 886 
 887         /*
 888          * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
 889          * by code in real_mode_start():
 890          *
 891          * GDT[0]:  NULL selector
 892          * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
 893          *
 894          * Clear the IDT as interrupts will be off and a limit of 0 will cause
 895          * the CPU to triple fault and reset on an NMI, seemingly as reasonable
 896          * a course of action as any other, though it may cause the entire
 897          * platform to reset in some cases...
 898          */
 899         real_mode_platter->rm_temp_gdt[0] = 0ULL;
 900         real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
 901 
 902         real_mode_platter->rm_temp_gdt_lim = (ushort_t)
 903             (sizeof (real_mode_platter->rm_temp_gdt) - 1);
 904         real_mode_platter->rm_temp_gdt_base = rm_platter_pa +
 905             offsetof(rm_platter_t, rm_temp_gdt);
 906 
 907         real_mode_platter->rm_temp_idt_lim = 0;
 908         real_mode_platter->rm_temp_idt_base = 0;
 909 
 910         /*
 911          * Since the CPU needs to jump to protected mode using an identity
 912          * mapped address, we need to calculate it here.
 913          */
 914         real_mode_platter->rm_longmode64_addr = rm_platter_pa + offset;
 915 #endif  /* __amd64 */
 916 
 917         /* return; */
 918 }
 919 
 920 void
 921 i_cpr_start_cpu(void)
 922 {
 923 
 924         struct cpu *cp = CPU;
 925 
 926         char *str = "i_cpr_start_cpu";
 927         extern void init_cpu_syscall(struct cpu *cp);
 928 
 929         PMD(PMD_SX, ("%s() called\n", str))
 930 
 931         PMD(PMD_SX, ("%s() #0 cp->cpu_base_spl %d\n", str,
 932             cp->cpu_base_spl))
 933 
 934         mutex_enter(&cpu_lock);
 935         if (cp == i_cpr_bootcpu()) {
 936                 mutex_exit(&cpu_lock);
 937                 PMD(PMD_SX,
 938                     ("%s() called on bootcpu nothing to do!\n", str))
 939                 return;
 940         }
 941         mutex_exit(&cpu_lock);
 942 
 943         /*
 944          * We need to Sync PAT with cpu0's PAT. We have to do
 945          * this with interrupts disabled.
 946          */
 947         if (is_x86_feature(x86_featureset, X86FSET_PAT))
 948                 pat_sync();
 949 
 950         /*
 951          * If we use XSAVE, we need to restore XFEATURE_ENABLE_MASK register.
 952          */
 953         if (fp_save_mech == FP_XSAVE) {
 954                 setup_xfem();
 955         }
 956 
 957         /*
 958          * Initialize this CPU's syscall handlers
 959          */
 960         init_cpu_syscall(cp);
 961 
 962         PMD(PMD_SX, ("%s() #1 cp->cpu_base_spl %d\n", str, cp->cpu_base_spl))
 963 
 964         /*
 965          * Do not need to call cpuid_pass2(), cpuid_pass3(), cpuid_pass4() or
 966          * init_cpu_info(), since the work that they do is only needed to
 967          * be done once at boot time
 968          */
 969 
 970 
 971         mutex_enter(&cpu_lock);
 972         CPUSET_ADD(procset, cp->cpu_id);
 973         mutex_exit(&cpu_lock);
 974 
 975         PMD(PMD_SX, ("%s() #2 cp->cpu_base_spl %d\n", str,
 976             cp->cpu_base_spl))
 977 
 978         if (tsc_gethrtime_enable) {
 979                 PMD(PMD_SX, ("%s() calling tsc_sync_slave\n", str))
 980                 tsc_sync_slave();
 981         }
 982 
 983         PMD(PMD_SX, ("%s() cp->cpu_id %d, cp->cpu_intr_actv %d\n", str,
 984             cp->cpu_id, cp->cpu_intr_actv))
 985         PMD(PMD_SX, ("%s() #3 cp->cpu_base_spl %d\n", str,
 986             cp->cpu_base_spl))
 987 
 988         (void) spl0();          /* enable interrupts */
 989 
 990         PMD(PMD_SX, ("%s() #4 cp->cpu_base_spl %d\n", str,
 991             cp->cpu_base_spl))
 992 
 993         /*
 994          * Set up the CPU module for this CPU.  This can't be done before
 995          * this CPU is made CPU_READY, because we may (in heterogeneous systems)
 996          * need to go load another CPU module.  The act of attempting to load
 997          * a module may trigger a cross-call, which will ASSERT unless this
 998          * cpu is CPU_READY.
 999          */
1000 
1001         /*
1002          * cmi already been init'd (during boot), so do not need to do it again
1003          */
1004 #ifdef PM_REINITMCAONRESUME
1005         if (is_x86_feature(x86_featureset, X86FSET_MCA))
1006                 cmi_mca_init();
1007 #endif
1008 
1009         PMD(PMD_SX, ("%s() returning\n", str))
1010 
1011         /* return; */
1012 }
1013 
1014 void
1015 i_cpr_alloc_cpus(void)
1016 {
1017         char *str = "i_cpr_alloc_cpus";
1018 
1019         PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
1020         /*
1021          * we allocate this only when we actually need it to save on
1022          * kernel memory
1023          */
1024 
1025         if (wc_other_cpus == NULL) {
1026                 wc_other_cpus = kmem_zalloc(max_ncpus * sizeof (wc_cpu_t),
1027                     KM_SLEEP);
1028         }
1029 
1030 }
1031 
1032 void
1033 i_cpr_free_cpus(void)
1034 {
1035         int index;
1036         wc_cpu_t *wc_cpu;
1037 
1038         if (wc_other_cpus != NULL) {
1039                 for (index = 0; index < max_ncpus; index++) {
1040                         wc_cpu = wc_other_cpus + index;
1041                         if (wc_cpu->wc_saved_stack != NULL) {
1042                                 kmem_free(wc_cpu->wc_saved_stack,
1043                                     wc_cpu->wc_saved_stack_size);
1044                         }
1045                 }
1046 
1047                 kmem_free((void *) wc_other_cpus,
1048                     max_ncpus * sizeof (wc_cpu_t));
1049                 wc_other_cpus = NULL;
1050         }
1051 }
1052 
1053 /*
1054  * wrapper for acpica_ddi_save_resources()
1055  */
1056 void
1057 i_cpr_save_configuration(dev_info_t *dip)
1058 {
1059         acpica_ddi_save_resources(dip);
1060 }
1061 
1062 /*
1063  * wrapper for acpica_ddi_restore_resources()
1064  */
1065 void
1066 i_cpr_restore_configuration(dev_info_t *dip)
1067 {
1068         acpica_ddi_restore_resources(dip);
1069 }
1070 
1071 static int
1072 wait_for_set(cpuset_t *set, int who)
1073 {
1074         int delays;
1075         char *str = "wait_for_set";
1076 
1077         for (delays = 0; !CPU_IN_SET(*set, who); delays++) {
1078                 if (delays == 500) {
1079                         /*
1080                          * After five seconds, things are probably
1081                          * looking a bit bleak - explain the hang.
1082                          */
1083                         cmn_err(CE_NOTE, "cpu%d: started, "
1084                             "but not running in the kernel yet", who);
1085                         PMD(PMD_SX, ("%s() %d cpu started "
1086                             "but not running in the kernel yet\n",
1087                             str, who))
1088                 } else if (delays > 2000) {
1089                         /*
1090                          * We waited at least 20 seconds, bail ..
1091                          */
1092                         cmn_err(CE_WARN, "cpu%d: timed out", who);
1093                         PMD(PMD_SX, ("%s() %d cpu timed out\n",
1094                             str, who))
1095                         return (0);
1096                 }
1097 
1098                 /*
1099                  * wait at least 10ms, then check again..
1100                  */
1101                 drv_usecwait(10000);
1102         }
1103 
1104         return (1);
1105 }
1106 
1107 static  void
1108 i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu)
1109 {
1110         size_t  stack_size;     /* size of stack */
1111         caddr_t start = CPR_GET_STACK_START(t); /* stack start */
1112         caddr_t end = CPR_GET_STACK_END(t);     /* stack end  */
1113 
1114         stack_size = (size_t)end - (size_t)start;
1115 
1116         if (wc_cpu->wc_saved_stack_size < stack_size) {
1117                 if (wc_cpu->wc_saved_stack != NULL) {
1118                         kmem_free(wc_cpu->wc_saved_stack,
1119                             wc_cpu->wc_saved_stack_size);
1120                 }
1121                 wc_cpu->wc_saved_stack = kmem_zalloc(stack_size, KM_SLEEP);
1122                 wc_cpu->wc_saved_stack_size = stack_size;
1123         }
1124 
1125         bcopy(start, wc_cpu->wc_saved_stack, stack_size);
1126 }
1127 
1128 void
1129 i_cpr_restore_stack(kthread_t *t, greg_t *save_stack)
1130 {
1131         size_t  stack_size;     /* size of stack */
1132         caddr_t start = CPR_GET_STACK_START(t); /* stack start */
1133         caddr_t end = CPR_GET_STACK_END(t);     /* stack end  */
1134 
1135         stack_size = (size_t)end - (size_t)start;
1136 
1137         bcopy(save_stack, start, stack_size);
1138 }