1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/hpet_acpi.h>
  26 #include <sys/hpet.h>
  27 #include <sys/bitmap.h>
  28 #include <sys/inttypes.h>
  29 #include <sys/time.h>
  30 #include <sys/sunddi.h>
  31 #include <sys/ksynch.h>
  32 #include <sys/apic.h>
  33 #include <sys/callb.h>
  34 #include <sys/clock.h>
  35 #include <sys/archsystm.h>
  36 #include <sys/cpupart.h>
  37 
  38 static int hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags);
  39 static boolean_t hpet_install_proxy(void);
  40 static boolean_t hpet_callback(int code);
  41 static boolean_t hpet_cpr(int code);
  42 static boolean_t hpet_resume(void);
  43 static void hpet_cst_callback(uint32_t code);
  44 static boolean_t hpet_deep_idle_config(int code);
  45 static int hpet_validate_table(ACPI_TABLE_HPET *hpet_table);
  46 static boolean_t hpet_checksum_table(unsigned char *table, unsigned int len);
  47 static void *hpet_memory_map(ACPI_TABLE_HPET *hpet_table);
  48 static int hpet_start_main_counter(hpet_info_t *hip);
  49 static int hpet_stop_main_counter(hpet_info_t *hip);
  50 static uint64_t hpet_read_main_counter_value(hpet_info_t *hip);
  51 static uint64_t hpet_set_leg_rt_cnf(hpet_info_t *hip, uint32_t new_value);
  52 static uint64_t hpet_read_gen_cap(hpet_info_t *hip);
  53 static uint64_t hpet_read_gen_config(hpet_info_t *hip);
  54 static uint64_t hpet_read_gen_intrpt_stat(hpet_info_t *hip);
  55 static uint64_t hpet_read_timer_N_config(hpet_info_t *hip, uint_t n);
  56 static hpet_TN_conf_cap_t hpet_convert_timer_N_config(uint64_t conf);
  57 /* LINTED E_STATIC_UNUSED */
  58 static uint64_t hpet_read_timer_N_comp(hpet_info_t *hip, uint_t n);
  59 /* LINTED E_STATIC_UNUSED */
  60 static void hpet_write_gen_cap(hpet_info_t *hip, uint64_t l);
  61 static void hpet_write_gen_config(hpet_info_t *hip, uint64_t l);
  62 static void hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l);
  63 static void hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l);
  64 static void hpet_write_timer_N_comp(hpet_info_t *hip, uint_t n, uint64_t l);
  65 static void hpet_disable_timer(hpet_info_t *hip, uint32_t timer_n);
  66 static void hpet_enable_timer(hpet_info_t *hip, uint32_t timer_n);
  67 /* LINTED E_STATIC_UNUSED */
  68 static void hpet_write_main_counter_value(hpet_info_t *hip, uint64_t l);
  69 static int hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip);
  70 static int hpet_timer_available(uint32_t allocated_timers, uint32_t n);
  71 static void hpet_timer_alloc(uint32_t *allocated_timers, uint32_t n);
  72 static void hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n,
  73     uint32_t interrupt);
  74 static uint_t hpet_isr(char *arg);
  75 static uint32_t hpet_install_interrupt_handler(uint_t (*func)(char *),
  76     int vector);
  77 static void hpet_uninstall_interrupt_handler(void);
  78 static void hpet_expire_all(void);
  79 static boolean_t hpet_guaranteed_schedule(hrtime_t required_wakeup_time);
  80 static boolean_t hpet_use_hpet_timer(hrtime_t *expire);
  81 static void hpet_use_lapic_timer(hrtime_t expire);
  82 static void hpet_init_proxy_data(void);
  83 
  84 /*
  85  * hpet_state_lock is used to synchronize disabling/enabling deep c-states
  86  * and to synchronize suspend/resume.
  87  */
  88 static kmutex_t         hpet_state_lock;
  89 static struct hpet_state {
  90         boolean_t       proxy_installed;        /* CBE proxy interrupt setup */
  91         boolean_t       cpr;                    /* currently in CPR */
  92         boolean_t       cpu_deep_idle;          /* user enable/disable */
  93         boolean_t       uni_cstate;             /* disable if only one cstate */
  94 } hpet_state = { B_FALSE, B_FALSE, B_TRUE, B_TRUE};
  95 
  96 uint64_t hpet_spin_check = HPET_SPIN_CHECK;
  97 uint64_t hpet_spin_timeout = HPET_SPIN_TIMEOUT;
  98 uint64_t hpet_idle_spin_timeout = HPET_SPIN_TIMEOUT;
  99 uint64_t hpet_isr_spin_timeout = HPET_SPIN_TIMEOUT;
 100 
 101 static kmutex_t         hpet_proxy_lock;        /* lock for lAPIC proxy data */
 102 /*
 103  * hpet_proxy_users is a per-cpu array.
 104  */
 105 static hpet_proxy_t     *hpet_proxy_users;      /* one per CPU */
 106 
 107 
 108 ACPI_TABLE_HPET         *hpet_table;            /* ACPI HPET table */
 109 hpet_info_t             hpet_info;              /* Human readable Information */
 110 
 111 /*
 112  * Provide HPET access from unix.so.
 113  * Set up pointers to access symbols in pcplusmp.
 114  */
 115 static void
 116 hpet_establish_hooks(void)
 117 {
 118         hpet.install_proxy = &hpet_install_proxy;
 119         hpet.callback = &hpet_callback;
 120         hpet.use_hpet_timer = &hpet_use_hpet_timer;
 121         hpet.use_lapic_timer = &hpet_use_lapic_timer;
 122 }
 123 
 124 /*
 125  * Get the ACPI "HPET" table.
 126  * acpi_probe() calls this function from mp_startup before drivers are loaded.
 127  * acpi_probe() verified the system is using ACPI before calling this.
 128  *
 129  * There may be more than one ACPI HPET table (Itanium only?).
 130  * Intel's HPET spec defines each timer block to have up to 32 counters and
 131  * be 1024 bytes long.  There can be more than one timer block of 32 counters.
 132  * Each timer block would have an additional ACPI HPET table.
 133  * Typical x86 systems today only have 1 HPET with 3 counters.
 134  * On x86 we only consume HPET table "1" for now.
 135  */
 136 int
 137 hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags)
 138 {
 139         extern hrtime_t tsc_read(void);
 140         extern int      idle_cpu_no_deep_c;
 141         extern int      cpuid_deep_cstates_supported(void);
 142         void            *la;
 143         uint64_t        ret;
 144         uint_t          num_timers;
 145         uint_t          ti;
 146 
 147         (void) memset(&hpet_info, 0, sizeof (hpet_info));
 148         hpet.supported = HPET_NO_SUPPORT;
 149 
 150         if (idle_cpu_no_deep_c)
 151                 return (DDI_FAILURE);
 152 
 153         if (!cpuid_deep_cstates_supported())
 154                 return (DDI_FAILURE);
 155 
 156         hpet_establish_hooks();
 157 
 158         /*
 159          * Get HPET ACPI table 1.
 160          */
 161         if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_HPET, HPET_TABLE_1,
 162             (ACPI_TABLE_HEADER **)&hpet_table))) {
 163                 cmn_err(CE_NOTE, "!hpet_acpi: unable to get ACPI HPET table");
 164                 return (DDI_FAILURE);
 165         }
 166 
 167         if (hpet_validate_table(hpet_table) != AE_OK) {
 168                 cmn_err(CE_NOTE, "!hpet_acpi: invalid HPET table");
 169                 return (DDI_FAILURE);
 170         }
 171 
 172         la = hpet_memory_map(hpet_table);
 173         if (la == NULL) {
 174                 cmn_err(CE_NOTE, "!hpet_acpi: memory map HPET failed");
 175                 return (DDI_FAILURE);
 176         }
 177         hpet_info.logical_address = la;
 178 
 179         ret = hpet_read_gen_cap(&hpet_info);
 180         hpet_info.gen_cap.counter_clk_period = HPET_GCAP_CNTR_CLK_PERIOD(ret);
 181         hpet_info.gen_cap.vendor_id = HPET_GCAP_VENDOR_ID(ret);
 182         hpet_info.gen_cap.leg_route_cap = HPET_GCAP_LEG_ROUTE_CAP(ret);
 183         hpet_info.gen_cap.count_size_cap = HPET_GCAP_CNT_SIZE_CAP(ret);
 184         /*
 185          * Hardware contains the last timer's number.
 186          * Add 1 to get the number of timers.
 187          */
 188         hpet_info.gen_cap.num_tim_cap = HPET_GCAP_NUM_TIM_CAP(ret) + 1;
 189         hpet_info.gen_cap.rev_id = HPET_GCAP_REV_ID(ret);
 190 
 191         if (hpet_info.gen_cap.counter_clk_period > HPET_MAX_CLK_PERIOD) {
 192                 cmn_err(CE_NOTE, "!hpet_acpi: COUNTER_CLK_PERIOD 0x%lx > 0x%lx",
 193                     (long)hpet_info.gen_cap.counter_clk_period,
 194                     (long)HPET_MAX_CLK_PERIOD);
 195                 return (DDI_FAILURE);
 196         }
 197 
 198         num_timers = (uint_t)hpet_info.gen_cap.num_tim_cap;
 199         if ((num_timers < 3) || (num_timers > 32)) {
 200                 cmn_err(CE_NOTE, "!hpet_acpi: invalid number of HPET timers "
 201                     "%lx", (long)num_timers);
 202                 return (DDI_FAILURE);
 203         }
 204         hpet_info.timer_n_config = (hpet_TN_conf_cap_t *)kmem_zalloc(
 205             num_timers * sizeof (uint64_t), KM_SLEEP);
 206 
 207         ret = hpet_read_gen_config(&hpet_info);
 208         hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
 209         hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
 210 
 211         /*
 212          * Solaris does not use the HPET Legacy Replacement Route capabilities.
 213          * This feature has been off by default on test systems.
 214          * The HPET spec does not specify if Legacy Replacement Route is
 215          * on or off by default, so we explicitely set it off here.
 216          * It should not matter which mode the HPET is in since we use
 217          * the first available non-legacy replacement timer: timer 2.
 218          */
 219         (void) hpet_set_leg_rt_cnf(&hpet_info, 0);
 220 
 221         ret = hpet_read_gen_config(&hpet_info);
 222         hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
 223         hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
 224 
 225         hpet_info.gen_intrpt_stat = hpet_read_gen_intrpt_stat(&hpet_info);
 226         hpet_info.main_counter_value = hpet_read_main_counter_value(&hpet_info);
 227 
 228         for (ti = 0; ti < num_timers; ++ti) {
 229                 ret = hpet_read_timer_N_config(&hpet_info, ti);
 230                 /*
 231                  * Make sure no timers are enabled (think fast reboot or
 232                  * virtual hardware).
 233                  */
 234                 if (ret & HPET_TIMER_N_INT_ENB_CNF_BIT) {
 235                         hpet_disable_timer(&hpet_info, ti);
 236                         ret &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;
 237                 }
 238 
 239                 hpet_info.timer_n_config[ti] = hpet_convert_timer_N_config(ret);
 240         }
 241 
 242         /*
 243          * Be aware the Main Counter may need to be initialized in the future
 244          * if it is used for more than just Deep C-State support.
 245          * The HPET's Main Counter does not need to be initialize to a specific
 246          * value before starting it for use to wake up CPUs from Deep C-States.
 247          */
 248         if (hpet_start_main_counter(&hpet_info) != AE_OK) {
 249                 cmn_err(CE_NOTE, "!hpet_acpi: hpet_start_main_counter failed");
 250                 return (DDI_FAILURE);
 251         }
 252 
 253         hpet_info.period = hpet_info.gen_cap.counter_clk_period;
 254         /*
 255          * Read main counter twice to record HPET latency for debugging.
 256          */
 257         hpet_info.tsc[0] = tsc_read();
 258         hpet_info.hpet_main_counter_reads[0] =
 259             hpet_read_main_counter_value(&hpet_info);
 260         hpet_info.tsc[1] = tsc_read();
 261         hpet_info.hpet_main_counter_reads[1] =
 262             hpet_read_main_counter_value(&hpet_info);
 263         hpet_info.tsc[2] = tsc_read();
 264 
 265         ret = hpet_read_gen_config(&hpet_info);
 266         hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
 267         hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
 268 
 269         /*
 270          * HPET main counter reads are supported now.
 271          */
 272         hpet.supported = HPET_TIMER_SUPPORT;
 273 
 274         return (hpet_init_proxy(hpet_vect, hpet_flags));
 275 }
 276 
 277 void
 278 hpet_acpi_fini(void)
 279 {
 280         if (hpet.supported == HPET_NO_SUPPORT)
 281                 return;
 282         if (hpet.supported >= HPET_TIMER_SUPPORT)
 283                 (void) hpet_stop_main_counter(&hpet_info);
 284         if (hpet.supported > HPET_TIMER_SUPPORT)
 285                 hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
 286 }
 287 
 288 /*
 289  * Do initial setup to use a HPET timer as a proxy for Deep C-state stalled
 290  * LAPIC Timers.  Get a free HPET timer that supports I/O APIC routed interrupt.
 291  * Setup data to handle the timer's ISR, and add the timer's interrupt.
 292  *
 293  * The ddi cannot be use to allocate the HPET timer's interrupt.
 294  * ioapic_init_intr() in mp_platform_common() later sets up the I/O APIC
 295  * to handle the HPET timer's interrupt.
 296  *
 297  * Note: FSB (MSI) interrupts are not currently supported by Intel HPETs as of
 298  * ICH9.  The HPET spec allows for MSI.  In the future MSI may be prefered.
 299  */
 300 static int
 301 hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags)
 302 {
 303         if (hpet_get_IOAPIC_intr_capable_timer(&hpet_info) == -1) {
 304                 cmn_err(CE_WARN, "!hpet_acpi: get ioapic intr failed.");
 305                 return (DDI_FAILURE);
 306         }
 307 
 308         hpet_init_proxy_data();
 309 
 310         if (hpet_install_interrupt_handler(&hpet_isr,
 311             hpet_info.cstate_timer.intr) != AE_OK) {
 312                 cmn_err(CE_WARN, "!hpet_acpi: install interrupt failed.");
 313                 return (DDI_FAILURE);
 314         }
 315         *hpet_vect = hpet_info.cstate_timer.intr;
 316         hpet_flags->intr_el = INTR_EL_LEVEL;
 317         hpet_flags->intr_po = INTR_PO_ACTIVE_HIGH;
 318         hpet_flags->bustype = BUS_PCI;               /*  we *do* conform to PCI */
 319 
 320         /*
 321          * Avoid a possibly stuck interrupt by programing the HPET's timer here
 322          * before the I/O APIC is programmed to handle this interrupt.
 323          */
 324         hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
 325             hpet_info.cstate_timer.intr);
 326 
 327         /*
 328          * All HPET functionality is supported.
 329          */
 330         hpet.supported = HPET_FULL_SUPPORT;
 331         return (DDI_SUCCESS);
 332 }
 333 
 334 /*
 335  * Called by kernel if it can support Deep C-States.
 336  */
 337 static boolean_t
 338 hpet_install_proxy(void)
 339 {
 340         if (hpet_state.proxy_installed == B_TRUE)
 341                 return (B_TRUE);
 342 
 343         if (hpet.supported != HPET_FULL_SUPPORT)
 344                 return (B_FALSE);
 345 
 346         hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
 347         hpet_state.proxy_installed = B_TRUE;
 348 
 349         return (B_TRUE);
 350 }
 351 
 352 /*
 353  * Remove the interrupt that was added with add_avintr() in
 354  * hpet_install_interrupt_handler().
 355  */
 356 static void
 357 hpet_uninstall_interrupt_handler(void)
 358 {
 359         rem_avintr(NULL, CBE_HIGH_PIL, (avfunc)&hpet_isr,
 360             hpet_info.cstate_timer.intr);
 361 }
 362 
 363 static int
 364 hpet_validate_table(ACPI_TABLE_HPET *hpet_table)
 365 {
 366         ACPI_TABLE_HEADER       *table_header = (ACPI_TABLE_HEADER *)hpet_table;
 367 
 368         if (table_header->Length != sizeof (ACPI_TABLE_HPET)) {
 369                 cmn_err(CE_WARN, "!hpet_validate_table: Length %lx != sizeof ("
 370                     "ACPI_TABLE_HPET) %lx.",
 371                     (unsigned long)((ACPI_TABLE_HEADER *)hpet_table)->Length,
 372                     (unsigned long)sizeof (ACPI_TABLE_HPET));
 373                 return (AE_ERROR);
 374         }
 375 
 376         if (!ACPI_COMPARE_NAME(table_header->Signature, ACPI_SIG_HPET)) {
 377                 cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET table "
 378                     "signature");
 379                 return (AE_ERROR);
 380         }
 381 
 382         if (!hpet_checksum_table((unsigned char *)hpet_table,
 383             (unsigned int)table_header->Length)) {
 384                 cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET checksum");
 385                 return (AE_ERROR);
 386         }
 387 
 388         /*
 389          * Sequence should be table number - 1.  We are using table 1.
 390          */
 391         if (hpet_table->Sequence != HPET_TABLE_1 - 1) {
 392                 cmn_err(CE_WARN, "!hpet_validate_table: Invalid Sequence %lx",
 393                     (long)hpet_table->Sequence);
 394                 return (AE_ERROR);
 395         }
 396 
 397         return (AE_OK);
 398 }
 399 
 400 static boolean_t
 401 hpet_checksum_table(unsigned char *table, unsigned int length)
 402 {
 403         unsigned char   checksum = 0;
 404         int             i;
 405 
 406         for (i = 0; i < length; ++i, ++table)
 407                 checksum += *table;
 408 
 409         return (checksum == 0);
 410 }
 411 
 412 static void *
 413 hpet_memory_map(ACPI_TABLE_HPET *hpet_table)
 414 {
 415         return (AcpiOsMapMemory(hpet_table->Address.Address, HPET_SIZE));
 416 }
 417 
 418 static int
 419 hpet_start_main_counter(hpet_info_t *hip)
 420 {
 421         uint64_t        *gcr_ptr;
 422         uint64_t        gcr;
 423 
 424         gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address);
 425         gcr = *gcr_ptr;
 426 
 427         gcr |= HPET_GCFR_ENABLE_CNF;
 428         *gcr_ptr = gcr;
 429         gcr = *gcr_ptr;
 430 
 431         return (gcr & HPET_GCFR_ENABLE_CNF ? AE_OK : ~AE_OK);
 432 }
 433 
 434 static int
 435 hpet_stop_main_counter(hpet_info_t *hip)
 436 {
 437         uint64_t        *gcr_ptr;
 438         uint64_t        gcr;
 439 
 440         gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address);
 441         gcr = *gcr_ptr;
 442 
 443         gcr &= ~HPET_GCFR_ENABLE_CNF;
 444         *gcr_ptr = gcr;
 445         gcr = *gcr_ptr;
 446 
 447         return (gcr & HPET_GCFR_ENABLE_CNF ? ~AE_OK : AE_OK);
 448 }
 449 
 450 /*
 451  * Set the Legacy Replacement Route bit.
 452  * This should be called before setting up timers.
 453  * The HPET specification is silent regarding setting this after timers are
 454  * programmed.
 455  */
 456 static uint64_t
 457 hpet_set_leg_rt_cnf(hpet_info_t *hip, uint32_t new_value)
 458 {
 459         uint64_t gen_conf = hpet_read_gen_config(hip);
 460 
 461         switch (new_value) {
 462         case 0:
 463                 gen_conf &= ~HPET_GCFR_LEG_RT_CNF;
 464                 break;
 465 
 466         case HPET_GCFR_LEG_RT_CNF:
 467                 gen_conf |= HPET_GCFR_LEG_RT_CNF;
 468                 break;
 469 
 470         default:
 471                 ASSERT(new_value == 0 || new_value == HPET_GCFR_LEG_RT_CNF);
 472                 break;
 473         }
 474         hpet_write_gen_config(hip, gen_conf);
 475         return (gen_conf);
 476 }
 477 
 478 static uint64_t
 479 hpet_read_gen_cap(hpet_info_t *hip)
 480 {
 481         return (*(uint64_t *)HPET_GEN_CAP_ADDRESS(hip->logical_address));
 482 }
 483 
 484 static uint64_t
 485 hpet_read_gen_config(hpet_info_t *hip)
 486 {
 487         return (*(uint64_t *)
 488             HPET_GEN_CONFIG_ADDRESS(hip->logical_address));
 489 }
 490 
 491 static uint64_t
 492 hpet_read_gen_intrpt_stat(hpet_info_t *hip)
 493 {
 494         hip->gen_intrpt_stat = *(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(
 495             hip->logical_address);
 496         return (hip->gen_intrpt_stat);
 497 }
 498 
 499 static uint64_t
 500 hpet_read_timer_N_config(hpet_info_t *hip, uint_t n)
 501 {
 502         uint64_t conf = *(uint64_t *)HPET_TIMER_N_CONF_ADDRESS(
 503             hip->logical_address, n);
 504         hip->timer_n_config[n] = hpet_convert_timer_N_config(conf);
 505         return (conf);
 506 }
 507 
 508 static hpet_TN_conf_cap_t
 509 hpet_convert_timer_N_config(uint64_t conf)
 510 {
 511         hpet_TN_conf_cap_t cc = { 0 };
 512 
 513         cc.int_route_cap = HPET_TIMER_N_INT_ROUTE_CAP(conf);
 514         cc.fsb_int_del_cap = HPET_TIMER_N_FSB_INT_DEL_CAP(conf);
 515         cc.fsb_int_en_cnf = HPET_TIMER_N_FSB_EN_CNF(conf);
 516         cc.int_route_cnf = HPET_TIMER_N_INT_ROUTE_CNF(conf);
 517         cc.mode32_cnf = HPET_TIMER_N_MODE32_CNF(conf);
 518         cc.val_set_cnf = HPET_TIMER_N_VAL_SET_CNF(conf);
 519         cc.size_cap = HPET_TIMER_N_SIZE_CAP(conf);
 520         cc.per_int_cap = HPET_TIMER_N_PER_INT_CAP(conf);
 521         cc.type_cnf = HPET_TIMER_N_TYPE_CNF(conf);
 522         cc.int_enb_cnf = HPET_TIMER_N_INT_ENB_CNF(conf);
 523         cc.int_type_cnf = HPET_TIMER_N_INT_TYPE_CNF(conf);
 524 
 525         return (cc);
 526 }
 527 
 528 static uint64_t
 529 hpet_read_timer_N_comp(hpet_info_t *hip, uint_t n)
 530 {
 531         if (hip->timer_n_config[n].size_cap == 1)
 532                 return (*(uint64_t *)
 533                     HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n));
 534         else
 535                 return (*(uint32_t *)
 536                     HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n));
 537 }
 538 
 539 static uint64_t
 540 hpet_read_main_counter_value(hpet_info_t *hip)
 541 {
 542         uint64_t        value;
 543         uint32_t        *counter;
 544         uint32_t        high1, high2, low;
 545 
 546         counter = (uint32_t *)HPET_MAIN_COUNTER_ADDRESS(hip->logical_address);
 547 
 548         /*
 549          * 32-bit main counters
 550          */
 551         if (hip->gen_cap.count_size_cap == 0) {
 552                 value = (uint64_t)*counter;
 553                 hip->main_counter_value = value;
 554                 return (value);
 555         }
 556 
 557         /*
 558          * HPET spec claims a 64-bit read can be split into two 32-bit reads
 559          * by the hardware connection to the HPET.
 560          */
 561         high2 = counter[1];
 562         do {
 563                 high1 = high2;
 564                 low = counter[0];
 565                 high2 = counter[1];
 566         } while (high2 != high1);
 567 
 568         value = ((uint64_t)high1 << 32) | low;
 569         hip->main_counter_value = value;
 570         return (value);
 571 }
 572 
 573 static void
 574 hpet_write_gen_cap(hpet_info_t *hip, uint64_t l)
 575 {
 576         *(uint64_t *)HPET_GEN_CAP_ADDRESS(hip->logical_address) = l;
 577 }
 578 
 579 static void
 580 hpet_write_gen_config(hpet_info_t *hip, uint64_t l)
 581 {
 582         *(uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address) = l;
 583 }
 584 
 585 static void
 586 hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l)
 587 {
 588         *(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(hip->logical_address) = l;
 589 }
 590 
 591 static void
 592 hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l)
 593 {
 594         if (hip->timer_n_config[n].size_cap == 1)
 595                 *(uint64_t *)HPET_TIMER_N_CONF_ADDRESS(
 596                     hip->logical_address, n) = l;
 597         else
 598                 *(uint32_t *)HPET_TIMER_N_CONF_ADDRESS(
 599                     hip->logical_address, n) = (uint32_t)(0xFFFFFFFF & l);
 600 }
 601 
 602 static void
 603 hpet_write_timer_N_comp(hpet_info_t *hip, uint_t n, uint64_t l)
 604 {
 605         *(uint64_t *)HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n) = l;
 606 }
 607 
 608 static void
 609 hpet_disable_timer(hpet_info_t *hip, uint32_t timer_n)
 610 {
 611         uint64_t l;
 612 
 613         l = hpet_read_timer_N_config(hip, timer_n);
 614         l &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;
 615         hpet_write_timer_N_config(hip, timer_n, l);
 616 }
 617 
 618 static void
 619 hpet_enable_timer(hpet_info_t *hip, uint32_t timer_n)
 620 {
 621         uint64_t l;
 622 
 623         l = hpet_read_timer_N_config(hip, timer_n);
 624         l |= HPET_TIMER_N_INT_ENB_CNF_BIT;
 625         hpet_write_timer_N_config(hip, timer_n, l);
 626 }
 627 
 628 static void
 629 hpet_write_main_counter_value(hpet_info_t *hip, uint64_t l)
 630 {
 631         uint32_t        *address;
 632 
 633         /*
 634          * HPET spec 1.0a states main counter register should be halted before
 635          * it is written to.
 636          */
 637         ASSERT(!(hpet_read_gen_config(hip) & HPET_GCFR_ENABLE_CNF));
 638 
 639         if (hip->gen_cap.count_size_cap == 1) {
 640                 *(uint64_t *)HPET_MAIN_COUNTER_ADDRESS(hip->logical_address)
 641                     = l;
 642         } else {
 643                 address = (uint32_t *)HPET_MAIN_COUNTER_ADDRESS(
 644                     hip->logical_address);
 645 
 646                 address[0] = (uint32_t)(l & 0xFFFFFFFF);
 647         }
 648 }
 649 
 650 /*
 651  * Add the interrupt handler for I/O APIC interrupt number (interrupt line).
 652  *
 653  * The I/O APIC line (vector) is programmed in ioapic_init_intr() called
 654  * from apic_picinit() psm_ops apic_ops entry point after we return from
 655  * apic_init() psm_ops entry point.
 656  */
 657 static uint32_t
 658 hpet_install_interrupt_handler(uint_t (*func)(char *), int vector)
 659 {
 660         uint32_t retval;
 661 
 662         retval = add_avintr(NULL, CBE_HIGH_PIL, (avfunc)func, "HPET Timer",
 663             vector, NULL, NULL, NULL, NULL);
 664         if (retval == 0) {
 665                 cmn_err(CE_WARN, "!hpet_acpi: add_avintr() failed");
 666                 return (AE_BAD_PARAMETER);
 667         }
 668         return (AE_OK);
 669 }
 670 
 671 /*
 672  * The HPET timers specify which I/O APIC interrupts they can be routed to.
 673  * Find the first available non-legacy-replacement timer and its I/O APIC irq.
 674  * Supported I/O APIC IRQs are specified in the int_route_cap bitmap in each
 675  * timer's timer_n_config register.
 676  */
 677 static int
 678 hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip)
 679 {
 680         int     timer;
 681         int     intr;
 682 
 683         for (timer = HPET_FIRST_NON_LEGACY_TIMER;
 684             timer < hip->gen_cap.num_tim_cap; ++timer) {
 685 
 686                 if (!hpet_timer_available(hip->allocated_timers, timer))
 687                         continue;
 688 
 689                 intr = lowbit(hip->timer_n_config[timer].int_route_cap) - 1;
 690                 if (intr >= 0) {
 691                         hpet_timer_alloc(&hip->allocated_timers, timer);
 692                         hip->cstate_timer.timer = timer;
 693                         hip->cstate_timer.intr = intr;
 694                         return (timer);
 695                 }
 696         }
 697 
 698         return (-1);
 699 }
 700 
 701 /*
 702  * Mark this timer as used.
 703  */
 704 static void
 705 hpet_timer_alloc(uint32_t *allocated_timers, uint32_t n)
 706 {
 707         *allocated_timers |= 1 << n;
 708 }
 709 
 710 /*
 711  * Check if this timer is available.
 712  * No mutual exclusion because only one thread uses this.
 713  */
 714 static int
 715 hpet_timer_available(uint32_t allocated_timers, uint32_t n)
 716 {
 717         return ((allocated_timers & (1 << n)) == 0);
 718 }
 719 
 720 /*
 721  * Setup timer N to route its interrupt to I/O APIC.
 722  */
 723 static void
 724 hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n, uint32_t interrupt)
 725 {
 726         uint64_t conf;
 727 
 728         conf = hpet_read_timer_N_config(hip, timer_n);
 729 
 730         /*
 731          * Caller is required to verify this interrupt route is supported.
 732          */
 733         ASSERT(HPET_TIMER_N_INT_ROUTE_CAP(conf) & (1 << interrupt));
 734 
 735         conf &= ~HPET_TIMER_N_FSB_EN_CNF_BIT;       /* use IOAPIC */
 736         conf |= HPET_TIMER_N_INT_ROUTE_SHIFT(interrupt);
 737         conf &= ~HPET_TIMER_N_TYPE_CNF_BIT; /* non periodic */
 738         conf &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;      /* disabled */
 739         conf |= HPET_TIMER_N_INT_TYPE_CNF_BIT;  /* Level Triggered */
 740 
 741         hpet_write_timer_N_config(hip, timer_n, conf);
 742 }
 743 
 744 /*
 745  * The HPET's Main Counter is not stopped before programming an HPET timer.
 746  * This will allow the HPET to be used as a time source.
 747  * The programmed timer interrupt may occur before this function returns.
 748  * Callers must block interrupts before calling this function if they must
 749  * guarantee the interrupt is handled after this function returns.
 750  *
 751  * Return 0 if main counter is less than timer after enabling timer.
 752  * The interrupt was programmed, but it may fire before this returns.
 753  * Return !0 if main counter is greater than timer after enabling timer.
 754  * In other words: the timer will not fire, and we do not know if it did fire.
 755  *
 756  * delta is in HPET ticks.
 757  *
 758  * Writing a 64-bit value to a 32-bit register will "wrap around".
 759  * A 32-bit HPET timer will wrap around in a little over 5 minutes.
 760  */
 761 int
 762 hpet_timer_program(hpet_info_t *hip, uint32_t timer, uint64_t delta)
 763 {
 764         uint64_t time, program;
 765 
 766         program = hpet_read_main_counter_value(hip);
 767         program += delta;
 768         hpet_write_timer_N_comp(hip, timer, program);
 769 
 770         time = hpet_read_main_counter_value(hip);
 771         if (time < program)
 772                 return (AE_OK);
 773 
 774         return (AE_TIME);
 775 }
 776 
 777 /*
 778  * CPR and power policy-change callback entry point.
 779  */
 780 boolean_t
 781 hpet_callback(int code)
 782 {
 783         switch (code) {
 784         case PM_DEFAULT_CPU_DEEP_IDLE:
 785                 /*FALLTHROUGH*/
 786         case PM_ENABLE_CPU_DEEP_IDLE:
 787                 /*FALLTHROUGH*/
 788         case PM_DISABLE_CPU_DEEP_IDLE:
 789                 return (hpet_deep_idle_config(code));
 790 
 791         case CB_CODE_CPR_RESUME:
 792                 /*FALLTHROUGH*/
 793         case CB_CODE_CPR_CHKPT:
 794                 return (hpet_cpr(code));
 795 
 796         case CST_EVENT_MULTIPLE_CSTATES:
 797                 hpet_cst_callback(CST_EVENT_MULTIPLE_CSTATES);
 798                 return (B_TRUE);
 799 
 800         case CST_EVENT_ONE_CSTATE:
 801                 hpet_cst_callback(CST_EVENT_ONE_CSTATE);
 802                 return (B_TRUE);
 803 
 804         default:
 805                 cmn_err(CE_NOTE, "!hpet_callback: invalid code %d\n", code);
 806                 return (B_FALSE);
 807         }
 808 }
 809 
 810 /*
 811  * According to the HPET spec 1.0a: the Operating System must save and restore
 812  * HPET event timer hardware context through ACPI sleep state transitions.
 813  * Timer registers (including the main counter) may not be preserved through
 814  * ACPI S3, S4, or S5 sleep states.  This code does not not support S1 nor S2.
 815  *
 816  * Current HPET state is already in hpet.supported and
 817  * hpet_state.proxy_installed.  hpet_info contains the proxy interrupt HPET
 818  * Timer state.
 819  *
 820  * Future projects beware: the HPET Main Counter is undefined after ACPI S3 or
 821  * S4, and it is not saved/restored here.  Future projects cannot expect the
 822  * Main Counter to be monotomically (or accurately) increasing across CPR.
 823  *
 824  * Note: the CPR Checkpoint path later calls pause_cpus() which ensures all
 825  * CPUs are awake and in a spin loop before the system suspends.  The HPET is
 826  * not needed for Deep C-state wakeup when CPUs are in cpu_pause().
 827  * It is safe to leave the HPET running as the system suspends; we just
 828  * disable the timer from generating interrupts here.
 829  */
 830 static boolean_t
 831 hpet_cpr(int code)
 832 {
 833         ulong_t         intr, dead_count = 0;
 834         hrtime_t        dead = gethrtime() + hpet_spin_timeout;
 835         boolean_t       ret = B_TRUE;
 836 
 837         mutex_enter(&hpet_state_lock);
 838         switch (code) {
 839         case CB_CODE_CPR_CHKPT:
 840                 if (hpet_state.proxy_installed == B_FALSE)
 841                         break;
 842 
 843                 hpet_state.cpr = B_TRUE;
 844 
 845                 intr = intr_clear();
 846                 while (!mutex_tryenter(&hpet_proxy_lock)) {
 847                         /*
 848                          * spin
 849                          */
 850                         intr_restore(intr);
 851                         if (dead_count++ > hpet_spin_check) {
 852                                 dead_count = 0;
 853                                 if (gethrtime() > dead) {
 854                                         hpet_state.cpr = B_FALSE;
 855                                         mutex_exit(&hpet_state_lock);
 856                                         cmn_err(CE_NOTE, "!hpet_cpr: deadman");
 857                                         return (B_FALSE);
 858                                 }
 859                         }
 860                         intr = intr_clear();
 861                 }
 862                 hpet_expire_all();
 863                 mutex_exit(&hpet_proxy_lock);
 864                 intr_restore(intr);
 865 
 866                 hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
 867                 break;
 868 
 869         case CB_CODE_CPR_RESUME:
 870                 if (hpet_resume() == B_TRUE)
 871                         hpet_state.cpr = B_FALSE;
 872                 else
 873                         cmn_err(CE_NOTE, "!hpet_resume failed.");
 874                 break;
 875 
 876         default:
 877                 cmn_err(CE_NOTE, "!hpet_cpr: invalid code %d\n", code);
 878                 ret = B_FALSE;
 879                 break;
 880         }
 881         mutex_exit(&hpet_state_lock);
 882         return (ret);
 883 }
 884 
 885 /*
 886  * Assume the HPET stopped in Suspend state and timer state was lost.
 887  */
 888 static boolean_t
 889 hpet_resume(void)
 890 {
 891         if (hpet.supported != HPET_TIMER_SUPPORT)
 892                 return (B_TRUE);
 893 
 894         /*
 895          * The HPET spec does not specify if Legacy Replacement Route is
 896          * on or off by default, so we set it off here.
 897          */
 898         (void) hpet_set_leg_rt_cnf(&hpet_info, 0);
 899 
 900         if (hpet_start_main_counter(&hpet_info) != AE_OK) {
 901                 cmn_err(CE_NOTE, "!hpet_resume: start main counter failed");
 902                 hpet.supported = HPET_NO_SUPPORT;
 903                 if (hpet_state.proxy_installed == B_TRUE) {
 904                         hpet_state.proxy_installed = B_FALSE;
 905                         hpet_uninstall_interrupt_handler();
 906                 }
 907                 return (B_FALSE);
 908         }
 909 
 910         if (hpet_state.proxy_installed == B_FALSE)
 911                 return (B_TRUE);
 912 
 913         hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
 914             hpet_info.cstate_timer.intr);
 915         if (hpet_state.cpu_deep_idle == B_TRUE)
 916                 hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
 917 
 918         return (B_TRUE);
 919 }
 920 
 921 /*
 922  * Callback to enable/disable Deep C-States based on power.conf setting.
 923  */
 924 static boolean_t
 925 hpet_deep_idle_config(int code)
 926 {
 927         ulong_t         intr, dead_count = 0;
 928         hrtime_t        dead = gethrtime() + hpet_spin_timeout;
 929         boolean_t       ret = B_TRUE;
 930 
 931         mutex_enter(&hpet_state_lock);
 932         switch (code) {
 933         case PM_DEFAULT_CPU_DEEP_IDLE:
 934                 /*FALLTHROUGH*/
 935         case PM_ENABLE_CPU_DEEP_IDLE:
 936 
 937                 if (hpet_state.cpu_deep_idle == B_TRUE)
 938                         break;
 939 
 940                 if (hpet_state.proxy_installed == B_FALSE) {
 941                         ret = B_FALSE;  /* Deep C-States not supported */
 942                         break;
 943                 }
 944 
 945                 hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
 946                 hpet_state.cpu_deep_idle = B_TRUE;
 947                 break;
 948 
 949         case PM_DISABLE_CPU_DEEP_IDLE:
 950 
 951                 if ((hpet_state.cpu_deep_idle == B_FALSE) ||
 952                     (hpet_state.proxy_installed == B_FALSE))
 953                         break;
 954 
 955                 /*
 956                  * The order of these operations is important to avoid
 957                  * lost wakeups: Set a flag to refuse all future LAPIC Timer
 958                  * proxy requests, then wake up all CPUs from deep C-state,
 959                  * and finally disable the HPET interrupt-generating timer.
 960                  */
 961                 hpet_state.cpu_deep_idle = B_FALSE;
 962 
 963                 intr = intr_clear();
 964                 while (!mutex_tryenter(&hpet_proxy_lock)) {
 965                         /*
 966                          * spin
 967                          */
 968                         intr_restore(intr);
 969                         if (dead_count++ > hpet_spin_check) {
 970                                 dead_count = 0;
 971                                 if (gethrtime() > dead) {
 972                                         hpet_state.cpu_deep_idle = B_TRUE;
 973                                         mutex_exit(&hpet_state_lock);
 974                                         cmn_err(CE_NOTE,
 975                                             "!hpet_deep_idle_config: deadman");
 976                                         return (B_FALSE);
 977                                 }
 978                         }
 979                         intr = intr_clear();
 980                 }
 981                 hpet_expire_all();
 982                 mutex_exit(&hpet_proxy_lock);
 983                 intr_restore(intr);
 984 
 985                 hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
 986                 break;
 987 
 988         default:
 989                 cmn_err(CE_NOTE, "!hpet_deep_idle_config: invalid code %d\n",
 990                     code);
 991                 ret = B_FALSE;
 992                 break;
 993         }
 994         mutex_exit(&hpet_state_lock);
 995 
 996         return (ret);
 997 }
 998 
 999 /*
1000  * Callback for _CST c-state change notifications.
1001  */
1002 static void
1003 hpet_cst_callback(uint32_t code)
1004 {
1005         ulong_t         intr, dead_count = 0;
1006         hrtime_t        dead = gethrtime() + hpet_spin_timeout;
1007 
1008         switch (code) {
1009         case CST_EVENT_ONE_CSTATE:
1010                 hpet_state.uni_cstate = B_TRUE;
1011                 intr = intr_clear();
1012                 while (!mutex_tryenter(&hpet_proxy_lock)) {
1013                         /*
1014                          * spin
1015                          */
1016                         intr_restore(intr);
1017                         if (dead_count++ > hpet_spin_check) {
1018                                 dead_count = 0;
1019                                 if (gethrtime() > dead) {
1020                                         hpet_expire_all();
1021                                         cmn_err(CE_NOTE,
1022                                             "!hpet_cst_callback: deadman");
1023                                         return;
1024                                 }
1025                         }
1026                         intr = intr_clear();
1027                 }
1028                 hpet_expire_all();
1029                 mutex_exit(&hpet_proxy_lock);
1030                 intr_restore(intr);
1031                 break;
1032 
1033         case CST_EVENT_MULTIPLE_CSTATES:
1034                 hpet_state.uni_cstate = B_FALSE;
1035                 break;
1036 
1037         default:
1038                 cmn_err(CE_NOTE, "!hpet_cst_callback: invalid code %d\n", code);
1039                 break;
1040         }
1041 }
1042 
1043 /*
1044  * Interrupt Service Routine for HPET I/O-APIC-generated interrupts.
1045  * Used to wakeup CPUs from Deep C-state when their Local APIC Timer stops.
1046  * This ISR runs on one CPU which pokes other CPUs out of Deep C-state as
1047  * needed.
1048  */
1049 /* ARGSUSED */
1050 static uint_t
1051 hpet_isr(char *arg)
1052 {
1053         uint64_t        timer_status;
1054         uint64_t        timer_mask;
1055         ulong_t         intr, dead_count = 0;
1056         hrtime_t        dead = gethrtime() + hpet_isr_spin_timeout;
1057 
1058         timer_mask = HPET_INTR_STATUS_MASK(hpet_info.cstate_timer.timer);
1059 
1060         /*
1061          * We are using a level-triggered interrupt.
1062          * HPET sets timer's General Interrupt Status Register bit N.
1063          * ISR checks this bit to see if it needs servicing.
1064          * ISR then clears this bit by writing 1 to that bit.
1065          */
1066         timer_status = hpet_read_gen_intrpt_stat(&hpet_info);
1067         if (!(timer_status & timer_mask))
1068                 return (DDI_INTR_UNCLAIMED);
1069         hpet_write_gen_intrpt_stat(&hpet_info, timer_mask);
1070 
1071         /*
1072          * Do not touch ISR data structures before checking the HPET's General
1073          * Interrupt Status register.  The General Interrupt Status register
1074          * will not be set by hardware until after timer interrupt generation
1075          * is enabled by software.  Software allocates necessary data
1076          * structures before enabling timer interrupts.  ASSERT the software
1077          * data structures required to handle this interrupt are initialized.
1078          */
1079         ASSERT(hpet_proxy_users != NULL);
1080 
1081         /*
1082          * CPUs in deep c-states do not enable interrupts until after
1083          * performing idle cleanup which includes descheduling themselves from
1084          * the HPET.  The CPU running this ISR will NEVER find itself in the
1085          * proxy list.  A lost wakeup may occur if this is false.
1086          */
1087         ASSERT(hpet_proxy_users[CPU->cpu_id] == HPET_INFINITY);
1088 
1089         /*
1090          * Higher level interrupts may deadlock with CPUs going idle if this
1091          * ISR is prempted while holding hpet_proxy_lock.
1092          */
1093         intr = intr_clear();
1094         while (!mutex_tryenter(&hpet_proxy_lock)) {
1095                 /*
1096                  * spin
1097                  */
1098                 intr_restore(intr);
1099                 if (dead_count++ > hpet_spin_check) {
1100                         dead_count = 0;
1101                         if (gethrtime() > dead) {
1102                                 hpet_expire_all();
1103                                 return (DDI_INTR_CLAIMED);
1104                         }
1105                 }
1106                 intr = intr_clear();
1107         }
1108         (void) hpet_guaranteed_schedule(HPET_INFINITY);
1109         mutex_exit(&hpet_proxy_lock);
1110         intr_restore(intr);
1111 
1112         return (DDI_INTR_CLAIMED);
1113 }
1114 
1115 /*
1116  * Used when disabling the HPET Timer interrupt.  CPUs in Deep C-state must be
1117  * woken up because they can no longer rely on the HPET's Timer to wake them.
1118  * We do not need to wait for CPUs to wakeup.
1119  */
1120 static void
1121 hpet_expire_all(void)
1122 {
1123         processorid_t   id;
1124 
1125         for (id = 0; id < max_ncpus; ++id) {
1126                 if (hpet_proxy_users[id] != HPET_INFINITY) {
1127                         hpet_proxy_users[id] = HPET_INFINITY;
1128                         if (id != CPU->cpu_id)
1129                                 poke_cpu(id);
1130                 }
1131         }
1132 }
1133 
1134 /*
1135  * To avoid missed wakeups this function must guarantee either the HPET timer
1136  * was successfully programmed to the next expire time or there are no waiting
1137  * CPUs.
1138  *
1139  * Callers cannot enter C2 or deeper if the HPET could not be programmed to
1140  * generate its next interrupt to happen at required_wakeup_time or sooner.
1141  * Returns B_TRUE if the HPET was programmed to interrupt by
1142  * required_wakeup_time, B_FALSE if not.
1143  */
1144 static boolean_t
1145 hpet_guaranteed_schedule(hrtime_t required_wakeup_time)
1146 {
1147         hrtime_t        now, next_proxy_time;
1148         processorid_t   id, next_proxy_id;
1149         int             proxy_timer = hpet_info.cstate_timer.timer;
1150         boolean_t       done = B_FALSE;
1151 
1152         ASSERT(mutex_owned(&hpet_proxy_lock));
1153 
1154         /*
1155          * Loop until we successfully program the HPET,
1156          * or no CPUs are scheduled to use the HPET as a proxy.
1157          */
1158         do {
1159                 /*
1160                  * Wake all CPUs that expired before now.
1161                  * Find the next CPU to wake up and next HPET program time.
1162                  */
1163                 now = gethrtime();
1164                 next_proxy_time = HPET_INFINITY;
1165                 next_proxy_id = CPU->cpu_id;
1166                 for (id = 0; id < max_ncpus; ++id) {
1167                         if (hpet_proxy_users[id] < now) {
1168                                 hpet_proxy_users[id] = HPET_INFINITY;
1169                                 if (id != CPU->cpu_id)
1170                                         poke_cpu(id);
1171                         } else if (hpet_proxy_users[id] < next_proxy_time) {
1172                                 next_proxy_time = hpet_proxy_users[id];
1173                                 next_proxy_id = id;
1174                         }
1175                 }
1176 
1177                 if (next_proxy_time == HPET_INFINITY) {
1178                         done = B_TRUE;
1179                         /*
1180                          * There are currently no CPUs using the HPET's Timer
1181                          * as a proxy for their LAPIC Timer.  The HPET's Timer
1182                          * does not need to be programmed.
1183                          *
1184                          * Letting the HPET timer wrap around to the current
1185                          * time is the longest possible timeout.
1186                          * A 64-bit timer will wrap around in ~ 2^44 seconds.
1187                          * A 32-bit timer will wrap around in ~ 2^12 seconds.
1188                          *
1189                          * Disabling the HPET's timer interrupt requires a
1190                          * (relatively expensive) write to the HPET.
1191                          * Instead we do nothing.
1192                          *
1193                          * We are gambling some CPU will attempt to enter a
1194                          * deep c-state before the timer wraps around.
1195                          * We assume one spurious interrupt in a little over an
1196                          * hour has less performance impact than writing to the
1197                          * HPET's timer disable bit every time all CPUs wakeup
1198                          * from deep c-state.
1199                          */
1200 
1201                 } else {
1202                         /*
1203                          * Idle CPUs disable interrupts before programming the
1204                          * HPET to prevent a lost wakeup if the HPET
1205                          * interrupts the idle cpu before it can enter a
1206                          * Deep C-State.
1207                          */
1208                         if (hpet_timer_program(&hpet_info, proxy_timer,
1209                             HRTIME_TO_HPET_TICKS(next_proxy_time - gethrtime()))
1210                             != AE_OK) {
1211                                 /*
1212                                  * We could not program the HPET to wakeup the
1213                                  * next CPU.  We must wake the CPU ourself to
1214                                  * avoid a lost wakeup.
1215                                  */
1216                                 hpet_proxy_users[next_proxy_id] = HPET_INFINITY;
1217                                 if (next_proxy_id != CPU->cpu_id)
1218                                         poke_cpu(next_proxy_id);
1219                         } else {
1220                                 done = B_TRUE;
1221                         }
1222                 }
1223 
1224         } while (!done);
1225 
1226         return (next_proxy_time <= required_wakeup_time);
1227 }
1228 
1229 /*
1230  * Use an HPET timer to act as this CPU's proxy local APIC timer.
1231  * Used in deep c-states C2 and above while the CPU's local APIC timer stalls.
1232  * Called by the idle thread with interrupts enabled.
1233  * Always returns with interrupts disabled.
1234  *
1235  * There are 3 possible outcomes from this function:
1236  * 1. The Local APIC Timer was already disabled before this function was called.
1237  *      LAPIC TIMER     : disabled
1238  *      HPET            : not scheduled to wake this CPU
1239  *      *lapic_expire   : (hrtime_t)HPET_INFINITY
1240  *      Returns         : B_TRUE
1241  * 2. Successfully programmed the HPET to act as a LAPIC Timer proxy.
1242  *      LAPIC TIMER     : disabled
1243  *      HPET            : scheduled to wake this CPU
1244  *      *lapic_expire   : hrtime_t when LAPIC timer would have expired
1245  *      Returns         : B_TRUE
1246  * 3. Failed to programmed the HPET to act as a LAPIC Timer proxy.
1247  *      LAPIC TIMER     : enabled
1248  *      HPET            : not scheduled to wake this CPU
1249  *      *lapic_expire   : (hrtime_t)HPET_INFINITY
1250  *      Returns         : B_FALSE
1251  *
1252  * The idle thread cannot enter Deep C-State in case 3.
1253  * The idle thread must re-enable & re-program the LAPIC_TIMER in case 2.
1254  */
1255 static boolean_t
1256 hpet_use_hpet_timer(hrtime_t *lapic_expire)
1257 {
1258         extern hrtime_t apic_timer_stop_count(void);
1259         extern void     apic_timer_restart(hrtime_t);
1260         hrtime_t        now, expire, dead;
1261         uint64_t        lapic_count, dead_count;
1262         cpupart_t       *cpu_part;
1263         processorid_t   cpu_sid;
1264         processorid_t   cpu_id = CPU->cpu_id;
1265         processorid_t   id;
1266         boolean_t       rslt;
1267         boolean_t       hset_update;
1268 
1269         cpu_part = CPU->cpu_part;
1270         cpu_sid = CPU->cpu_seqid;
1271 
1272         ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread);
1273 
1274         /*
1275          * A critical section exists between when the HPET is programmed
1276          * to interrupt the CPU and when this CPU enters an idle state.
1277          * Interrupts must be blocked during that time to prevent lost
1278          * CBE wakeup interrupts from either LAPIC or HPET.
1279          *
1280          * Must block interrupts before acquiring hpet_proxy_lock to prevent
1281          * a deadlock with the ISR if the ISR runs on this CPU after the
1282          * idle thread acquires the mutex but before it clears interrupts.
1283          */
1284         ASSERT(!interrupts_enabled());
1285         lapic_count = apic_timer_stop_count();
1286         now = gethrtime();
1287         dead = now + hpet_idle_spin_timeout;
1288         *lapic_expire = expire = now + lapic_count;
1289         if (lapic_count == (hrtime_t)-1) {
1290                 /*
1291                  * LAPIC timer is currently disabled.
1292                  * Will not use the HPET as a LAPIC Timer proxy.
1293                  */
1294                 *lapic_expire = (hrtime_t)HPET_INFINITY;
1295                 return (B_TRUE);
1296         }
1297 
1298         /*
1299          * Serialize hpet_proxy data structure manipulation.
1300          */
1301         dead_count = 0;
1302         while (!mutex_tryenter(&hpet_proxy_lock)) {
1303                 /*
1304                  * spin
1305                  */
1306                 apic_timer_restart(expire);
1307                 sti();
1308                 cli();
1309 
1310                 if (dead_count++ > hpet_spin_check) {
1311                         dead_count = 0;
1312                         hset_update = (((CPU->cpu_flags & CPU_OFFLINE) == 0) &&
1313                             (ncpus > 1));
1314                         if (hset_update &&
1315                             !bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) {
1316                                 *lapic_expire = (hrtime_t)HPET_INFINITY;
1317                                 return (B_FALSE);
1318                         }
1319                 }
1320 
1321                 lapic_count = apic_timer_stop_count();
1322                 now = gethrtime();
1323                 *lapic_expire = expire = now + lapic_count;
1324                 if (lapic_count == (hrtime_t)-1) {
1325                         /*
1326                          * LAPIC timer is currently disabled.
1327                          * Will not use the HPET as a LAPIC Timer proxy.
1328                          */
1329                         *lapic_expire = (hrtime_t)HPET_INFINITY;
1330                         return (B_TRUE);
1331                 }
1332                 if (now > dead) {
1333                         apic_timer_restart(expire);
1334                         *lapic_expire = (hrtime_t)HPET_INFINITY;
1335                         return (B_FALSE);
1336                 }
1337         }
1338 
1339         if ((hpet_state.cpr == B_TRUE) ||
1340             (hpet_state.cpu_deep_idle == B_FALSE) ||
1341             (hpet_state.proxy_installed == B_FALSE) ||
1342             (hpet_state.uni_cstate == B_TRUE)) {
1343                 mutex_exit(&hpet_proxy_lock);
1344                 apic_timer_restart(expire);
1345                 *lapic_expire = (hrtime_t)HPET_INFINITY;
1346                 return (B_FALSE);
1347         }
1348 
1349         hpet_proxy_users[cpu_id] = expire;
1350 
1351         /*
1352          * We are done if another cpu is scheduled on the HPET with an
1353          * expire time before us.  The next HPET interrupt has been programmed
1354          * to fire before our expire time.
1355          */
1356         for (id = 0; id < max_ncpus; ++id) {
1357                 if ((hpet_proxy_users[id] <= expire) && (id != cpu_id)) {
1358                         mutex_exit(&hpet_proxy_lock);
1359                         return (B_TRUE);
1360                 }
1361         }
1362 
1363         /*
1364          * We are the next lAPIC to expire.
1365          * Program the HPET with our expire time.
1366          */
1367         rslt = hpet_guaranteed_schedule(expire);
1368         mutex_exit(&hpet_proxy_lock);
1369 
1370         if (rslt == B_FALSE) {
1371                 apic_timer_restart(expire);
1372                 *lapic_expire = (hrtime_t)HPET_INFINITY;
1373         }
1374 
1375         return (rslt);
1376 }
1377 
1378 /*
1379  * Called by the idle thread when waking up from Deep C-state before enabling
1380  * interrupts.  With an array data structure it is faster to always remove
1381  * ourself from the array without checking if the HPET ISR already removed.
1382  *
1383  * We use a lazy algorithm for removing CPUs from the HPET's schedule.
1384  * We do not reprogram the HPET here because this CPU has real work to do.
1385  * On a idle system the CPU was probably woken up by the HPET's ISR.
1386  * On a heavily loaded system CPUs are not going into Deep C-state.
1387  * On a moderately loaded system another CPU will usually enter Deep C-state
1388  * and reprogram the HPET before the HPET fires with our wakeup.
1389  */
1390 static void
1391 hpet_use_lapic_timer(hrtime_t expire)
1392 {
1393         extern void     apic_timer_restart(hrtime_t);
1394         processorid_t   cpu_id = CPU->cpu_id;
1395 
1396         ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread);
1397         ASSERT(!interrupts_enabled());
1398 
1399         hpet_proxy_users[cpu_id] = HPET_INFINITY;
1400 
1401         /*
1402          * Do not enable a LAPIC Timer that was initially disabled.
1403          */
1404         if (expire != HPET_INFINITY)
1405                 apic_timer_restart(expire);
1406 }
1407 
1408 /*
1409  * Initialize data structure to keep track of CPUs using HPET as a proxy for
1410  * their stalled local APIC timer.  For now this is just an array.
1411  */
1412 static void
1413 hpet_init_proxy_data(void)
1414 {
1415         processorid_t   id;
1416 
1417         /*
1418          * Use max_ncpus for hot plug compliance.
1419          */
1420         hpet_proxy_users = kmem_zalloc(max_ncpus * sizeof (*hpet_proxy_users),
1421             KM_SLEEP);
1422 
1423         /*
1424          * Unused entries always contain HPET_INFINITY.
1425          */
1426         for (id = 0; id < max_ncpus; ++id)
1427                 hpet_proxy_users[id] = HPET_INFINITY;
1428 }