1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright (c) 2009,  Intel Corporation.
  27  * All Rights Reserved.
  28  */
  29 
  30 /*
  31  * CPU Device driver. The driver is not DDI-compliant.
  32  *
  33  * The driver supports following features:
  34  *      - Power management.
  35  */
  36 
  37 #include <sys/types.h>
  38 #include <sys/param.h>
  39 #include <sys/errno.h>
  40 #include <sys/modctl.h>
  41 #include <sys/kmem.h>
  42 #include <sys/conf.h>
  43 #include <sys/cmn_err.h>
  44 #include <sys/stat.h>
  45 #include <sys/debug.h>
  46 #include <sys/systm.h>
  47 #include <sys/ddi.h>
  48 #include <sys/sunddi.h>
  49 #include <sys/sdt.h>
  50 #include <sys/epm.h>
  51 #include <sys/machsystm.h>
  52 #include <sys/x_call.h>
  53 #include <sys/cpudrv_mach.h>
  54 #include <sys/msacct.h>
  55 
  56 /*
  57  * CPU power management
  58  *
  59  * The supported power saving model is to slow down the CPU (on SPARC by
  60  * dividing the CPU clock and on x86 by dropping down a P-state).
  61  * Periodically we determine the amount of time the CPU is running
  62  * idle thread and threads in user mode during the last quantum.  If the idle
  63  * thread was running less than its low water mark for current speed for
  64  * number of consecutive sampling periods, or number of running threads in
  65  * user mode are above its high water mark, we arrange to go to the higher
  66  * speed.  If the idle thread was running more than its high water mark without
  67  * dropping a number of consecutive times below the mark, and number of threads
  68  * running in user mode are below its low water mark, we arrange to go to the
  69  * next lower speed.  While going down, we go through all the speeds.  While
  70  * going up we go to the maximum speed to minimize impact on the user, but have
  71  * provisions in the driver to go to other speeds.
  72  *
  73  * The driver does not have knowledge of a particular implementation of this
  74  * scheme and will work with all CPUs supporting this model. On SPARC, the
  75  * driver determines supported speeds by looking at 'clock-divisors' property
  76  * created by OBP. On x86, the driver retrieves the supported speeds from
  77  * ACPI.
  78  */
  79 
  80 /*
  81  * Configuration function prototypes and data structures
  82  */
  83 static int cpudrv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
  84 static int cpudrv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
  85 static int cpudrv_power(dev_info_t *dip, int comp, int level);
  86 
  87 struct dev_ops cpudrv_ops = {
  88         DEVO_REV,               /* rev */
  89         0,                      /* refcnt */
  90         nodev,                  /* getinfo */
  91         nulldev,                /* identify */
  92         nulldev,                /* probe */
  93         cpudrv_attach,          /* attach */
  94         cpudrv_detach,          /* detach */
  95         nodev,                  /* reset */
  96         (struct cb_ops *)NULL,  /* cb_ops */
  97         (struct bus_ops *)NULL, /* bus_ops */
  98         cpudrv_power,           /* power */
  99         ddi_quiesce_not_needed,         /* quiesce */
 100 };
 101 
 102 static struct modldrv modldrv = {
 103         &mod_driverops,                     /* modops */
 104         "CPU Driver",                   /* linkinfo */
 105         &cpudrv_ops,                        /* dev_ops */
 106 };
 107 
 108 static struct modlinkage modlinkage = {
 109         MODREV_1,               /* rev */
 110         &modldrv,           /* linkage */
 111         NULL
 112 };
 113 
 114 /*
 115  * Function prototypes
 116  */
 117 static int cpudrv_init(cpudrv_devstate_t *cpudsp);
 118 static void cpudrv_free(cpudrv_devstate_t *cpudsp);
 119 static int cpudrv_comp_create(cpudrv_devstate_t *cpudsp);
 120 static void cpudrv_monitor_disp(void *arg);
 121 static void cpudrv_monitor(void *arg);
 122 
 123 /*
 124  * Driver global variables
 125  */
 126 uint_t cpudrv_debug = 0;
 127 void *cpudrv_state;
 128 static uint_t cpudrv_idle_hwm = CPUDRV_IDLE_HWM;
 129 static uint_t cpudrv_idle_lwm = CPUDRV_IDLE_LWM;
 130 static uint_t cpudrv_idle_buf_zone = CPUDRV_IDLE_BUF_ZONE;
 131 static uint_t cpudrv_idle_bhwm_cnt_max = CPUDRV_IDLE_BHWM_CNT_MAX;
 132 static uint_t cpudrv_idle_blwm_cnt_max = CPUDRV_IDLE_BLWM_CNT_MAX;
 133 static uint_t cpudrv_user_hwm = CPUDRV_USER_HWM;
 134 
 135 boolean_t cpudrv_enabled = B_TRUE;
 136 
 137 /*
 138  * cpudrv_direct_pm allows user applications to directly control the
 139  * power state transitions (direct pm) without following the normal
 140  * direct pm protocol. This is needed because the normal protocol
 141  * requires that a device only be lowered when it is idle, and be
 142  * brought up when it request to do so by calling pm_raise_power().
 143  * Ignoring this protocol is harmless for CPU (other than speed).
 144  * Moreover it might be the case that CPU is never idle or wants
 145  * to be at higher speed because of the addition CPU cycles required
 146  * to run the user application.
 147  *
 148  * The driver will still report idle/busy status to the framework. Although
 149  * framework will ignore this information for direct pm devices and not
 150  * try to bring them down when idle, user applications can still use this
 151  * information if they wants.
 152  *
 153  * In the future, provide an ioctl to control setting of this mode. In
 154  * that case, this variable should move to the state structure and
 155  * be protected by the lock in the state structure.
 156  */
 157 int cpudrv_direct_pm = 0;
 158 
 159 /*
 160  * Arranges for the handler function to be called at the interval suitable
 161  * for current speed.
 162  */
 163 #define CPUDRV_MONITOR_INIT(cpudsp) { \
 164     if (cpudrv_is_enabled(cpudsp)) {          \
 165                 ASSERT(mutex_owned(&(cpudsp)->lock)); \
 166                 (cpudsp)->cpudrv_pm.timeout_id = \
 167                     timeout(cpudrv_monitor_disp, \
 168                     (cpudsp), (((cpudsp)->cpudrv_pm.cur_spd == NULL) ? \
 169                     CPUDRV_QUANT_CNT_OTHR : \
 170                     (cpudsp)->cpudrv_pm.cur_spd->quant_cnt)); \
 171         } \
 172 }
 173 
 174 /*
 175  * Arranges for the handler function not to be called back.
 176  */
 177 #define CPUDRV_MONITOR_FINI(cpudsp) { \
 178         timeout_id_t tmp_tid; \
 179         ASSERT(mutex_owned(&(cpudsp)->lock)); \
 180         tmp_tid = (cpudsp)->cpudrv_pm.timeout_id; \
 181         (cpudsp)->cpudrv_pm.timeout_id = 0; \
 182         mutex_exit(&(cpudsp)->lock); \
 183         if (tmp_tid != 0) { \
 184                 (void) untimeout(tmp_tid); \
 185                 mutex_enter(&(cpudsp)->cpudrv_pm.timeout_lock); \
 186                 while ((cpudsp)->cpudrv_pm.timeout_count != 0) \
 187                         cv_wait(&(cpudsp)->cpudrv_pm.timeout_cv, \
 188                             &(cpudsp)->cpudrv_pm.timeout_lock); \
 189                 mutex_exit(&(cpudsp)->cpudrv_pm.timeout_lock); \
 190         } \
 191         mutex_enter(&(cpudsp)->lock); \
 192 }
 193 
 194 int
 195 _init(void)
 196 {
 197         int     error;
 198 
 199         DPRINTF(D_INIT, (" _init: function called\n"));
 200         if ((error = ddi_soft_state_init(&cpudrv_state,
 201             sizeof (cpudrv_devstate_t), 0)) != 0) {
 202                 return (error);
 203         }
 204 
 205         if ((error = mod_install(&modlinkage)) != 0)  {
 206                 ddi_soft_state_fini(&cpudrv_state);
 207         }
 208 
 209         /*
 210          * Callbacks used by the PPM driver.
 211          */
 212         CPUDRV_SET_PPM_CALLBACKS();
 213         return (error);
 214 }
 215 
 216 int
 217 _fini(void)
 218 {
 219         int     error;
 220 
 221         DPRINTF(D_FINI, (" _fini: function called\n"));
 222         if ((error = mod_remove(&modlinkage)) == 0) {
 223                 ddi_soft_state_fini(&cpudrv_state);
 224         }
 225 
 226         return (error);
 227 }
 228 
 229 int
 230 _info(struct modinfo *modinfop)
 231 {
 232         return (mod_info(&modlinkage, modinfop));
 233 }
 234 
 235 /*
 236  * Driver attach(9e) entry point.
 237  */
 238 static int
 239 cpudrv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 240 {
 241         int                     instance;
 242         cpudrv_devstate_t       *cpudsp;
 243 
 244         instance = ddi_get_instance(dip);
 245 
 246         switch (cmd) {
 247         case DDI_ATTACH:
 248                 DPRINTF(D_ATTACH, ("cpudrv_attach: instance %d: "
 249                     "DDI_ATTACH called\n", instance));
 250                 if (!cpudrv_is_enabled(NULL))
 251                         return (DDI_FAILURE);
 252                 if (ddi_soft_state_zalloc(cpudrv_state, instance) !=
 253                     DDI_SUCCESS) {
 254                         cmn_err(CE_WARN, "cpudrv_attach: instance %d: "
 255                             "can't allocate state", instance);
 256                         cpudrv_enabled = B_FALSE;
 257                         return (DDI_FAILURE);
 258                 }
 259                 if ((cpudsp = ddi_get_soft_state(cpudrv_state, instance)) ==
 260                     NULL) {
 261                         cmn_err(CE_WARN, "cpudrv_attach: instance %d: "
 262                             "can't get state", instance);
 263                         ddi_soft_state_free(cpudrv_state, instance);
 264                         cpudrv_enabled = B_FALSE;
 265                         return (DDI_FAILURE);
 266                 }
 267                 cpudsp->dip = dip;
 268 
 269                 /*
 270                  * Find CPU number for this dev_info node.
 271                  */
 272                 if (!cpudrv_get_cpu_id(dip, &(cpudsp->cpu_id))) {
 273                         cmn_err(CE_WARN, "cpudrv_attach: instance %d: "
 274                             "can't convert dip to cpu_id", instance);
 275                         ddi_soft_state_free(cpudrv_state, instance);
 276                         cpudrv_enabled = B_FALSE;
 277                         return (DDI_FAILURE);
 278                 }
 279 
 280                 mutex_init(&cpudsp->lock, NULL, MUTEX_DRIVER, NULL);
 281                 if (cpudrv_is_enabled(cpudsp)) {
 282                         if (cpudrv_init(cpudsp) != DDI_SUCCESS) {
 283                                 cpudrv_enabled = B_FALSE;
 284                                 cpudrv_free(cpudsp);
 285                                 ddi_soft_state_free(cpudrv_state, instance);
 286                                 return (DDI_FAILURE);
 287                         }
 288                         if (cpudrv_comp_create(cpudsp) != DDI_SUCCESS) {
 289                                 cpudrv_enabled = B_FALSE;
 290                                 cpudrv_free(cpudsp);
 291                                 ddi_soft_state_free(cpudrv_state, instance);
 292                                 return (DDI_FAILURE);
 293                         }
 294                         if (ddi_prop_update_string(DDI_DEV_T_NONE,
 295                             dip, "pm-class", "CPU") != DDI_PROP_SUCCESS) {
 296                                 cpudrv_enabled = B_FALSE;
 297                                 cpudrv_free(cpudsp);
 298                                 ddi_soft_state_free(cpudrv_state, instance);
 299                                 return (DDI_FAILURE);
 300                         }
 301 
 302                         /*
 303                          * Taskq is used to dispatch routine to monitor CPU
 304                          * activities.
 305                          */
 306                         cpudsp->cpudrv_pm.tq = ddi_taskq_create(dip,
 307                             "cpudrv_monitor", CPUDRV_TASKQ_THREADS,
 308                             TASKQ_DEFAULTPRI, 0);
 309 
 310                         mutex_init(&cpudsp->cpudrv_pm.timeout_lock, NULL,
 311                             MUTEX_DRIVER, NULL);
 312                         cv_init(&cpudsp->cpudrv_pm.timeout_cv, NULL,
 313                             CV_DEFAULT, NULL);
 314 
 315                         /*
 316                          * Driver needs to assume that CPU is running at
 317                          * unknown speed at DDI_ATTACH and switch it to the
 318                          * needed speed. We assume that initial needed speed
 319                          * is full speed for us.
 320                          */
 321                         /*
 322                          * We need to take the lock because cpudrv_monitor()
 323                          * will start running in parallel with attach().
 324                          */
 325                         mutex_enter(&cpudsp->lock);
 326                         cpudsp->cpudrv_pm.cur_spd = NULL;
 327                         cpudsp->cpudrv_pm.pm_started = B_FALSE;
 328                         /*
 329                          * We don't call pm_raise_power() directly from attach
 330                          * because driver attach for a slave CPU node can
 331                          * happen before the CPU is even initialized. We just
 332                          * start the monitoring system which understands
 333                          * unknown speed and moves CPU to top speed when it
 334                          * has been initialized.
 335                          */
 336                         CPUDRV_MONITOR_INIT(cpudsp);
 337                         mutex_exit(&cpudsp->lock);
 338 
 339                 }
 340 
 341                 if (!cpudrv_mach_init(cpudsp)) {
 342                         cmn_err(CE_WARN, "cpudrv_attach: instance %d: "
 343                             "cpudrv_mach_init failed", instance);
 344                         cpudrv_enabled = B_FALSE;
 345                         cpudrv_free(cpudsp);
 346                         ddi_soft_state_free(cpudrv_state, instance);
 347                         return (DDI_FAILURE);
 348                 }
 349 
 350                 CPUDRV_INSTALL_MAX_CHANGE_HANDLER(cpudsp);
 351 
 352                 (void) ddi_prop_update_int(DDI_DEV_T_NONE, dip,
 353                     DDI_NO_AUTODETACH, 1);
 354                 ddi_report_dev(dip);
 355                 return (DDI_SUCCESS);
 356 
 357         case DDI_RESUME:
 358                 DPRINTF(D_ATTACH, ("cpudrv_attach: instance %d: "
 359                     "DDI_RESUME called\n", instance));
 360 
 361                 cpudsp = ddi_get_soft_state(cpudrv_state, instance);
 362                 ASSERT(cpudsp != NULL);
 363 
 364                 /*
 365                  * Nothing to do for resume, if not doing active PM.
 366                  */
 367                 if (!cpudrv_is_enabled(cpudsp))
 368                         return (DDI_SUCCESS);
 369 
 370                 mutex_enter(&cpudsp->lock);
 371                 /*
 372                  * Driver needs to assume that CPU is running at unknown speed
 373                  * at DDI_RESUME and switch it to the needed speed. We assume
 374                  * that the needed speed is full speed for us.
 375                  */
 376                 cpudsp->cpudrv_pm.cur_spd = NULL;
 377                 CPUDRV_MONITOR_INIT(cpudsp);
 378                 mutex_exit(&cpudsp->lock);
 379                 CPUDRV_REDEFINE_TOPSPEED(dip);
 380                 return (DDI_SUCCESS);
 381 
 382         default:
 383                 return (DDI_FAILURE);
 384         }
 385 }
 386 
 387 /*
 388  * Driver detach(9e) entry point.
 389  */
 390 static int
 391 cpudrv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 392 {
 393         int                     instance;
 394         cpudrv_devstate_t       *cpudsp;
 395         cpudrv_pm_t             *cpupm;
 396 
 397         instance = ddi_get_instance(dip);
 398 
 399         switch (cmd) {
 400         case DDI_DETACH:
 401                 DPRINTF(D_DETACH, ("cpudrv_detach: instance %d: "
 402                     "DDI_DETACH called\n", instance));
 403 
 404 #if defined(__x86)
 405                 cpudsp = ddi_get_soft_state(cpudrv_state, instance);
 406                 ASSERT(cpudsp != NULL);
 407 
 408                 /*
 409                  * Nothing to do for detach, if no doing active PM.
 410                  */
 411                 if (!cpudrv_is_enabled(cpudsp))
 412                         return (DDI_SUCCESS);
 413 
 414                 /*
 415                  * uninstall PPC/_TPC change notification handler
 416                  */
 417                 CPUDRV_UNINSTALL_MAX_CHANGE_HANDLER(cpudsp);
 418 
 419                 /*
 420                  * destruct platform specific resource
 421                  */
 422                 if (!cpudrv_mach_fini(cpudsp))
 423                         return (DDI_FAILURE);
 424 
 425                 mutex_enter(&cpudsp->lock);
 426                 CPUDRV_MONITOR_FINI(cpudsp);
 427                 cv_destroy(&cpudsp->cpudrv_pm.timeout_cv);
 428                 mutex_destroy(&cpudsp->cpudrv_pm.timeout_lock);
 429                 ddi_taskq_destroy(cpudsp->cpudrv_pm.tq);
 430                 cpudrv_free(cpudsp);
 431                 mutex_exit(&cpudsp->lock);
 432                 mutex_destroy(&cpudsp->lock);
 433                 ddi_soft_state_free(cpudrv_state, instance);
 434                 (void) ddi_prop_update_int(DDI_DEV_T_NONE, dip,
 435                     DDI_NO_AUTODETACH, 0);
 436                 return (DDI_SUCCESS);
 437 
 438 #else
 439                 /*
 440                  * If the only thing supported by the driver is power
 441                  * management, we can in future enhance the driver and
 442                  * framework that loads it to unload the driver when
 443                  * user has disabled CPU power management.
 444                  */
 445                 return (DDI_FAILURE);
 446 #endif
 447 
 448         case DDI_SUSPEND:
 449                 DPRINTF(D_DETACH, ("cpudrv_detach: instance %d: "
 450                     "DDI_SUSPEND called\n", instance));
 451 
 452                 cpudsp = ddi_get_soft_state(cpudrv_state, instance);
 453                 ASSERT(cpudsp != NULL);
 454 
 455                 /*
 456                  * Nothing to do for suspend, if not doing active PM.
 457                  */
 458                 if (!cpudrv_is_enabled(cpudsp))
 459                         return (DDI_SUCCESS);
 460 
 461                 /*
 462                  * During a checkpoint-resume sequence, framework will
 463                  * stop interrupts to quiesce kernel activity. This will
 464                  * leave our monitoring system ineffective. Handle this
 465                  * by stopping our monitoring system and bringing CPU
 466                  * to full speed. In case we are in special direct pm
 467                  * mode, we leave the CPU at whatever speed it is. This
 468                  * is harmless other than speed.
 469                  */
 470                 mutex_enter(&cpudsp->lock);
 471                 cpupm = &(cpudsp->cpudrv_pm);
 472 
 473                 DPRINTF(D_DETACH, ("cpudrv_detach: instance %d: DDI_SUSPEND - "
 474                     "cur_spd %d, topspeed %d\n", instance,
 475                     cpupm->cur_spd->pm_level,
 476                     CPUDRV_TOPSPEED(cpupm)->pm_level));
 477 
 478                 CPUDRV_MONITOR_FINI(cpudsp);
 479 
 480                 if (!cpudrv_direct_pm && (cpupm->cur_spd !=
 481                     CPUDRV_TOPSPEED(cpupm))) {
 482                         if (cpupm->pm_busycnt < 1) {
 483                                 if ((pm_busy_component(dip, CPUDRV_COMP_NUM)
 484                                     == DDI_SUCCESS)) {
 485                                         cpupm->pm_busycnt++;
 486                                 } else {
 487                                         CPUDRV_MONITOR_INIT(cpudsp);
 488                                         mutex_exit(&cpudsp->lock);
 489                                         cmn_err(CE_WARN, "cpudrv_detach: "
 490                                             "instance %d: can't busy CPU "
 491                                             "component", instance);
 492                                         return (DDI_FAILURE);
 493                                 }
 494                         }
 495                         mutex_exit(&cpudsp->lock);
 496                         if (pm_raise_power(dip, CPUDRV_COMP_NUM,
 497                             CPUDRV_TOPSPEED(cpupm)->pm_level) !=
 498                             DDI_SUCCESS) {
 499                                 mutex_enter(&cpudsp->lock);
 500                                 CPUDRV_MONITOR_INIT(cpudsp);
 501                                 mutex_exit(&cpudsp->lock);
 502                                 cmn_err(CE_WARN, "cpudrv_detach: instance %d: "
 503                                     "can't raise CPU power level to %d",
 504                                     instance,
 505                                     CPUDRV_TOPSPEED(cpupm)->pm_level);
 506                                 return (DDI_FAILURE);
 507                         } else {
 508                                 return (DDI_SUCCESS);
 509                         }
 510                 } else {
 511                         mutex_exit(&cpudsp->lock);
 512                         return (DDI_SUCCESS);
 513                 }
 514 
 515         default:
 516                 return (DDI_FAILURE);
 517         }
 518 }
 519 
 520 /*
 521  * Driver power(9e) entry point.
 522  *
 523  * Driver's notion of current power is set *only* in power(9e) entry point
 524  * after actual power change operation has been successfully completed.
 525  */
 526 /* ARGSUSED */
 527 static int
 528 cpudrv_power(dev_info_t *dip, int comp, int level)
 529 {
 530         int                     instance;
 531         cpudrv_devstate_t       *cpudsp;
 532         cpudrv_pm_t             *cpudrvpm;
 533         cpudrv_pm_spd_t         *new_spd;
 534         boolean_t               is_ready;
 535         int                     ret;
 536 
 537         instance = ddi_get_instance(dip);
 538 
 539         DPRINTF(D_POWER, ("cpudrv_power: instance %d: level %d\n",
 540             instance, level));
 541 
 542         if ((cpudsp = ddi_get_soft_state(cpudrv_state, instance)) == NULL) {
 543                 cmn_err(CE_WARN, "cpudrv_power: instance %d: can't "
 544                     "get state", instance);
 545                 return (DDI_FAILURE);
 546         }
 547 
 548         /*
 549          * We're not ready until we can  get a cpu_t
 550          */
 551         is_ready = (cpudrv_get_cpu(cpudsp) == DDI_SUCCESS);
 552 
 553         mutex_enter(&cpudsp->lock);
 554         cpudrvpm = &(cpudsp->cpudrv_pm);
 555 
 556         /*
 557          * In normal operation, we fail if we are busy and request is
 558          * to lower the power level. We let this go through if the driver
 559          * is in special direct pm mode. On x86, we also let this through
 560          * if the change is due to a request to govern the max speed.
 561          */
 562         if (!cpudrv_direct_pm && (cpudrvpm->pm_busycnt >= 1) &&
 563             !cpudrv_is_governor_thread(cpudrvpm)) {
 564                 if ((cpudrvpm->cur_spd != NULL) &&
 565                     (level < cpudrvpm->cur_spd->pm_level)) {
 566                         mutex_exit(&cpudsp->lock);
 567                         return (DDI_FAILURE);
 568                 }
 569         }
 570 
 571         for (new_spd = cpudrvpm->head_spd; new_spd; new_spd =
 572             new_spd->down_spd) {
 573                 if (new_spd->pm_level == level)
 574                         break;
 575         }
 576         if (!new_spd) {
 577                 CPUDRV_RESET_GOVERNOR_THREAD(cpudrvpm);
 578                 mutex_exit(&cpudsp->lock);
 579                 cmn_err(CE_WARN, "cpudrv_power: instance %d: "
 580                     "can't locate new CPU speed", instance);
 581                 return (DDI_FAILURE);
 582         }
 583 
 584         /*
 585          * We currently refuse to power manage if the CPU is not ready to
 586          * take cross calls (cross calls fail silently if CPU is not ready
 587          * for it).
 588          *
 589          * Additionally, for x86 platforms we cannot power manage an instance,
 590          * until it has been initialized.
 591          */
 592         if (is_ready) {
 593                 is_ready = CPUDRV_XCALL_IS_READY(cpudsp->cpu_id);
 594                 if (!is_ready) {
 595                         DPRINTF(D_POWER, ("cpudrv_power: instance %d: "
 596                             "CPU not ready for x-calls\n", instance));
 597                 } else if (!(is_ready = cpudrv_power_ready(cpudsp->cp))) {
 598                         DPRINTF(D_POWER, ("cpudrv_power: instance %d: "
 599                             "waiting for all CPUs to be power manageable\n",
 600                             instance));
 601                 }
 602         }
 603         if (!is_ready) {
 604                 CPUDRV_RESET_GOVERNOR_THREAD(cpudrvpm);
 605                 mutex_exit(&cpudsp->lock);
 606                 return (DDI_FAILURE);
 607         }
 608 
 609         /*
 610          * Execute CPU specific routine on the requested CPU to
 611          * change its speed to normal-speed/divisor.
 612          */
 613         if ((ret = cpudrv_change_speed(cpudsp, new_spd)) != DDI_SUCCESS) {
 614                 cmn_err(CE_WARN, "cpudrv_power: "
 615                     "cpudrv_change_speed() return = %d", ret);
 616                 mutex_exit(&cpudsp->lock);
 617                 return (DDI_FAILURE);
 618         }
 619 
 620         /*
 621          * Reset idle threshold time for the new power level.
 622          */
 623         if ((cpudrvpm->cur_spd != NULL) && (level <
 624             cpudrvpm->cur_spd->pm_level)) {
 625                 if (pm_idle_component(dip, CPUDRV_COMP_NUM) ==
 626                     DDI_SUCCESS) {
 627                         if (cpudrvpm->pm_busycnt >= 1)
 628                                 cpudrvpm->pm_busycnt--;
 629                 } else {
 630                         cmn_err(CE_WARN, "cpudrv_power: instance %d: "
 631                             "can't idle CPU component",
 632                             ddi_get_instance(dip));
 633                 }
 634         }
 635         /*
 636          * Reset various parameters because we are now running at new speed.
 637          */
 638         cpudrvpm->lastquan_mstate[CMS_IDLE] = 0;
 639         cpudrvpm->lastquan_mstate[CMS_SYSTEM] = 0;
 640         cpudrvpm->lastquan_mstate[CMS_USER] = 0;
 641         cpudrvpm->lastquan_ticks = 0;
 642         cpudrvpm->cur_spd = new_spd;
 643         CPUDRV_RESET_GOVERNOR_THREAD(cpudrvpm);
 644         mutex_exit(&cpudsp->lock);
 645 
 646         return (DDI_SUCCESS);
 647 }
 648 
 649 /*
 650  * Initialize power management data.
 651  */
 652 static int
 653 cpudrv_init(cpudrv_devstate_t *cpudsp)
 654 {
 655         cpudrv_pm_t     *cpupm = &(cpudsp->cpudrv_pm);
 656         cpudrv_pm_spd_t *cur_spd;
 657         cpudrv_pm_spd_t *prev_spd = NULL;
 658         int             *speeds;
 659         uint_t          nspeeds;
 660         int             idle_cnt_percent;
 661         int             user_cnt_percent;
 662         int             i;
 663 
 664         CPUDRV_GET_SPEEDS(cpudsp, speeds, nspeeds);
 665         if (nspeeds < 2) {
 666                 /* Need at least two speeds to power manage */
 667                 CPUDRV_FREE_SPEEDS(speeds, nspeeds);
 668                 return (DDI_FAILURE);
 669         }
 670         cpupm->num_spd = nspeeds;
 671 
 672         /*
 673          * Calculate the watermarks and other parameters based on the
 674          * supplied speeds.
 675          *
 676          * One of the basic assumption is that for X amount of CPU work,
 677          * if CPU is slowed down by a factor of N, the time it takes to
 678          * do the same work will be N * X.
 679          *
 680          * The driver declares that a CPU is idle and ready for slowed down,
 681          * if amount of idle thread is more than the current speed idle_hwm
 682          * without dropping below idle_hwm a number of consecutive sampling
 683          * intervals and number of running threads in user mode are below
 684          * user_lwm.  We want to set the current user_lwm such that if we
 685          * just switched to the next slower speed with no change in real work
 686          * load, the amount of user threads at the slower speed will be such
 687          * that it falls below the slower speed's user_hwm.  If we didn't do
 688          * that then we will just come back to the higher speed as soon as we
 689          * go down even with no change in work load.
 690          * The user_hwm is a fixed precentage and not calculated dynamically.
 691          *
 692          * We bring the CPU up if idle thread at current speed is less than
 693          * the current speed idle_lwm for a number of consecutive sampling
 694          * intervals or user threads are above the user_hwm for the current
 695          * speed.
 696          */
 697         for (i = 0; i < nspeeds; i++) {
 698                 cur_spd = kmem_zalloc(sizeof (cpudrv_pm_spd_t), KM_SLEEP);
 699                 cur_spd->speed = speeds[i];
 700                 if (i == 0) {   /* normal speed */
 701                         cpupm->head_spd = cur_spd;
 702                         CPUDRV_TOPSPEED(cpupm) = cur_spd;
 703                         cur_spd->quant_cnt = CPUDRV_QUANT_CNT_NORMAL;
 704                         cur_spd->idle_hwm =
 705                             (cpudrv_idle_hwm * cur_spd->quant_cnt) / 100;
 706                         /* can't speed anymore */
 707                         cur_spd->idle_lwm = 0;
 708                         cur_spd->user_hwm = UINT_MAX;
 709                 } else {
 710                         cur_spd->quant_cnt = CPUDRV_QUANT_CNT_OTHR;
 711                         ASSERT(prev_spd != NULL);
 712                         prev_spd->down_spd = cur_spd;
 713                         cur_spd->up_spd = cpupm->head_spd;
 714 
 715                         /*
 716                          * Let's assume CPU is considered idle at full speed
 717                          * when it is spending I% of time in running the idle
 718                          * thread.  At full speed, CPU will be busy (100 - I) %
 719                          * of times.  This % of busyness increases by factor of
 720                          * N as CPU slows down.  CPU that is idle I% of times
 721                          * in full speed, it is idle (100 - ((100 - I) * N)) %
 722                          * of times in N speed.  The idle_lwm is a fixed
 723                          * percentage.  A large value of N may result in
 724                          * idle_hwm to go below idle_lwm.  We need to make sure
 725                          * that there is at least a buffer zone seperation
 726                          * between the idle_lwm and idle_hwm values.
 727                          */
 728                         idle_cnt_percent = CPUDRV_IDLE_CNT_PERCENT(
 729                             cpudrv_idle_hwm, speeds, i);
 730                         idle_cnt_percent = max(idle_cnt_percent,
 731                             (cpudrv_idle_lwm + cpudrv_idle_buf_zone));
 732                         cur_spd->idle_hwm =
 733                             (idle_cnt_percent * cur_spd->quant_cnt) / 100;
 734                         cur_spd->idle_lwm =
 735                             (cpudrv_idle_lwm * cur_spd->quant_cnt) / 100;
 736 
 737                         /*
 738                          * The lwm for user threads are determined such that
 739                          * if CPU slows down, the load of work in the
 740                          * new speed would still keep the CPU at or below the
 741                          * user_hwm in the new speed.  This is to prevent
 742                          * the quick jump back up to higher speed.
 743                          */
 744                         cur_spd->user_hwm = (cpudrv_user_hwm *
 745                             cur_spd->quant_cnt) / 100;
 746                         user_cnt_percent = CPUDRV_USER_CNT_PERCENT(
 747                             cpudrv_user_hwm, speeds, i);
 748                         prev_spd->user_lwm =
 749                             (user_cnt_percent * prev_spd->quant_cnt) / 100;
 750                 }
 751                 prev_spd = cur_spd;
 752         }
 753         /* Slowest speed. Can't slow down anymore */
 754         cur_spd->idle_hwm = UINT_MAX;
 755         cur_spd->user_lwm = -1;
 756 #ifdef  DEBUG
 757         DPRINTF(D_PM_INIT, ("cpudrv_init: instance %d: head_spd spd %d, "
 758             "num_spd %d\n", ddi_get_instance(cpudsp->dip),
 759             cpupm->head_spd->speed, cpupm->num_spd));
 760         for (cur_spd = cpupm->head_spd; cur_spd; cur_spd = cur_spd->down_spd) {
 761                 DPRINTF(D_PM_INIT, ("cpudrv_init: instance %d: speed %d, "
 762                     "down_spd spd %d, idle_hwm %d, user_lwm %d, "
 763                     "up_spd spd %d, idle_lwm %d, user_hwm %d, "
 764                     "quant_cnt %d\n", ddi_get_instance(cpudsp->dip),
 765                     cur_spd->speed,
 766                     (cur_spd->down_spd ? cur_spd->down_spd->speed : 0),
 767                     cur_spd->idle_hwm, cur_spd->user_lwm,
 768                     (cur_spd->up_spd ? cur_spd->up_spd->speed : 0),
 769                     cur_spd->idle_lwm, cur_spd->user_hwm,
 770                     cur_spd->quant_cnt));
 771         }
 772 #endif  /* DEBUG */
 773         CPUDRV_FREE_SPEEDS(speeds, nspeeds);
 774         return (DDI_SUCCESS);
 775 }
 776 
 777 /*
 778  * Free CPU power management data.
 779  */
 780 static void
 781 cpudrv_free(cpudrv_devstate_t *cpudsp)
 782 {
 783         cpudrv_pm_t     *cpupm = &(cpudsp->cpudrv_pm);
 784         cpudrv_pm_spd_t *cur_spd, *next_spd;
 785 
 786         cur_spd = cpupm->head_spd;
 787         while (cur_spd) {
 788                 next_spd = cur_spd->down_spd;
 789                 kmem_free(cur_spd, sizeof (cpudrv_pm_spd_t));
 790                 cur_spd = next_spd;
 791         }
 792         bzero(cpupm, sizeof (cpudrv_pm_t));
 793 }
 794 
 795 /*
 796  * Create pm-components property.
 797  */
 798 static int
 799 cpudrv_comp_create(cpudrv_devstate_t *cpudsp)
 800 {
 801         cpudrv_pm_t     *cpupm = &(cpudsp->cpudrv_pm);
 802         cpudrv_pm_spd_t *cur_spd;
 803         char            **pmc;
 804         int             size;
 805         char            name[] = "NAME=CPU Speed";
 806         int             i, j;
 807         uint_t          comp_spd;
 808         int             result = DDI_FAILURE;
 809 
 810         pmc = kmem_zalloc((cpupm->num_spd + 1) * sizeof (char *), KM_SLEEP);
 811         size = CPUDRV_COMP_SIZE();
 812         if (cpupm->num_spd > CPUDRV_COMP_MAX_VAL) {
 813                 cmn_err(CE_WARN, "cpudrv_comp_create: instance %d: "
 814                     "number of speeds exceeded limits",
 815                     ddi_get_instance(cpudsp->dip));
 816                 kmem_free(pmc, (cpupm->num_spd + 1) * sizeof (char *));
 817                 return (result);
 818         }
 819 
 820         for (i = cpupm->num_spd, cur_spd = cpupm->head_spd; i > 0;
 821             i--, cur_spd = cur_spd->down_spd) {
 822                 cur_spd->pm_level = i;
 823                 pmc[i] = kmem_zalloc((size * sizeof (char)), KM_SLEEP);
 824                 comp_spd = CPUDRV_COMP_SPEED(cpupm, cur_spd);
 825                 if (comp_spd > CPUDRV_COMP_MAX_VAL) {
 826                         cmn_err(CE_WARN, "cpudrv_comp_create: "
 827                             "instance %d: speed exceeded limits",
 828                             ddi_get_instance(cpudsp->dip));
 829                         for (j = cpupm->num_spd; j >= i; j--) {
 830                                 kmem_free(pmc[j], size * sizeof (char));
 831                         }
 832                         kmem_free(pmc, (cpupm->num_spd + 1) *
 833                             sizeof (char *));
 834                         return (result);
 835                 }
 836                 CPUDRV_COMP_SPRINT(pmc[i], cpupm, cur_spd, comp_spd)
 837                 DPRINTF(D_PM_COMP_CREATE, ("cpudrv_comp_create: "
 838                     "instance %d: pm-components power level %d string '%s'\n",
 839                     ddi_get_instance(cpudsp->dip), i, pmc[i]));
 840         }
 841         pmc[0] = kmem_zalloc(sizeof (name), KM_SLEEP);
 842         (void) strcat(pmc[0], name);
 843         DPRINTF(D_PM_COMP_CREATE, ("cpudrv_comp_create: instance %d: "
 844             "pm-components component name '%s'\n",
 845             ddi_get_instance(cpudsp->dip), pmc[0]));
 846 
 847         if (ddi_prop_update_string_array(DDI_DEV_T_NONE, cpudsp->dip,
 848             "pm-components", pmc, cpupm->num_spd + 1) == DDI_PROP_SUCCESS) {
 849                 result = DDI_SUCCESS;
 850         } else {
 851                 cmn_err(CE_WARN, "cpudrv_comp_create: instance %d: "
 852                     "can't create pm-components property",
 853                     ddi_get_instance(cpudsp->dip));
 854         }
 855 
 856         for (i = cpupm->num_spd; i > 0; i--) {
 857                 kmem_free(pmc[i], size * sizeof (char));
 858         }
 859         kmem_free(pmc[0], sizeof (name));
 860         kmem_free(pmc, (cpupm->num_spd + 1) * sizeof (char *));
 861         return (result);
 862 }
 863 
 864 /*
 865  * Mark a component idle.
 866  */
 867 #define CPUDRV_MONITOR_PM_IDLE_COMP(dip, cpupm) { \
 868         if ((cpupm)->pm_busycnt >= 1) { \
 869                 if (pm_idle_component((dip), CPUDRV_COMP_NUM) == \
 870                     DDI_SUCCESS) { \
 871                         DPRINTF(D_PM_MONITOR, ("cpudrv_monitor: " \
 872                             "instance %d: pm_idle_component called\n", \
 873                             ddi_get_instance((dip)))); \
 874                         (cpupm)->pm_busycnt--; \
 875                 } else { \
 876                         cmn_err(CE_WARN, "cpudrv_monitor: instance %d: " \
 877                             "can't idle CPU component", \
 878                             ddi_get_instance((dip))); \
 879                 } \
 880         } \
 881 }
 882 
 883 /*
 884  * Marks a component busy in both PM framework and driver state structure.
 885  */
 886 #define CPUDRV_MONITOR_PM_BUSY_COMP(dip, cpupm) { \
 887         if ((cpupm)->pm_busycnt < 1) { \
 888                 if (pm_busy_component((dip), CPUDRV_COMP_NUM) == \
 889                     DDI_SUCCESS) { \
 890                         DPRINTF(D_PM_MONITOR, ("cpudrv_monitor: " \
 891                             "instance %d: pm_busy_component called\n", \
 892                             ddi_get_instance((dip)))); \
 893                         (cpupm)->pm_busycnt++; \
 894                 } else { \
 895                         cmn_err(CE_WARN, "cpudrv_monitor: instance %d: " \
 896                             "can't busy CPU component", \
 897                             ddi_get_instance((dip))); \
 898                 } \
 899         } \
 900 }
 901 
 902 /*
 903  * Marks a component busy and calls pm_raise_power().
 904  */
 905 #define CPUDRV_MONITOR_PM_BUSY_AND_RAISE(dip, cpudsp, cpupm, new_spd) { \
 906         int ret; \
 907         /* \
 908          * Mark driver and PM framework busy first so framework doesn't try \
 909          * to bring CPU to lower speed when we need to be at higher speed. \
 910          */ \
 911         CPUDRV_MONITOR_PM_BUSY_COMP((dip), (cpupm)); \
 912         mutex_exit(&(cpudsp)->lock); \
 913         DPRINTF(D_PM_MONITOR, ("cpudrv_monitor: instance %d: " \
 914             "pm_raise_power called to %d\n", ddi_get_instance((dip)), \
 915                 (new_spd->pm_level))); \
 916         ret = pm_raise_power((dip), CPUDRV_COMP_NUM, (new_spd->pm_level)); \
 917         if (ret != DDI_SUCCESS) { \
 918                 cmn_err(CE_WARN, "cpudrv_monitor: instance %d: can't " \
 919                     "raise CPU power level", ddi_get_instance((dip))); \
 920         } \
 921         mutex_enter(&(cpudsp)->lock); \
 922         if (ret == DDI_SUCCESS && cpudsp->cpudrv_pm.cur_spd == NULL) { \
 923                 cpudsp->cpudrv_pm.cur_spd = new_spd; \
 924         } \
 925 }
 926 
 927 /*
 928  * In order to monitor a CPU, we need to hold cpu_lock to access CPU
 929  * statistics. Holding cpu_lock is not allowed from a callout routine.
 930  * We dispatch a taskq to do that job.
 931  */
 932 static void
 933 cpudrv_monitor_disp(void *arg)
 934 {
 935         cpudrv_devstate_t       *cpudsp = (cpudrv_devstate_t *)arg;
 936 
 937         /*
 938          * We are here because the last task has scheduled a timeout.
 939          * The queue should be empty at this time.
 940          */
 941         mutex_enter(&cpudsp->cpudrv_pm.timeout_lock);
 942         if ((ddi_taskq_dispatch(cpudsp->cpudrv_pm.tq, cpudrv_monitor, arg,
 943             DDI_NOSLEEP)) != DDI_SUCCESS) {
 944                 mutex_exit(&cpudsp->cpudrv_pm.timeout_lock);
 945                 DPRINTF(D_PM_MONITOR, ("cpudrv_monitor_disp: failed to "
 946                     "dispatch the cpudrv_monitor taskq\n"));
 947                 mutex_enter(&cpudsp->lock);
 948                 CPUDRV_MONITOR_INIT(cpudsp);
 949                 mutex_exit(&cpudsp->lock);
 950                 return;
 951         }
 952         cpudsp->cpudrv_pm.timeout_count++;
 953         mutex_exit(&cpudsp->cpudrv_pm.timeout_lock);
 954 }
 955 
 956 /*
 957  * Monitors each CPU for the amount of time idle thread was running in the
 958  * last quantum and arranges for the CPU to go to the lower or higher speed.
 959  * Called at the time interval appropriate for the current speed. The
 960  * time interval for normal speed is CPUDRV_QUANT_CNT_NORMAL. The time
 961  * interval for other speeds (including unknown speed) is
 962  * CPUDRV_QUANT_CNT_OTHR.
 963  */
 964 static void
 965 cpudrv_monitor(void *arg)
 966 {
 967         cpudrv_devstate_t       *cpudsp = (cpudrv_devstate_t *)arg;
 968         cpudrv_pm_t             *cpupm;
 969         cpudrv_pm_spd_t         *cur_spd, *new_spd;
 970         dev_info_t              *dip;
 971         uint_t                  idle_cnt, user_cnt, system_cnt;
 972         clock_t                 ticks;
 973         uint_t                  tick_cnt;
 974         hrtime_t                msnsecs[NCMSTATES];
 975         boolean_t               is_ready;
 976 
 977 #define GET_CPU_MSTATE_CNT(state, cnt) \
 978         msnsecs[state] = NSEC_TO_TICK(msnsecs[state]); \
 979         if (cpupm->lastquan_mstate[state] > msnsecs[state]) \
 980                 msnsecs[state] = cpupm->lastquan_mstate[state]; \
 981         cnt = msnsecs[state] - cpupm->lastquan_mstate[state]; \
 982         cpupm->lastquan_mstate[state] = msnsecs[state]
 983 
 984         /*
 985          * We're not ready until we can  get a cpu_t
 986          */
 987         is_ready = (cpudrv_get_cpu(cpudsp) == DDI_SUCCESS);
 988 
 989         mutex_enter(&cpudsp->lock);
 990         cpupm = &(cpudsp->cpudrv_pm);
 991         if (cpupm->timeout_id == 0) {
 992                 mutex_exit(&cpudsp->lock);
 993                 goto do_return;
 994         }
 995         cur_spd = cpupm->cur_spd;
 996         dip = cpudsp->dip;
 997 
 998         /*
 999          * We assume that a CPU is initialized and has a valid cpu_t
1000          * structure, if it is ready for cross calls. If this changes,
1001          * additional checks might be needed.
1002          *
1003          * Additionally, for x86 platforms we cannot power manage an
1004          * instance, until it has been initialized.
1005          */
1006         if (is_ready) {
1007                 is_ready = CPUDRV_XCALL_IS_READY(cpudsp->cpu_id);
1008                 if (!is_ready) {
1009                         DPRINTF(D_PM_MONITOR, ("cpudrv_monitor: instance %d: "
1010                             "CPU not ready for x-calls\n",
1011                             ddi_get_instance(dip)));
1012                 } else if (!(is_ready = cpudrv_power_ready(cpudsp->cp))) {
1013                         DPRINTF(D_PM_MONITOR, ("cpudrv_monitor: instance %d: "
1014                             "waiting for all CPUs to be power manageable\n",
1015                             ddi_get_instance(dip)));
1016                 }
1017         }
1018         if (!is_ready) {
1019                 /*
1020                  * Make sure that we are busy so that framework doesn't
1021                  * try to bring us down in this situation.
1022                  */
1023                 CPUDRV_MONITOR_PM_BUSY_COMP(dip, cpupm);
1024                 CPUDRV_MONITOR_INIT(cpudsp);
1025                 mutex_exit(&cpudsp->lock);
1026                 goto do_return;
1027         }
1028 
1029         /*
1030          * Make sure that we are still not at unknown power level.
1031          */
1032         if (cur_spd == NULL) {
1033                 DPRINTF(D_PM_MONITOR, ("cpudrv_monitor: instance %d: "
1034                     "cur_spd is unknown\n", ddi_get_instance(dip)));
1035                 CPUDRV_MONITOR_PM_BUSY_AND_RAISE(dip, cpudsp, cpupm,
1036                     CPUDRV_TOPSPEED(cpupm));
1037                 /*
1038                  * We just changed the speed. Wait till at least next
1039                  * call to this routine before proceeding ahead.
1040                  */
1041                 CPUDRV_MONITOR_INIT(cpudsp);
1042                 mutex_exit(&cpudsp->lock);
1043                 goto do_return;
1044         }
1045 
1046         if (!cpupm->pm_started) {
1047                 cpupm->pm_started = B_TRUE;
1048                 cpudrv_set_supp_freqs(cpudsp);
1049         }
1050 
1051         get_cpu_mstate(cpudsp->cp, msnsecs);
1052         GET_CPU_MSTATE_CNT(CMS_IDLE, idle_cnt);
1053         GET_CPU_MSTATE_CNT(CMS_USER, user_cnt);
1054         GET_CPU_MSTATE_CNT(CMS_SYSTEM, system_cnt);
1055 
1056         /*
1057          * We can't do anything when we have just switched to a state
1058          * because there is no valid timestamp.
1059          */
1060         if (cpupm->lastquan_ticks == 0) {
1061                 cpupm->lastquan_ticks = NSEC_TO_TICK(gethrtime());
1062                 CPUDRV_MONITOR_INIT(cpudsp);
1063                 mutex_exit(&cpudsp->lock);
1064                 goto do_return;
1065         }
1066 
1067         /*
1068          * Various watermarks are based on this routine being called back
1069          * exactly at the requested period. This is not guaranteed
1070          * because this routine is called from a taskq that is dispatched
1071          * from a timeout routine.  Handle this by finding out how many
1072          * ticks have elapsed since the last call and adjusting
1073          * the idle_cnt based on the delay added to the requested period
1074          * by timeout and taskq.
1075          */
1076         ticks = NSEC_TO_TICK(gethrtime());
1077         tick_cnt = ticks - cpupm->lastquan_ticks;
1078         ASSERT(tick_cnt != 0);
1079         cpupm->lastquan_ticks = ticks;
1080 
1081         /*
1082          * Time taken between recording the current counts and
1083          * arranging the next call of this routine is an error in our
1084          * calculation. We minimize the error by calling
1085          * CPUDRV_MONITOR_INIT() here instead of end of this routine.
1086          */
1087         CPUDRV_MONITOR_INIT(cpudsp);
1088         DPRINTF(D_PM_MONITOR_VERBOSE, ("cpudrv_monitor: instance %d: "
1089             "idle count %d, user count %d, system count %d, pm_level %d, "
1090             "pm_busycnt %d\n", ddi_get_instance(dip), idle_cnt, user_cnt,
1091             system_cnt, cur_spd->pm_level, cpupm->pm_busycnt));
1092 
1093 #ifdef  DEBUG
1094         /*
1095          * Notify that timeout and taskq has caused delays and we need to
1096          * scale our parameters accordingly.
1097          *
1098          * To get accurate result, don't turn on other DPRINTFs with
1099          * the following DPRINTF. PROM calls generated by other
1100          * DPRINTFs changes the timing.
1101          */
1102         if (tick_cnt > cur_spd->quant_cnt) {
1103                 DPRINTF(D_PM_MONITOR_DELAY, ("cpudrv_monitor: instance %d: "
1104                     "tick count %d > quantum_count %u\n",
1105                     ddi_get_instance(dip), tick_cnt, cur_spd->quant_cnt));
1106         }
1107 #endif  /* DEBUG */
1108 
1109         /*
1110          * Adjust counts based on the delay added by timeout and taskq.
1111          */
1112         idle_cnt = (idle_cnt * cur_spd->quant_cnt) / tick_cnt;
1113         user_cnt = (user_cnt * cur_spd->quant_cnt) / tick_cnt;
1114 
1115         if ((user_cnt > cur_spd->user_hwm) || (idle_cnt < cur_spd->idle_lwm &&
1116             cur_spd->idle_blwm_cnt >= cpudrv_idle_blwm_cnt_max)) {
1117                 cur_spd->idle_blwm_cnt = 0;
1118                 cur_spd->idle_bhwm_cnt = 0;
1119                 /*
1120                  * In normal situation, arrange to go to next higher speed.
1121                  * If we are running in special direct pm mode, we just stay
1122                  * at the current speed.
1123                  */
1124                 if (cur_spd == cur_spd->up_spd || cpudrv_direct_pm) {
1125                         CPUDRV_MONITOR_PM_BUSY_COMP(dip, cpupm);
1126                 } else {
1127                         new_spd = cur_spd->up_spd;
1128                         CPUDRV_MONITOR_PM_BUSY_AND_RAISE(dip, cpudsp, cpupm,
1129                             new_spd);
1130                 }
1131         } else if ((user_cnt <= cur_spd->user_lwm) &&
1132             (idle_cnt >= cur_spd->idle_hwm) || !CPU_ACTIVE(cpudsp->cp)) {
1133                 cur_spd->idle_blwm_cnt = 0;
1134                 cur_spd->idle_bhwm_cnt = 0;
1135                 /*
1136                  * Arrange to go to next lower speed by informing our idle
1137                  * status to the power management framework.
1138                  */
1139                 CPUDRV_MONITOR_PM_IDLE_COMP(dip, cpupm);
1140         } else {
1141                 /*
1142                  * If we are between the idle water marks and have not
1143                  * been here enough consecutive times to be considered
1144                  * busy, just increment the count and return.
1145                  */
1146                 if ((idle_cnt < cur_spd->idle_hwm) &&
1147                     (idle_cnt >= cur_spd->idle_lwm) &&
1148                     (cur_spd->idle_bhwm_cnt < cpudrv_idle_bhwm_cnt_max)) {
1149                         cur_spd->idle_blwm_cnt = 0;
1150                         cur_spd->idle_bhwm_cnt++;
1151                         mutex_exit(&cpudsp->lock);
1152                         goto do_return;
1153                 }
1154                 if (idle_cnt < cur_spd->idle_lwm) {
1155                         cur_spd->idle_blwm_cnt++;
1156                         cur_spd->idle_bhwm_cnt = 0;
1157                 }
1158                 /*
1159                  * Arranges to stay at the current speed.
1160                  */
1161                 CPUDRV_MONITOR_PM_BUSY_COMP(dip, cpupm);
1162         }
1163         mutex_exit(&cpudsp->lock);
1164 do_return:
1165         mutex_enter(&cpupm->timeout_lock);
1166         ASSERT(cpupm->timeout_count > 0);
1167         cpupm->timeout_count--;
1168         cv_signal(&cpupm->timeout_cv);
1169         mutex_exit(&cpupm->timeout_lock);
1170 }
1171 
1172 /*
1173  * get cpu_t structure for cpudrv_devstate_t
1174  */
1175 int
1176 cpudrv_get_cpu(cpudrv_devstate_t *cpudsp)
1177 {
1178         ASSERT(cpudsp != NULL);
1179 
1180         /*
1181          * return DDI_SUCCESS if cpudrv_devstate_t
1182          * already contains cpu_t structure
1183          */
1184         if (cpudsp->cp != NULL)
1185                 return (DDI_SUCCESS);
1186 
1187         if (MUTEX_HELD(&cpu_lock)) {
1188                 cpudsp->cp = cpu_get(cpudsp->cpu_id);
1189         } else {
1190                 mutex_enter(&cpu_lock);
1191                 cpudsp->cp = cpu_get(cpudsp->cpu_id);
1192                 mutex_exit(&cpu_lock);
1193         }
1194 
1195         if (cpudsp->cp == NULL)
1196                 return (DDI_FAILURE);
1197 
1198         return (DDI_SUCCESS);
1199 }