6583 remove whole-process swapping
1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012 by Delphix. All rights reserved. 24 */ 25 26 /* 27 * Architecture-independent CPU control functions. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/var.h> 33 #include <sys/thread.h> 34 #include <sys/cpuvar.h> 35 #include <sys/cpu_event.h> 36 #include <sys/kstat.h> 37 #include <sys/uadmin.h> 38 #include <sys/systm.h> 39 #include <sys/errno.h> 40 #include <sys/cmn_err.h> 41 #include <sys/procset.h> 42 #include <sys/processor.h> 43 #include <sys/debug.h> 44 #include <sys/cpupart.h> 45 #include <sys/lgrp.h> 46 #include <sys/pset.h> 47 #include <sys/pghw.h> 48 #include <sys/kmem.h> 49 #include <sys/kmem_impl.h> /* to set per-cpu kmem_cache offset */ 50 #include <sys/atomic.h> 51 #include <sys/callb.h> 52 #include <sys/vtrace.h> 53 #include <sys/cyclic.h> 54 #include <sys/bitmap.h> 55 #include <sys/nvpair.h> 56 #include <sys/pool_pset.h> 57 #include <sys/msacct.h> 58 #include <sys/time.h> 59 #include <sys/archsystm.h> 60 #include <sys/sdt.h> 61 #if defined(__x86) || defined(__amd64) 62 #include <sys/x86_archext.h> 63 #endif 64 #include <sys/callo.h> 65 66 extern int mp_cpu_start(cpu_t *); 67 extern int mp_cpu_stop(cpu_t *); 68 extern int mp_cpu_poweron(cpu_t *); 69 extern int mp_cpu_poweroff(cpu_t *); 70 extern int mp_cpu_configure(int); 71 extern int mp_cpu_unconfigure(int); 72 extern void mp_cpu_faulted_enter(cpu_t *); 73 extern void mp_cpu_faulted_exit(cpu_t *); 74 75 extern int cmp_cpu_to_chip(processorid_t cpuid); 76 #ifdef __sparcv9 77 extern char *cpu_fru_fmri(cpu_t *cp); 78 #endif 79 80 static void cpu_add_active_internal(cpu_t *cp); 81 static void cpu_remove_active(cpu_t *cp); 82 static void cpu_info_kstat_create(cpu_t *cp); 83 static void cpu_info_kstat_destroy(cpu_t *cp); 84 static void cpu_stats_kstat_create(cpu_t *cp); 85 static void cpu_stats_kstat_destroy(cpu_t *cp); 86 87 static int cpu_sys_stats_ks_update(kstat_t *ksp, int rw); 88 static int cpu_vm_stats_ks_update(kstat_t *ksp, int rw); 89 static int cpu_stat_ks_update(kstat_t *ksp, int rw); 90 static int cpu_state_change_hooks(int, cpu_setup_t, cpu_setup_t); 91 92 /* 93 * cpu_lock protects ncpus, ncpus_online, cpu_flag, cpu_list, cpu_active, 94 * max_cpu_seqid_ever, and dispatch queue reallocations. The lock ordering with 95 * respect to related locks is: 96 * 97 * cpu_lock --> thread_free_lock ---> p_lock ---> thread_lock() 98 * 99 * Warning: Certain sections of code do not use the cpu_lock when 100 * traversing the cpu_list (e.g. mutex_vector_enter(), clock()). Since 101 * all cpus are paused during modifications to this list, a solution 102 * to protect the list is too either disable kernel preemption while 103 * walking the list, *or* recheck the cpu_next pointer at each 104 * iteration in the loop. Note that in no cases can any cached 105 * copies of the cpu pointers be kept as they may become invalid. 106 */ 107 kmutex_t cpu_lock; 108 cpu_t *cpu_list; /* list of all CPUs */ 109 cpu_t *clock_cpu_list; /* used by clock to walk CPUs */ 110 cpu_t *cpu_active; /* list of active CPUs */ 111 static cpuset_t cpu_available; /* set of available CPUs */ 112 cpuset_t cpu_seqid_inuse; /* which cpu_seqids are in use */ 113 114 cpu_t **cpu_seq; /* ptrs to CPUs, indexed by seq_id */ 115 116 /* 117 * max_ncpus keeps the max cpus the system can have. Initially 118 * it's NCPU, but since most archs scan the devtree for cpus 119 * fairly early on during boot, the real max can be known before 120 * ncpus is set (useful for early NCPU based allocations). 121 */ 122 int max_ncpus = NCPU; 123 /* 124 * platforms that set max_ncpus to maxiumum number of cpus that can be 125 * dynamically added will set boot_max_ncpus to the number of cpus found 126 * at device tree scan time during boot. 127 */ 128 int boot_max_ncpus = -1; 129 int boot_ncpus = -1; 130 /* 131 * Maximum possible CPU id. This can never be >= NCPU since NCPU is 132 * used to size arrays that are indexed by CPU id. 133 */ 134 processorid_t max_cpuid = NCPU - 1; 135 136 /* 137 * Maximum cpu_seqid was given. This number can only grow and never shrink. It 138 * can be used to optimize NCPU loops to avoid going through CPUs which were 139 * never on-line. 140 */ 141 processorid_t max_cpu_seqid_ever = 0; 142 143 int ncpus = 1; 144 int ncpus_online = 1; 145 146 /* 147 * CPU that we're trying to offline. Protected by cpu_lock. 148 */ 149 cpu_t *cpu_inmotion; 150 151 /* 152 * Can be raised to suppress further weakbinding, which are instead 153 * satisfied by disabling preemption. Must be raised/lowered under cpu_lock, 154 * while individual thread weakbinding synchronization is done under thread 155 * lock. 156 */ 157 int weakbindingbarrier; 158 159 /* 160 * Variables used in pause_cpus(). 161 */ 162 static volatile char safe_list[NCPU]; 163 164 static struct _cpu_pause_info { 165 int cp_spl; /* spl saved in pause_cpus() */ 166 volatile int cp_go; /* Go signal sent after all ready */ 167 int cp_count; /* # of CPUs to pause */ 168 ksema_t cp_sem; /* synch pause_cpus & cpu_pause */ 169 kthread_id_t cp_paused; 170 void *(*cp_func)(void *); 171 } cpu_pause_info; 172 173 static kmutex_t pause_free_mutex; 174 static kcondvar_t pause_free_cv; 175 176 177 static struct cpu_sys_stats_ks_data { 178 kstat_named_t cpu_ticks_idle; 179 kstat_named_t cpu_ticks_user; 180 kstat_named_t cpu_ticks_kernel; 181 kstat_named_t cpu_ticks_wait; 182 kstat_named_t cpu_nsec_idle; 183 kstat_named_t cpu_nsec_user; 184 kstat_named_t cpu_nsec_kernel; 185 kstat_named_t cpu_nsec_dtrace; 186 kstat_named_t cpu_nsec_intr; 187 kstat_named_t cpu_load_intr; 188 kstat_named_t wait_ticks_io; 189 kstat_named_t dtrace_probes; 190 kstat_named_t bread; 191 kstat_named_t bwrite; 192 kstat_named_t lread; 193 kstat_named_t lwrite; 194 kstat_named_t phread; 195 kstat_named_t phwrite; 196 kstat_named_t pswitch; 197 kstat_named_t trap; 198 kstat_named_t intr; 199 kstat_named_t syscall; 200 kstat_named_t sysread; 201 kstat_named_t syswrite; 202 kstat_named_t sysfork; 203 kstat_named_t sysvfork; 204 kstat_named_t sysexec; 205 kstat_named_t readch; 206 kstat_named_t writech; 207 kstat_named_t rcvint; 208 kstat_named_t xmtint; 209 kstat_named_t mdmint; 210 kstat_named_t rawch; 211 kstat_named_t canch; 212 kstat_named_t outch; 213 kstat_named_t msg; 214 kstat_named_t sema; 215 kstat_named_t namei; 216 kstat_named_t ufsiget; 217 kstat_named_t ufsdirblk; 218 kstat_named_t ufsipage; 219 kstat_named_t ufsinopage; 220 kstat_named_t procovf; 221 kstat_named_t intrthread; 222 kstat_named_t intrblk; 223 kstat_named_t intrunpin; 224 kstat_named_t idlethread; 225 kstat_named_t inv_swtch; 226 kstat_named_t nthreads; 227 kstat_named_t cpumigrate; 228 kstat_named_t xcalls; 229 kstat_named_t mutex_adenters; 230 kstat_named_t rw_rdfails; 231 kstat_named_t rw_wrfails; 232 kstat_named_t modload; 233 kstat_named_t modunload; 234 kstat_named_t bawrite; 235 kstat_named_t iowait; 236 } cpu_sys_stats_ks_data_template = { 237 { "cpu_ticks_idle", KSTAT_DATA_UINT64 }, 238 { "cpu_ticks_user", KSTAT_DATA_UINT64 }, 239 { "cpu_ticks_kernel", KSTAT_DATA_UINT64 }, 240 { "cpu_ticks_wait", KSTAT_DATA_UINT64 }, 241 { "cpu_nsec_idle", KSTAT_DATA_UINT64 }, 242 { "cpu_nsec_user", KSTAT_DATA_UINT64 }, 243 { "cpu_nsec_kernel", KSTAT_DATA_UINT64 }, 244 { "cpu_nsec_dtrace", KSTAT_DATA_UINT64 }, 245 { "cpu_nsec_intr", KSTAT_DATA_UINT64 }, 246 { "cpu_load_intr", KSTAT_DATA_UINT64 }, 247 { "wait_ticks_io", KSTAT_DATA_UINT64 }, 248 { "dtrace_probes", KSTAT_DATA_UINT64 }, 249 { "bread", KSTAT_DATA_UINT64 }, 250 { "bwrite", KSTAT_DATA_UINT64 }, 251 { "lread", KSTAT_DATA_UINT64 }, 252 { "lwrite", KSTAT_DATA_UINT64 }, 253 { "phread", KSTAT_DATA_UINT64 }, 254 { "phwrite", KSTAT_DATA_UINT64 }, 255 { "pswitch", KSTAT_DATA_UINT64 }, 256 { "trap", KSTAT_DATA_UINT64 }, 257 { "intr", KSTAT_DATA_UINT64 }, 258 { "syscall", KSTAT_DATA_UINT64 }, 259 { "sysread", KSTAT_DATA_UINT64 }, 260 { "syswrite", KSTAT_DATA_UINT64 }, 261 { "sysfork", KSTAT_DATA_UINT64 }, 262 { "sysvfork", KSTAT_DATA_UINT64 }, 263 { "sysexec", KSTAT_DATA_UINT64 }, 264 { "readch", KSTAT_DATA_UINT64 }, 265 { "writech", KSTAT_DATA_UINT64 }, 266 { "rcvint", KSTAT_DATA_UINT64 }, 267 { "xmtint", KSTAT_DATA_UINT64 }, 268 { "mdmint", KSTAT_DATA_UINT64 }, 269 { "rawch", KSTAT_DATA_UINT64 }, 270 { "canch", KSTAT_DATA_UINT64 }, 271 { "outch", KSTAT_DATA_UINT64 }, 272 { "msg", KSTAT_DATA_UINT64 }, 273 { "sema", KSTAT_DATA_UINT64 }, 274 { "namei", KSTAT_DATA_UINT64 }, 275 { "ufsiget", KSTAT_DATA_UINT64 }, 276 { "ufsdirblk", KSTAT_DATA_UINT64 }, 277 { "ufsipage", KSTAT_DATA_UINT64 }, 278 { "ufsinopage", KSTAT_DATA_UINT64 }, 279 { "procovf", KSTAT_DATA_UINT64 }, 280 { "intrthread", KSTAT_DATA_UINT64 }, 281 { "intrblk", KSTAT_DATA_UINT64 }, 282 { "intrunpin", KSTAT_DATA_UINT64 }, 283 { "idlethread", KSTAT_DATA_UINT64 }, 284 { "inv_swtch", KSTAT_DATA_UINT64 }, 285 { "nthreads", KSTAT_DATA_UINT64 }, 286 { "cpumigrate", KSTAT_DATA_UINT64 }, 287 { "xcalls", KSTAT_DATA_UINT64 }, 288 { "mutex_adenters", KSTAT_DATA_UINT64 }, 289 { "rw_rdfails", KSTAT_DATA_UINT64 }, 290 { "rw_wrfails", KSTAT_DATA_UINT64 }, 291 { "modload", KSTAT_DATA_UINT64 }, 292 { "modunload", KSTAT_DATA_UINT64 }, 293 { "bawrite", KSTAT_DATA_UINT64 }, 294 { "iowait", KSTAT_DATA_UINT64 }, 295 }; 296 297 static struct cpu_vm_stats_ks_data { 298 kstat_named_t pgrec; 299 kstat_named_t pgfrec; 300 kstat_named_t pgin; 301 kstat_named_t pgpgin; 302 kstat_named_t pgout; 303 kstat_named_t pgpgout; 304 kstat_named_t zfod; 305 kstat_named_t dfree; 306 kstat_named_t scan; 307 kstat_named_t rev; 308 kstat_named_t hat_fault; 309 kstat_named_t as_fault; 310 kstat_named_t maj_fault; 311 kstat_named_t cow_fault; 312 kstat_named_t prot_fault; 313 kstat_named_t softlock; 314 kstat_named_t kernel_asflt; 315 kstat_named_t pgrrun; 316 kstat_named_t execpgin; 317 kstat_named_t execpgout; 318 kstat_named_t execfree; 319 kstat_named_t anonpgin; 320 kstat_named_t anonpgout; 321 kstat_named_t anonfree; 322 kstat_named_t fspgin; 323 kstat_named_t fspgout; 324 kstat_named_t fsfree; 325 } cpu_vm_stats_ks_data_template = { 326 { "pgrec", KSTAT_DATA_UINT64 }, 327 { "pgfrec", KSTAT_DATA_UINT64 }, 328 { "pgin", KSTAT_DATA_UINT64 }, 329 { "pgpgin", KSTAT_DATA_UINT64 }, 330 { "pgout", KSTAT_DATA_UINT64 }, 331 { "pgpgout", KSTAT_DATA_UINT64 }, 332 { "zfod", KSTAT_DATA_UINT64 }, 333 { "dfree", KSTAT_DATA_UINT64 }, 334 { "scan", KSTAT_DATA_UINT64 }, 335 { "rev", KSTAT_DATA_UINT64 }, 336 { "hat_fault", KSTAT_DATA_UINT64 }, 337 { "as_fault", KSTAT_DATA_UINT64 }, 338 { "maj_fault", KSTAT_DATA_UINT64 }, 339 { "cow_fault", KSTAT_DATA_UINT64 }, 340 { "prot_fault", KSTAT_DATA_UINT64 }, 341 { "softlock", KSTAT_DATA_UINT64 }, 342 { "kernel_asflt", KSTAT_DATA_UINT64 }, 343 { "pgrrun", KSTAT_DATA_UINT64 }, 344 { "execpgin", KSTAT_DATA_UINT64 }, 345 { "execpgout", KSTAT_DATA_UINT64 }, 346 { "execfree", KSTAT_DATA_UINT64 }, 347 { "anonpgin", KSTAT_DATA_UINT64 }, 348 { "anonpgout", KSTAT_DATA_UINT64 }, 349 { "anonfree", KSTAT_DATA_UINT64 }, 350 { "fspgin", KSTAT_DATA_UINT64 }, 351 { "fspgout", KSTAT_DATA_UINT64 }, 352 { "fsfree", KSTAT_DATA_UINT64 }, 353 }; 354 355 /* 356 * Force the specified thread to migrate to the appropriate processor. 357 * Called with thread lock held, returns with it dropped. 358 */ 359 static void 360 force_thread_migrate(kthread_id_t tp) 361 { 362 ASSERT(THREAD_LOCK_HELD(tp)); 363 if (tp == curthread) { 364 THREAD_TRANSITION(tp); 365 CL_SETRUN(tp); 366 thread_unlock_nopreempt(tp); 367 swtch(); 368 } else { 369 if (tp->t_state == TS_ONPROC) { 370 cpu_surrender(tp); 371 } else if (tp->t_state == TS_RUN) { 372 (void) dispdeq(tp); 373 setbackdq(tp); 374 } 375 thread_unlock(tp); 376 } 377 } 378 379 /* 380 * Set affinity for a specified CPU. 381 * A reference count is incremented and the affinity is held until the 382 * reference count is decremented to zero by thread_affinity_clear(). 383 * This is so regions of code requiring affinity can be nested. 384 * Caller needs to ensure that cpu_id remains valid, which can be 385 * done by holding cpu_lock across this call, unless the caller 386 * specifies CPU_CURRENT in which case the cpu_lock will be acquired 387 * by thread_affinity_set and CPU->cpu_id will be the target CPU. 388 */ 389 void 390 thread_affinity_set(kthread_id_t t, int cpu_id) 391 { 392 cpu_t *cp; 393 int c; 394 395 ASSERT(!(t == curthread && t->t_weakbound_cpu != NULL)); 396 397 if ((c = cpu_id) == CPU_CURRENT) { 398 mutex_enter(&cpu_lock); 399 cpu_id = CPU->cpu_id; 400 } 401 /* 402 * We should be asserting that cpu_lock is held here, but 403 * the NCA code doesn't acquire it. The following assert 404 * should be uncommented when the NCA code is fixed. 405 * 406 * ASSERT(MUTEX_HELD(&cpu_lock)); 407 */ 408 ASSERT((cpu_id >= 0) && (cpu_id < NCPU)); 409 cp = cpu[cpu_id]; 410 ASSERT(cp != NULL); /* user must provide a good cpu_id */ 411 /* 412 * If there is already a hard affinity requested, and this affinity 413 * conflicts with that, panic. 414 */ 415 thread_lock(t); 416 if (t->t_affinitycnt > 0 && t->t_bound_cpu != cp) { 417 panic("affinity_set: setting %p but already bound to %p", 418 (void *)cp, (void *)t->t_bound_cpu); 419 } 420 t->t_affinitycnt++; 421 t->t_bound_cpu = cp; 422 423 /* 424 * Make sure we're running on the right CPU. 425 */ 426 if (cp != t->t_cpu || t != curthread) { 427 force_thread_migrate(t); /* drops thread lock */ 428 } else { 429 thread_unlock(t); 430 } 431 432 if (c == CPU_CURRENT) 433 mutex_exit(&cpu_lock); 434 } 435 436 /* 437 * Wrapper for backward compatibility. 438 */ 439 void 440 affinity_set(int cpu_id) 441 { 442 thread_affinity_set(curthread, cpu_id); 443 } 444 445 /* 446 * Decrement the affinity reservation count and if it becomes zero, 447 * clear the CPU affinity for the current thread, or set it to the user's 448 * software binding request. 449 */ 450 void 451 thread_affinity_clear(kthread_id_t t) 452 { 453 register processorid_t binding; 454 455 thread_lock(t); 456 if (--t->t_affinitycnt == 0) { 457 if ((binding = t->t_bind_cpu) == PBIND_NONE) { 458 /* 459 * Adjust disp_max_unbound_pri if necessary. 460 */ 461 disp_adjust_unbound_pri(t); 462 t->t_bound_cpu = NULL; 463 if (t->t_cpu->cpu_part != t->t_cpupart) { 464 force_thread_migrate(t); 465 return; 466 } 467 } else { 468 t->t_bound_cpu = cpu[binding]; 469 /* 470 * Make sure the thread is running on the bound CPU. 471 */ 472 if (t->t_cpu != t->t_bound_cpu) { 473 force_thread_migrate(t); 474 return; /* already dropped lock */ 475 } 476 } 477 } 478 thread_unlock(t); 479 } 480 481 /* 482 * Wrapper for backward compatibility. 483 */ 484 void 485 affinity_clear(void) 486 { 487 thread_affinity_clear(curthread); 488 } 489 490 /* 491 * Weak cpu affinity. Bind to the "current" cpu for short periods 492 * of time during which the thread must not block (but may be preempted). 493 * Use this instead of kpreempt_disable() when it is only "no migration" 494 * rather than "no preemption" semantics that are required - disabling 495 * preemption holds higher priority threads off of cpu and if the 496 * operation that is protected is more than momentary this is not good 497 * for realtime etc. 498 * 499 * Weakly bound threads will not prevent a cpu from being offlined - 500 * we'll only run them on the cpu to which they are weakly bound but 501 * (because they do not block) we'll always be able to move them on to 502 * another cpu at offline time if we give them just a short moment to 503 * run during which they will unbind. To give a cpu a chance of offlining, 504 * however, we require a barrier to weak bindings that may be raised for a 505 * given cpu (offline/move code may set this and then wait a short time for 506 * existing weak bindings to drop); the cpu_inmotion pointer is that barrier. 507 * 508 * There are few restrictions on the calling context of thread_nomigrate. 509 * The caller must not hold the thread lock. Calls may be nested. 510 * 511 * After weakbinding a thread must not perform actions that may block. 512 * In particular it must not call thread_affinity_set; calling that when 513 * already weakbound is nonsensical anyway. 514 * 515 * If curthread is prevented from migrating for other reasons 516 * (kernel preemption disabled; high pil; strongly bound; interrupt thread) 517 * then the weak binding will succeed even if this cpu is the target of an 518 * offline/move request. 519 */ 520 void 521 thread_nomigrate(void) 522 { 523 cpu_t *cp; 524 kthread_id_t t = curthread; 525 526 again: 527 kpreempt_disable(); 528 cp = CPU; 529 530 /* 531 * A highlevel interrupt must not modify t_nomigrate or 532 * t_weakbound_cpu of the thread it has interrupted. A lowlevel 533 * interrupt thread cannot migrate and we can avoid the 534 * thread_lock call below by short-circuiting here. In either 535 * case we can just return since no migration is possible and 536 * the condition will persist (ie, when we test for these again 537 * in thread_allowmigrate they can't have changed). Migration 538 * is also impossible if we're at or above DISP_LEVEL pil. 539 */ 540 if (CPU_ON_INTR(cp) || t->t_flag & T_INTR_THREAD || 541 getpil() >= DISP_LEVEL) { 542 kpreempt_enable(); 543 return; 544 } 545 546 /* 547 * We must be consistent with existing weak bindings. Since we 548 * may be interrupted between the increment of t_nomigrate and 549 * the store to t_weakbound_cpu below we cannot assume that 550 * t_weakbound_cpu will be set if t_nomigrate is. Note that we 551 * cannot assert t_weakbound_cpu == t_bind_cpu since that is not 552 * always the case. 553 */ 554 if (t->t_nomigrate && t->t_weakbound_cpu && t->t_weakbound_cpu != cp) { 555 if (!panicstr) 556 panic("thread_nomigrate: binding to %p but already " 557 "bound to %p", (void *)cp, 558 (void *)t->t_weakbound_cpu); 559 } 560 561 /* 562 * At this point we have preemption disabled and we don't yet hold 563 * the thread lock. So it's possible that somebody else could 564 * set t_bind_cpu here and not be able to force us across to the 565 * new cpu (since we have preemption disabled). 566 */ 567 thread_lock(curthread); 568 569 /* 570 * If further weak bindings are being (temporarily) suppressed then 571 * we'll settle for disabling kernel preemption (which assures 572 * no migration provided the thread does not block which it is 573 * not allowed to if using thread_nomigrate). We must remember 574 * this disposition so we can take appropriate action in 575 * thread_allowmigrate. If this is a nested call and the 576 * thread is already weakbound then fall through as normal. 577 * We remember the decision to settle for kpreempt_disable through 578 * negative nesting counting in t_nomigrate. Once a thread has had one 579 * weakbinding request satisfied in this way any further (nested) 580 * requests will continue to be satisfied in the same way, 581 * even if weak bindings have recommenced. 582 */ 583 if (t->t_nomigrate < 0 || weakbindingbarrier && t->t_nomigrate == 0) { 584 --t->t_nomigrate; 585 thread_unlock(curthread); 586 return; /* with kpreempt_disable still active */ 587 } 588 589 /* 590 * We hold thread_lock so t_bind_cpu cannot change. We could, 591 * however, be running on a different cpu to which we are t_bound_cpu 592 * to (as explained above). If we grant the weak binding request 593 * in that case then the dispatcher must favour our weak binding 594 * over our strong (in which case, just as when preemption is 595 * disabled, we can continue to run on a cpu other than the one to 596 * which we are strongbound; the difference in this case is that 597 * this thread can be preempted and so can appear on the dispatch 598 * queues of a cpu other than the one it is strongbound to). 599 * 600 * If the cpu we are running on does not appear to be a current 601 * offline target (we check cpu_inmotion to determine this - since 602 * we don't hold cpu_lock we may not see a recent store to that, 603 * so it's possible that we at times can grant a weak binding to a 604 * cpu that is an offline target, but that one request will not 605 * prevent the offline from succeeding) then we will always grant 606 * the weak binding request. This includes the case above where 607 * we grant a weakbinding not commensurate with our strong binding. 608 * 609 * If our cpu does appear to be an offline target then we're inclined 610 * not to grant the weakbinding request just yet - we'd prefer to 611 * migrate to another cpu and grant the request there. The 612 * exceptions are those cases where going through preemption code 613 * will not result in us changing cpu: 614 * 615 * . interrupts have already bypassed this case (see above) 616 * . we are already weakbound to this cpu (dispatcher code will 617 * always return us to the weakbound cpu) 618 * . preemption was disabled even before we disabled it above 619 * . we are strongbound to this cpu (if we're strongbound to 620 * another and not yet running there the trip through the 621 * dispatcher will move us to the strongbound cpu and we 622 * will grant the weak binding there) 623 */ 624 if (cp != cpu_inmotion || t->t_nomigrate > 0 || t->t_preempt > 1 || 625 t->t_bound_cpu == cp) { 626 /* 627 * Don't be tempted to store to t_weakbound_cpu only on 628 * the first nested bind request - if we're interrupted 629 * after the increment of t_nomigrate and before the 630 * store to t_weakbound_cpu and the interrupt calls 631 * thread_nomigrate then the assertion in thread_allowmigrate 632 * would fail. 633 */ 634 t->t_nomigrate++; 635 t->t_weakbound_cpu = cp; 636 membar_producer(); 637 thread_unlock(curthread); 638 /* 639 * Now that we have dropped the thread_lock another thread 640 * can set our t_weakbound_cpu, and will try to migrate us 641 * to the strongbound cpu (which will not be prevented by 642 * preemption being disabled since we're about to enable 643 * preemption). We have granted the weakbinding to the current 644 * cpu, so again we are in the position that is is is possible 645 * that our weak and strong bindings differ. Again this 646 * is catered for by dispatcher code which will favour our 647 * weak binding. 648 */ 649 kpreempt_enable(); 650 } else { 651 /* 652 * Move to another cpu before granting the request by 653 * forcing this thread through preemption code. When we 654 * get to set{front,back}dq called from CL_PREEMPT() 655 * cpu_choose() will be used to select a cpu to queue 656 * us on - that will see cpu_inmotion and take 657 * steps to avoid returning us to this cpu. 658 */ 659 cp->cpu_kprunrun = 1; 660 thread_unlock(curthread); 661 kpreempt_enable(); /* will call preempt() */ 662 goto again; 663 } 664 } 665 666 void 667 thread_allowmigrate(void) 668 { 669 kthread_id_t t = curthread; 670 671 ASSERT(t->t_weakbound_cpu == CPU || 672 (t->t_nomigrate < 0 && t->t_preempt > 0) || 673 CPU_ON_INTR(CPU) || t->t_flag & T_INTR_THREAD || 674 getpil() >= DISP_LEVEL); 675 676 if (CPU_ON_INTR(CPU) || (t->t_flag & T_INTR_THREAD) || 677 getpil() >= DISP_LEVEL) 678 return; 679 680 if (t->t_nomigrate < 0) { 681 /* 682 * This thread was granted "weak binding" in the 683 * stronger form of kernel preemption disabling. 684 * Undo a level of nesting for both t_nomigrate 685 * and t_preempt. 686 */ 687 ++t->t_nomigrate; 688 kpreempt_enable(); 689 } else if (--t->t_nomigrate == 0) { 690 /* 691 * Time to drop the weak binding. We need to cater 692 * for the case where we're weakbound to a different 693 * cpu than that to which we're strongbound (a very 694 * temporary arrangement that must only persist until 695 * weak binding drops). We don't acquire thread_lock 696 * here so even as this code executes t_bound_cpu 697 * may be changing. So we disable preemption and 698 * a) in the case that t_bound_cpu changes while we 699 * have preemption disabled kprunrun will be set 700 * asynchronously, and b) if before disabling 701 * preemption we were already on a different cpu to 702 * our t_bound_cpu then we set kprunrun ourselves 703 * to force a trip through the dispatcher when 704 * preemption is enabled. 705 */ 706 kpreempt_disable(); 707 if (t->t_bound_cpu && 708 t->t_weakbound_cpu != t->t_bound_cpu) 709 CPU->cpu_kprunrun = 1; 710 t->t_weakbound_cpu = NULL; 711 membar_producer(); 712 kpreempt_enable(); 713 } 714 } 715 716 /* 717 * weakbinding_stop can be used to temporarily cause weakbindings made 718 * with thread_nomigrate to be satisfied through the stronger action of 719 * kpreempt_disable. weakbinding_start recommences normal weakbinding. 720 */ 721 722 void 723 weakbinding_stop(void) 724 { 725 ASSERT(MUTEX_HELD(&cpu_lock)); 726 weakbindingbarrier = 1; 727 membar_producer(); /* make visible before subsequent thread_lock */ 728 } 729 730 void 731 weakbinding_start(void) 732 { 733 ASSERT(MUTEX_HELD(&cpu_lock)); 734 weakbindingbarrier = 0; 735 } 736 737 void 738 null_xcall(void) 739 { 740 } 741 742 /* 743 * This routine is called to place the CPUs in a safe place so that 744 * one of them can be taken off line or placed on line. What we are 745 * trying to do here is prevent a thread from traversing the list 746 * of active CPUs while we are changing it or from getting placed on 747 * the run queue of a CPU that has just gone off line. We do this by 748 * creating a thread with the highest possible prio for each CPU and 749 * having it call this routine. The advantage of this method is that 750 * we can eliminate all checks for CPU_ACTIVE in the disp routines. 751 * This makes disp faster at the expense of making p_online() slower 752 * which is a good trade off. 753 */ 754 static void 755 cpu_pause(int index) 756 { 757 int s; 758 struct _cpu_pause_info *cpi = &cpu_pause_info; 759 volatile char *safe = &safe_list[index]; 760 long lindex = index; 761 762 ASSERT((curthread->t_bound_cpu != NULL) || (*safe == PAUSE_DIE)); 763 764 while (*safe != PAUSE_DIE) { 765 *safe = PAUSE_READY; 766 membar_enter(); /* make sure stores are flushed */ 767 sema_v(&cpi->cp_sem); /* signal requesting thread */ 768 769 /* 770 * Wait here until all pause threads are running. That 771 * indicates that it's safe to do the spl. Until 772 * cpu_pause_info.cp_go is set, we don't want to spl 773 * because that might block clock interrupts needed 774 * to preempt threads on other CPUs. 775 */ 776 while (cpi->cp_go == 0) 777 ; 778 /* 779 * Even though we are at the highest disp prio, we need 780 * to block out all interrupts below LOCK_LEVEL so that 781 * an intr doesn't come in, wake up a thread, and call 782 * setbackdq/setfrontdq. 783 */ 784 s = splhigh(); 785 /* 786 * if cp_func has been set then call it using index as the 787 * argument, currently only used by cpr_suspend_cpus(). 788 * This function is used as the code to execute on the 789 * "paused" cpu's when a machine comes out of a sleep state 790 * and CPU's were powered off. (could also be used for 791 * hotplugging CPU's). 792 */ 793 if (cpi->cp_func != NULL) 794 (*cpi->cp_func)((void *)lindex); 795 796 mach_cpu_pause(safe); 797 798 splx(s); 799 /* 800 * Waiting is at an end. Switch out of cpu_pause 801 * loop and resume useful work. 802 */ 803 swtch(); 804 } 805 806 mutex_enter(&pause_free_mutex); 807 *safe = PAUSE_DEAD; 808 cv_broadcast(&pause_free_cv); 809 mutex_exit(&pause_free_mutex); 810 } 811 812 /* 813 * Allow the cpus to start running again. 814 */ 815 void 816 start_cpus() 817 { 818 int i; 819 820 ASSERT(MUTEX_HELD(&cpu_lock)); 821 ASSERT(cpu_pause_info.cp_paused); 822 cpu_pause_info.cp_paused = NULL; 823 for (i = 0; i < NCPU; i++) 824 safe_list[i] = PAUSE_IDLE; 825 membar_enter(); /* make sure stores are flushed */ 826 affinity_clear(); 827 splx(cpu_pause_info.cp_spl); 828 kpreempt_enable(); 829 } 830 831 /* 832 * Allocate a pause thread for a CPU. 833 */ 834 static void 835 cpu_pause_alloc(cpu_t *cp) 836 { 837 kthread_id_t t; 838 long cpun = cp->cpu_id; 839 840 /* 841 * Note, v.v_nglobpris will not change value as long as I hold 842 * cpu_lock. 843 */ 844 t = thread_create(NULL, 0, cpu_pause, (void *)cpun, 845 0, &p0, TS_STOPPED, v.v_nglobpris - 1); 846 thread_lock(t); 847 t->t_bound_cpu = cp; 848 t->t_disp_queue = cp->cpu_disp; 849 t->t_affinitycnt = 1; 850 t->t_preempt = 1; 851 thread_unlock(t); 852 cp->cpu_pause_thread = t; 853 /* 854 * Registering a thread in the callback table is usually done 855 * in the initialization code of the thread. In this 856 * case, we do it right after thread creation because the 857 * thread itself may never run, and we need to register the 858 * fact that it is safe for cpr suspend. 859 */ 860 CALLB_CPR_INIT_SAFE(t, "cpu_pause"); 861 } 862 863 /* 864 * Free a pause thread for a CPU. 865 */ 866 static void 867 cpu_pause_free(cpu_t *cp) 868 { 869 kthread_id_t t; 870 int cpun = cp->cpu_id; 871 872 ASSERT(MUTEX_HELD(&cpu_lock)); 873 /* 874 * We have to get the thread and tell him to die. 875 */ 876 if ((t = cp->cpu_pause_thread) == NULL) { 877 ASSERT(safe_list[cpun] == PAUSE_IDLE); 878 return; 879 } 880 thread_lock(t); 881 t->t_cpu = CPU; /* disp gets upset if last cpu is quiesced. */ 882 t->t_bound_cpu = NULL; /* Must un-bind; cpu may not be running. */ 883 t->t_pri = v.v_nglobpris - 1; 884 ASSERT(safe_list[cpun] == PAUSE_IDLE); 885 safe_list[cpun] = PAUSE_DIE; 886 THREAD_TRANSITION(t); 887 setbackdq(t); 888 thread_unlock_nopreempt(t); 889 890 /* 891 * If we don't wait for the thread to actually die, it may try to 892 * run on the wrong cpu as part of an actual call to pause_cpus(). 893 */ 894 mutex_enter(&pause_free_mutex); 895 while (safe_list[cpun] != PAUSE_DEAD) { 896 cv_wait(&pause_free_cv, &pause_free_mutex); 897 } 898 mutex_exit(&pause_free_mutex); 899 safe_list[cpun] = PAUSE_IDLE; 900 901 cp->cpu_pause_thread = NULL; 902 } 903 904 /* 905 * Initialize basic structures for pausing CPUs. 906 */ 907 void 908 cpu_pause_init() 909 { 910 sema_init(&cpu_pause_info.cp_sem, 0, NULL, SEMA_DEFAULT, NULL); 911 /* 912 * Create initial CPU pause thread. 913 */ 914 cpu_pause_alloc(CPU); 915 } 916 917 /* 918 * Start the threads used to pause another CPU. 919 */ 920 static int 921 cpu_pause_start(processorid_t cpu_id) 922 { 923 int i; 924 int cpu_count = 0; 925 926 for (i = 0; i < NCPU; i++) { 927 cpu_t *cp; 928 kthread_id_t t; 929 930 cp = cpu[i]; 931 if (!CPU_IN_SET(cpu_available, i) || (i == cpu_id)) { 932 safe_list[i] = PAUSE_WAIT; 933 continue; 934 } 935 936 /* 937 * Skip CPU if it is quiesced or not yet started. 938 */ 939 if ((cp->cpu_flags & (CPU_QUIESCED | CPU_READY)) != CPU_READY) { 940 safe_list[i] = PAUSE_WAIT; 941 continue; 942 } 943 944 /* 945 * Start this CPU's pause thread. 946 */ 947 t = cp->cpu_pause_thread; 948 thread_lock(t); 949 /* 950 * Reset the priority, since nglobpris may have 951 * changed since the thread was created, if someone 952 * has loaded the RT (or some other) scheduling 953 * class. 954 */ 955 t->t_pri = v.v_nglobpris - 1; 956 THREAD_TRANSITION(t); 957 setbackdq(t); 958 thread_unlock_nopreempt(t); 959 ++cpu_count; 960 } 961 return (cpu_count); 962 } 963 964 965 /* 966 * Pause all of the CPUs except the one we are on by creating a high 967 * priority thread bound to those CPUs. 968 * 969 * Note that one must be extremely careful regarding code 970 * executed while CPUs are paused. Since a CPU may be paused 971 * while a thread scheduling on that CPU is holding an adaptive 972 * lock, code executed with CPUs paused must not acquire adaptive 973 * (or low-level spin) locks. Also, such code must not block, 974 * since the thread that is supposed to initiate the wakeup may 975 * never run. 976 * 977 * With a few exceptions, the restrictions on code executed with CPUs 978 * paused match those for code executed at high-level interrupt 979 * context. 980 */ 981 void 982 pause_cpus(cpu_t *off_cp, void *(*func)(void *)) 983 { 984 processorid_t cpu_id; 985 int i; 986 struct _cpu_pause_info *cpi = &cpu_pause_info; 987 988 ASSERT(MUTEX_HELD(&cpu_lock)); 989 ASSERT(cpi->cp_paused == NULL); 990 cpi->cp_count = 0; 991 cpi->cp_go = 0; 992 for (i = 0; i < NCPU; i++) 993 safe_list[i] = PAUSE_IDLE; 994 kpreempt_disable(); 995 996 cpi->cp_func = func; 997 998 /* 999 * If running on the cpu that is going offline, get off it. 1000 * This is so that it won't be necessary to rechoose a CPU 1001 * when done. 1002 */ 1003 if (CPU == off_cp) 1004 cpu_id = off_cp->cpu_next_part->cpu_id; 1005 else 1006 cpu_id = CPU->cpu_id; 1007 affinity_set(cpu_id); 1008 1009 /* 1010 * Start the pause threads and record how many were started 1011 */ 1012 cpi->cp_count = cpu_pause_start(cpu_id); 1013 1014 /* 1015 * Now wait for all CPUs to be running the pause thread. 1016 */ 1017 while (cpi->cp_count > 0) { 1018 /* 1019 * Spin reading the count without grabbing the disp 1020 * lock to make sure we don't prevent the pause 1021 * threads from getting the lock. 1022 */ 1023 while (sema_held(&cpi->cp_sem)) 1024 ; 1025 if (sema_tryp(&cpi->cp_sem)) 1026 --cpi->cp_count; 1027 } 1028 cpi->cp_go = 1; /* all have reached cpu_pause */ 1029 1030 /* 1031 * Now wait for all CPUs to spl. (Transition from PAUSE_READY 1032 * to PAUSE_WAIT.) 1033 */ 1034 for (i = 0; i < NCPU; i++) { 1035 while (safe_list[i] != PAUSE_WAIT) 1036 ; 1037 } 1038 cpi->cp_spl = splhigh(); /* block dispatcher on this CPU */ 1039 cpi->cp_paused = curthread; 1040 } 1041 1042 /* 1043 * Check whether the current thread has CPUs paused 1044 */ 1045 int 1046 cpus_paused(void) 1047 { 1048 if (cpu_pause_info.cp_paused != NULL) { 1049 ASSERT(cpu_pause_info.cp_paused == curthread); 1050 return (1); 1051 } 1052 return (0); 1053 } 1054 1055 static cpu_t * 1056 cpu_get_all(processorid_t cpun) 1057 { 1058 ASSERT(MUTEX_HELD(&cpu_lock)); 1059 1060 if (cpun >= NCPU || cpun < 0 || !CPU_IN_SET(cpu_available, cpun)) 1061 return (NULL); 1062 return (cpu[cpun]); 1063 } 1064 1065 /* 1066 * Check whether cpun is a valid processor id and whether it should be 1067 * visible from the current zone. If it is, return a pointer to the 1068 * associated CPU structure. 1069 */ 1070 cpu_t * 1071 cpu_get(processorid_t cpun) 1072 { 1073 cpu_t *c; 1074 1075 ASSERT(MUTEX_HELD(&cpu_lock)); 1076 c = cpu_get_all(cpun); 1077 if (c != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() && 1078 zone_pset_get(curproc->p_zone) != cpupart_query_cpu(c)) 1079 return (NULL); 1080 return (c); 1081 } 1082 1083 /* 1084 * The following functions should be used to check CPU states in the kernel. 1085 * They should be invoked with cpu_lock held. Kernel subsystems interested 1086 * in CPU states should *not* use cpu_get_state() and various P_ONLINE/etc 1087 * states. Those are for user-land (and system call) use only. 1088 */ 1089 1090 /* 1091 * Determine whether the CPU is online and handling interrupts. 1092 */ 1093 int 1094 cpu_is_online(cpu_t *cpu) 1095 { 1096 ASSERT(MUTEX_HELD(&cpu_lock)); 1097 return (cpu_flagged_online(cpu->cpu_flags)); 1098 } 1099 1100 /* 1101 * Determine whether the CPU is offline (this includes spare and faulted). 1102 */ 1103 int 1104 cpu_is_offline(cpu_t *cpu) 1105 { 1106 ASSERT(MUTEX_HELD(&cpu_lock)); 1107 return (cpu_flagged_offline(cpu->cpu_flags)); 1108 } 1109 1110 /* 1111 * Determine whether the CPU is powered off. 1112 */ 1113 int 1114 cpu_is_poweredoff(cpu_t *cpu) 1115 { 1116 ASSERT(MUTEX_HELD(&cpu_lock)); 1117 return (cpu_flagged_poweredoff(cpu->cpu_flags)); 1118 } 1119 1120 /* 1121 * Determine whether the CPU is handling interrupts. 1122 */ 1123 int 1124 cpu_is_nointr(cpu_t *cpu) 1125 { 1126 ASSERT(MUTEX_HELD(&cpu_lock)); 1127 return (cpu_flagged_nointr(cpu->cpu_flags)); 1128 } 1129 1130 /* 1131 * Determine whether the CPU is active (scheduling threads). 1132 */ 1133 int 1134 cpu_is_active(cpu_t *cpu) 1135 { 1136 ASSERT(MUTEX_HELD(&cpu_lock)); 1137 return (cpu_flagged_active(cpu->cpu_flags)); 1138 } 1139 1140 /* 1141 * Same as above, but these require cpu_flags instead of cpu_t pointers. 1142 */ 1143 int 1144 cpu_flagged_online(cpu_flag_t cpu_flags) 1145 { 1146 return (cpu_flagged_active(cpu_flags) && 1147 (cpu_flags & CPU_ENABLE)); 1148 } 1149 1150 int 1151 cpu_flagged_offline(cpu_flag_t cpu_flags) 1152 { 1153 return (((cpu_flags & CPU_POWEROFF) == 0) && 1154 ((cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY)); 1155 } 1156 1157 int 1158 cpu_flagged_poweredoff(cpu_flag_t cpu_flags) 1159 { 1160 return ((cpu_flags & CPU_POWEROFF) == CPU_POWEROFF); 1161 } 1162 1163 int 1164 cpu_flagged_nointr(cpu_flag_t cpu_flags) 1165 { 1166 return (cpu_flagged_active(cpu_flags) && 1167 (cpu_flags & CPU_ENABLE) == 0); 1168 } 1169 1170 int 1171 cpu_flagged_active(cpu_flag_t cpu_flags) 1172 { 1173 return (((cpu_flags & (CPU_POWEROFF | CPU_FAULTED | CPU_SPARE)) == 0) && 1174 ((cpu_flags & (CPU_READY | CPU_OFFLINE)) == CPU_READY)); 1175 } 1176 1177 /* 1178 * Bring the indicated CPU online. 1179 */ 1180 int 1181 cpu_online(cpu_t *cp) 1182 { 1183 int error = 0; 1184 1185 /* 1186 * Handle on-line request. 1187 * This code must put the new CPU on the active list before 1188 * starting it because it will not be paused, and will start 1189 * using the active list immediately. The real start occurs 1190 * when the CPU_QUIESCED flag is turned off. 1191 */ 1192 1193 ASSERT(MUTEX_HELD(&cpu_lock)); 1194 1195 /* 1196 * Put all the cpus into a known safe place. 1197 * No mutexes can be entered while CPUs are paused. 1198 */ 1199 error = mp_cpu_start(cp); /* arch-dep hook */ 1200 if (error == 0) { 1201 pg_cpupart_in(cp, cp->cpu_part); 1202 pause_cpus(NULL, NULL); 1203 cpu_add_active_internal(cp); 1204 if (cp->cpu_flags & CPU_FAULTED) { 1205 cp->cpu_flags &= ~CPU_FAULTED; 1206 mp_cpu_faulted_exit(cp); 1207 } 1208 cp->cpu_flags &= ~(CPU_QUIESCED | CPU_OFFLINE | CPU_FROZEN | 1209 CPU_SPARE); 1210 CPU_NEW_GENERATION(cp); 1211 start_cpus(); 1212 cpu_stats_kstat_create(cp); 1213 cpu_create_intrstat(cp); 1214 lgrp_kstat_create(cp); 1215 cpu_state_change_notify(cp->cpu_id, CPU_ON); 1216 cpu_intr_enable(cp); /* arch-dep hook */ 1217 cpu_state_change_notify(cp->cpu_id, CPU_INTR_ON); 1218 cpu_set_state(cp); 1219 cyclic_online(cp); 1220 /* 1221 * This has to be called only after cyclic_online(). This 1222 * function uses cyclics. 1223 */ 1224 callout_cpu_online(cp); 1225 poke_cpu(cp->cpu_id); 1226 } 1227 1228 return (error); 1229 } 1230 1231 /* 1232 * Take the indicated CPU offline. 1233 */ 1234 int 1235 cpu_offline(cpu_t *cp, int flags) 1236 { 1237 cpupart_t *pp; 1238 int error = 0; 1239 cpu_t *ncp; 1240 int intr_enable; 1241 int cyclic_off = 0; 1242 int callout_off = 0; 1243 int loop_count; 1244 int no_quiesce = 0; 1245 int (*bound_func)(struct cpu *, int); 1246 kthread_t *t; 1247 lpl_t *cpu_lpl; 1248 proc_t *p; 1249 int lgrp_diff_lpl; 1250 boolean_t unbind_all_threads = (flags & CPU_FORCED) != 0; 1251 1252 ASSERT(MUTEX_HELD(&cpu_lock)); 1253 1254 /* 1255 * If we're going from faulted or spare to offline, just 1256 * clear these flags and update CPU state. 1257 */ 1258 if (cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) { 1259 if (cp->cpu_flags & CPU_FAULTED) { 1260 cp->cpu_flags &= ~CPU_FAULTED; 1261 mp_cpu_faulted_exit(cp); 1262 } 1263 cp->cpu_flags &= ~CPU_SPARE; 1264 cpu_set_state(cp); 1265 return (0); 1266 } 1267 1268 /* 1269 * Handle off-line request. 1270 */ 1271 pp = cp->cpu_part; 1272 /* 1273 * Don't offline last online CPU in partition 1274 */ 1275 if (ncpus_online <= 1 || pp->cp_ncpus <= 1 || cpu_intr_count(cp) < 2) 1276 return (EBUSY); 1277 /* 1278 * Unbind all soft-bound threads bound to our CPU and hard bound threads 1279 * if we were asked to. 1280 */ 1281 error = cpu_unbind(cp->cpu_id, unbind_all_threads); 1282 if (error != 0) 1283 return (error); 1284 /* 1285 * We shouldn't be bound to this CPU ourselves. 1286 */ 1287 if (curthread->t_bound_cpu == cp) 1288 return (EBUSY); 1289 1290 /* 1291 * Tell interested parties that this CPU is going offline. 1292 */ 1293 CPU_NEW_GENERATION(cp); 1294 cpu_state_change_notify(cp->cpu_id, CPU_OFF); 1295 1296 /* 1297 * Tell the PG subsystem that the CPU is leaving the partition 1298 */ 1299 pg_cpupart_out(cp, pp); 1300 1301 /* 1302 * Take the CPU out of interrupt participation so we won't find 1303 * bound kernel threads. If the architecture cannot completely 1304 * shut off interrupts on the CPU, don't quiesce it, but don't 1305 * run anything but interrupt thread... this is indicated by 1306 * the CPU_OFFLINE flag being on but the CPU_QUIESCE flag being 1307 * off. 1308 */ 1309 intr_enable = cp->cpu_flags & CPU_ENABLE; 1310 if (intr_enable) 1311 no_quiesce = cpu_intr_disable(cp); 1312 1313 /* 1314 * Record that we are aiming to offline this cpu. This acts as 1315 * a barrier to further weak binding requests in thread_nomigrate 1316 * and also causes cpu_choose, disp_lowpri_cpu and setfrontdq to 1317 * lean away from this cpu. Further strong bindings are already 1318 * avoided since we hold cpu_lock. Since threads that are set 1319 * runnable around now and others coming off the target cpu are 1320 * directed away from the target, existing strong and weak bindings 1321 * (especially the latter) to the target cpu stand maximum chance of 1322 * being able to unbind during the short delay loop below (if other 1323 * unbound threads compete they may not see cpu in time to unbind 1324 * even if they would do so immediately. 1325 */ 1326 cpu_inmotion = cp; 1327 membar_enter(); 1328 1329 /* 1330 * Check for kernel threads (strong or weak) bound to that CPU. 1331 * Strongly bound threads may not unbind, and we'll have to return 1332 * EBUSY. Weakly bound threads should always disappear - we've 1333 * stopped more weak binding with cpu_inmotion and existing 1334 * bindings will drain imminently (they may not block). Nonetheless 1335 * we will wait for a fixed period for all bound threads to disappear. 1336 * Inactive interrupt threads are OK (they'll be in TS_FREE 1337 * state). If test finds some bound threads, wait a few ticks 1338 * to give short-lived threads (such as interrupts) chance to 1339 * complete. Note that if no_quiesce is set, i.e. this cpu 1340 * is required to service interrupts, then we take the route 1341 * that permits interrupt threads to be active (or bypassed). 1342 */ 1343 bound_func = no_quiesce ? disp_bound_threads : disp_bound_anythreads; 1344 1345 again: for (loop_count = 0; (*bound_func)(cp, 0); loop_count++) { 1346 if (loop_count >= 5) { 1347 error = EBUSY; /* some threads still bound */ 1348 break; 1349 } 1350 1351 /* 1352 * If some threads were assigned, give them 1353 * a chance to complete or move. 1354 * 1355 * This assumes that the clock_thread is not bound 1356 * to any CPU, because the clock_thread is needed to 1357 * do the delay(hz/100). 1358 * 1359 * Note: we still hold the cpu_lock while waiting for 1360 * the next clock tick. This is OK since it isn't 1361 * needed for anything else except processor_bind(2), 1362 * and system initialization. If we drop the lock, 1363 * we would risk another p_online disabling the last 1364 * processor. 1365 */ 1366 delay(hz/100); 1367 } 1368 1369 if (error == 0 && callout_off == 0) { 1370 callout_cpu_offline(cp); 1371 callout_off = 1; 1372 } 1373 1374 if (error == 0 && cyclic_off == 0) { 1375 if (!cyclic_offline(cp)) { 1376 /* 1377 * We must have bound cyclics... 1378 */ 1379 error = EBUSY; 1380 goto out; 1381 } 1382 cyclic_off = 1; 1383 } 1384 1385 /* 1386 * Call mp_cpu_stop() to perform any special operations 1387 * needed for this machine architecture to offline a CPU. 1388 */ 1389 if (error == 0) 1390 error = mp_cpu_stop(cp); /* arch-dep hook */ 1391 1392 /* 1393 * If that all worked, take the CPU offline and decrement 1394 * ncpus_online. 1395 */ 1396 if (error == 0) { 1397 /* 1398 * Put all the cpus into a known safe place. 1399 * No mutexes can be entered while CPUs are paused. 1400 */ 1401 pause_cpus(cp, NULL); 1402 /* 1403 * Repeat the operation, if necessary, to make sure that 1404 * all outstanding low-level interrupts run to completion 1405 * before we set the CPU_QUIESCED flag. It's also possible 1406 * that a thread has weak bound to the cpu despite our raising 1407 * cpu_inmotion above since it may have loaded that 1408 * value before the barrier became visible (this would have 1409 * to be the thread that was on the target cpu at the time 1410 * we raised the barrier). 1411 */ 1412 if ((!no_quiesce && cp->cpu_intr_actv != 0) || 1413 (*bound_func)(cp, 1)) { 1414 start_cpus(); 1415 (void) mp_cpu_start(cp); 1416 goto again; 1417 } 1418 ncp = cp->cpu_next_part; 1419 cpu_lpl = cp->cpu_lpl; 1420 ASSERT(cpu_lpl != NULL); 1421 1422 /* 1423 * Remove the CPU from the list of active CPUs. 1424 */ 1425 cpu_remove_active(cp); 1426 1427 /* 1428 * Walk the active process list and look for threads 1429 * whose home lgroup needs to be updated, or 1430 * the last CPU they run on is the one being offlined now. 1431 */ 1432 1433 ASSERT(curthread->t_cpu != cp); 1434 for (p = practive; p != NULL; p = p->p_next) { 1435 1436 t = p->p_tlist; 1437 1438 if (t == NULL) 1439 continue; 1440 1441 lgrp_diff_lpl = 0; 1442 1443 do { 1444 ASSERT(t->t_lpl != NULL); 1445 /* 1446 * Taking last CPU in lpl offline 1447 * Rehome thread if it is in this lpl 1448 * Otherwise, update the count of how many 1449 * threads are in this CPU's lgroup but have 1450 * a different lpl. 1451 */ 1452 1453 if (cpu_lpl->lpl_ncpu == 0) { 1454 if (t->t_lpl == cpu_lpl) 1455 lgrp_move_thread(t, 1456 lgrp_choose(t, 1457 t->t_cpupart), 0); 1458 else if (t->t_lpl->lpl_lgrpid == 1459 cpu_lpl->lpl_lgrpid) 1460 lgrp_diff_lpl++; 1461 } 1462 ASSERT(t->t_lpl->lpl_ncpu > 0); 1463 1464 /* 1465 * Update CPU last ran on if it was this CPU 1466 */ 1467 if (t->t_cpu == cp && t->t_bound_cpu != cp) 1468 t->t_cpu = disp_lowpri_cpu(ncp, 1469 t->t_lpl, t->t_pri, NULL); 1470 ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp || 1471 t->t_weakbound_cpu == cp); 1472 1473 t = t->t_forw; 1474 } while (t != p->p_tlist); 1475 1476 /* 1477 * Didn't find any threads in the same lgroup as this 1478 * CPU with a different lpl, so remove the lgroup from 1479 * the process lgroup bitmask. 1480 */ 1481 1482 if (lgrp_diff_lpl == 0) 1483 klgrpset_del(p->p_lgrpset, cpu_lpl->lpl_lgrpid); 1484 } 1485 1486 /* 1487 * Walk thread list looking for threads that need to be 1488 * rehomed, since there are some threads that are not in 1489 * their process's p_tlist. 1490 */ 1491 1492 t = curthread; 1493 do { 1494 ASSERT(t != NULL && t->t_lpl != NULL); 1495 1496 /* 1497 * Rehome threads with same lpl as this CPU when this 1498 * is the last CPU in the lpl. 1499 */ 1500 1501 if ((cpu_lpl->lpl_ncpu == 0) && (t->t_lpl == cpu_lpl)) 1502 lgrp_move_thread(t, 1503 lgrp_choose(t, t->t_cpupart), 1); 1504 1505 ASSERT(t->t_lpl->lpl_ncpu > 0); 1506 1507 /* 1508 * Update CPU last ran on if it was this CPU 1509 */ 1510 1511 if (t->t_cpu == cp && t->t_bound_cpu != cp) { 1512 t->t_cpu = disp_lowpri_cpu(ncp, 1513 t->t_lpl, t->t_pri, NULL); 1514 } 1515 ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp || 1516 t->t_weakbound_cpu == cp); 1517 t = t->t_next; 1518 1519 } while (t != curthread); 1520 ASSERT((cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) == 0); 1521 cp->cpu_flags |= CPU_OFFLINE; 1522 disp_cpu_inactive(cp); 1523 if (!no_quiesce) 1524 cp->cpu_flags |= CPU_QUIESCED; 1525 ncpus_online--; 1526 cpu_set_state(cp); 1527 cpu_inmotion = NULL; 1528 start_cpus(); 1529 cpu_stats_kstat_destroy(cp); 1530 cpu_delete_intrstat(cp); 1531 lgrp_kstat_destroy(cp); 1532 } 1533 1534 out: 1535 cpu_inmotion = NULL; 1536 1537 /* 1538 * If we failed, re-enable interrupts. 1539 * Do this even if cpu_intr_disable returned an error, because 1540 * it may have partially disabled interrupts. 1541 */ 1542 if (error && intr_enable) 1543 cpu_intr_enable(cp); 1544 1545 /* 1546 * If we failed, but managed to offline the cyclic subsystem on this 1547 * CPU, bring it back online. 1548 */ 1549 if (error && cyclic_off) 1550 cyclic_online(cp); 1551 1552 /* 1553 * If we failed, but managed to offline callouts on this CPU, 1554 * bring it back online. 1555 */ 1556 if (error && callout_off) 1557 callout_cpu_online(cp); 1558 1559 /* 1560 * If we failed, tell the PG subsystem that the CPU is back 1561 */ 1562 pg_cpupart_in(cp, pp); 1563 1564 /* 1565 * If we failed, we need to notify everyone that this CPU is back on. 1566 */ 1567 if (error != 0) { 1568 CPU_NEW_GENERATION(cp); 1569 cpu_state_change_notify(cp->cpu_id, CPU_ON); 1570 cpu_state_change_notify(cp->cpu_id, CPU_INTR_ON); 1571 } 1572 1573 return (error); 1574 } 1575 1576 /* 1577 * Mark the indicated CPU as faulted, taking it offline. 1578 */ 1579 int 1580 cpu_faulted(cpu_t *cp, int flags) 1581 { 1582 int error = 0; 1583 1584 ASSERT(MUTEX_HELD(&cpu_lock)); 1585 ASSERT(!cpu_is_poweredoff(cp)); 1586 1587 if (cpu_is_offline(cp)) { 1588 cp->cpu_flags &= ~CPU_SPARE; 1589 cp->cpu_flags |= CPU_FAULTED; 1590 mp_cpu_faulted_enter(cp); 1591 cpu_set_state(cp); 1592 return (0); 1593 } 1594 1595 if ((error = cpu_offline(cp, flags)) == 0) { 1596 cp->cpu_flags |= CPU_FAULTED; 1597 mp_cpu_faulted_enter(cp); 1598 cpu_set_state(cp); 1599 } 1600 1601 return (error); 1602 } 1603 1604 /* 1605 * Mark the indicated CPU as a spare, taking it offline. 1606 */ 1607 int 1608 cpu_spare(cpu_t *cp, int flags) 1609 { 1610 int error = 0; 1611 1612 ASSERT(MUTEX_HELD(&cpu_lock)); 1613 ASSERT(!cpu_is_poweredoff(cp)); 1614 1615 if (cpu_is_offline(cp)) { 1616 if (cp->cpu_flags & CPU_FAULTED) { 1617 cp->cpu_flags &= ~CPU_FAULTED; 1618 mp_cpu_faulted_exit(cp); 1619 } 1620 cp->cpu_flags |= CPU_SPARE; 1621 cpu_set_state(cp); 1622 return (0); 1623 } 1624 1625 if ((error = cpu_offline(cp, flags)) == 0) { 1626 cp->cpu_flags |= CPU_SPARE; 1627 cpu_set_state(cp); 1628 } 1629 1630 return (error); 1631 } 1632 1633 /* 1634 * Take the indicated CPU from poweroff to offline. 1635 */ 1636 int 1637 cpu_poweron(cpu_t *cp) 1638 { 1639 int error = ENOTSUP; 1640 1641 ASSERT(MUTEX_HELD(&cpu_lock)); 1642 ASSERT(cpu_is_poweredoff(cp)); 1643 1644 error = mp_cpu_poweron(cp); /* arch-dep hook */ 1645 if (error == 0) 1646 cpu_set_state(cp); 1647 1648 return (error); 1649 } 1650 1651 /* 1652 * Take the indicated CPU from any inactive state to powered off. 1653 */ 1654 int 1655 cpu_poweroff(cpu_t *cp) 1656 { 1657 int error = ENOTSUP; 1658 1659 ASSERT(MUTEX_HELD(&cpu_lock)); 1660 ASSERT(cpu_is_offline(cp)); 1661 1662 if (!(cp->cpu_flags & CPU_QUIESCED)) 1663 return (EBUSY); /* not completely idle */ 1664 1665 error = mp_cpu_poweroff(cp); /* arch-dep hook */ 1666 if (error == 0) 1667 cpu_set_state(cp); 1668 1669 return (error); 1670 } 1671 1672 /* 1673 * Initialize the Sequential CPU id lookup table 1674 */ 1675 void 1676 cpu_seq_tbl_init() 1677 { 1678 cpu_t **tbl; 1679 1680 tbl = kmem_zalloc(sizeof (struct cpu *) * max_ncpus, KM_SLEEP); 1681 tbl[0] = CPU; 1682 1683 cpu_seq = tbl; 1684 } 1685 1686 /* 1687 * Initialize the CPU lists for the first CPU. 1688 */ 1689 void 1690 cpu_list_init(cpu_t *cp) 1691 { 1692 cp->cpu_next = cp; 1693 cp->cpu_prev = cp; 1694 cpu_list = cp; 1695 clock_cpu_list = cp; 1696 1697 cp->cpu_next_onln = cp; 1698 cp->cpu_prev_onln = cp; 1699 cpu_active = cp; 1700 1701 cp->cpu_seqid = 0; 1702 CPUSET_ADD(cpu_seqid_inuse, 0); 1703 1704 /* 1705 * Bootstrap cpu_seq using cpu_list 1706 * The cpu_seq[] table will be dynamically allocated 1707 * when kmem later becomes available (but before going MP) 1708 */ 1709 cpu_seq = &cpu_list; 1710 1711 cp->cpu_cache_offset = KMEM_CPU_CACHE_OFFSET(cp->cpu_seqid); 1712 cp_default.cp_cpulist = cp; 1713 cp_default.cp_ncpus = 1; 1714 cp->cpu_next_part = cp; 1715 cp->cpu_prev_part = cp; 1716 cp->cpu_part = &cp_default; 1717 1718 CPUSET_ADD(cpu_available, cp->cpu_id); 1719 } 1720 1721 /* 1722 * Insert a CPU into the list of available CPUs. 1723 */ 1724 void 1725 cpu_add_unit(cpu_t *cp) 1726 { 1727 int seqid; 1728 1729 ASSERT(MUTEX_HELD(&cpu_lock)); 1730 ASSERT(cpu_list != NULL); /* list started in cpu_list_init */ 1731 1732 lgrp_config(LGRP_CONFIG_CPU_ADD, (uintptr_t)cp, 0); 1733 1734 /* 1735 * Note: most users of the cpu_list will grab the 1736 * cpu_lock to insure that it isn't modified. However, 1737 * certain users can't or won't do that. To allow this 1738 * we pause the other cpus. Users who walk the list 1739 * without cpu_lock, must disable kernel preemption 1740 * to insure that the list isn't modified underneath 1741 * them. Also, any cached pointers to cpu structures 1742 * must be revalidated by checking to see if the 1743 * cpu_next pointer points to itself. This check must 1744 * be done with the cpu_lock held or kernel preemption 1745 * disabled. This check relies upon the fact that 1746 * old cpu structures are not free'ed or cleared after 1747 * then are removed from the cpu_list. 1748 * 1749 * Note that the clock code walks the cpu list dereferencing 1750 * the cpu_part pointer, so we need to initialize it before 1751 * adding the cpu to the list. 1752 */ 1753 cp->cpu_part = &cp_default; 1754 pause_cpus(NULL, NULL); 1755 cp->cpu_next = cpu_list; 1756 cp->cpu_prev = cpu_list->cpu_prev; 1757 cpu_list->cpu_prev->cpu_next = cp; 1758 cpu_list->cpu_prev = cp; 1759 start_cpus(); 1760 1761 for (seqid = 0; CPU_IN_SET(cpu_seqid_inuse, seqid); seqid++) 1762 continue; 1763 CPUSET_ADD(cpu_seqid_inuse, seqid); 1764 cp->cpu_seqid = seqid; 1765 1766 if (seqid > max_cpu_seqid_ever) 1767 max_cpu_seqid_ever = seqid; 1768 1769 ASSERT(ncpus < max_ncpus); 1770 ncpus++; 1771 cp->cpu_cache_offset = KMEM_CPU_CACHE_OFFSET(cp->cpu_seqid); 1772 cpu[cp->cpu_id] = cp; 1773 CPUSET_ADD(cpu_available, cp->cpu_id); 1774 cpu_seq[cp->cpu_seqid] = cp; 1775 1776 /* 1777 * allocate a pause thread for this CPU. 1778 */ 1779 cpu_pause_alloc(cp); 1780 1781 /* 1782 * So that new CPUs won't have NULL prev_onln and next_onln pointers, 1783 * link them into a list of just that CPU. 1784 * This is so that disp_lowpri_cpu will work for thread_create in 1785 * pause_cpus() when called from the startup thread in a new CPU. 1786 */ 1787 cp->cpu_next_onln = cp; 1788 cp->cpu_prev_onln = cp; 1789 cpu_info_kstat_create(cp); 1790 cp->cpu_next_part = cp; 1791 cp->cpu_prev_part = cp; 1792 1793 init_cpu_mstate(cp, CMS_SYSTEM); 1794 1795 pool_pset_mod = gethrtime(); 1796 } 1797 1798 /* 1799 * Do the opposite of cpu_add_unit(). 1800 */ 1801 void 1802 cpu_del_unit(int cpuid) 1803 { 1804 struct cpu *cp, *cpnext; 1805 1806 ASSERT(MUTEX_HELD(&cpu_lock)); 1807 cp = cpu[cpuid]; 1808 ASSERT(cp != NULL); 1809 1810 ASSERT(cp->cpu_next_onln == cp); 1811 ASSERT(cp->cpu_prev_onln == cp); 1812 ASSERT(cp->cpu_next_part == cp); 1813 ASSERT(cp->cpu_prev_part == cp); 1814 1815 /* 1816 * Tear down the CPU's physical ID cache, and update any 1817 * processor groups 1818 */ 1819 pg_cpu_fini(cp, NULL); 1820 pghw_physid_destroy(cp); 1821 1822 /* 1823 * Destroy kstat stuff. 1824 */ 1825 cpu_info_kstat_destroy(cp); 1826 term_cpu_mstate(cp); 1827 /* 1828 * Free up pause thread. 1829 */ 1830 cpu_pause_free(cp); 1831 CPUSET_DEL(cpu_available, cp->cpu_id); 1832 cpu[cp->cpu_id] = NULL; 1833 cpu_seq[cp->cpu_seqid] = NULL; 1834 1835 /* 1836 * The clock thread and mutex_vector_enter cannot hold the 1837 * cpu_lock while traversing the cpu list, therefore we pause 1838 * all other threads by pausing the other cpus. These, and any 1839 * other routines holding cpu pointers while possibly sleeping 1840 * must be sure to call kpreempt_disable before processing the 1841 * list and be sure to check that the cpu has not been deleted 1842 * after any sleeps (check cp->cpu_next != NULL). We guarantee 1843 * to keep the deleted cpu structure around. 1844 * 1845 * Note that this MUST be done AFTER cpu_available 1846 * has been updated so that we don't waste time 1847 * trying to pause the cpu we're trying to delete. 1848 */ 1849 pause_cpus(NULL, NULL); 1850 1851 cpnext = cp->cpu_next; 1852 cp->cpu_prev->cpu_next = cp->cpu_next; 1853 cp->cpu_next->cpu_prev = cp->cpu_prev; 1854 if (cp == cpu_list) 1855 cpu_list = cpnext; 1856 1857 /* 1858 * Signals that the cpu has been deleted (see above). 1859 */ 1860 cp->cpu_next = NULL; 1861 cp->cpu_prev = NULL; 1862 1863 start_cpus(); 1864 1865 CPUSET_DEL(cpu_seqid_inuse, cp->cpu_seqid); 1866 ncpus--; 1867 lgrp_config(LGRP_CONFIG_CPU_DEL, (uintptr_t)cp, 0); 1868 1869 pool_pset_mod = gethrtime(); 1870 } 1871 1872 /* 1873 * Add a CPU to the list of active CPUs. 1874 * This routine must not get any locks, because other CPUs are paused. 1875 */ 1876 static void 1877 cpu_add_active_internal(cpu_t *cp) 1878 { 1879 cpupart_t *pp = cp->cpu_part; 1880 1881 ASSERT(MUTEX_HELD(&cpu_lock)); 1882 ASSERT(cpu_list != NULL); /* list started in cpu_list_init */ 1883 1884 ncpus_online++; 1885 cpu_set_state(cp); 1886 cp->cpu_next_onln = cpu_active; 1887 cp->cpu_prev_onln = cpu_active->cpu_prev_onln; 1888 cpu_active->cpu_prev_onln->cpu_next_onln = cp; 1889 cpu_active->cpu_prev_onln = cp; 1890 1891 if (pp->cp_cpulist) { 1892 cp->cpu_next_part = pp->cp_cpulist; 1893 cp->cpu_prev_part = pp->cp_cpulist->cpu_prev_part; 1894 pp->cp_cpulist->cpu_prev_part->cpu_next_part = cp; 1895 pp->cp_cpulist->cpu_prev_part = cp; 1896 } else { 1897 ASSERT(pp->cp_ncpus == 0); 1898 pp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp; 1899 } 1900 pp->cp_ncpus++; 1901 if (pp->cp_ncpus == 1) { 1902 cp_numparts_nonempty++; 1903 ASSERT(cp_numparts_nonempty != 0); 1904 } 1905 1906 pg_cpu_active(cp); 1907 lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)cp, 0); 1908 1909 bzero(&cp->cpu_loadavg, sizeof (cp->cpu_loadavg)); 1910 } 1911 1912 /* 1913 * Add a CPU to the list of active CPUs. 1914 * This is called from machine-dependent layers when a new CPU is started. 1915 */ 1916 void 1917 cpu_add_active(cpu_t *cp) 1918 { 1919 pg_cpupart_in(cp, cp->cpu_part); 1920 1921 pause_cpus(NULL, NULL); 1922 cpu_add_active_internal(cp); 1923 start_cpus(); 1924 1925 cpu_stats_kstat_create(cp); 1926 cpu_create_intrstat(cp); 1927 lgrp_kstat_create(cp); 1928 cpu_state_change_notify(cp->cpu_id, CPU_INIT); 1929 } 1930 1931 1932 /* 1933 * Remove a CPU from the list of active CPUs. 1934 * This routine must not get any locks, because other CPUs are paused. 1935 */ 1936 /* ARGSUSED */ 1937 static void 1938 cpu_remove_active(cpu_t *cp) 1939 { 1940 cpupart_t *pp = cp->cpu_part; 1941 1942 ASSERT(MUTEX_HELD(&cpu_lock)); 1943 ASSERT(cp->cpu_next_onln != cp); /* not the last one */ 1944 ASSERT(cp->cpu_prev_onln != cp); /* not the last one */ 1945 1946 pg_cpu_inactive(cp); 1947 1948 lgrp_config(LGRP_CONFIG_CPU_OFFLINE, (uintptr_t)cp, 0); 1949 1950 if (cp == clock_cpu_list) 1951 clock_cpu_list = cp->cpu_next_onln; 1952 1953 cp->cpu_prev_onln->cpu_next_onln = cp->cpu_next_onln; 1954 cp->cpu_next_onln->cpu_prev_onln = cp->cpu_prev_onln; 1955 if (cpu_active == cp) { 1956 cpu_active = cp->cpu_next_onln; 1957 } 1958 cp->cpu_next_onln = cp; 1959 cp->cpu_prev_onln = cp; 1960 1961 cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part; 1962 cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part; 1963 if (pp->cp_cpulist == cp) { 1964 pp->cp_cpulist = cp->cpu_next_part; 1965 ASSERT(pp->cp_cpulist != cp); 1966 } 1967 cp->cpu_next_part = cp; 1968 cp->cpu_prev_part = cp; 1969 pp->cp_ncpus--; 1970 if (pp->cp_ncpus == 0) { 1971 cp_numparts_nonempty--; 1972 ASSERT(cp_numparts_nonempty != 0); 1973 } 1974 } 1975 1976 /* 1977 * Routine used to setup a newly inserted CPU in preparation for starting 1978 * it running code. 1979 */ 1980 int 1981 cpu_configure(int cpuid) 1982 { 1983 int retval = 0; 1984 1985 ASSERT(MUTEX_HELD(&cpu_lock)); 1986 1987 /* 1988 * Some structures are statically allocated based upon 1989 * the maximum number of cpus the system supports. Do not 1990 * try to add anything beyond this limit. 1991 */ 1992 if (cpuid < 0 || cpuid >= NCPU) { 1993 return (EINVAL); 1994 } 1995 1996 if ((cpu[cpuid] != NULL) && (cpu[cpuid]->cpu_flags != 0)) { 1997 return (EALREADY); 1998 } 1999 2000 if ((retval = mp_cpu_configure(cpuid)) != 0) { 2001 return (retval); 2002 } 2003 2004 cpu[cpuid]->cpu_flags = CPU_QUIESCED | CPU_OFFLINE | CPU_POWEROFF; 2005 cpu_set_state(cpu[cpuid]); 2006 retval = cpu_state_change_hooks(cpuid, CPU_CONFIG, CPU_UNCONFIG); 2007 if (retval != 0) 2008 (void) mp_cpu_unconfigure(cpuid); 2009 2010 return (retval); 2011 } 2012 2013 /* 2014 * Routine used to cleanup a CPU that has been powered off. This will 2015 * destroy all per-cpu information related to this cpu. 2016 */ 2017 int 2018 cpu_unconfigure(int cpuid) 2019 { 2020 int error; 2021 2022 ASSERT(MUTEX_HELD(&cpu_lock)); 2023 2024 if (cpu[cpuid] == NULL) { 2025 return (ENODEV); 2026 } 2027 2028 if (cpu[cpuid]->cpu_flags == 0) { 2029 return (EALREADY); 2030 } 2031 2032 if ((cpu[cpuid]->cpu_flags & CPU_POWEROFF) == 0) { 2033 return (EBUSY); 2034 } 2035 2036 if (cpu[cpuid]->cpu_props != NULL) { 2037 (void) nvlist_free(cpu[cpuid]->cpu_props); 2038 cpu[cpuid]->cpu_props = NULL; 2039 } 2040 2041 error = cpu_state_change_hooks(cpuid, CPU_UNCONFIG, CPU_CONFIG); 2042 2043 if (error != 0) 2044 return (error); 2045 2046 return (mp_cpu_unconfigure(cpuid)); 2047 } 2048 2049 /* 2050 * Routines for registering and de-registering cpu_setup callback functions. 2051 * 2052 * Caller's context 2053 * These routines must not be called from a driver's attach(9E) or 2054 * detach(9E) entry point. 2055 * 2056 * NOTE: CPU callbacks should not block. They are called with cpu_lock held. 2057 */ 2058 2059 /* 2060 * Ideally, these would be dynamically allocated and put into a linked 2061 * list; however that is not feasible because the registration routine 2062 * has to be available before the kmem allocator is working (in fact, 2063 * it is called by the kmem allocator init code). In any case, there 2064 * are quite a few extra entries for future users. 2065 */ 2066 #define NCPU_SETUPS 20 2067 2068 struct cpu_setup { 2069 cpu_setup_func_t *func; 2070 void *arg; 2071 } cpu_setups[NCPU_SETUPS]; 2072 2073 void 2074 register_cpu_setup_func(cpu_setup_func_t *func, void *arg) 2075 { 2076 int i; 2077 2078 ASSERT(MUTEX_HELD(&cpu_lock)); 2079 2080 for (i = 0; i < NCPU_SETUPS; i++) 2081 if (cpu_setups[i].func == NULL) 2082 break; 2083 if (i >= NCPU_SETUPS) 2084 cmn_err(CE_PANIC, "Ran out of cpu_setup callback entries"); 2085 2086 cpu_setups[i].func = func; 2087 cpu_setups[i].arg = arg; 2088 } 2089 2090 void 2091 unregister_cpu_setup_func(cpu_setup_func_t *func, void *arg) 2092 { 2093 int i; 2094 2095 ASSERT(MUTEX_HELD(&cpu_lock)); 2096 2097 for (i = 0; i < NCPU_SETUPS; i++) 2098 if ((cpu_setups[i].func == func) && 2099 (cpu_setups[i].arg == arg)) 2100 break; 2101 if (i >= NCPU_SETUPS) 2102 cmn_err(CE_PANIC, "Could not find cpu_setup callback to " 2103 "deregister"); 2104 2105 cpu_setups[i].func = NULL; 2106 cpu_setups[i].arg = 0; 2107 } 2108 2109 /* 2110 * Call any state change hooks for this CPU, ignore any errors. 2111 */ 2112 void 2113 cpu_state_change_notify(int id, cpu_setup_t what) 2114 { 2115 int i; 2116 2117 ASSERT(MUTEX_HELD(&cpu_lock)); 2118 2119 for (i = 0; i < NCPU_SETUPS; i++) { 2120 if (cpu_setups[i].func != NULL) { 2121 cpu_setups[i].func(what, id, cpu_setups[i].arg); 2122 } 2123 } 2124 } 2125 2126 /* 2127 * Call any state change hooks for this CPU, undo it if error found. 2128 */ 2129 static int 2130 cpu_state_change_hooks(int id, cpu_setup_t what, cpu_setup_t undo) 2131 { 2132 int i; 2133 int retval = 0; 2134 2135 ASSERT(MUTEX_HELD(&cpu_lock)); 2136 2137 for (i = 0; i < NCPU_SETUPS; i++) { 2138 if (cpu_setups[i].func != NULL) { 2139 retval = cpu_setups[i].func(what, id, 2140 cpu_setups[i].arg); 2141 if (retval) { 2142 for (i--; i >= 0; i--) { 2143 if (cpu_setups[i].func != NULL) 2144 cpu_setups[i].func(undo, 2145 id, cpu_setups[i].arg); 2146 } 2147 break; 2148 } 2149 } 2150 } 2151 return (retval); 2152 } 2153 2154 /* 2155 * Export information about this CPU via the kstat mechanism. 2156 */ 2157 static struct { 2158 kstat_named_t ci_state; 2159 kstat_named_t ci_state_begin; 2160 kstat_named_t ci_cpu_type; 2161 kstat_named_t ci_fpu_type; 2162 kstat_named_t ci_clock_MHz; 2163 kstat_named_t ci_chip_id; 2164 kstat_named_t ci_implementation; 2165 kstat_named_t ci_brandstr; 2166 kstat_named_t ci_core_id; 2167 kstat_named_t ci_curr_clock_Hz; 2168 kstat_named_t ci_supp_freq_Hz; 2169 kstat_named_t ci_pg_id; 2170 #if defined(__sparcv9) 2171 kstat_named_t ci_device_ID; 2172 kstat_named_t ci_cpu_fru; 2173 #endif 2174 #if defined(__x86) 2175 kstat_named_t ci_vendorstr; 2176 kstat_named_t ci_family; 2177 kstat_named_t ci_model; 2178 kstat_named_t ci_step; 2179 kstat_named_t ci_clogid; 2180 kstat_named_t ci_pkg_core_id; 2181 kstat_named_t ci_ncpuperchip; 2182 kstat_named_t ci_ncoreperchip; 2183 kstat_named_t ci_max_cstates; 2184 kstat_named_t ci_curr_cstate; 2185 kstat_named_t ci_cacheid; 2186 kstat_named_t ci_sktstr; 2187 #endif 2188 } cpu_info_template = { 2189 { "state", KSTAT_DATA_CHAR }, 2190 { "state_begin", KSTAT_DATA_LONG }, 2191 { "cpu_type", KSTAT_DATA_CHAR }, 2192 { "fpu_type", KSTAT_DATA_CHAR }, 2193 { "clock_MHz", KSTAT_DATA_LONG }, 2194 { "chip_id", KSTAT_DATA_LONG }, 2195 { "implementation", KSTAT_DATA_STRING }, 2196 { "brand", KSTAT_DATA_STRING }, 2197 { "core_id", KSTAT_DATA_LONG }, 2198 { "current_clock_Hz", KSTAT_DATA_UINT64 }, 2199 { "supported_frequencies_Hz", KSTAT_DATA_STRING }, 2200 { "pg_id", KSTAT_DATA_LONG }, 2201 #if defined(__sparcv9) 2202 { "device_ID", KSTAT_DATA_UINT64 }, 2203 { "cpu_fru", KSTAT_DATA_STRING }, 2204 #endif 2205 #if defined(__x86) 2206 { "vendor_id", KSTAT_DATA_STRING }, 2207 { "family", KSTAT_DATA_INT32 }, 2208 { "model", KSTAT_DATA_INT32 }, 2209 { "stepping", KSTAT_DATA_INT32 }, 2210 { "clog_id", KSTAT_DATA_INT32 }, 2211 { "pkg_core_id", KSTAT_DATA_LONG }, 2212 { "ncpu_per_chip", KSTAT_DATA_INT32 }, 2213 { "ncore_per_chip", KSTAT_DATA_INT32 }, 2214 { "supported_max_cstates", KSTAT_DATA_INT32 }, 2215 { "current_cstate", KSTAT_DATA_INT32 }, 2216 { "cache_id", KSTAT_DATA_INT32 }, 2217 { "socket_type", KSTAT_DATA_STRING }, 2218 #endif 2219 }; 2220 2221 static kmutex_t cpu_info_template_lock; 2222 2223 static int 2224 cpu_info_kstat_update(kstat_t *ksp, int rw) 2225 { 2226 cpu_t *cp = ksp->ks_private; 2227 const char *pi_state; 2228 2229 if (rw == KSTAT_WRITE) 2230 return (EACCES); 2231 2232 #if defined(__x86) 2233 /* Is the cpu still initialising itself? */ 2234 if (cpuid_checkpass(cp, 1) == 0) 2235 return (ENXIO); 2236 #endif 2237 switch (cp->cpu_type_info.pi_state) { 2238 case P_ONLINE: 2239 pi_state = PS_ONLINE; 2240 break; 2241 case P_POWEROFF: 2242 pi_state = PS_POWEROFF; 2243 break; 2244 case P_NOINTR: 2245 pi_state = PS_NOINTR; 2246 break; 2247 case P_FAULTED: 2248 pi_state = PS_FAULTED; 2249 break; 2250 case P_SPARE: 2251 pi_state = PS_SPARE; 2252 break; 2253 case P_OFFLINE: 2254 pi_state = PS_OFFLINE; 2255 break; 2256 default: 2257 pi_state = "unknown"; 2258 } 2259 (void) strcpy(cpu_info_template.ci_state.value.c, pi_state); 2260 cpu_info_template.ci_state_begin.value.l = cp->cpu_state_begin; 2261 (void) strncpy(cpu_info_template.ci_cpu_type.value.c, 2262 cp->cpu_type_info.pi_processor_type, 15); 2263 (void) strncpy(cpu_info_template.ci_fpu_type.value.c, 2264 cp->cpu_type_info.pi_fputypes, 15); 2265 cpu_info_template.ci_clock_MHz.value.l = cp->cpu_type_info.pi_clock; 2266 cpu_info_template.ci_chip_id.value.l = 2267 pg_plat_hw_instance_id(cp, PGHW_CHIP); 2268 kstat_named_setstr(&cpu_info_template.ci_implementation, 2269 cp->cpu_idstr); 2270 kstat_named_setstr(&cpu_info_template.ci_brandstr, cp->cpu_brandstr); 2271 cpu_info_template.ci_core_id.value.l = pg_plat_get_core_id(cp); 2272 cpu_info_template.ci_curr_clock_Hz.value.ui64 = 2273 cp->cpu_curr_clock; 2274 cpu_info_template.ci_pg_id.value.l = 2275 cp->cpu_pg && cp->cpu_pg->cmt_lineage ? 2276 cp->cpu_pg->cmt_lineage->pg_id : -1; 2277 kstat_named_setstr(&cpu_info_template.ci_supp_freq_Hz, 2278 cp->cpu_supp_freqs); 2279 #if defined(__sparcv9) 2280 cpu_info_template.ci_device_ID.value.ui64 = 2281 cpunodes[cp->cpu_id].device_id; 2282 kstat_named_setstr(&cpu_info_template.ci_cpu_fru, cpu_fru_fmri(cp)); 2283 #endif 2284 #if defined(__x86) 2285 kstat_named_setstr(&cpu_info_template.ci_vendorstr, 2286 cpuid_getvendorstr(cp)); 2287 cpu_info_template.ci_family.value.l = cpuid_getfamily(cp); 2288 cpu_info_template.ci_model.value.l = cpuid_getmodel(cp); 2289 cpu_info_template.ci_step.value.l = cpuid_getstep(cp); 2290 cpu_info_template.ci_clogid.value.l = cpuid_get_clogid(cp); 2291 cpu_info_template.ci_ncpuperchip.value.l = cpuid_get_ncpu_per_chip(cp); 2292 cpu_info_template.ci_ncoreperchip.value.l = 2293 cpuid_get_ncore_per_chip(cp); 2294 cpu_info_template.ci_pkg_core_id.value.l = cpuid_get_pkgcoreid(cp); 2295 cpu_info_template.ci_max_cstates.value.l = cp->cpu_m.max_cstates; 2296 cpu_info_template.ci_curr_cstate.value.l = cpu_idle_get_cpu_state(cp); 2297 cpu_info_template.ci_cacheid.value.i32 = cpuid_get_cacheid(cp); 2298 kstat_named_setstr(&cpu_info_template.ci_sktstr, 2299 cpuid_getsocketstr(cp)); 2300 #endif 2301 2302 return (0); 2303 } 2304 2305 static void 2306 cpu_info_kstat_create(cpu_t *cp) 2307 { 2308 zoneid_t zoneid; 2309 2310 ASSERT(MUTEX_HELD(&cpu_lock)); 2311 2312 if (pool_pset_enabled()) 2313 zoneid = GLOBAL_ZONEID; 2314 else 2315 zoneid = ALL_ZONES; 2316 if ((cp->cpu_info_kstat = kstat_create_zone("cpu_info", cp->cpu_id, 2317 NULL, "misc", KSTAT_TYPE_NAMED, 2318 sizeof (cpu_info_template) / sizeof (kstat_named_t), 2319 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE, zoneid)) != NULL) { 2320 cp->cpu_info_kstat->ks_data_size += 2 * CPU_IDSTRLEN; 2321 #if defined(__sparcv9) 2322 cp->cpu_info_kstat->ks_data_size += 2323 strlen(cpu_fru_fmri(cp)) + 1; 2324 #endif 2325 #if defined(__x86) 2326 cp->cpu_info_kstat->ks_data_size += X86_VENDOR_STRLEN; 2327 #endif 2328 if (cp->cpu_supp_freqs != NULL) 2329 cp->cpu_info_kstat->ks_data_size += 2330 strlen(cp->cpu_supp_freqs) + 1; 2331 cp->cpu_info_kstat->ks_lock = &cpu_info_template_lock; 2332 cp->cpu_info_kstat->ks_data = &cpu_info_template; 2333 cp->cpu_info_kstat->ks_private = cp; 2334 cp->cpu_info_kstat->ks_update = cpu_info_kstat_update; 2335 kstat_install(cp->cpu_info_kstat); 2336 } 2337 } 2338 2339 static void 2340 cpu_info_kstat_destroy(cpu_t *cp) 2341 { 2342 ASSERT(MUTEX_HELD(&cpu_lock)); 2343 2344 kstat_delete(cp->cpu_info_kstat); 2345 cp->cpu_info_kstat = NULL; 2346 } 2347 2348 /* 2349 * Create and install kstats for the boot CPU. 2350 */ 2351 void 2352 cpu_kstat_init(cpu_t *cp) 2353 { 2354 mutex_enter(&cpu_lock); 2355 cpu_info_kstat_create(cp); 2356 cpu_stats_kstat_create(cp); 2357 cpu_create_intrstat(cp); 2358 cpu_set_state(cp); 2359 mutex_exit(&cpu_lock); 2360 } 2361 2362 /* 2363 * Make visible to the zone that subset of the cpu information that would be 2364 * initialized when a cpu is configured (but still offline). 2365 */ 2366 void 2367 cpu_visibility_configure(cpu_t *cp, zone_t *zone) 2368 { 2369 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2370 2371 ASSERT(MUTEX_HELD(&cpu_lock)); 2372 ASSERT(pool_pset_enabled()); 2373 ASSERT(cp != NULL); 2374 2375 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2376 zone->zone_ncpus++; 2377 ASSERT(zone->zone_ncpus <= ncpus); 2378 } 2379 if (cp->cpu_info_kstat != NULL) 2380 kstat_zone_add(cp->cpu_info_kstat, zoneid); 2381 } 2382 2383 /* 2384 * Make visible to the zone that subset of the cpu information that would be 2385 * initialized when a previously configured cpu is onlined. 2386 */ 2387 void 2388 cpu_visibility_online(cpu_t *cp, zone_t *zone) 2389 { 2390 kstat_t *ksp; 2391 char name[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */ 2392 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2393 processorid_t cpun; 2394 2395 ASSERT(MUTEX_HELD(&cpu_lock)); 2396 ASSERT(pool_pset_enabled()); 2397 ASSERT(cp != NULL); 2398 ASSERT(cpu_is_active(cp)); 2399 2400 cpun = cp->cpu_id; 2401 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2402 zone->zone_ncpus_online++; 2403 ASSERT(zone->zone_ncpus_online <= ncpus_online); 2404 } 2405 (void) snprintf(name, sizeof (name), "cpu_stat%d", cpun); 2406 if ((ksp = kstat_hold_byname("cpu_stat", cpun, name, ALL_ZONES)) 2407 != NULL) { 2408 kstat_zone_add(ksp, zoneid); 2409 kstat_rele(ksp); 2410 } 2411 if ((ksp = kstat_hold_byname("cpu", cpun, "sys", ALL_ZONES)) != NULL) { 2412 kstat_zone_add(ksp, zoneid); 2413 kstat_rele(ksp); 2414 } 2415 if ((ksp = kstat_hold_byname("cpu", cpun, "vm", ALL_ZONES)) != NULL) { 2416 kstat_zone_add(ksp, zoneid); 2417 kstat_rele(ksp); 2418 } 2419 if ((ksp = kstat_hold_byname("cpu", cpun, "intrstat", ALL_ZONES)) != 2420 NULL) { 2421 kstat_zone_add(ksp, zoneid); 2422 kstat_rele(ksp); 2423 } 2424 } 2425 2426 /* 2427 * Update relevant kstats such that cpu is now visible to processes 2428 * executing in specified zone. 2429 */ 2430 void 2431 cpu_visibility_add(cpu_t *cp, zone_t *zone) 2432 { 2433 cpu_visibility_configure(cp, zone); 2434 if (cpu_is_active(cp)) 2435 cpu_visibility_online(cp, zone); 2436 } 2437 2438 /* 2439 * Make invisible to the zone that subset of the cpu information that would be 2440 * torn down when a previously offlined cpu is unconfigured. 2441 */ 2442 void 2443 cpu_visibility_unconfigure(cpu_t *cp, zone_t *zone) 2444 { 2445 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2446 2447 ASSERT(MUTEX_HELD(&cpu_lock)); 2448 ASSERT(pool_pset_enabled()); 2449 ASSERT(cp != NULL); 2450 2451 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2452 ASSERT(zone->zone_ncpus != 0); 2453 zone->zone_ncpus--; 2454 } 2455 if (cp->cpu_info_kstat) 2456 kstat_zone_remove(cp->cpu_info_kstat, zoneid); 2457 } 2458 2459 /* 2460 * Make invisible to the zone that subset of the cpu information that would be 2461 * torn down when a cpu is offlined (but still configured). 2462 */ 2463 void 2464 cpu_visibility_offline(cpu_t *cp, zone_t *zone) 2465 { 2466 kstat_t *ksp; 2467 char name[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */ 2468 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2469 processorid_t cpun; 2470 2471 ASSERT(MUTEX_HELD(&cpu_lock)); 2472 ASSERT(pool_pset_enabled()); 2473 ASSERT(cp != NULL); 2474 ASSERT(cpu_is_active(cp)); 2475 2476 cpun = cp->cpu_id; 2477 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2478 ASSERT(zone->zone_ncpus_online != 0); 2479 zone->zone_ncpus_online--; 2480 } 2481 2482 if ((ksp = kstat_hold_byname("cpu", cpun, "intrstat", ALL_ZONES)) != 2483 NULL) { 2484 kstat_zone_remove(ksp, zoneid); 2485 kstat_rele(ksp); 2486 } 2487 if ((ksp = kstat_hold_byname("cpu", cpun, "vm", ALL_ZONES)) != NULL) { 2488 kstat_zone_remove(ksp, zoneid); 2489 kstat_rele(ksp); 2490 } 2491 if ((ksp = kstat_hold_byname("cpu", cpun, "sys", ALL_ZONES)) != NULL) { 2492 kstat_zone_remove(ksp, zoneid); 2493 kstat_rele(ksp); 2494 } 2495 (void) snprintf(name, sizeof (name), "cpu_stat%d", cpun); 2496 if ((ksp = kstat_hold_byname("cpu_stat", cpun, name, ALL_ZONES)) 2497 != NULL) { 2498 kstat_zone_remove(ksp, zoneid); 2499 kstat_rele(ksp); 2500 } 2501 } 2502 2503 /* 2504 * Update relevant kstats such that cpu is no longer visible to processes 2505 * executing in specified zone. 2506 */ 2507 void 2508 cpu_visibility_remove(cpu_t *cp, zone_t *zone) 2509 { 2510 if (cpu_is_active(cp)) 2511 cpu_visibility_offline(cp, zone); 2512 cpu_visibility_unconfigure(cp, zone); 2513 } 2514 2515 /* 2516 * Bind a thread to a CPU as requested. 2517 */ 2518 int 2519 cpu_bind_thread(kthread_id_t tp, processorid_t bind, processorid_t *obind, 2520 int *error) 2521 { 2522 processorid_t binding; 2523 cpu_t *cp = NULL; 2524 2525 ASSERT(MUTEX_HELD(&cpu_lock)); 2526 ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock)); 2527 2528 thread_lock(tp); 2529 2530 /* 2531 * Record old binding, but change the obind, which was initialized 2532 * to PBIND_NONE, only if this thread has a binding. This avoids 2533 * reporting PBIND_NONE for a process when some LWPs are bound. 2534 */ 2535 binding = tp->t_bind_cpu; 2536 if (binding != PBIND_NONE) 2537 *obind = binding; /* record old binding */ 2538 2539 switch (bind) { 2540 case PBIND_QUERY: 2541 /* Just return the old binding */ 2542 thread_unlock(tp); 2543 return (0); 2544 2545 case PBIND_QUERY_TYPE: 2546 /* Return the binding type */ 2547 *obind = TB_CPU_IS_SOFT(tp) ? PBIND_SOFT : PBIND_HARD; 2548 thread_unlock(tp); 2549 return (0); 2550 2551 case PBIND_SOFT: 2552 /* 2553 * Set soft binding for this thread and return the actual 2554 * binding 2555 */ 2556 TB_CPU_SOFT_SET(tp); 2557 thread_unlock(tp); 2558 return (0); 2559 2560 case PBIND_HARD: 2561 /* 2562 * Set hard binding for this thread and return the actual 2563 * binding 2564 */ 2565 TB_CPU_HARD_SET(tp); 2566 thread_unlock(tp); 2567 return (0); 2568 2569 default: 2570 break; 2571 } 2572 2573 /* 2574 * If this thread/LWP cannot be bound because of permission 2575 * problems, just note that and return success so that the 2576 * other threads/LWPs will be bound. This is the way 2577 * processor_bind() is defined to work. 2578 * 2579 * Binding will get EPERM if the thread is of system class 2580 * or hasprocperm() fails. 2581 */ 2582 if (tp->t_cid == 0 || !hasprocperm(tp->t_cred, CRED())) { 2583 *error = EPERM; 2584 thread_unlock(tp); 2585 return (0); 2586 } 2587 2588 binding = bind; 2589 if (binding != PBIND_NONE) { 2590 cp = cpu_get((processorid_t)binding); 2591 /* 2592 * Make sure binding is valid and is in right partition. 2593 */ 2594 if (cp == NULL || tp->t_cpupart != cp->cpu_part) { 2595 *error = EINVAL; 2596 thread_unlock(tp); 2597 return (0); 2598 } 2599 } 2600 tp->t_bind_cpu = binding; /* set new binding */ 2601 2602 /* 2603 * If there is no system-set reason for affinity, set 2604 * the t_bound_cpu field to reflect the binding. 2605 */ 2606 if (tp->t_affinitycnt == 0) { 2607 if (binding == PBIND_NONE) { 2608 /* 2609 * We may need to adjust disp_max_unbound_pri 2610 * since we're becoming unbound. 2611 */ 2612 disp_adjust_unbound_pri(tp); 2613 2614 tp->t_bound_cpu = NULL; /* set new binding */ 2615 2616 /* 2617 * Move thread to lgroup with strongest affinity 2618 * after unbinding 2619 */ 2620 if (tp->t_lgrp_affinity) 2621 lgrp_move_thread(tp, 2622 lgrp_choose(tp, tp->t_cpupart), 1); 2623 2624 if (tp->t_state == TS_ONPROC && 2625 tp->t_cpu->cpu_part != tp->t_cpupart) 2626 cpu_surrender(tp); 2627 } else { 2628 lpl_t *lpl; 2629 2630 tp->t_bound_cpu = cp; 2631 ASSERT(cp->cpu_lpl != NULL); 2632 2633 /* 2634 * Set home to lgroup with most affinity containing CPU 2635 * that thread is being bound or minimum bounding 2636 * lgroup if no affinities set 2637 */ 2638 if (tp->t_lgrp_affinity) 2639 lpl = lgrp_affinity_best(tp, tp->t_cpupart, 2640 LGRP_NONE, B_FALSE); 2641 else 2642 lpl = cp->cpu_lpl; 2643 2644 if (tp->t_lpl != lpl) { 2645 /* can't grab cpu_lock */ 2646 lgrp_move_thread(tp, lpl, 1); 2647 } 2648 2649 /* 2650 * Make the thread switch to the bound CPU. 2651 * If the thread is runnable, we need to 2652 * requeue it even if t_cpu is already set 2653 * to the right CPU, since it may be on a 2654 * kpreempt queue and need to move to a local 2655 * queue. We could check t_disp_queue to 2656 * avoid unnecessary overhead if it's already 2657 * on the right queue, but since this isn't 2658 * a performance-critical operation it doesn't 2659 * seem worth the extra code and complexity. 2660 * 2661 * If the thread is weakbound to the cpu then it will 2662 * resist the new binding request until the weak 2663 * binding drops. The cpu_surrender or requeueing 2664 * below could be skipped in such cases (since it 2665 * will have no effect), but that would require 2666 * thread_allowmigrate to acquire thread_lock so 2667 * we'll take the very occasional hit here instead. 2668 */ 2669 if (tp->t_state == TS_ONPROC) { 2670 cpu_surrender(tp); 2671 } else if (tp->t_state == TS_RUN) { 2672 cpu_t *ocp = tp->t_cpu; 2673 2674 (void) dispdeq(tp); 2675 setbackdq(tp); 2676 /* 2677 * On the bound CPU's disp queue now. 2678 */ 2679 ASSERT(tp->t_disp_queue == cp->cpu_disp || 2680 tp->t_weakbound_cpu == ocp); 2681 } 2682 } 2683 } 2684 2685 /* 2686 * Our binding has changed; set TP_CHANGEBIND. 2687 */ 2688 tp->t_proc_flag |= TP_CHANGEBIND; 2689 aston(tp); 2690 2691 thread_unlock(tp); 2692 2693 return (0); 2694 } 2695 2696 #if CPUSET_WORDS > 1 2697 2698 /* 2699 * Functions for implementing cpuset operations when a cpuset is more 2700 * than one word. On platforms where a cpuset is a single word these 2701 * are implemented as macros in cpuvar.h. 2702 */ 2703 2704 void 2705 cpuset_all(cpuset_t *s) 2706 { 2707 int i; 2708 2709 for (i = 0; i < CPUSET_WORDS; i++) 2710 s->cpub[i] = ~0UL; 2711 } 2712 2713 void 2714 cpuset_all_but(cpuset_t *s, uint_t cpu) 2715 { 2716 cpuset_all(s); 2717 CPUSET_DEL(*s, cpu); 2718 } 2719 2720 void 2721 cpuset_only(cpuset_t *s, uint_t cpu) 2722 { 2723 CPUSET_ZERO(*s); 2724 CPUSET_ADD(*s, cpu); 2725 } 2726 2727 int 2728 cpuset_isnull(cpuset_t *s) 2729 { 2730 int i; 2731 2732 for (i = 0; i < CPUSET_WORDS; i++) 2733 if (s->cpub[i] != 0) 2734 return (0); 2735 return (1); 2736 } 2737 2738 int 2739 cpuset_cmp(cpuset_t *s1, cpuset_t *s2) 2740 { 2741 int i; 2742 2743 for (i = 0; i < CPUSET_WORDS; i++) 2744 if (s1->cpub[i] != s2->cpub[i]) 2745 return (0); 2746 return (1); 2747 } 2748 2749 uint_t 2750 cpuset_find(cpuset_t *s) 2751 { 2752 2753 uint_t i; 2754 uint_t cpu = (uint_t)-1; 2755 2756 /* 2757 * Find a cpu in the cpuset 2758 */ 2759 for (i = 0; i < CPUSET_WORDS; i++) { 2760 cpu = (uint_t)(lowbit(s->cpub[i]) - 1); 2761 if (cpu != (uint_t)-1) { 2762 cpu += i * BT_NBIPUL; 2763 break; 2764 } 2765 } 2766 return (cpu); 2767 } 2768 2769 void 2770 cpuset_bounds(cpuset_t *s, uint_t *smallestid, uint_t *largestid) 2771 { 2772 int i, j; 2773 uint_t bit; 2774 2775 /* 2776 * First, find the smallest cpu id in the set. 2777 */ 2778 for (i = 0; i < CPUSET_WORDS; i++) { 2779 if (s->cpub[i] != 0) { 2780 bit = (uint_t)(lowbit(s->cpub[i]) - 1); 2781 ASSERT(bit != (uint_t)-1); 2782 *smallestid = bit + (i * BT_NBIPUL); 2783 2784 /* 2785 * Now find the largest cpu id in 2786 * the set and return immediately. 2787 * Done in an inner loop to avoid 2788 * having to break out of the first 2789 * loop. 2790 */ 2791 for (j = CPUSET_WORDS - 1; j >= i; j--) { 2792 if (s->cpub[j] != 0) { 2793 bit = (uint_t)(highbit(s->cpub[j]) - 1); 2794 ASSERT(bit != (uint_t)-1); 2795 *largestid = bit + (j * BT_NBIPUL); 2796 ASSERT(*largestid >= *smallestid); 2797 return; 2798 } 2799 } 2800 2801 /* 2802 * If this code is reached, a 2803 * smallestid was found, but not a 2804 * largestid. The cpuset must have 2805 * been changed during the course 2806 * of this function call. 2807 */ 2808 ASSERT(0); 2809 } 2810 } 2811 *smallestid = *largestid = CPUSET_NOTINSET; 2812 } 2813 2814 #endif /* CPUSET_WORDS */ 2815 2816 /* 2817 * Unbind threads bound to specified CPU. 2818 * 2819 * If `unbind_all_threads' is true, unbind all user threads bound to a given 2820 * CPU. Otherwise unbind all soft-bound user threads. 2821 */ 2822 int 2823 cpu_unbind(processorid_t cpu, boolean_t unbind_all_threads) 2824 { 2825 processorid_t obind; 2826 kthread_t *tp; 2827 int ret = 0; 2828 proc_t *pp; 2829 int err, berr = 0; 2830 2831 ASSERT(MUTEX_HELD(&cpu_lock)); 2832 2833 mutex_enter(&pidlock); 2834 for (pp = practive; pp != NULL; pp = pp->p_next) { 2835 mutex_enter(&pp->p_lock); 2836 tp = pp->p_tlist; 2837 /* 2838 * Skip zombies, kernel processes, and processes in 2839 * other zones, if called from a non-global zone. 2840 */ 2841 if (tp == NULL || (pp->p_flag & SSYS) || 2842 !HASZONEACCESS(curproc, pp->p_zone->zone_id)) { 2843 mutex_exit(&pp->p_lock); 2844 continue; 2845 } 2846 do { 2847 if (tp->t_bind_cpu != cpu) 2848 continue; 2849 /* 2850 * Skip threads with hard binding when 2851 * `unbind_all_threads' is not specified. 2852 */ 2853 if (!unbind_all_threads && TB_CPU_IS_HARD(tp)) 2854 continue; 2855 err = cpu_bind_thread(tp, PBIND_NONE, &obind, &berr); 2856 if (ret == 0) 2857 ret = err; 2858 } while ((tp = tp->t_forw) != pp->p_tlist); 2859 mutex_exit(&pp->p_lock); 2860 } 2861 mutex_exit(&pidlock); 2862 if (ret == 0) 2863 ret = berr; 2864 return (ret); 2865 } 2866 2867 2868 /* 2869 * Destroy all remaining bound threads on a cpu. 2870 */ 2871 void 2872 cpu_destroy_bound_threads(cpu_t *cp) 2873 { 2874 extern id_t syscid; 2875 register kthread_id_t t, tlist, tnext; 2876 2877 /* 2878 * Destroy all remaining bound threads on the cpu. This 2879 * should include both the interrupt threads and the idle thread. 2880 * This requires some care, since we need to traverse the 2881 * thread list with the pidlock mutex locked, but thread_free 2882 * also locks the pidlock mutex. So, we collect the threads 2883 * we're going to reap in a list headed by "tlist", then we 2884 * unlock the pidlock mutex and traverse the tlist list, 2885 * doing thread_free's on the thread's. Simple, n'est pas? 2886 * Also, this depends on thread_free not mucking with the 2887 * t_next and t_prev links of the thread. 2888 */ 2889 2890 if ((t = curthread) != NULL) { 2891 2892 tlist = NULL; 2893 mutex_enter(&pidlock); 2894 do { 2895 tnext = t->t_next; 2896 if (t->t_bound_cpu == cp) { 2897 2898 /* 2899 * We've found a bound thread, carefully unlink 2900 * it out of the thread list, and add it to 2901 * our "tlist". We "know" we don't have to 2902 * worry about unlinking curthread (the thread 2903 * that is executing this code). 2904 */ 2905 t->t_next->t_prev = t->t_prev; 2906 t->t_prev->t_next = t->t_next; 2907 t->t_next = tlist; 2908 tlist = t; 2909 ASSERT(t->t_cid == syscid); 2910 /* wake up anyone blocked in thread_join */ 2911 cv_broadcast(&t->t_joincv); 2912 /* 2913 * t_lwp set by interrupt threads and not 2914 * cleared. 2915 */ 2916 t->t_lwp = NULL; 2917 /* 2918 * Pause and idle threads always have 2919 * t_state set to TS_ONPROC. 2920 */ 2921 t->t_state = TS_FREE; 2922 t->t_prev = NULL; /* Just in case */ 2923 } 2924 2925 } while ((t = tnext) != curthread); 2926 2927 mutex_exit(&pidlock); 2928 2929 mutex_sync(); 2930 for (t = tlist; t != NULL; t = tnext) { 2931 tnext = t->t_next; 2932 thread_free(t); 2933 } 2934 } 2935 } 2936 2937 /* 2938 * Update the cpu_supp_freqs of this cpu. This information is returned 2939 * as part of cpu_info kstats. If the cpu_info_kstat exists already, then 2940 * maintain the kstat data size. 2941 */ 2942 void 2943 cpu_set_supp_freqs(cpu_t *cp, const char *freqs) 2944 { 2945 char clkstr[sizeof ("18446744073709551615") + 1]; /* ui64 MAX */ 2946 const char *lfreqs = clkstr; 2947 boolean_t kstat_exists = B_FALSE; 2948 kstat_t *ksp; 2949 size_t len; 2950 2951 /* 2952 * A NULL pointer means we only support one speed. 2953 */ 2954 if (freqs == NULL) 2955 (void) snprintf(clkstr, sizeof (clkstr), "%"PRIu64, 2956 cp->cpu_curr_clock); 2957 else 2958 lfreqs = freqs; 2959 2960 /* 2961 * Make sure the frequency doesn't change while a snapshot is 2962 * going on. Of course, we only need to worry about this if 2963 * the kstat exists. 2964 */ 2965 if ((ksp = cp->cpu_info_kstat) != NULL) { 2966 mutex_enter(ksp->ks_lock); 2967 kstat_exists = B_TRUE; 2968 } 2969 2970 /* 2971 * Free any previously allocated string and if the kstat 2972 * already exists, then update its data size. 2973 */ 2974 if (cp->cpu_supp_freqs != NULL) { 2975 len = strlen(cp->cpu_supp_freqs) + 1; 2976 kmem_free(cp->cpu_supp_freqs, len); 2977 if (kstat_exists) 2978 ksp->ks_data_size -= len; 2979 } 2980 2981 /* 2982 * Allocate the new string and set the pointer. 2983 */ 2984 len = strlen(lfreqs) + 1; 2985 cp->cpu_supp_freqs = kmem_alloc(len, KM_SLEEP); 2986 (void) strcpy(cp->cpu_supp_freqs, lfreqs); 2987 2988 /* 2989 * If the kstat already exists then update the data size and 2990 * free the lock. 2991 */ 2992 if (kstat_exists) { 2993 ksp->ks_data_size += len; 2994 mutex_exit(ksp->ks_lock); 2995 } 2996 } 2997 2998 /* 2999 * Indicate the current CPU's clock freqency (in Hz). 3000 * The calling context must be such that CPU references are safe. 3001 */ 3002 void 3003 cpu_set_curr_clock(uint64_t new_clk) 3004 { 3005 uint64_t old_clk; 3006 3007 old_clk = CPU->cpu_curr_clock; 3008 CPU->cpu_curr_clock = new_clk; 3009 3010 /* 3011 * The cpu-change-speed DTrace probe exports the frequency in Hz 3012 */ 3013 DTRACE_PROBE3(cpu__change__speed, processorid_t, CPU->cpu_id, 3014 uint64_t, old_clk, uint64_t, new_clk); 3015 } 3016 3017 /* 3018 * processor_info(2) and p_online(2) status support functions 3019 * The constants returned by the cpu_get_state() and cpu_get_state_str() are 3020 * for use in communicating processor state information to userland. Kernel 3021 * subsystems should only be using the cpu_flags value directly. Subsystems 3022 * modifying cpu_flags should record the state change via a call to the 3023 * cpu_set_state(). 3024 */ 3025 3026 /* 3027 * Update the pi_state of this CPU. This function provides the CPU status for 3028 * the information returned by processor_info(2). 3029 */ 3030 void 3031 cpu_set_state(cpu_t *cpu) 3032 { 3033 ASSERT(MUTEX_HELD(&cpu_lock)); 3034 cpu->cpu_type_info.pi_state = cpu_get_state(cpu); 3035 cpu->cpu_state_begin = gethrestime_sec(); 3036 pool_cpu_mod = gethrtime(); 3037 } 3038 3039 /* 3040 * Return offline/online/other status for the indicated CPU. Use only for 3041 * communication with user applications; cpu_flags provides the in-kernel 3042 * interface. 3043 */ 3044 int 3045 cpu_get_state(cpu_t *cpu) 3046 { 3047 ASSERT(MUTEX_HELD(&cpu_lock)); 3048 if (cpu->cpu_flags & CPU_POWEROFF) 3049 return (P_POWEROFF); 3050 else if (cpu->cpu_flags & CPU_FAULTED) 3051 return (P_FAULTED); 3052 else if (cpu->cpu_flags & CPU_SPARE) 3053 return (P_SPARE); 3054 else if ((cpu->cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY) 3055 return (P_OFFLINE); 3056 else if (cpu->cpu_flags & CPU_ENABLE) 3057 return (P_ONLINE); 3058 else 3059 return (P_NOINTR); 3060 } 3061 3062 /* 3063 * Return processor_info(2) state as a string. 3064 */ 3065 const char * 3066 cpu_get_state_str(cpu_t *cpu) 3067 { 3068 const char *string; 3069 3070 switch (cpu_get_state(cpu)) { 3071 case P_ONLINE: 3072 string = PS_ONLINE; 3073 break; 3074 case P_POWEROFF: 3075 string = PS_POWEROFF; 3076 break; 3077 case P_NOINTR: 3078 string = PS_NOINTR; 3079 break; 3080 case P_SPARE: 3081 string = PS_SPARE; 3082 break; 3083 case P_FAULTED: 3084 string = PS_FAULTED; 3085 break; 3086 case P_OFFLINE: 3087 string = PS_OFFLINE; 3088 break; 3089 default: 3090 string = "unknown"; 3091 break; 3092 } 3093 return (string); 3094 } 3095 3096 /* 3097 * Export this CPU's statistics (cpu_stat_t and cpu_stats_t) as raw and named 3098 * kstats, respectively. This is done when a CPU is initialized or placed 3099 * online via p_online(2). 3100 */ 3101 static void 3102 cpu_stats_kstat_create(cpu_t *cp) 3103 { 3104 int instance = cp->cpu_id; 3105 char *module = "cpu"; 3106 char *class = "misc"; 3107 kstat_t *ksp; 3108 zoneid_t zoneid; 3109 3110 ASSERT(MUTEX_HELD(&cpu_lock)); 3111 3112 if (pool_pset_enabled()) 3113 zoneid = GLOBAL_ZONEID; 3114 else 3115 zoneid = ALL_ZONES; 3116 /* 3117 * Create named kstats 3118 */ 3119 #define CPU_STATS_KS_CREATE(name, tsize, update_func) \ 3120 ksp = kstat_create_zone(module, instance, (name), class, \ 3121 KSTAT_TYPE_NAMED, (tsize) / sizeof (kstat_named_t), 0, \ 3122 zoneid); \ 3123 if (ksp != NULL) { \ 3124 ksp->ks_private = cp; \ 3125 ksp->ks_update = (update_func); \ 3126 kstat_install(ksp); \ 3127 } else \ 3128 cmn_err(CE_WARN, "cpu: unable to create %s:%d:%s kstat", \ 3129 module, instance, (name)); 3130 3131 CPU_STATS_KS_CREATE("sys", sizeof (cpu_sys_stats_ks_data_template), 3132 cpu_sys_stats_ks_update); 3133 CPU_STATS_KS_CREATE("vm", sizeof (cpu_vm_stats_ks_data_template), 3134 cpu_vm_stats_ks_update); 3135 3136 /* 3137 * Export the familiar cpu_stat_t KSTAT_TYPE_RAW kstat. 3138 */ 3139 ksp = kstat_create_zone("cpu_stat", cp->cpu_id, NULL, 3140 "misc", KSTAT_TYPE_RAW, sizeof (cpu_stat_t), 0, zoneid); 3141 if (ksp != NULL) { 3142 ksp->ks_update = cpu_stat_ks_update; 3143 ksp->ks_private = cp; 3144 kstat_install(ksp); 3145 } 3146 } 3147 3148 static void 3149 cpu_stats_kstat_destroy(cpu_t *cp) 3150 { 3151 char ks_name[KSTAT_STRLEN]; 3152 3153 (void) sprintf(ks_name, "cpu_stat%d", cp->cpu_id); 3154 kstat_delete_byname("cpu_stat", cp->cpu_id, ks_name); 3155 3156 kstat_delete_byname("cpu", cp->cpu_id, "sys"); 3157 kstat_delete_byname("cpu", cp->cpu_id, "vm"); 3158 } 3159 3160 static int 3161 cpu_sys_stats_ks_update(kstat_t *ksp, int rw) 3162 { 3163 cpu_t *cp = (cpu_t *)ksp->ks_private; 3164 struct cpu_sys_stats_ks_data *csskd; 3165 cpu_sys_stats_t *css; 3166 hrtime_t msnsecs[NCMSTATES]; 3167 int i; 3168 3169 if (rw == KSTAT_WRITE) 3170 return (EACCES); 3171 3172 csskd = ksp->ks_data; 3173 css = &cp->cpu_stats.sys; 3174 3175 /* 3176 * Read CPU mstate, but compare with the last values we 3177 * received to make sure that the returned kstats never 3178 * decrease. 3179 */ 3180 3181 get_cpu_mstate(cp, msnsecs); 3182 if (csskd->cpu_nsec_idle.value.ui64 > msnsecs[CMS_IDLE]) 3183 msnsecs[CMS_IDLE] = csskd->cpu_nsec_idle.value.ui64; 3184 if (csskd->cpu_nsec_user.value.ui64 > msnsecs[CMS_USER]) 3185 msnsecs[CMS_USER] = csskd->cpu_nsec_user.value.ui64; 3186 if (csskd->cpu_nsec_kernel.value.ui64 > msnsecs[CMS_SYSTEM]) 3187 msnsecs[CMS_SYSTEM] = csskd->cpu_nsec_kernel.value.ui64; 3188 3189 bcopy(&cpu_sys_stats_ks_data_template, ksp->ks_data, 3190 sizeof (cpu_sys_stats_ks_data_template)); 3191 3192 csskd->cpu_ticks_wait.value.ui64 = 0; 3193 csskd->wait_ticks_io.value.ui64 = 0; 3194 3195 csskd->cpu_nsec_idle.value.ui64 = msnsecs[CMS_IDLE]; 3196 csskd->cpu_nsec_user.value.ui64 = msnsecs[CMS_USER]; 3197 csskd->cpu_nsec_kernel.value.ui64 = msnsecs[CMS_SYSTEM]; 3198 csskd->cpu_ticks_idle.value.ui64 = 3199 NSEC_TO_TICK(csskd->cpu_nsec_idle.value.ui64); 3200 csskd->cpu_ticks_user.value.ui64 = 3201 NSEC_TO_TICK(csskd->cpu_nsec_user.value.ui64); 3202 csskd->cpu_ticks_kernel.value.ui64 = 3203 NSEC_TO_TICK(csskd->cpu_nsec_kernel.value.ui64); 3204 csskd->cpu_nsec_dtrace.value.ui64 = cp->cpu_dtrace_nsec; 3205 csskd->dtrace_probes.value.ui64 = cp->cpu_dtrace_probes; 3206 csskd->cpu_nsec_intr.value.ui64 = cp->cpu_intrlast; 3207 csskd->cpu_load_intr.value.ui64 = cp->cpu_intrload; 3208 csskd->bread.value.ui64 = css->bread; 3209 csskd->bwrite.value.ui64 = css->bwrite; 3210 csskd->lread.value.ui64 = css->lread; 3211 csskd->lwrite.value.ui64 = css->lwrite; 3212 csskd->phread.value.ui64 = css->phread; 3213 csskd->phwrite.value.ui64 = css->phwrite; 3214 csskd->pswitch.value.ui64 = css->pswitch; 3215 csskd->trap.value.ui64 = css->trap; 3216 csskd->intr.value.ui64 = 0; 3217 for (i = 0; i < PIL_MAX; i++) 3218 csskd->intr.value.ui64 += css->intr[i]; 3219 csskd->syscall.value.ui64 = css->syscall; 3220 csskd->sysread.value.ui64 = css->sysread; 3221 csskd->syswrite.value.ui64 = css->syswrite; 3222 csskd->sysfork.value.ui64 = css->sysfork; 3223 csskd->sysvfork.value.ui64 = css->sysvfork; 3224 csskd->sysexec.value.ui64 = css->sysexec; 3225 csskd->readch.value.ui64 = css->readch; 3226 csskd->writech.value.ui64 = css->writech; 3227 csskd->rcvint.value.ui64 = css->rcvint; 3228 csskd->xmtint.value.ui64 = css->xmtint; 3229 csskd->mdmint.value.ui64 = css->mdmint; 3230 csskd->rawch.value.ui64 = css->rawch; 3231 csskd->canch.value.ui64 = css->canch; 3232 csskd->outch.value.ui64 = css->outch; 3233 csskd->msg.value.ui64 = css->msg; 3234 csskd->sema.value.ui64 = css->sema; 3235 csskd->namei.value.ui64 = css->namei; 3236 csskd->ufsiget.value.ui64 = css->ufsiget; 3237 csskd->ufsdirblk.value.ui64 = css->ufsdirblk; 3238 csskd->ufsipage.value.ui64 = css->ufsipage; 3239 csskd->ufsinopage.value.ui64 = css->ufsinopage; 3240 csskd->procovf.value.ui64 = css->procovf; 3241 csskd->intrthread.value.ui64 = 0; 3242 for (i = 0; i < LOCK_LEVEL - 1; i++) 3243 csskd->intrthread.value.ui64 += css->intr[i]; 3244 csskd->intrblk.value.ui64 = css->intrblk; 3245 csskd->intrunpin.value.ui64 = css->intrunpin; 3246 csskd->idlethread.value.ui64 = css->idlethread; 3247 csskd->inv_swtch.value.ui64 = css->inv_swtch; 3248 csskd->nthreads.value.ui64 = css->nthreads; 3249 csskd->cpumigrate.value.ui64 = css->cpumigrate; 3250 csskd->xcalls.value.ui64 = css->xcalls; 3251 csskd->mutex_adenters.value.ui64 = css->mutex_adenters; 3252 csskd->rw_rdfails.value.ui64 = css->rw_rdfails; 3253 csskd->rw_wrfails.value.ui64 = css->rw_wrfails; 3254 csskd->modload.value.ui64 = css->modload; 3255 csskd->modunload.value.ui64 = css->modunload; 3256 csskd->bawrite.value.ui64 = css->bawrite; 3257 csskd->iowait.value.ui64 = css->iowait; 3258 3259 return (0); 3260 } 3261 3262 static int 3263 cpu_vm_stats_ks_update(kstat_t *ksp, int rw) 3264 { 3265 cpu_t *cp = (cpu_t *)ksp->ks_private; 3266 struct cpu_vm_stats_ks_data *cvskd; 3267 cpu_vm_stats_t *cvs; 3268 3269 if (rw == KSTAT_WRITE) 3270 return (EACCES); 3271 3272 cvs = &cp->cpu_stats.vm; 3273 cvskd = ksp->ks_data; 3274 3275 bcopy(&cpu_vm_stats_ks_data_template, ksp->ks_data, 3276 sizeof (cpu_vm_stats_ks_data_template)); 3277 cvskd->pgrec.value.ui64 = cvs->pgrec; 3278 cvskd->pgfrec.value.ui64 = cvs->pgfrec; 3279 cvskd->pgin.value.ui64 = cvs->pgin; 3280 cvskd->pgpgin.value.ui64 = cvs->pgpgin; 3281 cvskd->pgout.value.ui64 = cvs->pgout; 3282 cvskd->pgpgout.value.ui64 = cvs->pgpgout; 3283 cvskd->zfod.value.ui64 = cvs->zfod; 3284 cvskd->dfree.value.ui64 = cvs->dfree; 3285 cvskd->scan.value.ui64 = cvs->scan; 3286 cvskd->rev.value.ui64 = cvs->rev; 3287 cvskd->hat_fault.value.ui64 = cvs->hat_fault; 3288 cvskd->as_fault.value.ui64 = cvs->as_fault; 3289 cvskd->maj_fault.value.ui64 = cvs->maj_fault; 3290 cvskd->cow_fault.value.ui64 = cvs->cow_fault; 3291 cvskd->prot_fault.value.ui64 = cvs->prot_fault; 3292 cvskd->softlock.value.ui64 = cvs->softlock; 3293 cvskd->kernel_asflt.value.ui64 = cvs->kernel_asflt; 3294 cvskd->pgrrun.value.ui64 = cvs->pgrrun; 3295 cvskd->execpgin.value.ui64 = cvs->execpgin; 3296 cvskd->execpgout.value.ui64 = cvs->execpgout; 3297 cvskd->execfree.value.ui64 = cvs->execfree; 3298 cvskd->anonpgin.value.ui64 = cvs->anonpgin; 3299 cvskd->anonpgout.value.ui64 = cvs->anonpgout; 3300 cvskd->anonfree.value.ui64 = cvs->anonfree; 3301 cvskd->fspgin.value.ui64 = cvs->fspgin; 3302 cvskd->fspgout.value.ui64 = cvs->fspgout; 3303 cvskd->fsfree.value.ui64 = cvs->fsfree; 3304 3305 return (0); 3306 } 3307 3308 static int 3309 cpu_stat_ks_update(kstat_t *ksp, int rw) 3310 { 3311 cpu_stat_t *cso; 3312 cpu_t *cp; 3313 int i; 3314 hrtime_t msnsecs[NCMSTATES]; 3315 3316 cso = (cpu_stat_t *)ksp->ks_data; 3317 cp = (cpu_t *)ksp->ks_private; 3318 3319 if (rw == KSTAT_WRITE) 3320 return (EACCES); 3321 3322 /* 3323 * Read CPU mstate, but compare with the last values we 3324 * received to make sure that the returned kstats never 3325 * decrease. 3326 */ 3327 3328 get_cpu_mstate(cp, msnsecs); 3329 msnsecs[CMS_IDLE] = NSEC_TO_TICK(msnsecs[CMS_IDLE]); 3330 msnsecs[CMS_USER] = NSEC_TO_TICK(msnsecs[CMS_USER]); 3331 msnsecs[CMS_SYSTEM] = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); 3332 if (cso->cpu_sysinfo.cpu[CPU_IDLE] < msnsecs[CMS_IDLE]) 3333 cso->cpu_sysinfo.cpu[CPU_IDLE] = msnsecs[CMS_IDLE]; 3334 if (cso->cpu_sysinfo.cpu[CPU_USER] < msnsecs[CMS_USER]) 3335 cso->cpu_sysinfo.cpu[CPU_USER] = msnsecs[CMS_USER]; 3336 if (cso->cpu_sysinfo.cpu[CPU_KERNEL] < msnsecs[CMS_SYSTEM]) 3337 cso->cpu_sysinfo.cpu[CPU_KERNEL] = msnsecs[CMS_SYSTEM]; 3338 cso->cpu_sysinfo.cpu[CPU_WAIT] = 0; 3339 cso->cpu_sysinfo.wait[W_IO] = 0; 3340 cso->cpu_sysinfo.wait[W_SWAP] = 0; 3341 cso->cpu_sysinfo.wait[W_PIO] = 0; 3342 cso->cpu_sysinfo.bread = CPU_STATS(cp, sys.bread); 3343 cso->cpu_sysinfo.bwrite = CPU_STATS(cp, sys.bwrite); 3344 cso->cpu_sysinfo.lread = CPU_STATS(cp, sys.lread); 3345 cso->cpu_sysinfo.lwrite = CPU_STATS(cp, sys.lwrite); 3346 cso->cpu_sysinfo.phread = CPU_STATS(cp, sys.phread); 3347 cso->cpu_sysinfo.phwrite = CPU_STATS(cp, sys.phwrite); 3348 cso->cpu_sysinfo.pswitch = CPU_STATS(cp, sys.pswitch); 3349 cso->cpu_sysinfo.trap = CPU_STATS(cp, sys.trap); 3350 cso->cpu_sysinfo.intr = 0; 3351 for (i = 0; i < PIL_MAX; i++) 3352 cso->cpu_sysinfo.intr += CPU_STATS(cp, sys.intr[i]); 3353 cso->cpu_sysinfo.syscall = CPU_STATS(cp, sys.syscall); 3354 cso->cpu_sysinfo.sysread = CPU_STATS(cp, sys.sysread); 3355 cso->cpu_sysinfo.syswrite = CPU_STATS(cp, sys.syswrite); 3356 cso->cpu_sysinfo.sysfork = CPU_STATS(cp, sys.sysfork); 3357 cso->cpu_sysinfo.sysvfork = CPU_STATS(cp, sys.sysvfork); 3358 cso->cpu_sysinfo.sysexec = CPU_STATS(cp, sys.sysexec); 3359 cso->cpu_sysinfo.readch = CPU_STATS(cp, sys.readch); 3360 cso->cpu_sysinfo.writech = CPU_STATS(cp, sys.writech); 3361 cso->cpu_sysinfo.rcvint = CPU_STATS(cp, sys.rcvint); 3362 cso->cpu_sysinfo.xmtint = CPU_STATS(cp, sys.xmtint); 3363 cso->cpu_sysinfo.mdmint = CPU_STATS(cp, sys.mdmint); 3364 cso->cpu_sysinfo.rawch = CPU_STATS(cp, sys.rawch); 3365 cso->cpu_sysinfo.canch = CPU_STATS(cp, sys.canch); 3366 cso->cpu_sysinfo.outch = CPU_STATS(cp, sys.outch); 3367 cso->cpu_sysinfo.msg = CPU_STATS(cp, sys.msg); 3368 cso->cpu_sysinfo.sema = CPU_STATS(cp, sys.sema); 3369 cso->cpu_sysinfo.namei = CPU_STATS(cp, sys.namei); 3370 cso->cpu_sysinfo.ufsiget = CPU_STATS(cp, sys.ufsiget); 3371 cso->cpu_sysinfo.ufsdirblk = CPU_STATS(cp, sys.ufsdirblk); 3372 cso->cpu_sysinfo.ufsipage = CPU_STATS(cp, sys.ufsipage); 3373 cso->cpu_sysinfo.ufsinopage = CPU_STATS(cp, sys.ufsinopage); 3374 cso->cpu_sysinfo.inodeovf = 0; 3375 cso->cpu_sysinfo.fileovf = 0; 3376 cso->cpu_sysinfo.procovf = CPU_STATS(cp, sys.procovf); 3377 cso->cpu_sysinfo.intrthread = 0; 3378 for (i = 0; i < LOCK_LEVEL - 1; i++) 3379 cso->cpu_sysinfo.intrthread += CPU_STATS(cp, sys.intr[i]); 3380 cso->cpu_sysinfo.intrblk = CPU_STATS(cp, sys.intrblk); 3381 cso->cpu_sysinfo.idlethread = CPU_STATS(cp, sys.idlethread); 3382 cso->cpu_sysinfo.inv_swtch = CPU_STATS(cp, sys.inv_swtch); 3383 cso->cpu_sysinfo.nthreads = CPU_STATS(cp, sys.nthreads); 3384 cso->cpu_sysinfo.cpumigrate = CPU_STATS(cp, sys.cpumigrate); 3385 cso->cpu_sysinfo.xcalls = CPU_STATS(cp, sys.xcalls); 3386 cso->cpu_sysinfo.mutex_adenters = CPU_STATS(cp, sys.mutex_adenters); 3387 cso->cpu_sysinfo.rw_rdfails = CPU_STATS(cp, sys.rw_rdfails); 3388 cso->cpu_sysinfo.rw_wrfails = CPU_STATS(cp, sys.rw_wrfails); 3389 cso->cpu_sysinfo.modload = CPU_STATS(cp, sys.modload); 3390 cso->cpu_sysinfo.modunload = CPU_STATS(cp, sys.modunload); 3391 cso->cpu_sysinfo.bawrite = CPU_STATS(cp, sys.bawrite); 3392 cso->cpu_sysinfo.rw_enters = 0; 3393 cso->cpu_sysinfo.win_uo_cnt = 0; 3394 cso->cpu_sysinfo.win_uu_cnt = 0; 3395 cso->cpu_sysinfo.win_so_cnt = 0; 3396 cso->cpu_sysinfo.win_su_cnt = 0; 3397 cso->cpu_sysinfo.win_suo_cnt = 0; 3398 3399 cso->cpu_syswait.iowait = CPU_STATS(cp, sys.iowait); 3400 cso->cpu_syswait.swap = 0; 3401 cso->cpu_syswait.physio = 0; 3402 3403 cso->cpu_vminfo.pgrec = CPU_STATS(cp, vm.pgrec); 3404 cso->cpu_vminfo.pgfrec = CPU_STATS(cp, vm.pgfrec); 3405 cso->cpu_vminfo.pgin = CPU_STATS(cp, vm.pgin); 3406 cso->cpu_vminfo.pgpgin = CPU_STATS(cp, vm.pgpgin); 3407 cso->cpu_vminfo.pgout = CPU_STATS(cp, vm.pgout); 3408 cso->cpu_vminfo.pgpgout = CPU_STATS(cp, vm.pgpgout); 3409 cso->cpu_vminfo.zfod = CPU_STATS(cp, vm.zfod); 3410 cso->cpu_vminfo.dfree = CPU_STATS(cp, vm.dfree); 3411 cso->cpu_vminfo.scan = CPU_STATS(cp, vm.scan); 3412 cso->cpu_vminfo.rev = CPU_STATS(cp, vm.rev); 3413 cso->cpu_vminfo.hat_fault = CPU_STATS(cp, vm.hat_fault); 3414 cso->cpu_vminfo.as_fault = CPU_STATS(cp, vm.as_fault); 3415 cso->cpu_vminfo.maj_fault = CPU_STATS(cp, vm.maj_fault); 3416 cso->cpu_vminfo.cow_fault = CPU_STATS(cp, vm.cow_fault); 3417 cso->cpu_vminfo.prot_fault = CPU_STATS(cp, vm.prot_fault); 3418 cso->cpu_vminfo.softlock = CPU_STATS(cp, vm.softlock); 3419 cso->cpu_vminfo.kernel_asflt = CPU_STATS(cp, vm.kernel_asflt); 3420 cso->cpu_vminfo.pgrrun = CPU_STATS(cp, vm.pgrrun); 3421 cso->cpu_vminfo.execpgin = CPU_STATS(cp, vm.execpgin); 3422 cso->cpu_vminfo.execpgout = CPU_STATS(cp, vm.execpgout); 3423 cso->cpu_vminfo.execfree = CPU_STATS(cp, vm.execfree); 3424 cso->cpu_vminfo.anonpgin = CPU_STATS(cp, vm.anonpgin); 3425 cso->cpu_vminfo.anonpgout = CPU_STATS(cp, vm.anonpgout); 3426 cso->cpu_vminfo.anonfree = CPU_STATS(cp, vm.anonfree); 3427 cso->cpu_vminfo.fspgin = CPU_STATS(cp, vm.fspgin); 3428 cso->cpu_vminfo.fspgout = CPU_STATS(cp, vm.fspgout); 3429 cso->cpu_vminfo.fsfree = CPU_STATS(cp, vm.fsfree); 3430 3431 return (0); 3432 } --- EOF ---