1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/sysmacros.h> 33 #include <sys/signal.h> 34 #include <sys/user.h> 35 #include <sys/systm.h> 36 #include <sys/sysinfo.h> 37 #include <sys/var.h> 38 #include <sys/errno.h> 39 #include <sys/cmn_err.h> 40 #include <sys/debug.h> 41 #include <sys/inline.h> 42 #include <sys/disp.h> 43 #include <sys/class.h> 44 #include <sys/bitmap.h> 45 #include <sys/kmem.h> 46 #include <sys/cpuvar.h> 47 #include <sys/vtrace.h> 48 #include <sys/tnf.h> 49 #include <sys/cpupart.h> 50 #include <sys/lgrp.h> 51 #include <sys/pg.h> 52 #include <sys/cmt.h> 53 #include <sys/bitset.h> 54 #include <sys/schedctl.h> 55 #include <sys/atomic.h> 56 #include <sys/dtrace.h> 57 #include <sys/sdt.h> 58 #include <sys/archsystm.h> 59 60 #include <vm/as.h> 61 62 #define BOUND_CPU 0x1 63 #define BOUND_PARTITION 0x2 64 #define BOUND_INTR 0x4 65 66 /* Dispatch queue allocation structure and functions */ 67 struct disp_queue_info { 68 disp_t *dp; 69 dispq_t *olddispq; 70 dispq_t *newdispq; 71 ulong_t *olddqactmap; 72 ulong_t *newdqactmap; 73 int oldnglobpris; 74 }; 75 static void disp_dq_alloc(struct disp_queue_info *dptr, int numpris, 76 disp_t *dp); 77 static void disp_dq_assign(struct disp_queue_info *dptr, int numpris); 78 static void disp_dq_free(struct disp_queue_info *dptr); 79 80 /* platform-specific routine to call when processor is idle */ 81 static void generic_idle_cpu(); 82 void (*idle_cpu)() = generic_idle_cpu; 83 84 /* routines invoked when a CPU enters/exits the idle loop */ 85 static void idle_enter(); 86 static void idle_exit(); 87 88 /* platform-specific routine to call when thread is enqueued */ 89 static void generic_enq_thread(cpu_t *, int); 90 void (*disp_enq_thread)(cpu_t *, int) = generic_enq_thread; 91 92 pri_t kpreemptpri; /* priority where kernel preemption applies */ 93 pri_t upreemptpri = 0; /* priority where normal preemption applies */ 94 pri_t intr_pri; /* interrupt thread priority base level */ 95 96 #define KPQPRI -1 /* pri where cpu affinity is dropped for kpq */ 97 pri_t kpqpri = KPQPRI; /* can be set in /etc/system */ 98 disp_t cpu0_disp; /* boot CPU's dispatch queue */ 99 disp_lock_t swapped_lock; /* lock swapped threads and swap queue */ 100 int nswapped; /* total number of swapped threads */ 101 void disp_swapped_enq(kthread_t *tp); 102 static void disp_swapped_setrun(kthread_t *tp); 103 static void cpu_resched(cpu_t *cp, pri_t tpri); 104 105 /* 106 * If this is set, only interrupt threads will cause kernel preemptions. 107 * This is done by changing the value of kpreemptpri. kpreemptpri 108 * will either be the max sysclass pri + 1 or the min interrupt pri. 109 */ 110 int only_intr_kpreempt; 111 112 extern void set_idle_cpu(int cpun); 113 extern void unset_idle_cpu(int cpun); 114 static void setkpdq(kthread_t *tp, int borf); 115 #define SETKP_BACK 0 116 #define SETKP_FRONT 1 117 /* 118 * Parameter that determines how recently a thread must have run 119 * on the CPU to be considered loosely-bound to that CPU to reduce 120 * cold cache effects. The interval is in hertz. 121 */ 122 #define RECHOOSE_INTERVAL 3 123 int rechoose_interval = RECHOOSE_INTERVAL; 124 125 /* 126 * Parameter that determines how long (in nanoseconds) a thread must 127 * be sitting on a run queue before it can be stolen by another CPU 128 * to reduce migrations. The interval is in nanoseconds. 129 * 130 * The nosteal_nsec should be set by platform code cmp_set_nosteal_interval() 131 * to an appropriate value. nosteal_nsec is set to NOSTEAL_UNINITIALIZED 132 * here indicating it is uninitiallized. 133 * Setting nosteal_nsec to 0 effectively disables the nosteal 'protection'. 134 * 135 */ 136 #define NOSTEAL_UNINITIALIZED (-1) 137 hrtime_t nosteal_nsec = NOSTEAL_UNINITIALIZED; 138 extern void cmp_set_nosteal_interval(void); 139 140 id_t defaultcid; /* system "default" class; see dispadmin(1M) */ 141 142 disp_lock_t transition_lock; /* lock on transitioning threads */ 143 disp_lock_t stop_lock; /* lock on stopped threads */ 144 145 static void cpu_dispqalloc(int numpris); 146 147 /* 148 * This gets returned by disp_getwork/disp_getbest if we couldn't steal 149 * a thread because it was sitting on its run queue for a very short 150 * period of time. 151 */ 152 #define T_DONTSTEAL (kthread_t *)(-1) /* returned by disp_getwork/getbest */ 153 154 static kthread_t *disp_getwork(cpu_t *to); 155 static kthread_t *disp_getbest(disp_t *from); 156 static kthread_t *disp_ratify(kthread_t *tp, disp_t *kpq); 157 158 void swtch_to(kthread_t *); 159 160 /* 161 * dispatcher and scheduler initialization 162 */ 163 164 /* 165 * disp_setup - Common code to calculate and allocate dispatcher 166 * variables and structures based on the maximum priority. 167 */ 168 static void 169 disp_setup(pri_t maxglobpri, pri_t oldnglobpris) 170 { 171 pri_t newnglobpris; 172 173 ASSERT(MUTEX_HELD(&cpu_lock)); 174 175 newnglobpris = maxglobpri + 1 + LOCK_LEVEL; 176 177 if (newnglobpris > oldnglobpris) { 178 /* 179 * Allocate new kp queues for each CPU partition. 180 */ 181 cpupart_kpqalloc(newnglobpris); 182 183 /* 184 * Allocate new dispatch queues for each CPU. 185 */ 186 cpu_dispqalloc(newnglobpris); 187 188 /* 189 * compute new interrupt thread base priority 190 */ 191 intr_pri = maxglobpri; 192 if (only_intr_kpreempt) { 193 kpreemptpri = intr_pri + 1; 194 if (kpqpri == KPQPRI) 195 kpqpri = kpreemptpri; 196 } 197 v.v_nglobpris = newnglobpris; 198 } 199 } 200 201 /* 202 * dispinit - Called to initialize all loaded classes and the 203 * dispatcher framework. 204 */ 205 void 206 dispinit(void) 207 { 208 id_t cid; 209 pri_t maxglobpri; 210 pri_t cl_maxglobpri; 211 212 maxglobpri = -1; 213 214 /* 215 * Initialize transition lock, which will always be set. 216 */ 217 DISP_LOCK_INIT(&transition_lock); 218 disp_lock_enter_high(&transition_lock); 219 DISP_LOCK_INIT(&stop_lock); 220 221 mutex_enter(&cpu_lock); 222 CPU->cpu_disp->disp_maxrunpri = -1; 223 CPU->cpu_disp->disp_max_unbound_pri = -1; 224 225 /* 226 * Initialize the default CPU partition. 227 */ 228 cpupart_initialize_default(); 229 /* 230 * Call the class specific initialization functions for 231 * all pre-installed schedulers. 232 * 233 * We pass the size of a class specific parameter 234 * buffer to each of the initialization functions 235 * to try to catch problems with backward compatibility 236 * of class modules. 237 * 238 * For example a new class module running on an old system 239 * which didn't provide sufficiently large parameter buffers 240 * would be bad news. Class initialization modules can check for 241 * this and take action if they detect a problem. 242 */ 243 244 for (cid = 0; cid < nclass; cid++) { 245 sclass_t *sc; 246 247 sc = &sclass[cid]; 248 if (SCHED_INSTALLED(sc)) { 249 cl_maxglobpri = sc->cl_init(cid, PC_CLPARMSZ, 250 &sc->cl_funcs); 251 if (cl_maxglobpri > maxglobpri) 252 maxglobpri = cl_maxglobpri; 253 } 254 } 255 kpreemptpri = (pri_t)v.v_maxsyspri + 1; 256 if (kpqpri == KPQPRI) 257 kpqpri = kpreemptpri; 258 259 ASSERT(maxglobpri >= 0); 260 disp_setup(maxglobpri, 0); 261 262 mutex_exit(&cpu_lock); 263 264 /* 265 * Platform specific sticky scheduler setup. 266 */ 267 if (nosteal_nsec == NOSTEAL_UNINITIALIZED) 268 cmp_set_nosteal_interval(); 269 270 /* 271 * Get the default class ID; this may be later modified via 272 * dispadmin(1M). This will load the class (normally TS) and that will 273 * call disp_add(), which is why we had to drop cpu_lock first. 274 */ 275 if (getcid(defaultclass, &defaultcid) != 0) { 276 cmn_err(CE_PANIC, "Couldn't load default scheduling class '%s'", 277 defaultclass); 278 } 279 } 280 281 /* 282 * disp_add - Called with class pointer to initialize the dispatcher 283 * for a newly loaded class. 284 */ 285 void 286 disp_add(sclass_t *clp) 287 { 288 pri_t maxglobpri; 289 pri_t cl_maxglobpri; 290 291 mutex_enter(&cpu_lock); 292 /* 293 * Initialize the scheduler class. 294 */ 295 maxglobpri = (pri_t)(v.v_nglobpris - LOCK_LEVEL - 1); 296 cl_maxglobpri = clp->cl_init(clp - sclass, PC_CLPARMSZ, &clp->cl_funcs); 297 if (cl_maxglobpri > maxglobpri) 298 maxglobpri = cl_maxglobpri; 299 300 /* 301 * Save old queue information. Since we're initializing a 302 * new scheduling class which has just been loaded, then 303 * the size of the dispq may have changed. We need to handle 304 * that here. 305 */ 306 disp_setup(maxglobpri, v.v_nglobpris); 307 308 mutex_exit(&cpu_lock); 309 } 310 311 312 /* 313 * For each CPU, allocate new dispatch queues 314 * with the stated number of priorities. 315 */ 316 static void 317 cpu_dispqalloc(int numpris) 318 { 319 cpu_t *cpup; 320 struct disp_queue_info *disp_mem; 321 int i, num; 322 323 ASSERT(MUTEX_HELD(&cpu_lock)); 324 325 disp_mem = kmem_zalloc(NCPU * 326 sizeof (struct disp_queue_info), KM_SLEEP); 327 328 /* 329 * This routine must allocate all of the memory before stopping 330 * the cpus because it must not sleep in kmem_alloc while the 331 * CPUs are stopped. Locks they hold will not be freed until they 332 * are restarted. 333 */ 334 i = 0; 335 cpup = cpu_list; 336 do { 337 disp_dq_alloc(&disp_mem[i], numpris, cpup->cpu_disp); 338 i++; 339 cpup = cpup->cpu_next; 340 } while (cpup != cpu_list); 341 num = i; 342 343 pause_cpus(NULL); 344 for (i = 0; i < num; i++) 345 disp_dq_assign(&disp_mem[i], numpris); 346 start_cpus(); 347 348 /* 349 * I must free all of the memory after starting the cpus because 350 * I can not risk sleeping in kmem_free while the cpus are stopped. 351 */ 352 for (i = 0; i < num; i++) 353 disp_dq_free(&disp_mem[i]); 354 355 kmem_free(disp_mem, NCPU * sizeof (struct disp_queue_info)); 356 } 357 358 static void 359 disp_dq_alloc(struct disp_queue_info *dptr, int numpris, disp_t *dp) 360 { 361 dptr->newdispq = kmem_zalloc(numpris * sizeof (dispq_t), KM_SLEEP); 362 dptr->newdqactmap = kmem_zalloc(((numpris / BT_NBIPUL) + 1) * 363 sizeof (long), KM_SLEEP); 364 dptr->dp = dp; 365 } 366 367 static void 368 disp_dq_assign(struct disp_queue_info *dptr, int numpris) 369 { 370 disp_t *dp; 371 372 dp = dptr->dp; 373 dptr->olddispq = dp->disp_q; 374 dptr->olddqactmap = dp->disp_qactmap; 375 dptr->oldnglobpris = dp->disp_npri; 376 377 ASSERT(dptr->oldnglobpris < numpris); 378 379 if (dptr->olddispq != NULL) { 380 /* 381 * Use kcopy because bcopy is platform-specific 382 * and could block while we might have paused the cpus. 383 */ 384 (void) kcopy(dptr->olddispq, dptr->newdispq, 385 dptr->oldnglobpris * sizeof (dispq_t)); 386 (void) kcopy(dptr->olddqactmap, dptr->newdqactmap, 387 ((dptr->oldnglobpris / BT_NBIPUL) + 1) * 388 sizeof (long)); 389 } 390 dp->disp_q = dptr->newdispq; 391 dp->disp_qactmap = dptr->newdqactmap; 392 dp->disp_q_limit = &dptr->newdispq[numpris]; 393 dp->disp_npri = numpris; 394 } 395 396 static void 397 disp_dq_free(struct disp_queue_info *dptr) 398 { 399 if (dptr->olddispq != NULL) 400 kmem_free(dptr->olddispq, 401 dptr->oldnglobpris * sizeof (dispq_t)); 402 if (dptr->olddqactmap != NULL) 403 kmem_free(dptr->olddqactmap, 404 ((dptr->oldnglobpris / BT_NBIPUL) + 1) * sizeof (long)); 405 } 406 407 /* 408 * For a newly created CPU, initialize the dispatch queue. 409 * This is called before the CPU is known through cpu[] or on any lists. 410 */ 411 void 412 disp_cpu_init(cpu_t *cp) 413 { 414 disp_t *dp; 415 dispq_t *newdispq; 416 ulong_t *newdqactmap; 417 418 ASSERT(MUTEX_HELD(&cpu_lock)); /* protect dispatcher queue sizes */ 419 420 if (cp == cpu0_disp.disp_cpu) 421 dp = &cpu0_disp; 422 else 423 dp = kmem_alloc(sizeof (disp_t), KM_SLEEP); 424 bzero(dp, sizeof (disp_t)); 425 cp->cpu_disp = dp; 426 dp->disp_cpu = cp; 427 dp->disp_maxrunpri = -1; 428 dp->disp_max_unbound_pri = -1; 429 DISP_LOCK_INIT(&cp->cpu_thread_lock); 430 /* 431 * Allocate memory for the dispatcher queue headers 432 * and the active queue bitmap. 433 */ 434 newdispq = kmem_zalloc(v.v_nglobpris * sizeof (dispq_t), KM_SLEEP); 435 newdqactmap = kmem_zalloc(((v.v_nglobpris / BT_NBIPUL) + 1) * 436 sizeof (long), KM_SLEEP); 437 dp->disp_q = newdispq; 438 dp->disp_qactmap = newdqactmap; 439 dp->disp_q_limit = &newdispq[v.v_nglobpris]; 440 dp->disp_npri = v.v_nglobpris; 441 } 442 443 void 444 disp_cpu_fini(cpu_t *cp) 445 { 446 ASSERT(MUTEX_HELD(&cpu_lock)); 447 448 disp_kp_free(cp->cpu_disp); 449 if (cp->cpu_disp != &cpu0_disp) 450 kmem_free(cp->cpu_disp, sizeof (disp_t)); 451 } 452 453 /* 454 * Allocate new, larger kpreempt dispatch queue to replace the old one. 455 */ 456 void 457 disp_kp_alloc(disp_t *dq, pri_t npri) 458 { 459 struct disp_queue_info mem_info; 460 461 if (npri > dq->disp_npri) { 462 /* 463 * Allocate memory for the new array. 464 */ 465 disp_dq_alloc(&mem_info, npri, dq); 466 467 /* 468 * We need to copy the old structures to the new 469 * and free the old. 470 */ 471 disp_dq_assign(&mem_info, npri); 472 disp_dq_free(&mem_info); 473 } 474 } 475 476 /* 477 * Free dispatch queue. 478 * Used for the kpreempt queues for a removed CPU partition and 479 * for the per-CPU queues of deleted CPUs. 480 */ 481 void 482 disp_kp_free(disp_t *dq) 483 { 484 struct disp_queue_info mem_info; 485 486 mem_info.olddispq = dq->disp_q; 487 mem_info.olddqactmap = dq->disp_qactmap; 488 mem_info.oldnglobpris = dq->disp_npri; 489 disp_dq_free(&mem_info); 490 } 491 492 /* 493 * End dispatcher and scheduler initialization. 494 */ 495 496 /* 497 * See if there's anything to do other than remain idle. 498 * Return non-zero if there is. 499 * 500 * This function must be called with high spl, or with 501 * kernel preemption disabled to prevent the partition's 502 * active cpu list from changing while being traversed. 503 * 504 * This is essentially a simpler version of disp_getwork() 505 * to be called by CPUs preparing to "halt". 506 */ 507 int 508 disp_anywork(void) 509 { 510 cpu_t *cp = CPU; 511 cpu_t *ocp; 512 volatile int *local_nrunnable = &cp->cpu_disp->disp_nrunnable; 513 514 if (!(cp->cpu_flags & CPU_OFFLINE)) { 515 if (CP_MAXRUNPRI(cp->cpu_part) >= 0) 516 return (1); 517 518 for (ocp = cp->cpu_next_part; ocp != cp; 519 ocp = ocp->cpu_next_part) { 520 ASSERT(CPU_ACTIVE(ocp)); 521 522 /* 523 * Something has appeared on the local run queue. 524 */ 525 if (*local_nrunnable > 0) 526 return (1); 527 /* 528 * If we encounter another idle CPU that will 529 * soon be trolling around through disp_anywork() 530 * terminate our walk here and let this other CPU 531 * patrol the next part of the list. 532 */ 533 if (ocp->cpu_dispatch_pri == -1 && 534 (ocp->cpu_disp_flags & CPU_DISP_HALTED) == 0) 535 return (0); 536 /* 537 * Work can be taken from another CPU if: 538 * - There is unbound work on the run queue 539 * - That work isn't a thread undergoing a 540 * - context switch on an otherwise empty queue. 541 * - The CPU isn't running the idle loop. 542 */ 543 if (ocp->cpu_disp->disp_max_unbound_pri != -1 && 544 !((ocp->cpu_disp_flags & CPU_DISP_DONTSTEAL) && 545 ocp->cpu_disp->disp_nrunnable == 1) && 546 ocp->cpu_dispatch_pri != -1) 547 return (1); 548 } 549 } 550 return (0); 551 } 552 553 /* 554 * Called when CPU enters the idle loop 555 */ 556 static void 557 idle_enter() 558 { 559 cpu_t *cp = CPU; 560 561 new_cpu_mstate(CMS_IDLE, gethrtime_unscaled()); 562 CPU_STATS_ADDQ(cp, sys, idlethread, 1); 563 set_idle_cpu(cp->cpu_id); /* arch-dependent hook */ 564 } 565 566 /* 567 * Called when CPU exits the idle loop 568 */ 569 static void 570 idle_exit() 571 { 572 cpu_t *cp = CPU; 573 574 new_cpu_mstate(CMS_SYSTEM, gethrtime_unscaled()); 575 unset_idle_cpu(cp->cpu_id); /* arch-dependent hook */ 576 } 577 578 /* 579 * Idle loop. 580 */ 581 void 582 idle() 583 { 584 struct cpu *cp = CPU; /* pointer to this CPU */ 585 kthread_t *t; /* taken thread */ 586 587 idle_enter(); 588 589 /* 590 * Uniprocessor version of idle loop. 591 * Do this until notified that we're on an actual multiprocessor. 592 */ 593 while (ncpus == 1) { 594 if (cp->cpu_disp->disp_nrunnable == 0) { 595 (*idle_cpu)(); 596 continue; 597 } 598 idle_exit(); 599 swtch(); 600 601 idle_enter(); /* returned from swtch */ 602 } 603 604 /* 605 * Multiprocessor idle loop. 606 */ 607 for (;;) { 608 /* 609 * If CPU is completely quiesced by p_online(2), just wait 610 * here with minimal bus traffic until put online. 611 */ 612 while (cp->cpu_flags & CPU_QUIESCED) 613 (*idle_cpu)(); 614 615 if (cp->cpu_disp->disp_nrunnable != 0) { 616 idle_exit(); 617 swtch(); 618 } else { 619 if (cp->cpu_flags & CPU_OFFLINE) 620 continue; 621 if ((t = disp_getwork(cp)) == NULL) { 622 if (cp->cpu_chosen_level != -1) { 623 disp_t *dp = cp->cpu_disp; 624 disp_t *kpq; 625 626 disp_lock_enter(&dp->disp_lock); 627 /* 628 * Set kpq under lock to prevent 629 * migration between partitions. 630 */ 631 kpq = &cp->cpu_part->cp_kp_queue; 632 if (kpq->disp_maxrunpri == -1) 633 cp->cpu_chosen_level = -1; 634 disp_lock_exit(&dp->disp_lock); 635 } 636 (*idle_cpu)(); 637 continue; 638 } 639 /* 640 * If there was a thread but we couldn't steal 641 * it, then keep trying. 642 */ 643 if (t == T_DONTSTEAL) 644 continue; 645 idle_exit(); 646 swtch_to(t); 647 } 648 idle_enter(); /* returned from swtch/swtch_to */ 649 } 650 } 651 652 653 /* 654 * Preempt the currently running thread in favor of the highest 655 * priority thread. The class of the current thread controls 656 * where it goes on the dispatcher queues. If panicking, turn 657 * preemption off. 658 */ 659 void 660 preempt() 661 { 662 kthread_t *t = curthread; 663 klwp_t *lwp = ttolwp(curthread); 664 665 if (panicstr) 666 return; 667 668 TRACE_0(TR_FAC_DISP, TR_PREEMPT_START, "preempt_start"); 669 670 thread_lock(t); 671 672 if (t->t_state != TS_ONPROC || t->t_disp_queue != CPU->cpu_disp) { 673 /* 674 * this thread has already been chosen to be run on 675 * another CPU. Clear kprunrun on this CPU since we're 676 * already headed for swtch(). 677 */ 678 CPU->cpu_kprunrun = 0; 679 thread_unlock_nopreempt(t); 680 TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end"); 681 } else { 682 if (lwp != NULL) 683 lwp->lwp_ru.nivcsw++; 684 CPU_STATS_ADDQ(CPU, sys, inv_swtch, 1); 685 THREAD_TRANSITION(t); 686 CL_PREEMPT(t); 687 DTRACE_SCHED(preempt); 688 thread_unlock_nopreempt(t); 689 690 TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end"); 691 692 swtch(); /* clears CPU->cpu_runrun via disp() */ 693 } 694 } 695 696 extern kthread_t *thread_unpin(); 697 698 /* 699 * disp() - find the highest priority thread for this processor to run, and 700 * set it in TS_ONPROC state so that resume() can be called to run it. 701 */ 702 static kthread_t * 703 disp() 704 { 705 cpu_t *cpup; 706 disp_t *dp; 707 kthread_t *tp; 708 dispq_t *dq; 709 int maxrunword; 710 pri_t pri; 711 disp_t *kpq; 712 713 TRACE_0(TR_FAC_DISP, TR_DISP_START, "disp_start"); 714 715 cpup = CPU; 716 /* 717 * Find the highest priority loaded, runnable thread. 718 */ 719 dp = cpup->cpu_disp; 720 721 reschedule: 722 /* 723 * If there is more important work on the global queue with a better 724 * priority than the maximum on this CPU, take it now. 725 */ 726 kpq = &cpup->cpu_part->cp_kp_queue; 727 while ((pri = kpq->disp_maxrunpri) >= 0 && 728 pri >= dp->disp_maxrunpri && 729 (cpup->cpu_flags & CPU_OFFLINE) == 0 && 730 (tp = disp_getbest(kpq)) != NULL) { 731 if (disp_ratify(tp, kpq) != NULL) { 732 TRACE_1(TR_FAC_DISP, TR_DISP_END, 733 "disp_end:tid %p", tp); 734 return (tp); 735 } 736 } 737 738 disp_lock_enter(&dp->disp_lock); 739 pri = dp->disp_maxrunpri; 740 741 /* 742 * If there is nothing to run, look at what's runnable on other queues. 743 * Choose the idle thread if the CPU is quiesced. 744 * Note that CPUs that have the CPU_OFFLINE flag set can still run 745 * interrupt threads, which will be the only threads on the CPU's own 746 * queue, but cannot run threads from other queues. 747 */ 748 if (pri == -1) { 749 if (!(cpup->cpu_flags & CPU_OFFLINE)) { 750 disp_lock_exit(&dp->disp_lock); 751 if ((tp = disp_getwork(cpup)) == NULL || 752 tp == T_DONTSTEAL) { 753 tp = cpup->cpu_idle_thread; 754 (void) splhigh(); 755 THREAD_ONPROC(tp, cpup); 756 cpup->cpu_dispthread = tp; 757 cpup->cpu_dispatch_pri = -1; 758 cpup->cpu_runrun = cpup->cpu_kprunrun = 0; 759 cpup->cpu_chosen_level = -1; 760 } 761 } else { 762 disp_lock_exit_high(&dp->disp_lock); 763 tp = cpup->cpu_idle_thread; 764 THREAD_ONPROC(tp, cpup); 765 cpup->cpu_dispthread = tp; 766 cpup->cpu_dispatch_pri = -1; 767 cpup->cpu_runrun = cpup->cpu_kprunrun = 0; 768 cpup->cpu_chosen_level = -1; 769 } 770 TRACE_1(TR_FAC_DISP, TR_DISP_END, 771 "disp_end:tid %p", tp); 772 return (tp); 773 } 774 775 dq = &dp->disp_q[pri]; 776 tp = dq->dq_first; 777 778 ASSERT(tp != NULL); 779 ASSERT(tp->t_schedflag & TS_LOAD); /* thread must be swapped in */ 780 781 DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp); 782 783 /* 784 * Found it so remove it from queue. 785 */ 786 dp->disp_nrunnable--; 787 dq->dq_sruncnt--; 788 if ((dq->dq_first = tp->t_link) == NULL) { 789 ulong_t *dqactmap = dp->disp_qactmap; 790 791 ASSERT(dq->dq_sruncnt == 0); 792 dq->dq_last = NULL; 793 794 /* 795 * The queue is empty, so the corresponding bit needs to be 796 * turned off in dqactmap. If nrunnable != 0 just took the 797 * last runnable thread off the 798 * highest queue, so recompute disp_maxrunpri. 799 */ 800 maxrunword = pri >> BT_ULSHIFT; 801 dqactmap[maxrunword] &= ~BT_BIW(pri); 802 803 if (dp->disp_nrunnable == 0) { 804 dp->disp_max_unbound_pri = -1; 805 dp->disp_maxrunpri = -1; 806 } else { 807 int ipri; 808 809 ipri = bt_gethighbit(dqactmap, maxrunword); 810 dp->disp_maxrunpri = ipri; 811 if (ipri < dp->disp_max_unbound_pri) 812 dp->disp_max_unbound_pri = ipri; 813 } 814 } else { 815 tp->t_link = NULL; 816 } 817 818 /* 819 * Set TS_DONT_SWAP flag to prevent another processor from swapping 820 * out this thread before we have a chance to run it. 821 * While running, it is protected against swapping by t_lock. 822 */ 823 tp->t_schedflag |= TS_DONT_SWAP; 824 cpup->cpu_dispthread = tp; /* protected by spl only */ 825 cpup->cpu_dispatch_pri = pri; 826 ASSERT(pri == DISP_PRIO(tp)); 827 thread_onproc(tp, cpup); /* set t_state to TS_ONPROC */ 828 disp_lock_exit_high(&dp->disp_lock); /* drop run queue lock */ 829 830 ASSERT(tp != NULL); 831 TRACE_1(TR_FAC_DISP, TR_DISP_END, 832 "disp_end:tid %p", tp); 833 834 if (disp_ratify(tp, kpq) == NULL) 835 goto reschedule; 836 837 return (tp); 838 } 839 840 /* 841 * swtch() 842 * Find best runnable thread and run it. 843 * Called with the current thread already switched to a new state, 844 * on a sleep queue, run queue, stopped, and not zombied. 845 * May be called at any spl level less than or equal to LOCK_LEVEL. 846 * Always drops spl to the base level (spl0()). 847 */ 848 void 849 swtch() 850 { 851 kthread_t *t = curthread; 852 kthread_t *next; 853 cpu_t *cp; 854 855 TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 856 857 if (t->t_flag & T_INTR_THREAD) 858 cpu_intr_swtch_enter(t); 859 860 if (t->t_intr != NULL) { 861 /* 862 * We are an interrupt thread. Setup and return 863 * the interrupted thread to be resumed. 864 */ 865 (void) splhigh(); /* block other scheduler action */ 866 cp = CPU; /* now protected against migration */ 867 ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */ 868 CPU_STATS_ADDQ(cp, sys, pswitch, 1); 869 CPU_STATS_ADDQ(cp, sys, intrblk, 1); 870 next = thread_unpin(); 871 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 872 resume_from_intr(next); 873 } else { 874 #ifdef DEBUG 875 if (t->t_state == TS_ONPROC && 876 t->t_disp_queue->disp_cpu == CPU && 877 t->t_preempt == 0) { 878 thread_lock(t); 879 ASSERT(t->t_state != TS_ONPROC || 880 t->t_disp_queue->disp_cpu != CPU || 881 t->t_preempt != 0); /* cannot migrate */ 882 thread_unlock_nopreempt(t); 883 } 884 #endif /* DEBUG */ 885 cp = CPU; 886 next = disp(); /* returns with spl high */ 887 ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */ 888 889 /* OK to steal anything left on run queue */ 890 cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; 891 892 if (next != t) { 893 hrtime_t now; 894 895 now = gethrtime_unscaled(); 896 pg_ev_thread_swtch(cp, now, t, next); 897 898 /* 899 * If t was previously in the TS_ONPROC state, 900 * setfrontdq and setbackdq won't have set its t_waitrq. 901 * Since we now finally know that we're switching away 902 * from this thread, set its t_waitrq if it is on a run 903 * queue. 904 */ 905 if ((t->t_state == TS_RUN) && (t->t_waitrq == 0)) { 906 t->t_waitrq = now; 907 } 908 909 /* 910 * restore mstate of thread that we are switching to 911 */ 912 restore_mstate(next); 913 914 CPU_STATS_ADDQ(cp, sys, pswitch, 1); 915 cp->cpu_last_swtch = t->t_disp_time = ddi_get_lbolt(); 916 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 917 918 if (dtrace_vtime_active) 919 dtrace_vtime_switch(next); 920 921 resume(next); 922 /* 923 * The TR_RESUME_END and TR_SWTCH_END trace points 924 * appear at the end of resume(), because we may not 925 * return here 926 */ 927 } else { 928 if (t->t_flag & T_INTR_THREAD) 929 cpu_intr_swtch_exit(t); 930 /* 931 * Threads that enqueue themselves on a run queue defer 932 * setting t_waitrq. It is then either set in swtch() 933 * when the CPU is actually yielded, or not at all if it 934 * is remaining on the CPU. 935 * There is however a window between where the thread 936 * placed itself on a run queue, and where it selects 937 * itself in disp(), where a third party (eg. clock() 938 * doing tick processing) may have re-enqueued this 939 * thread, setting t_waitrq in the process. We detect 940 * this race by noticing that despite switching to 941 * ourself, our t_waitrq has been set, and should be 942 * cleared. 943 */ 944 if (t->t_waitrq != 0) 945 t->t_waitrq = 0; 946 947 pg_ev_thread_remain(cp, t); 948 949 DTRACE_SCHED(remain__cpu); 950 TRACE_0(TR_FAC_DISP, TR_SWTCH_END, "swtch_end"); 951 (void) spl0(); 952 } 953 } 954 } 955 956 /* 957 * swtch_from_zombie() 958 * Special case of swtch(), which allows checks for TS_ZOMB to be 959 * eliminated from normal resume. 960 * Find best runnable thread and run it. 961 * Called with the current thread zombied. 962 * Zombies cannot migrate, so CPU references are safe. 963 */ 964 void 965 swtch_from_zombie() 966 { 967 kthread_t *next; 968 cpu_t *cpu = CPU; 969 970 TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 971 972 ASSERT(curthread->t_state == TS_ZOMB); 973 974 next = disp(); /* returns with spl high */ 975 ASSERT(CPU_ON_INTR(CPU) == 0); /* not called with PIL > 10 */ 976 CPU_STATS_ADDQ(CPU, sys, pswitch, 1); 977 ASSERT(next != curthread); 978 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 979 980 pg_ev_thread_swtch(cpu, gethrtime_unscaled(), curthread, next); 981 982 restore_mstate(next); 983 984 if (dtrace_vtime_active) 985 dtrace_vtime_switch(next); 986 987 resume_from_zombie(next); 988 /* 989 * The TR_RESUME_END and TR_SWTCH_END trace points 990 * appear at the end of resume(), because we certainly will not 991 * return here 992 */ 993 } 994 995 #if defined(DEBUG) && (defined(DISP_DEBUG) || defined(lint)) 996 997 /* 998 * search_disp_queues() 999 * Search the given dispatch queues for thread tp. 1000 * Return 1 if tp is found, otherwise return 0. 1001 */ 1002 static int 1003 search_disp_queues(disp_t *dp, kthread_t *tp) 1004 { 1005 dispq_t *dq; 1006 dispq_t *eq; 1007 1008 disp_lock_enter_high(&dp->disp_lock); 1009 1010 for (dq = dp->disp_q, eq = dp->disp_q_limit; dq < eq; ++dq) { 1011 kthread_t *rp; 1012 1013 ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL); 1014 1015 for (rp = dq->dq_first; rp; rp = rp->t_link) 1016 if (tp == rp) { 1017 disp_lock_exit_high(&dp->disp_lock); 1018 return (1); 1019 } 1020 } 1021 disp_lock_exit_high(&dp->disp_lock); 1022 1023 return (0); 1024 } 1025 1026 /* 1027 * thread_on_queue() 1028 * Search all per-CPU dispatch queues and all partition-wide kpreempt 1029 * queues for thread tp. Return 1 if tp is found, otherwise return 0. 1030 */ 1031 static int 1032 thread_on_queue(kthread_t *tp) 1033 { 1034 cpu_t *cp; 1035 struct cpupart *part; 1036 1037 ASSERT(getpil() >= DISP_LEVEL); 1038 1039 /* 1040 * Search the per-CPU dispatch queues for tp. 1041 */ 1042 cp = CPU; 1043 do { 1044 if (search_disp_queues(cp->cpu_disp, tp)) 1045 return (1); 1046 } while ((cp = cp->cpu_next_onln) != CPU); 1047 1048 /* 1049 * Search the partition-wide kpreempt queues for tp. 1050 */ 1051 part = CPU->cpu_part; 1052 do { 1053 if (search_disp_queues(&part->cp_kp_queue, tp)) 1054 return (1); 1055 } while ((part = part->cp_next) != CPU->cpu_part); 1056 1057 return (0); 1058 } 1059 1060 #else 1061 1062 #define thread_on_queue(tp) 0 /* ASSERT must be !thread_on_queue */ 1063 1064 #endif /* DEBUG */ 1065 1066 /* 1067 * like swtch(), but switch to a specified thread taken from another CPU. 1068 * called with spl high.. 1069 */ 1070 void 1071 swtch_to(kthread_t *next) 1072 { 1073 cpu_t *cp = CPU; 1074 hrtime_t now; 1075 1076 TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 1077 1078 /* 1079 * Update context switch statistics. 1080 */ 1081 CPU_STATS_ADDQ(cp, sys, pswitch, 1); 1082 1083 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 1084 1085 now = gethrtime_unscaled(); 1086 pg_ev_thread_swtch(cp, now, curthread, next); 1087 1088 /* OK to steal anything left on run queue */ 1089 cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; 1090 1091 /* record last execution time */ 1092 cp->cpu_last_swtch = curthread->t_disp_time = ddi_get_lbolt(); 1093 1094 /* 1095 * If t was previously in the TS_ONPROC state, setfrontdq and setbackdq 1096 * won't have set its t_waitrq. Since we now finally know that we're 1097 * switching away from this thread, set its t_waitrq if it is on a run 1098 * queue. 1099 */ 1100 if ((curthread->t_state == TS_RUN) && (curthread->t_waitrq == 0)) { 1101 curthread->t_waitrq = now; 1102 } 1103 1104 /* restore next thread to previously running microstate */ 1105 restore_mstate(next); 1106 1107 if (dtrace_vtime_active) 1108 dtrace_vtime_switch(next); 1109 1110 resume(next); 1111 /* 1112 * The TR_RESUME_END and TR_SWTCH_END trace points 1113 * appear at the end of resume(), because we may not 1114 * return here 1115 */ 1116 } 1117 1118 #define CPU_IDLING(pri) ((pri) == -1) 1119 1120 static void 1121 cpu_resched(cpu_t *cp, pri_t tpri) 1122 { 1123 int call_poke_cpu = 0; 1124 pri_t cpupri = cp->cpu_dispatch_pri; 1125 1126 if (!CPU_IDLING(cpupri) && (cpupri < tpri)) { 1127 TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED, 1128 "CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri); 1129 if (tpri >= upreemptpri && cp->cpu_runrun == 0) { 1130 cp->cpu_runrun = 1; 1131 aston(cp->cpu_dispthread); 1132 if (tpri < kpreemptpri && cp != CPU) 1133 call_poke_cpu = 1; 1134 } 1135 if (tpri >= kpreemptpri && cp->cpu_kprunrun == 0) { 1136 cp->cpu_kprunrun = 1; 1137 if (cp != CPU) 1138 call_poke_cpu = 1; 1139 } 1140 } 1141 1142 /* 1143 * Propagate cpu_runrun, and cpu_kprunrun to global visibility. 1144 */ 1145 membar_enter(); 1146 1147 if (call_poke_cpu) 1148 poke_cpu(cp->cpu_id); 1149 } 1150 1151 /* 1152 * setbackdq() keeps runqs balanced such that the difference in length 1153 * between the chosen runq and the next one is no more than RUNQ_MAX_DIFF. 1154 * For threads with priorities below RUNQ_MATCH_PRI levels, the runq's lengths 1155 * must match. When per-thread TS_RUNQMATCH flag is set, setbackdq() will 1156 * try to keep runqs perfectly balanced regardless of the thread priority. 1157 */ 1158 #define RUNQ_MATCH_PRI 16 /* pri below which queue lengths must match */ 1159 #define RUNQ_MAX_DIFF 2 /* maximum runq length difference */ 1160 #define RUNQ_LEN(cp, pri) ((cp)->cpu_disp->disp_q[pri].dq_sruncnt) 1161 1162 /* 1163 * Macro that evaluates to true if it is likely that the thread has cache 1164 * warmth. This is based on the amount of time that has elapsed since the 1165 * thread last ran. If that amount of time is less than "rechoose_interval" 1166 * ticks, then we decide that the thread has enough cache warmth to warrant 1167 * some affinity for t->t_cpu. 1168 */ 1169 #define THREAD_HAS_CACHE_WARMTH(thread) \ 1170 ((thread == curthread) || \ 1171 ((ddi_get_lbolt() - thread->t_disp_time) <= rechoose_interval)) 1172 /* 1173 * Put the specified thread on the back of the dispatcher 1174 * queue corresponding to its current priority. 1175 * 1176 * Called with the thread in transition, onproc or stopped state 1177 * and locked (transition implies locked) and at high spl. 1178 * Returns with the thread in TS_RUN state and still locked. 1179 */ 1180 void 1181 setbackdq(kthread_t *tp) 1182 { 1183 dispq_t *dq; 1184 disp_t *dp; 1185 cpu_t *cp; 1186 pri_t tpri; 1187 int bound; 1188 boolean_t self; 1189 1190 ASSERT(THREAD_LOCK_HELD(tp)); 1191 ASSERT((tp->t_schedflag & TS_ALLSTART) == 0); 1192 ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */ 1193 1194 /* 1195 * If thread is "swapped" or on the swap queue don't 1196 * queue it, but wake sched. 1197 */ 1198 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) { 1199 disp_swapped_setrun(tp); 1200 return; 1201 } 1202 1203 self = (tp == curthread); 1204 1205 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 1206 bound = 1; 1207 else 1208 bound = 0; 1209 1210 tpri = DISP_PRIO(tp); 1211 if (ncpus == 1) 1212 cp = tp->t_cpu; 1213 else if (!bound) { 1214 if (tpri >= kpqpri) { 1215 setkpdq(tp, SETKP_BACK); 1216 return; 1217 } 1218 1219 /* 1220 * We'll generally let this thread continue to run where 1221 * it last ran...but will consider migration if: 1222 * - We thread probably doesn't have much cache warmth. 1223 * - The CPU where it last ran is the target of an offline 1224 * request. 1225 * - The thread last ran outside it's home lgroup. 1226 */ 1227 if ((!THREAD_HAS_CACHE_WARMTH(tp)) || 1228 (tp->t_cpu == cpu_inmotion)) { 1229 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, NULL); 1230 } else if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) { 1231 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, 1232 self ? tp->t_cpu : NULL); 1233 } else { 1234 cp = tp->t_cpu; 1235 } 1236 1237 if (tp->t_cpupart == cp->cpu_part) { 1238 int qlen; 1239 1240 /* 1241 * Perform any CMT load balancing 1242 */ 1243 cp = cmt_balance(tp, cp); 1244 1245 /* 1246 * Balance across the run queues 1247 */ 1248 qlen = RUNQ_LEN(cp, tpri); 1249 if (tpri >= RUNQ_MATCH_PRI && 1250 !(tp->t_schedflag & TS_RUNQMATCH)) 1251 qlen -= RUNQ_MAX_DIFF; 1252 if (qlen > 0) { 1253 cpu_t *newcp; 1254 1255 if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) { 1256 newcp = cp->cpu_next_part; 1257 } else if ((newcp = cp->cpu_next_lpl) == cp) { 1258 newcp = cp->cpu_next_part; 1259 } 1260 1261 if (RUNQ_LEN(newcp, tpri) < qlen) { 1262 DTRACE_PROBE3(runq__balance, 1263 kthread_t *, tp, 1264 cpu_t *, cp, cpu_t *, newcp); 1265 cp = newcp; 1266 } 1267 } 1268 } else { 1269 /* 1270 * Migrate to a cpu in the new partition. 1271 */ 1272 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, 1273 tp->t_lpl, tp->t_pri, NULL); 1274 } 1275 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1276 } else { 1277 /* 1278 * It is possible that t_weakbound_cpu != t_bound_cpu (for 1279 * a short time until weak binding that existed when the 1280 * strong binding was established has dropped) so we must 1281 * favour weak binding over strong. 1282 */ 1283 cp = tp->t_weakbound_cpu ? 1284 tp->t_weakbound_cpu : tp->t_bound_cpu; 1285 } 1286 /* 1287 * A thread that is ONPROC may be temporarily placed on the run queue 1288 * but then chosen to run again by disp. If the thread we're placing on 1289 * the queue is in TS_ONPROC state, don't set its t_waitrq until a 1290 * replacement process is actually scheduled in swtch(). In this 1291 * situation, curthread is the only thread that could be in the ONPROC 1292 * state. 1293 */ 1294 if ((!self) && (tp->t_waitrq == 0)) { 1295 hrtime_t curtime; 1296 1297 curtime = gethrtime_unscaled(); 1298 (void) cpu_update_pct(tp, curtime); 1299 tp->t_waitrq = curtime; 1300 } else { 1301 (void) cpu_update_pct(tp, gethrtime_unscaled()); 1302 } 1303 1304 dp = cp->cpu_disp; 1305 disp_lock_enter_high(&dp->disp_lock); 1306 1307 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 0); 1308 TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p", 1309 tpri, cp, tp); 1310 1311 #ifndef NPROBE 1312 /* Kernel probe */ 1313 if (tnf_tracing_active) 1314 tnf_thread_queue(tp, cp, tpri); 1315 #endif /* NPROBE */ 1316 1317 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1318 1319 THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */ 1320 tp->t_disp_queue = dp; 1321 tp->t_link = NULL; 1322 1323 dq = &dp->disp_q[tpri]; 1324 dp->disp_nrunnable++; 1325 if (!bound) 1326 dp->disp_steal = 0; 1327 membar_enter(); 1328 1329 if (dq->dq_sruncnt++ != 0) { 1330 ASSERT(dq->dq_first != NULL); 1331 dq->dq_last->t_link = tp; 1332 dq->dq_last = tp; 1333 } else { 1334 ASSERT(dq->dq_first == NULL); 1335 ASSERT(dq->dq_last == NULL); 1336 dq->dq_first = dq->dq_last = tp; 1337 BT_SET(dp->disp_qactmap, tpri); 1338 if (tpri > dp->disp_maxrunpri) { 1339 dp->disp_maxrunpri = tpri; 1340 membar_enter(); 1341 cpu_resched(cp, tpri); 1342 } 1343 } 1344 1345 if (!bound && tpri > dp->disp_max_unbound_pri) { 1346 if (self && dp->disp_max_unbound_pri == -1 && cp == CPU) { 1347 /* 1348 * If there are no other unbound threads on the 1349 * run queue, don't allow other CPUs to steal 1350 * this thread while we are in the middle of a 1351 * context switch. We may just switch to it 1352 * again right away. CPU_DISP_DONTSTEAL is cleared 1353 * in swtch and swtch_to. 1354 */ 1355 cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL; 1356 } 1357 dp->disp_max_unbound_pri = tpri; 1358 } 1359 (*disp_enq_thread)(cp, bound); 1360 } 1361 1362 /* 1363 * Put the specified thread on the front of the dispatcher 1364 * queue corresponding to its current priority. 1365 * 1366 * Called with the thread in transition, onproc or stopped state 1367 * and locked (transition implies locked) and at high spl. 1368 * Returns with the thread in TS_RUN state and still locked. 1369 */ 1370 void 1371 setfrontdq(kthread_t *tp) 1372 { 1373 disp_t *dp; 1374 dispq_t *dq; 1375 cpu_t *cp; 1376 pri_t tpri; 1377 int bound; 1378 1379 ASSERT(THREAD_LOCK_HELD(tp)); 1380 ASSERT((tp->t_schedflag & TS_ALLSTART) == 0); 1381 ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */ 1382 1383 /* 1384 * If thread is "swapped" or on the swap queue don't 1385 * queue it, but wake sched. 1386 */ 1387 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) { 1388 disp_swapped_setrun(tp); 1389 return; 1390 } 1391 1392 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 1393 bound = 1; 1394 else 1395 bound = 0; 1396 1397 tpri = DISP_PRIO(tp); 1398 if (ncpus == 1) 1399 cp = tp->t_cpu; 1400 else if (!bound) { 1401 if (tpri >= kpqpri) { 1402 setkpdq(tp, SETKP_FRONT); 1403 return; 1404 } 1405 cp = tp->t_cpu; 1406 if (tp->t_cpupart == cp->cpu_part) { 1407 /* 1408 * We'll generally let this thread continue to run 1409 * where it last ran, but will consider migration if: 1410 * - The thread last ran outside it's home lgroup. 1411 * - The CPU where it last ran is the target of an 1412 * offline request (a thread_nomigrate() on the in 1413 * motion CPU relies on this when forcing a preempt). 1414 * - The thread isn't the highest priority thread where 1415 * it last ran, and it is considered not likely to 1416 * have significant cache warmth. 1417 */ 1418 if ((!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp)) || 1419 (cp == cpu_inmotion)) { 1420 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, 1421 (tp == curthread) ? cp : NULL); 1422 } else if ((tpri < cp->cpu_disp->disp_maxrunpri) && 1423 (!THREAD_HAS_CACHE_WARMTH(tp))) { 1424 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, 1425 NULL); 1426 } 1427 } else { 1428 /* 1429 * Migrate to a cpu in the new partition. 1430 */ 1431 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, 1432 tp->t_lpl, tp->t_pri, NULL); 1433 } 1434 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1435 } else { 1436 /* 1437 * It is possible that t_weakbound_cpu != t_bound_cpu (for 1438 * a short time until weak binding that existed when the 1439 * strong binding was established has dropped) so we must 1440 * favour weak binding over strong. 1441 */ 1442 cp = tp->t_weakbound_cpu ? 1443 tp->t_weakbound_cpu : tp->t_bound_cpu; 1444 } 1445 1446 /* 1447 * A thread that is ONPROC may be temporarily placed on the run queue 1448 * but then chosen to run again by disp. If the thread we're placing on 1449 * the queue is in TS_ONPROC state, don't set its t_waitrq until a 1450 * replacement process is actually scheduled in swtch(). In this 1451 * situation, curthread is the only thread that could be in the ONPROC 1452 * state. 1453 */ 1454 if ((tp != curthread) && (tp->t_waitrq == 0)) { 1455 hrtime_t curtime; 1456 1457 curtime = gethrtime_unscaled(); 1458 (void) cpu_update_pct(tp, curtime); 1459 tp->t_waitrq = curtime; 1460 } else { 1461 (void) cpu_update_pct(tp, gethrtime_unscaled()); 1462 } 1463 1464 dp = cp->cpu_disp; 1465 disp_lock_enter_high(&dp->disp_lock); 1466 1467 TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp); 1468 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 1); 1469 1470 #ifndef NPROBE 1471 /* Kernel probe */ 1472 if (tnf_tracing_active) 1473 tnf_thread_queue(tp, cp, tpri); 1474 #endif /* NPROBE */ 1475 1476 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1477 1478 THREAD_RUN(tp, &dp->disp_lock); /* set TS_RUN state and lock */ 1479 tp->t_disp_queue = dp; 1480 1481 dq = &dp->disp_q[tpri]; 1482 dp->disp_nrunnable++; 1483 if (!bound) 1484 dp->disp_steal = 0; 1485 membar_enter(); 1486 1487 if (dq->dq_sruncnt++ != 0) { 1488 ASSERT(dq->dq_last != NULL); 1489 tp->t_link = dq->dq_first; 1490 dq->dq_first = tp; 1491 } else { 1492 ASSERT(dq->dq_last == NULL); 1493 ASSERT(dq->dq_first == NULL); 1494 tp->t_link = NULL; 1495 dq->dq_first = dq->dq_last = tp; 1496 BT_SET(dp->disp_qactmap, tpri); 1497 if (tpri > dp->disp_maxrunpri) { 1498 dp->disp_maxrunpri = tpri; 1499 membar_enter(); 1500 cpu_resched(cp, tpri); 1501 } 1502 } 1503 1504 if (!bound && tpri > dp->disp_max_unbound_pri) { 1505 if (tp == curthread && dp->disp_max_unbound_pri == -1 && 1506 cp == CPU) { 1507 /* 1508 * If there are no other unbound threads on the 1509 * run queue, don't allow other CPUs to steal 1510 * this thread while we are in the middle of a 1511 * context switch. We may just switch to it 1512 * again right away. CPU_DISP_DONTSTEAL is cleared 1513 * in swtch and swtch_to. 1514 */ 1515 cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL; 1516 } 1517 dp->disp_max_unbound_pri = tpri; 1518 } 1519 (*disp_enq_thread)(cp, bound); 1520 } 1521 1522 /* 1523 * Put a high-priority unbound thread on the kp queue 1524 */ 1525 static void 1526 setkpdq(kthread_t *tp, int borf) 1527 { 1528 dispq_t *dq; 1529 disp_t *dp; 1530 cpu_t *cp; 1531 pri_t tpri; 1532 1533 tpri = DISP_PRIO(tp); 1534 1535 dp = &tp->t_cpupart->cp_kp_queue; 1536 disp_lock_enter_high(&dp->disp_lock); 1537 1538 TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp); 1539 1540 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1541 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, borf); 1542 THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */ 1543 tp->t_disp_queue = dp; 1544 dp->disp_nrunnable++; 1545 dq = &dp->disp_q[tpri]; 1546 1547 if (dq->dq_sruncnt++ != 0) { 1548 if (borf == SETKP_BACK) { 1549 ASSERT(dq->dq_first != NULL); 1550 tp->t_link = NULL; 1551 dq->dq_last->t_link = tp; 1552 dq->dq_last = tp; 1553 } else { 1554 ASSERT(dq->dq_last != NULL); 1555 tp->t_link = dq->dq_first; 1556 dq->dq_first = tp; 1557 } 1558 } else { 1559 if (borf == SETKP_BACK) { 1560 ASSERT(dq->dq_first == NULL); 1561 ASSERT(dq->dq_last == NULL); 1562 dq->dq_first = dq->dq_last = tp; 1563 } else { 1564 ASSERT(dq->dq_last == NULL); 1565 ASSERT(dq->dq_first == NULL); 1566 tp->t_link = NULL; 1567 dq->dq_first = dq->dq_last = tp; 1568 } 1569 BT_SET(dp->disp_qactmap, tpri); 1570 if (tpri > dp->disp_max_unbound_pri) 1571 dp->disp_max_unbound_pri = tpri; 1572 if (tpri > dp->disp_maxrunpri) { 1573 dp->disp_maxrunpri = tpri; 1574 membar_enter(); 1575 } 1576 } 1577 1578 cp = tp->t_cpu; 1579 if (tp->t_cpupart != cp->cpu_part) { 1580 /* migrate to a cpu in the new partition */ 1581 cp = tp->t_cpupart->cp_cpulist; 1582 } 1583 cp = disp_lowpri_cpu(cp, tp->t_lpl, tp->t_pri, NULL); 1584 disp_lock_enter_high(&cp->cpu_disp->disp_lock); 1585 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1586 1587 #ifndef NPROBE 1588 /* Kernel probe */ 1589 if (tnf_tracing_active) 1590 tnf_thread_queue(tp, cp, tpri); 1591 #endif /* NPROBE */ 1592 1593 if (cp->cpu_chosen_level < tpri) 1594 cp->cpu_chosen_level = tpri; 1595 cpu_resched(cp, tpri); 1596 disp_lock_exit_high(&cp->cpu_disp->disp_lock); 1597 (*disp_enq_thread)(cp, 0); 1598 } 1599 1600 /* 1601 * Remove a thread from the dispatcher queue if it is on it. 1602 * It is not an error if it is not found but we return whether 1603 * or not it was found in case the caller wants to check. 1604 */ 1605 int 1606 dispdeq(kthread_t *tp) 1607 { 1608 disp_t *dp; 1609 dispq_t *dq; 1610 kthread_t *rp; 1611 kthread_t *trp; 1612 kthread_t **ptp; 1613 int tpri; 1614 1615 ASSERT(THREAD_LOCK_HELD(tp)); 1616 1617 if (tp->t_state != TS_RUN) 1618 return (0); 1619 1620 /* 1621 * The thread is "swapped" or is on the swap queue and 1622 * hence no longer on the run queue, so return true. 1623 */ 1624 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) 1625 return (1); 1626 1627 tpri = DISP_PRIO(tp); 1628 dp = tp->t_disp_queue; 1629 ASSERT(tpri < dp->disp_npri); 1630 dq = &dp->disp_q[tpri]; 1631 ptp = &dq->dq_first; 1632 rp = *ptp; 1633 trp = NULL; 1634 1635 ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL); 1636 1637 /* 1638 * Search for thread in queue. 1639 * Double links would simplify this at the expense of disp/setrun. 1640 */ 1641 while (rp != tp && rp != NULL) { 1642 trp = rp; 1643 ptp = &trp->t_link; 1644 rp = trp->t_link; 1645 } 1646 1647 if (rp == NULL) { 1648 panic("dispdeq: thread not on queue"); 1649 } 1650 1651 DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp); 1652 1653 /* 1654 * Found it so remove it from queue. 1655 */ 1656 if ((*ptp = rp->t_link) == NULL) 1657 dq->dq_last = trp; 1658 1659 dp->disp_nrunnable--; 1660 if (--dq->dq_sruncnt == 0) { 1661 dp->disp_qactmap[tpri >> BT_ULSHIFT] &= ~BT_BIW(tpri); 1662 if (dp->disp_nrunnable == 0) { 1663 dp->disp_max_unbound_pri = -1; 1664 dp->disp_maxrunpri = -1; 1665 } else if (tpri == dp->disp_maxrunpri) { 1666 int ipri; 1667 1668 ipri = bt_gethighbit(dp->disp_qactmap, 1669 dp->disp_maxrunpri >> BT_ULSHIFT); 1670 if (ipri < dp->disp_max_unbound_pri) 1671 dp->disp_max_unbound_pri = ipri; 1672 dp->disp_maxrunpri = ipri; 1673 } 1674 } 1675 tp->t_link = NULL; 1676 THREAD_TRANSITION(tp); /* put in intermediate state */ 1677 return (1); 1678 } 1679 1680 1681 /* 1682 * dq_sruninc and dq_srundec are public functions for 1683 * incrementing/decrementing the sruncnts when a thread on 1684 * a dispatcher queue is made schedulable/unschedulable by 1685 * resetting the TS_LOAD flag. 1686 * 1687 * The caller MUST have the thread lock and therefore the dispatcher 1688 * queue lock so that the operation which changes 1689 * the flag, the operation that checks the status of the thread to 1690 * determine if it's on a disp queue AND the call to this function 1691 * are one atomic operation with respect to interrupts. 1692 */ 1693 1694 /* 1695 * Called by sched AFTER TS_LOAD flag is set on a swapped, runnable thread. 1696 */ 1697 void 1698 dq_sruninc(kthread_t *t) 1699 { 1700 ASSERT(t->t_state == TS_RUN); 1701 ASSERT(t->t_schedflag & TS_LOAD); 1702 1703 THREAD_TRANSITION(t); 1704 setfrontdq(t); 1705 } 1706 1707 /* 1708 * See comment on calling conventions above. 1709 * Called by sched BEFORE TS_LOAD flag is cleared on a runnable thread. 1710 */ 1711 void 1712 dq_srundec(kthread_t *t) 1713 { 1714 ASSERT(t->t_schedflag & TS_LOAD); 1715 1716 (void) dispdeq(t); 1717 disp_swapped_enq(t); 1718 } 1719 1720 /* 1721 * Change the dispatcher lock of thread to the "swapped_lock" 1722 * and return with thread lock still held. 1723 * 1724 * Called with thread_lock held, in transition state, and at high spl. 1725 */ 1726 void 1727 disp_swapped_enq(kthread_t *tp) 1728 { 1729 ASSERT(THREAD_LOCK_HELD(tp)); 1730 ASSERT(tp->t_schedflag & TS_LOAD); 1731 1732 switch (tp->t_state) { 1733 case TS_RUN: 1734 disp_lock_enter_high(&swapped_lock); 1735 THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */ 1736 break; 1737 case TS_ONPROC: 1738 disp_lock_enter_high(&swapped_lock); 1739 THREAD_TRANSITION(tp); 1740 wake_sched_sec = 1; /* tell clock to wake sched */ 1741 THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */ 1742 break; 1743 default: 1744 panic("disp_swapped: tp: %p bad t_state", (void *)tp); 1745 } 1746 } 1747 1748 /* 1749 * This routine is called by setbackdq/setfrontdq if the thread is 1750 * not loaded or loaded and on the swap queue. 1751 * 1752 * Thread state TS_SLEEP implies that a swapped thread 1753 * has been woken up and needs to be swapped in by the swapper. 1754 * 1755 * Thread state TS_RUN, it implies that the priority of a swapped 1756 * thread is being increased by scheduling class (e.g. ts_update). 1757 */ 1758 static void 1759 disp_swapped_setrun(kthread_t *tp) 1760 { 1761 ASSERT(THREAD_LOCK_HELD(tp)); 1762 ASSERT((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD); 1763 1764 switch (tp->t_state) { 1765 case TS_SLEEP: 1766 disp_lock_enter_high(&swapped_lock); 1767 /* 1768 * Wakeup sched immediately (i.e., next tick) if the 1769 * thread priority is above maxclsyspri. 1770 */ 1771 if (DISP_PRIO(tp) > maxclsyspri) 1772 wake_sched = 1; 1773 else 1774 wake_sched_sec = 1; 1775 THREAD_RUN(tp, &swapped_lock); /* set TS_RUN state and lock */ 1776 break; 1777 case TS_RUN: /* called from ts_update */ 1778 break; 1779 default: 1780 panic("disp_swapped_setrun: tp: %p bad t_state", (void *)tp); 1781 } 1782 } 1783 1784 /* 1785 * Make a thread give up its processor. Find the processor on 1786 * which this thread is executing, and have that processor 1787 * preempt. 1788 * 1789 * We allow System Duty Cycle (SDC) threads to be preempted even if 1790 * they are running at kernel priorities. To implement this, we always 1791 * set cpu_kprunrun; this ensures preempt() will be called. Since SDC 1792 * calls cpu_surrender() very often, we only preempt if there is anyone 1793 * competing with us. 1794 */ 1795 void 1796 cpu_surrender(kthread_t *tp) 1797 { 1798 cpu_t *cpup; 1799 int max_pri; 1800 int max_run_pri; 1801 klwp_t *lwp; 1802 1803 ASSERT(THREAD_LOCK_HELD(tp)); 1804 1805 if (tp->t_state != TS_ONPROC) 1806 return; 1807 cpup = tp->t_disp_queue->disp_cpu; /* CPU thread dispatched to */ 1808 max_pri = cpup->cpu_disp->disp_maxrunpri; /* best pri of that CPU */ 1809 max_run_pri = CP_MAXRUNPRI(cpup->cpu_part); 1810 if (max_pri < max_run_pri) 1811 max_pri = max_run_pri; 1812 1813 if (tp->t_cid == sysdccid) { 1814 uint_t t_pri = DISP_PRIO(tp); 1815 if (t_pri > max_pri) 1816 return; /* we are not competing w/ anyone */ 1817 cpup->cpu_runrun = cpup->cpu_kprunrun = 1; 1818 } else { 1819 cpup->cpu_runrun = 1; 1820 if (max_pri >= kpreemptpri && cpup->cpu_kprunrun == 0) { 1821 cpup->cpu_kprunrun = 1; 1822 } 1823 } 1824 1825 /* 1826 * Propagate cpu_runrun, and cpu_kprunrun to global visibility. 1827 */ 1828 membar_enter(); 1829 1830 DTRACE_SCHED1(surrender, kthread_t *, tp); 1831 1832 /* 1833 * Make the target thread take an excursion through trap() 1834 * to do preempt() (unless we're already in trap or post_syscall, 1835 * calling cpu_surrender via CL_TRAPRET). 1836 */ 1837 if (tp != curthread || (lwp = tp->t_lwp) == NULL || 1838 lwp->lwp_state != LWP_USER) { 1839 aston(tp); 1840 if (cpup != CPU) 1841 poke_cpu(cpup->cpu_id); 1842 } 1843 TRACE_2(TR_FAC_DISP, TR_CPU_SURRENDER, 1844 "cpu_surrender:tid %p cpu %p", tp, cpup); 1845 } 1846 1847 /* 1848 * Commit to and ratify a scheduling decision 1849 */ 1850 /*ARGSUSED*/ 1851 static kthread_t * 1852 disp_ratify(kthread_t *tp, disp_t *kpq) 1853 { 1854 pri_t tpri, maxpri; 1855 pri_t maxkpri; 1856 cpu_t *cpup; 1857 1858 ASSERT(tp != NULL); 1859 /* 1860 * Commit to, then ratify scheduling decision 1861 */ 1862 cpup = CPU; 1863 if (cpup->cpu_runrun != 0) 1864 cpup->cpu_runrun = 0; 1865 if (cpup->cpu_kprunrun != 0) 1866 cpup->cpu_kprunrun = 0; 1867 if (cpup->cpu_chosen_level != -1) 1868 cpup->cpu_chosen_level = -1; 1869 membar_enter(); 1870 tpri = DISP_PRIO(tp); 1871 maxpri = cpup->cpu_disp->disp_maxrunpri; 1872 maxkpri = kpq->disp_maxrunpri; 1873 if (maxpri < maxkpri) 1874 maxpri = maxkpri; 1875 if (tpri < maxpri) { 1876 /* 1877 * should have done better 1878 * put this one back and indicate to try again 1879 */ 1880 cpup->cpu_dispthread = curthread; /* fixup dispthread */ 1881 cpup->cpu_dispatch_pri = DISP_PRIO(curthread); 1882 thread_lock_high(tp); 1883 THREAD_TRANSITION(tp); 1884 setfrontdq(tp); 1885 thread_unlock_nopreempt(tp); 1886 1887 tp = NULL; 1888 } 1889 return (tp); 1890 } 1891 1892 /* 1893 * See if there is any work on the dispatcher queue for other CPUs. 1894 * If there is, dequeue the best thread and return. 1895 */ 1896 static kthread_t * 1897 disp_getwork(cpu_t *cp) 1898 { 1899 cpu_t *ocp; /* other CPU */ 1900 cpu_t *ocp_start; 1901 cpu_t *tcp; /* target local CPU */ 1902 kthread_t *tp; 1903 kthread_t *retval = NULL; 1904 pri_t maxpri; 1905 disp_t *kpq; /* kp queue for this partition */ 1906 lpl_t *lpl, *lpl_leaf; 1907 int leafidx, startidx; 1908 hrtime_t stealtime; 1909 lgrp_id_t local_id; 1910 1911 maxpri = -1; 1912 tcp = NULL; 1913 1914 kpq = &cp->cpu_part->cp_kp_queue; 1915 while (kpq->disp_maxrunpri >= 0) { 1916 /* 1917 * Try to take a thread from the kp_queue. 1918 */ 1919 tp = (disp_getbest(kpq)); 1920 if (tp) 1921 return (disp_ratify(tp, kpq)); 1922 } 1923 1924 kpreempt_disable(); /* protect the cpu_active list */ 1925 1926 /* 1927 * Try to find something to do on another CPU's run queue. 1928 * Loop through all other CPUs looking for the one with the highest 1929 * priority unbound thread. 1930 * 1931 * On NUMA machines, the partition's CPUs are consulted in order of 1932 * distance from the current CPU. This way, the first available 1933 * work found is also the closest, and will suffer the least 1934 * from being migrated. 1935 */ 1936 lpl = lpl_leaf = cp->cpu_lpl; 1937 local_id = lpl_leaf->lpl_lgrpid; 1938 leafidx = startidx = 0; 1939 1940 /* 1941 * This loop traverses the lpl hierarchy. Higher level lpls represent 1942 * broader levels of locality 1943 */ 1944 do { 1945 /* This loop iterates over the lpl's leaves */ 1946 do { 1947 if (lpl_leaf != cp->cpu_lpl) 1948 ocp = lpl_leaf->lpl_cpus; 1949 else 1950 ocp = cp->cpu_next_lpl; 1951 1952 /* This loop iterates over the CPUs in the leaf */ 1953 ocp_start = ocp; 1954 do { 1955 pri_t pri; 1956 1957 ASSERT(CPU_ACTIVE(ocp)); 1958 1959 /* 1960 * End our stroll around this lpl if: 1961 * 1962 * - Something became runnable on the local 1963 * queue...which also ends our stroll around 1964 * the partition. 1965 * 1966 * - We happen across another idle CPU. 1967 * Since it is patrolling the next portion 1968 * of the lpl's list (assuming it's not 1969 * halted, or busy servicing an interrupt), 1970 * move to the next higher level of locality. 1971 */ 1972 if (cp->cpu_disp->disp_nrunnable != 0) { 1973 kpreempt_enable(); 1974 return (NULL); 1975 } 1976 if (ocp->cpu_dispatch_pri == -1) { 1977 if (ocp->cpu_disp_flags & 1978 CPU_DISP_HALTED || 1979 ocp->cpu_intr_actv != 0) 1980 continue; 1981 else 1982 goto next_level; 1983 } 1984 1985 /* 1986 * If there's only one thread and the CPU 1987 * is in the middle of a context switch, 1988 * or it's currently running the idle thread, 1989 * don't steal it. 1990 */ 1991 if ((ocp->cpu_disp_flags & 1992 CPU_DISP_DONTSTEAL) && 1993 ocp->cpu_disp->disp_nrunnable == 1) 1994 continue; 1995 1996 pri = ocp->cpu_disp->disp_max_unbound_pri; 1997 if (pri > maxpri) { 1998 /* 1999 * Don't steal threads that we attempted 2000 * to steal recently until they're ready 2001 * to be stolen again. 2002 */ 2003 stealtime = ocp->cpu_disp->disp_steal; 2004 if (stealtime == 0 || 2005 stealtime - gethrtime() <= 0) { 2006 maxpri = pri; 2007 tcp = ocp; 2008 } else { 2009 /* 2010 * Don't update tcp, just set 2011 * the retval to T_DONTSTEAL, so 2012 * that if no acceptable CPUs 2013 * are found the return value 2014 * will be T_DONTSTEAL rather 2015 * then NULL. 2016 */ 2017 retval = T_DONTSTEAL; 2018 } 2019 } 2020 } while ((ocp = ocp->cpu_next_lpl) != ocp_start); 2021 2022 /* 2023 * Iterate to the next leaf lpl in the resource set 2024 * at this level of locality. If we hit the end of 2025 * the set, wrap back around to the beginning. 2026 * 2027 * Note: This iteration is NULL terminated for a reason 2028 * see lpl_topo_bootstrap() in lgrp.c for details. 2029 */ 2030 if ((lpl_leaf = lpl->lpl_rset[++leafidx]) == NULL) { 2031 leafidx = 0; 2032 lpl_leaf = lpl->lpl_rset[leafidx]; 2033 } 2034 } while (leafidx != startidx); 2035 2036 next_level: 2037 /* 2038 * Expand the search to include farther away CPUs (next 2039 * locality level). The closer CPUs that have already been 2040 * checked will be checked again. In doing so, idle CPUs 2041 * will tend to be more aggresive about stealing from CPUs 2042 * that are closer (since the closer CPUs will be considered 2043 * more often). 2044 * Begin at this level with the CPUs local leaf lpl. 2045 */ 2046 if ((lpl = lpl->lpl_parent) != NULL) { 2047 leafidx = startidx = lpl->lpl_id2rset[local_id]; 2048 lpl_leaf = lpl->lpl_rset[leafidx]; 2049 } 2050 } while (!tcp && lpl); 2051 2052 kpreempt_enable(); 2053 2054 /* 2055 * If another queue looks good, and there is still nothing on 2056 * the local queue, try to transfer one or more threads 2057 * from it to our queue. 2058 */ 2059 if (tcp && cp->cpu_disp->disp_nrunnable == 0) { 2060 tp = disp_getbest(tcp->cpu_disp); 2061 if (tp == NULL || tp == T_DONTSTEAL) 2062 return (tp); 2063 return (disp_ratify(tp, kpq)); 2064 } 2065 return (retval); 2066 } 2067 2068 2069 /* 2070 * disp_fix_unbound_pri() 2071 * Determines the maximum priority of unbound threads on the queue. 2072 * The priority is kept for the queue, but is only increased, never 2073 * reduced unless some CPU is looking for something on that queue. 2074 * 2075 * The priority argument is the known upper limit. 2076 * 2077 * Perhaps this should be kept accurately, but that probably means 2078 * separate bitmaps for bound and unbound threads. Since only idled 2079 * CPUs will have to do this recalculation, it seems better this way. 2080 */ 2081 static void 2082 disp_fix_unbound_pri(disp_t *dp, pri_t pri) 2083 { 2084 kthread_t *tp; 2085 dispq_t *dq; 2086 ulong_t *dqactmap = dp->disp_qactmap; 2087 ulong_t mapword; 2088 int wx; 2089 2090 ASSERT(DISP_LOCK_HELD(&dp->disp_lock)); 2091 2092 ASSERT(pri >= 0); /* checked by caller */ 2093 2094 /* 2095 * Start the search at the next lowest priority below the supplied 2096 * priority. This depends on the bitmap implementation. 2097 */ 2098 do { 2099 wx = pri >> BT_ULSHIFT; /* index of word in map */ 2100 2101 /* 2102 * Form mask for all lower priorities in the word. 2103 */ 2104 mapword = dqactmap[wx] & (BT_BIW(pri) - 1); 2105 2106 /* 2107 * Get next lower active priority. 2108 */ 2109 if (mapword != 0) { 2110 pri = (wx << BT_ULSHIFT) + highbit(mapword) - 1; 2111 } else if (wx > 0) { 2112 pri = bt_gethighbit(dqactmap, wx - 1); /* sign extend */ 2113 if (pri < 0) 2114 break; 2115 } else { 2116 pri = -1; 2117 break; 2118 } 2119 2120 /* 2121 * Search the queue for unbound, runnable threads. 2122 */ 2123 dq = &dp->disp_q[pri]; 2124 tp = dq->dq_first; 2125 2126 while (tp && (tp->t_bound_cpu || tp->t_weakbound_cpu)) { 2127 tp = tp->t_link; 2128 } 2129 2130 /* 2131 * If a thread was found, set the priority and return. 2132 */ 2133 } while (tp == NULL); 2134 2135 /* 2136 * pri holds the maximum unbound thread priority or -1. 2137 */ 2138 if (dp->disp_max_unbound_pri != pri) 2139 dp->disp_max_unbound_pri = pri; 2140 } 2141 2142 /* 2143 * disp_adjust_unbound_pri() - thread is becoming unbound, so we should 2144 * check if the CPU to which is was previously bound should have 2145 * its disp_max_unbound_pri increased. 2146 */ 2147 void 2148 disp_adjust_unbound_pri(kthread_t *tp) 2149 { 2150 disp_t *dp; 2151 pri_t tpri; 2152 2153 ASSERT(THREAD_LOCK_HELD(tp)); 2154 2155 /* 2156 * Don't do anything if the thread is not bound, or 2157 * currently not runnable or swapped out. 2158 */ 2159 if (tp->t_bound_cpu == NULL || 2160 tp->t_state != TS_RUN || 2161 tp->t_schedflag & TS_ON_SWAPQ) 2162 return; 2163 2164 tpri = DISP_PRIO(tp); 2165 dp = tp->t_bound_cpu->cpu_disp; 2166 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 2167 if (tpri > dp->disp_max_unbound_pri) 2168 dp->disp_max_unbound_pri = tpri; 2169 } 2170 2171 /* 2172 * disp_getbest() 2173 * De-queue the highest priority unbound runnable thread. 2174 * Returns with the thread unlocked and onproc but at splhigh (like disp()). 2175 * Returns NULL if nothing found. 2176 * Returns T_DONTSTEAL if the thread was not stealable. 2177 * so that the caller will try again later. 2178 * 2179 * Passed a pointer to a dispatch queue not associated with this CPU, and 2180 * its type. 2181 */ 2182 static kthread_t * 2183 disp_getbest(disp_t *dp) 2184 { 2185 kthread_t *tp; 2186 dispq_t *dq; 2187 pri_t pri; 2188 cpu_t *cp, *tcp; 2189 boolean_t allbound; 2190 2191 disp_lock_enter(&dp->disp_lock); 2192 2193 /* 2194 * If there is nothing to run, or the CPU is in the middle of a 2195 * context switch of the only thread, return NULL. 2196 */ 2197 tcp = dp->disp_cpu; 2198 cp = CPU; 2199 pri = dp->disp_max_unbound_pri; 2200 if (pri == -1 || 2201 (tcp != NULL && (tcp->cpu_disp_flags & CPU_DISP_DONTSTEAL) && 2202 tcp->cpu_disp->disp_nrunnable == 1)) { 2203 disp_lock_exit_nopreempt(&dp->disp_lock); 2204 return (NULL); 2205 } 2206 2207 dq = &dp->disp_q[pri]; 2208 2209 2210 /* 2211 * Assume that all threads are bound on this queue, and change it 2212 * later when we find out that it is not the case. 2213 */ 2214 allbound = B_TRUE; 2215 for (tp = dq->dq_first; tp != NULL; tp = tp->t_link) { 2216 hrtime_t now, nosteal, rqtime; 2217 2218 /* 2219 * Skip over bound threads which could be here even 2220 * though disp_max_unbound_pri indicated this level. 2221 */ 2222 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 2223 continue; 2224 2225 /* 2226 * We've got some unbound threads on this queue, so turn 2227 * the allbound flag off now. 2228 */ 2229 allbound = B_FALSE; 2230 2231 /* 2232 * The thread is a candidate for stealing from its run queue. We 2233 * don't want to steal threads that became runnable just a 2234 * moment ago. This improves CPU affinity for threads that get 2235 * preempted for short periods of time and go back on the run 2236 * queue. 2237 * 2238 * We want to let it stay on its run queue if it was only placed 2239 * there recently and it was running on the same CPU before that 2240 * to preserve its cache investment. For the thread to remain on 2241 * its run queue, ALL of the following conditions must be 2242 * satisfied: 2243 * 2244 * - the disp queue should not be the kernel preemption queue 2245 * - delayed idle stealing should not be disabled 2246 * - nosteal_nsec should be non-zero 2247 * - it should run with user priority 2248 * - it should be on the run queue of the CPU where it was 2249 * running before being placed on the run queue 2250 * - it should be the only thread on the run queue (to prevent 2251 * extra scheduling latency for other threads) 2252 * - it should sit on the run queue for less than per-chip 2253 * nosteal interval or global nosteal interval 2254 * - in case of CPUs with shared cache it should sit in a run 2255 * queue of a CPU from a different chip 2256 * 2257 * The checks are arranged so that the ones that are faster are 2258 * placed earlier. 2259 */ 2260 if (tcp == NULL || 2261 pri >= minclsyspri || 2262 tp->t_cpu != tcp) 2263 break; 2264 2265 /* 2266 * Steal immediately if, due to CMT processor architecture 2267 * migraiton between cp and tcp would incur no performance 2268 * penalty. 2269 */ 2270 if (pg_cmt_can_migrate(cp, tcp)) 2271 break; 2272 2273 nosteal = nosteal_nsec; 2274 if (nosteal == 0) 2275 break; 2276 2277 /* 2278 * Calculate time spent sitting on run queue 2279 */ 2280 now = gethrtime_unscaled(); 2281 rqtime = now - tp->t_waitrq; 2282 scalehrtime(&rqtime); 2283 2284 /* 2285 * Steal immediately if the time spent on this run queue is more 2286 * than allowed nosteal delay. 2287 * 2288 * Negative rqtime check is needed here to avoid infinite 2289 * stealing delays caused by unlikely but not impossible 2290 * drifts between CPU times on different CPUs. 2291 */ 2292 if (rqtime > nosteal || rqtime < 0) 2293 break; 2294 2295 DTRACE_PROBE4(nosteal, kthread_t *, tp, 2296 cpu_t *, tcp, cpu_t *, cp, hrtime_t, rqtime); 2297 scalehrtime(&now); 2298 /* 2299 * Calculate when this thread becomes stealable 2300 */ 2301 now += (nosteal - rqtime); 2302 2303 /* 2304 * Calculate time when some thread becomes stealable 2305 */ 2306 if (now < dp->disp_steal) 2307 dp->disp_steal = now; 2308 } 2309 2310 /* 2311 * If there were no unbound threads on this queue, find the queue 2312 * where they are and then return later. The value of 2313 * disp_max_unbound_pri is not always accurate because it isn't 2314 * reduced until another idle CPU looks for work. 2315 */ 2316 if (allbound) 2317 disp_fix_unbound_pri(dp, pri); 2318 2319 /* 2320 * If we reached the end of the queue and found no unbound threads 2321 * then return NULL so that other CPUs will be considered. If there 2322 * are unbound threads but they cannot yet be stolen, then 2323 * return T_DONTSTEAL and try again later. 2324 */ 2325 if (tp == NULL) { 2326 disp_lock_exit_nopreempt(&dp->disp_lock); 2327 return (allbound ? NULL : T_DONTSTEAL); 2328 } 2329 2330 /* 2331 * Found a runnable, unbound thread, so remove it from queue. 2332 * dispdeq() requires that we have the thread locked, and we do, 2333 * by virtue of holding the dispatch queue lock. dispdeq() will 2334 * put the thread in transition state, thereby dropping the dispq 2335 * lock. 2336 */ 2337 2338 #ifdef DEBUG 2339 { 2340 int thread_was_on_queue; 2341 2342 thread_was_on_queue = dispdeq(tp); /* drops disp_lock */ 2343 ASSERT(thread_was_on_queue); 2344 } 2345 2346 #else /* DEBUG */ 2347 (void) dispdeq(tp); /* drops disp_lock */ 2348 #endif /* DEBUG */ 2349 2350 /* 2351 * Reset the disp_queue steal time - we do not know what is the smallest 2352 * value across the queue is. 2353 */ 2354 dp->disp_steal = 0; 2355 2356 tp->t_schedflag |= TS_DONT_SWAP; 2357 2358 /* 2359 * Setup thread to run on the current CPU. 2360 */ 2361 tp->t_disp_queue = cp->cpu_disp; 2362 2363 cp->cpu_dispthread = tp; /* protected by spl only */ 2364 cp->cpu_dispatch_pri = pri; 2365 2366 /* 2367 * There can be a memory synchronization race between disp_getbest() 2368 * and disp_ratify() vs cpu_resched() where cpu_resched() is trying 2369 * to preempt the current thread to run the enqueued thread while 2370 * disp_getbest() and disp_ratify() are changing the current thread 2371 * to the stolen thread. This may lead to a situation where 2372 * cpu_resched() tries to preempt the wrong thread and the 2373 * stolen thread continues to run on the CPU which has been tagged 2374 * for preemption. 2375 * Later the clock thread gets enqueued but doesn't get to run on the 2376 * CPU causing the system to hang. 2377 * 2378 * To avoid this, grabbing and dropping the disp_lock (which does 2379 * a memory barrier) is needed to synchronize the execution of 2380 * cpu_resched() with disp_getbest() and disp_ratify() and 2381 * synchronize the memory read and written by cpu_resched(), 2382 * disp_getbest(), and disp_ratify() with each other. 2383 * (see CR#6482861 for more details). 2384 */ 2385 disp_lock_enter_high(&cp->cpu_disp->disp_lock); 2386 disp_lock_exit_high(&cp->cpu_disp->disp_lock); 2387 2388 ASSERT(pri == DISP_PRIO(tp)); 2389 2390 DTRACE_PROBE3(steal, kthread_t *, tp, cpu_t *, tcp, cpu_t *, cp); 2391 2392 thread_onproc(tp, cp); /* set t_state to TS_ONPROC */ 2393 2394 /* 2395 * Return with spl high so that swtch() won't need to raise it. 2396 * The disp_lock was dropped by dispdeq(). 2397 */ 2398 2399 return (tp); 2400 } 2401 2402 /* 2403 * disp_bound_common() - common routine for higher level functions 2404 * that check for bound threads under certain conditions. 2405 * If 'threadlistsafe' is set then there is no need to acquire 2406 * pidlock to stop the thread list from changing (eg, if 2407 * disp_bound_* is called with cpus paused). 2408 */ 2409 static int 2410 disp_bound_common(cpu_t *cp, int threadlistsafe, int flag) 2411 { 2412 int found = 0; 2413 kthread_t *tp; 2414 2415 ASSERT(flag); 2416 2417 if (!threadlistsafe) 2418 mutex_enter(&pidlock); 2419 tp = curthread; /* faster than allthreads */ 2420 do { 2421 if (tp->t_state != TS_FREE) { 2422 /* 2423 * If an interrupt thread is busy, but the 2424 * caller doesn't care (i.e. BOUND_INTR is off), 2425 * then just ignore it and continue through. 2426 */ 2427 if ((tp->t_flag & T_INTR_THREAD) && 2428 !(flag & BOUND_INTR)) 2429 continue; 2430 2431 /* 2432 * Skip the idle thread for the CPU 2433 * we're about to set offline. 2434 */ 2435 if (tp == cp->cpu_idle_thread) 2436 continue; 2437 2438 /* 2439 * Skip the pause thread for the CPU 2440 * we're about to set offline. 2441 */ 2442 if (tp == cp->cpu_pause_thread) 2443 continue; 2444 2445 if ((flag & BOUND_CPU) && 2446 (tp->t_bound_cpu == cp || 2447 tp->t_bind_cpu == cp->cpu_id || 2448 tp->t_weakbound_cpu == cp)) { 2449 found = 1; 2450 break; 2451 } 2452 2453 if ((flag & BOUND_PARTITION) && 2454 (tp->t_cpupart == cp->cpu_part)) { 2455 found = 1; 2456 break; 2457 } 2458 } 2459 } while ((tp = tp->t_next) != curthread && found == 0); 2460 if (!threadlistsafe) 2461 mutex_exit(&pidlock); 2462 return (found); 2463 } 2464 2465 /* 2466 * disp_bound_threads - return nonzero if threads are bound to the processor. 2467 * Called infrequently. Keep this simple. 2468 * Includes threads that are asleep or stopped but not onproc. 2469 */ 2470 int 2471 disp_bound_threads(cpu_t *cp, int threadlistsafe) 2472 { 2473 return (disp_bound_common(cp, threadlistsafe, BOUND_CPU)); 2474 } 2475 2476 /* 2477 * disp_bound_anythreads - return nonzero if _any_ threads are bound 2478 * to the given processor, including interrupt threads. 2479 */ 2480 int 2481 disp_bound_anythreads(cpu_t *cp, int threadlistsafe) 2482 { 2483 return (disp_bound_common(cp, threadlistsafe, BOUND_CPU | BOUND_INTR)); 2484 } 2485 2486 /* 2487 * disp_bound_partition - return nonzero if threads are bound to the same 2488 * partition as the processor. 2489 * Called infrequently. Keep this simple. 2490 * Includes threads that are asleep or stopped but not onproc. 2491 */ 2492 int 2493 disp_bound_partition(cpu_t *cp, int threadlistsafe) 2494 { 2495 return (disp_bound_common(cp, threadlistsafe, BOUND_PARTITION)); 2496 } 2497 2498 /* 2499 * disp_cpu_inactive - make a CPU inactive by moving all of its unbound 2500 * threads to other CPUs. 2501 */ 2502 void 2503 disp_cpu_inactive(cpu_t *cp) 2504 { 2505 kthread_t *tp; 2506 disp_t *dp = cp->cpu_disp; 2507 dispq_t *dq; 2508 pri_t pri; 2509 int wasonq; 2510 2511 disp_lock_enter(&dp->disp_lock); 2512 while ((pri = dp->disp_max_unbound_pri) != -1) { 2513 dq = &dp->disp_q[pri]; 2514 tp = dq->dq_first; 2515 2516 /* 2517 * Skip over bound threads. 2518 */ 2519 while (tp != NULL && tp->t_bound_cpu != NULL) { 2520 tp = tp->t_link; 2521 } 2522 2523 if (tp == NULL) { 2524 /* disp_max_unbound_pri must be inaccurate, so fix it */ 2525 disp_fix_unbound_pri(dp, pri); 2526 continue; 2527 } 2528 2529 wasonq = dispdeq(tp); /* drops disp_lock */ 2530 ASSERT(wasonq); 2531 ASSERT(tp->t_weakbound_cpu == NULL); 2532 2533 setbackdq(tp); 2534 /* 2535 * Called from cpu_offline: 2536 * 2537 * cp has already been removed from the list of active cpus 2538 * and tp->t_cpu has been changed so there is no risk of 2539 * tp ending up back on cp. 2540 * 2541 * Called from cpupart_move_cpu: 2542 * 2543 * The cpu has moved to a new cpupart. Any threads that 2544 * were on it's dispatch queues before the move remain 2545 * in the old partition and can't run in the new partition. 2546 */ 2547 ASSERT(tp->t_cpu != cp); 2548 thread_unlock(tp); 2549 2550 disp_lock_enter(&dp->disp_lock); 2551 } 2552 disp_lock_exit(&dp->disp_lock); 2553 } 2554 2555 /* 2556 * disp_lowpri_cpu - find CPU running the lowest priority thread. 2557 * The hint passed in is used as a starting point so we don't favor 2558 * CPU 0 or any other CPU. The caller should pass in the most recently 2559 * used CPU for the thread. 2560 * 2561 * The lgroup and priority are used to determine the best CPU to run on 2562 * in a NUMA machine. The lgroup specifies which CPUs are closest while 2563 * the thread priority will indicate whether the thread will actually run 2564 * there. To pick the best CPU, the CPUs inside and outside of the given 2565 * lgroup which are running the lowest priority threads are found. The 2566 * remote CPU is chosen only if the thread will not run locally on a CPU 2567 * within the lgroup, but will run on the remote CPU. If the thread 2568 * cannot immediately run on any CPU, the best local CPU will be chosen. 2569 * 2570 * The lpl specified also identifies the cpu partition from which 2571 * disp_lowpri_cpu should select a CPU. 2572 * 2573 * curcpu is used to indicate that disp_lowpri_cpu is being called on 2574 * behalf of the current thread. (curthread is looking for a new cpu) 2575 * In this case, cpu_dispatch_pri for this thread's cpu should be 2576 * ignored. 2577 * 2578 * If a cpu is the target of an offline request then try to avoid it. 2579 * 2580 * This function must be called at either high SPL, or with preemption 2581 * disabled, so that the "hint" CPU cannot be removed from the online 2582 * CPU list while we are traversing it. 2583 */ 2584 cpu_t * 2585 disp_lowpri_cpu(cpu_t *hint, lpl_t *lpl, pri_t tpri, cpu_t *curcpu) 2586 { 2587 cpu_t *bestcpu; 2588 cpu_t *besthomecpu; 2589 cpu_t *cp, *cpstart; 2590 2591 pri_t bestpri; 2592 pri_t cpupri; 2593 2594 klgrpset_t done; 2595 klgrpset_t cur_set; 2596 2597 lpl_t *lpl_iter, *lpl_leaf; 2598 int i; 2599 2600 /* 2601 * Scan for a CPU currently running the lowest priority thread. 2602 * Cannot get cpu_lock here because it is adaptive. 2603 * We do not require lock on CPU list. 2604 */ 2605 ASSERT(hint != NULL); 2606 ASSERT(lpl != NULL); 2607 ASSERT(lpl->lpl_ncpu > 0); 2608 2609 /* 2610 * First examine local CPUs. Note that it's possible the hint CPU 2611 * passed in in remote to the specified home lgroup. If our priority 2612 * isn't sufficient enough such that we can run immediately at home, 2613 * then examine CPUs remote to our home lgroup. 2614 * We would like to give preference to CPUs closest to "home". 2615 * If we can't find a CPU where we'll run at a given level 2616 * of locality, we expand our search to include the next level. 2617 */ 2618 bestcpu = besthomecpu = NULL; 2619 klgrpset_clear(done); 2620 /* start with lpl we were passed */ 2621 2622 lpl_iter = lpl; 2623 2624 do { 2625 2626 bestpri = SHRT_MAX; 2627 klgrpset_clear(cur_set); 2628 2629 for (i = 0; i < lpl_iter->lpl_nrset; i++) { 2630 lpl_leaf = lpl_iter->lpl_rset[i]; 2631 if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid)) 2632 continue; 2633 2634 klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid); 2635 2636 if (hint->cpu_lpl == lpl_leaf) 2637 cp = cpstart = hint; 2638 else 2639 cp = cpstart = lpl_leaf->lpl_cpus; 2640 2641 do { 2642 if (cp == curcpu) 2643 cpupri = -1; 2644 else if (cp == cpu_inmotion) 2645 cpupri = SHRT_MAX; 2646 else 2647 cpupri = cp->cpu_dispatch_pri; 2648 if (cp->cpu_disp->disp_maxrunpri > cpupri) 2649 cpupri = cp->cpu_disp->disp_maxrunpri; 2650 if (cp->cpu_chosen_level > cpupri) 2651 cpupri = cp->cpu_chosen_level; 2652 if (cpupri < bestpri) { 2653 if (CPU_IDLING(cpupri)) { 2654 ASSERT((cp->cpu_flags & 2655 CPU_QUIESCED) == 0); 2656 return (cp); 2657 } 2658 bestcpu = cp; 2659 bestpri = cpupri; 2660 } 2661 } while ((cp = cp->cpu_next_lpl) != cpstart); 2662 } 2663 2664 if (bestcpu && (tpri > bestpri)) { 2665 ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0); 2666 return (bestcpu); 2667 } 2668 if (besthomecpu == NULL) 2669 besthomecpu = bestcpu; 2670 /* 2671 * Add the lgrps we just considered to the "done" set 2672 */ 2673 klgrpset_or(done, cur_set); 2674 2675 } while ((lpl_iter = lpl_iter->lpl_parent) != NULL); 2676 2677 /* 2678 * The specified priority isn't high enough to run immediately 2679 * anywhere, so just return the best CPU from the home lgroup. 2680 */ 2681 ASSERT((besthomecpu->cpu_flags & CPU_QUIESCED) == 0); 2682 return (besthomecpu); 2683 } 2684 2685 /* 2686 * This routine provides the generic idle cpu function for all processors. 2687 * If a processor has some specific code to execute when idle (say, to stop 2688 * the pipeline and save power) then that routine should be defined in the 2689 * processors specific code (module_xx.c) and the global variable idle_cpu 2690 * set to that function. 2691 */ 2692 static void 2693 generic_idle_cpu(void) 2694 { 2695 } 2696 2697 /*ARGSUSED*/ 2698 static void 2699 generic_enq_thread(cpu_t *cpu, int bound) 2700 { 2701 }