6583 remove whole-process swapping
1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2013, Joyent, Inc. All rights reserved. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/param.h> 29 #include <sys/sysmacros.h> 30 #include <sys/cred.h> 31 #include <sys/proc.h> 32 #include <sys/session.h> 33 #include <sys/strsubr.h> 34 #include <sys/user.h> 35 #include <sys/priocntl.h> 36 #include <sys/class.h> 37 #include <sys/disp.h> 38 #include <sys/procset.h> 39 #include <sys/debug.h> 40 #include <sys/kmem.h> 41 #include <sys/errno.h> 42 #include <sys/fx.h> 43 #include <sys/fxpriocntl.h> 44 #include <sys/cpuvar.h> 45 #include <sys/systm.h> 46 #include <sys/vtrace.h> 47 #include <sys/schedctl.h> 48 #include <sys/tnf_probe.h> 49 #include <sys/sunddi.h> 50 #include <sys/spl.h> 51 #include <sys/modctl.h> 52 #include <sys/policy.h> 53 #include <sys/sdt.h> 54 #include <sys/cpupart.h> 55 #include <sys/cpucaps.h> 56 57 static pri_t fx_init(id_t, int, classfuncs_t **); 58 59 static struct sclass csw = { 60 "FX", 61 fx_init, 62 0 63 }; 64 65 static struct modlsched modlsched = { 66 &mod_schedops, "Fixed priority sched class", &csw 67 }; 68 69 static struct modlinkage modlinkage = { 70 MODREV_1, (void *)&modlsched, NULL 71 }; 72 73 74 /* 75 * control flags (kparms->fx_cflags). 76 */ 77 #define FX_DOUPRILIM 0x01 /* change user priority limit */ 78 #define FX_DOUPRI 0x02 /* change user priority */ 79 #define FX_DOTQ 0x04 /* change FX time quantum */ 80 81 82 #define FXMAXUPRI 60 /* maximum user priority setting */ 83 84 #define FX_MAX_UNPRIV_PRI 0 /* maximum unpriviledge priority */ 85 86 /* 87 * The fxproc_t structures that have a registered callback vector, 88 * are also kept in an array of circular doubly linked lists. A hash on 89 * the thread id (from ddi_get_kt_did()) is used to determine which list 90 * each of such fxproc structures should be placed. Each list has a dummy 91 * "head" which is never removed, so the list is never empty. 92 */ 93 94 #define FX_CB_LISTS 16 /* number of lists, must be power of 2 */ 95 #define FX_CB_LIST_HASH(ktid) ((uint_t)ktid & (FX_CB_LISTS - 1)) 96 97 /* Insert fxproc into callback list */ 98 #define FX_CB_LIST_INSERT(fxpp) \ 99 { \ 100 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); \ 101 kmutex_t *lockp = &fx_cb_list_lock[index]; \ 102 fxproc_t *headp = &fx_cb_plisthead[index]; \ 103 mutex_enter(lockp); \ 104 fxpp->fx_cb_next = headp->fx_cb_next; \ 105 fxpp->fx_cb_prev = headp; \ 106 headp->fx_cb_next->fx_cb_prev = fxpp; \ 107 headp->fx_cb_next = fxpp; \ 108 mutex_exit(lockp); \ 109 } 110 111 /* 112 * Remove thread from callback list. 113 */ 114 #define FX_CB_LIST_DELETE(fxpp) \ 115 { \ 116 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); \ 117 kmutex_t *lockp = &fx_cb_list_lock[index]; \ 118 mutex_enter(lockp); \ 119 fxpp->fx_cb_prev->fx_cb_next = fxpp->fx_cb_next; \ 120 fxpp->fx_cb_next->fx_cb_prev = fxpp->fx_cb_prev; \ 121 mutex_exit(lockp); \ 122 } 123 124 #define FX_HAS_CB(fxpp) (fxpp->fx_callback != NULL) 125 126 /* adjust x to be between 0 and fx_maxumdpri */ 127 128 #define FX_ADJUST_PRI(pri) \ 129 { \ 130 if (pri < 0) \ 131 pri = 0; \ 132 else if (pri > fx_maxumdpri) \ 133 pri = fx_maxumdpri; \ 134 } 135 136 #define FX_ADJUST_QUANTUM(q) \ 137 { \ 138 if (q > INT_MAX) \ 139 q = INT_MAX; \ 140 else if (q <= 0) \ 141 q = FX_TQINF; \ 142 } 143 144 #define FX_ISVALID(pri, quantum) \ 145 (((pri >= 0) || (pri == FX_CB_NOCHANGE)) && \ 146 ((quantum >= 0) || (quantum == FX_NOCHANGE) || \ 147 (quantum == FX_TQDEF) || (quantum == FX_TQINF))) 148 149 150 static id_t fx_cid; /* fixed priority class ID */ 151 static fxdpent_t *fx_dptbl; /* fixed priority disp parameter table */ 152 153 static pri_t fx_maxupri = FXMAXUPRI; 154 static pri_t fx_maxumdpri; /* max user mode fixed priority */ 155 156 static pri_t fx_maxglobpri; /* maximum global priority used by fx class */ 157 static kmutex_t fx_dptblock; /* protects fixed priority dispatch table */ 158 159 160 static kmutex_t fx_cb_list_lock[FX_CB_LISTS]; /* protects list of fxprocs */ 161 /* that have callbacks */ 162 static fxproc_t fx_cb_plisthead[FX_CB_LISTS]; /* dummy fxproc at head of */ 163 /* list of fxprocs with */ 164 /* callbacks */ 165 166 static int fx_admin(caddr_t, cred_t *); 167 static int fx_getclinfo(void *); 168 static int fx_parmsin(void *); 169 static int fx_parmsout(void *, pc_vaparms_t *); 170 static int fx_vaparmsin(void *, pc_vaparms_t *); 171 static int fx_vaparmsout(void *, pc_vaparms_t *); 172 static int fx_getclpri(pcpri_t *); 173 static int fx_alloc(void **, int); 174 static void fx_free(void *); 175 static int fx_enterclass(kthread_t *, id_t, void *, cred_t *, void *); 176 static void fx_exitclass(void *); 177 static int fx_canexit(kthread_t *, cred_t *); 178 static int fx_fork(kthread_t *, kthread_t *, void *); 179 static void fx_forkret(kthread_t *, kthread_t *); 180 static void fx_parmsget(kthread_t *, void *); 181 static int fx_parmsset(kthread_t *, void *, id_t, cred_t *); 182 static void fx_stop(kthread_t *, int, int); 183 static void fx_exit(kthread_t *); 184 static void fx_trapret(kthread_t *); 185 static void fx_preempt(kthread_t *); 186 static void fx_setrun(kthread_t *); 187 static void fx_sleep(kthread_t *); 188 static void fx_tick(kthread_t *); 189 static void fx_wakeup(kthread_t *); 190 static int fx_donice(kthread_t *, cred_t *, int, int *); 191 static int fx_doprio(kthread_t *, cred_t *, int, int *); 192 static pri_t fx_globpri(kthread_t *); 193 static void fx_yield(kthread_t *); 194 static void fx_nullsys(); 195 196 extern fxdpent_t *fx_getdptbl(void); 197 198 static void fx_change_priority(kthread_t *, fxproc_t *); 199 static fxproc_t *fx_list_lookup(kt_did_t); 200 static void fx_list_release(fxproc_t *); 201 202 203 static struct classfuncs fx_classfuncs = { 204 /* class functions */ 205 fx_admin, 206 fx_getclinfo, 207 fx_parmsin, 208 fx_parmsout, 209 fx_vaparmsin, 210 fx_vaparmsout, 211 fx_getclpri, 212 fx_alloc, 213 fx_free, 214 215 /* thread functions */ 216 fx_enterclass, 217 fx_exitclass, 218 fx_canexit, 219 fx_fork, 220 fx_forkret, 221 fx_parmsget, 222 fx_parmsset, 223 fx_stop, 224 fx_exit, 225 fx_nullsys, /* active */ 226 fx_nullsys, /* inactive */ 227 fx_trapret, 228 fx_preempt, 229 fx_setrun, 230 fx_sleep, 231 fx_tick, 232 fx_wakeup, 233 fx_donice, 234 fx_globpri, 235 fx_nullsys, /* set_process_group */ 236 fx_yield, 237 fx_doprio, 238 }; 239 240 241 int 242 _init() 243 { 244 return (mod_install(&modlinkage)); 245 } 246 247 int 248 _fini() 249 { 250 return (EBUSY); 251 } 252 253 int 254 _info(struct modinfo *modinfop) 255 { 256 return (mod_info(&modlinkage, modinfop)); 257 } 258 259 /* 260 * Fixed priority class initialization. Called by dispinit() at boot time. 261 * We can ignore the clparmsz argument since we know that the smallest 262 * possible parameter buffer is big enough for us. 263 */ 264 /* ARGSUSED */ 265 static pri_t 266 fx_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp) 267 { 268 int i; 269 extern pri_t fx_getmaxumdpri(void); 270 271 fx_dptbl = fx_getdptbl(); 272 fx_maxumdpri = fx_getmaxumdpri(); 273 fx_maxglobpri = fx_dptbl[fx_maxumdpri].fx_globpri; 274 275 fx_cid = cid; /* Record our class ID */ 276 277 /* 278 * Initialize the hash table for fxprocs with callbacks 279 */ 280 for (i = 0; i < FX_CB_LISTS; i++) { 281 fx_cb_plisthead[i].fx_cb_next = fx_cb_plisthead[i].fx_cb_prev = 282 &fx_cb_plisthead[i]; 283 } 284 285 /* 286 * We're required to return a pointer to our classfuncs 287 * structure and the highest global priority value we use. 288 */ 289 *clfuncspp = &fx_classfuncs; 290 return (fx_maxglobpri); 291 } 292 293 /* 294 * Get or reset the fx_dptbl values per the user's request. 295 */ 296 static int 297 fx_admin(caddr_t uaddr, cred_t *reqpcredp) 298 { 299 fxadmin_t fxadmin; 300 fxdpent_t *tmpdpp; 301 int userdpsz; 302 int i; 303 size_t fxdpsz; 304 305 if (get_udatamodel() == DATAMODEL_NATIVE) { 306 if (copyin(uaddr, &fxadmin, sizeof (fxadmin_t))) 307 return (EFAULT); 308 } 309 #ifdef _SYSCALL32_IMPL 310 else { 311 /* get fxadmin struct from ILP32 caller */ 312 fxadmin32_t fxadmin32; 313 if (copyin(uaddr, &fxadmin32, sizeof (fxadmin32_t))) 314 return (EFAULT); 315 fxadmin.fx_dpents = 316 (struct fxdpent *)(uintptr_t)fxadmin32.fx_dpents; 317 fxadmin.fx_ndpents = fxadmin32.fx_ndpents; 318 fxadmin.fx_cmd = fxadmin32.fx_cmd; 319 } 320 #endif /* _SYSCALL32_IMPL */ 321 322 fxdpsz = (fx_maxumdpri + 1) * sizeof (fxdpent_t); 323 324 switch (fxadmin.fx_cmd) { 325 case FX_GETDPSIZE: 326 fxadmin.fx_ndpents = fx_maxumdpri + 1; 327 328 if (get_udatamodel() == DATAMODEL_NATIVE) { 329 if (copyout(&fxadmin, uaddr, sizeof (fxadmin_t))) 330 return (EFAULT); 331 } 332 #ifdef _SYSCALL32_IMPL 333 else { 334 /* return fxadmin struct to ILP32 caller */ 335 fxadmin32_t fxadmin32; 336 fxadmin32.fx_dpents = 337 (caddr32_t)(uintptr_t)fxadmin.fx_dpents; 338 fxadmin32.fx_ndpents = fxadmin.fx_ndpents; 339 fxadmin32.fx_cmd = fxadmin.fx_cmd; 340 if (copyout(&fxadmin32, uaddr, sizeof (fxadmin32_t))) 341 return (EFAULT); 342 } 343 #endif /* _SYSCALL32_IMPL */ 344 break; 345 346 case FX_GETDPTBL: 347 userdpsz = MIN(fxadmin.fx_ndpents * sizeof (fxdpent_t), 348 fxdpsz); 349 if (copyout(fx_dptbl, fxadmin.fx_dpents, userdpsz)) 350 return (EFAULT); 351 352 fxadmin.fx_ndpents = userdpsz / sizeof (fxdpent_t); 353 354 if (get_udatamodel() == DATAMODEL_NATIVE) { 355 if (copyout(&fxadmin, uaddr, sizeof (fxadmin_t))) 356 return (EFAULT); 357 } 358 #ifdef _SYSCALL32_IMPL 359 else { 360 /* return fxadmin struct to ILP32 callers */ 361 fxadmin32_t fxadmin32; 362 fxadmin32.fx_dpents = 363 (caddr32_t)(uintptr_t)fxadmin.fx_dpents; 364 fxadmin32.fx_ndpents = fxadmin.fx_ndpents; 365 fxadmin32.fx_cmd = fxadmin.fx_cmd; 366 if (copyout(&fxadmin32, uaddr, sizeof (fxadmin32_t))) 367 return (EFAULT); 368 } 369 #endif /* _SYSCALL32_IMPL */ 370 break; 371 372 case FX_SETDPTBL: 373 /* 374 * We require that the requesting process has sufficient 375 * privileges. We also require that the table supplied by 376 * the user exactly match the current fx_dptbl in size. 377 */ 378 if (secpolicy_dispadm(reqpcredp) != 0) { 379 return (EPERM); 380 } 381 if (fxadmin.fx_ndpents * sizeof (fxdpent_t) != fxdpsz) { 382 return (EINVAL); 383 } 384 385 /* 386 * We read the user supplied table into a temporary buffer 387 * where it is validated before being copied over the 388 * fx_dptbl. 389 */ 390 tmpdpp = kmem_alloc(fxdpsz, KM_SLEEP); 391 if (copyin(fxadmin.fx_dpents, tmpdpp, fxdpsz)) { 392 kmem_free(tmpdpp, fxdpsz); 393 return (EFAULT); 394 } 395 for (i = 0; i < fxadmin.fx_ndpents; i++) { 396 397 /* 398 * Validate the user supplied values. All we are doing 399 * here is verifying that the values are within their 400 * allowable ranges and will not panic the system. We 401 * make no attempt to ensure that the resulting 402 * configuration makes sense or results in reasonable 403 * performance. 404 */ 405 if (tmpdpp[i].fx_quantum <= 0 && 406 tmpdpp[i].fx_quantum != FX_TQINF) { 407 kmem_free(tmpdpp, fxdpsz); 408 return (EINVAL); 409 } 410 } 411 412 /* 413 * Copy the user supplied values over the current fx_dptbl 414 * values. The fx_globpri member is read-only so we don't 415 * overwrite it. 416 */ 417 mutex_enter(&fx_dptblock); 418 for (i = 0; i < fxadmin.fx_ndpents; i++) { 419 fx_dptbl[i].fx_quantum = tmpdpp[i].fx_quantum; 420 } 421 mutex_exit(&fx_dptblock); 422 kmem_free(tmpdpp, fxdpsz); 423 break; 424 425 default: 426 return (EINVAL); 427 } 428 return (0); 429 } 430 431 /* 432 * Allocate a fixed priority class specific thread structure and 433 * initialize it with the parameters supplied. Also move the thread 434 * to specified priority. 435 */ 436 static int 437 fx_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp, 438 void *bufp) 439 { 440 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 441 fxproc_t *fxpp; 442 pri_t reqfxupri; 443 pri_t reqfxuprilim; 444 445 fxpp = (fxproc_t *)bufp; 446 ASSERT(fxpp != NULL); 447 448 /* 449 * Initialize the fxproc structure. 450 */ 451 fxpp->fx_flags = 0; 452 fxpp->fx_callback = NULL; 453 fxpp->fx_cookie = NULL; 454 455 if (fxkparmsp == NULL) { 456 /* 457 * Use default values. 458 */ 459 fxpp->fx_pri = fxpp->fx_uprilim = 0; 460 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 461 fxpp->fx_nice = NZERO; 462 } else { 463 /* 464 * Use supplied values. 465 */ 466 467 if ((fxkparmsp->fx_cflags & FX_DOUPRILIM) == 0) { 468 reqfxuprilim = 0; 469 } else { 470 if (fxkparmsp->fx_uprilim > FX_MAX_UNPRIV_PRI && 471 secpolicy_setpriority(reqpcredp) != 0) 472 return (EPERM); 473 reqfxuprilim = fxkparmsp->fx_uprilim; 474 FX_ADJUST_PRI(reqfxuprilim); 475 } 476 477 if ((fxkparmsp->fx_cflags & FX_DOUPRI) == 0) { 478 reqfxupri = reqfxuprilim; 479 } else { 480 if (fxkparmsp->fx_upri > FX_MAX_UNPRIV_PRI && 481 secpolicy_setpriority(reqpcredp) != 0) 482 return (EPERM); 483 /* 484 * Set the user priority to the requested value 485 * or the upri limit, whichever is lower. 486 */ 487 reqfxupri = fxkparmsp->fx_upri; 488 FX_ADJUST_PRI(reqfxupri); 489 490 if (reqfxupri > reqfxuprilim) 491 reqfxupri = reqfxuprilim; 492 } 493 494 495 fxpp->fx_uprilim = reqfxuprilim; 496 fxpp->fx_pri = reqfxupri; 497 498 fxpp->fx_nice = NZERO - (NZERO * reqfxupri) / fx_maxupri; 499 500 if (((fxkparmsp->fx_cflags & FX_DOTQ) == 0) || 501 (fxkparmsp->fx_tqntm == FX_TQDEF)) { 502 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 503 } else { 504 if (secpolicy_setpriority(reqpcredp) != 0) 505 return (EPERM); 506 507 if (fxkparmsp->fx_tqntm == FX_TQINF) 508 fxpp->fx_pquantum = FX_TQINF; 509 else { 510 fxpp->fx_pquantum = fxkparmsp->fx_tqntm; 511 } 512 } 513 514 } 515 516 fxpp->fx_timeleft = fxpp->fx_pquantum; 517 cpucaps_sc_init(&fxpp->fx_caps); 518 fxpp->fx_tp = t; 519 520 thread_lock(t); /* get dispatcher lock on thread */ 521 t->t_clfuncs = &(sclass[cid].cl_funcs->thread); 522 t->t_cid = cid; 523 t->t_cldata = (void *)fxpp; 524 t->t_schedflag &= ~TS_RUNQMATCH; 525 fx_change_priority(t, fxpp); 526 thread_unlock(t); 527 528 return (0); 529 } 530 531 /* 532 * The thread is exiting. 533 */ 534 static void 535 fx_exit(kthread_t *t) 536 { 537 fxproc_t *fxpp; 538 539 thread_lock(t); 540 fxpp = (fxproc_t *)(t->t_cldata); 541 542 /* 543 * A thread could be exiting in between clock ticks, so we need to 544 * calculate how much CPU time it used since it was charged last time. 545 * 546 * CPU caps are not enforced on exiting processes - it is usually 547 * desirable to exit as soon as possible to free resources. 548 */ 549 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ONLY); 550 551 if (FX_HAS_CB(fxpp)) { 552 FX_CB_EXIT(FX_CALLB(fxpp), fxpp->fx_cookie); 553 fxpp->fx_callback = NULL; 554 fxpp->fx_cookie = NULL; 555 thread_unlock(t); 556 FX_CB_LIST_DELETE(fxpp); 557 return; 558 } 559 560 thread_unlock(t); 561 } 562 563 /* 564 * Exiting the class. Free fxproc structure of thread. 565 */ 566 static void 567 fx_exitclass(void *procp) 568 { 569 fxproc_t *fxpp = (fxproc_t *)procp; 570 571 thread_lock(fxpp->fx_tp); 572 if (FX_HAS_CB(fxpp)) { 573 574 FX_CB_EXIT(FX_CALLB(fxpp), fxpp->fx_cookie); 575 576 fxpp->fx_callback = NULL; 577 fxpp->fx_cookie = NULL; 578 thread_unlock(fxpp->fx_tp); 579 FX_CB_LIST_DELETE(fxpp); 580 } else 581 thread_unlock(fxpp->fx_tp); 582 583 kmem_free(fxpp, sizeof (fxproc_t)); 584 } 585 586 /* ARGSUSED */ 587 static int 588 fx_canexit(kthread_t *t, cred_t *cred) 589 { 590 /* 591 * A thread can always leave the FX class 592 */ 593 return (0); 594 } 595 596 /* 597 * Initialize fixed-priority class specific proc structure for a child. 598 * callbacks are not inherited upon fork. 599 */ 600 static int 601 fx_fork(kthread_t *t, kthread_t *ct, void *bufp) 602 { 603 fxproc_t *pfxpp; /* ptr to parent's fxproc structure */ 604 fxproc_t *cfxpp; /* ptr to child's fxproc structure */ 605 606 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 607 608 cfxpp = (fxproc_t *)bufp; 609 ASSERT(cfxpp != NULL); 610 thread_lock(t); 611 pfxpp = (fxproc_t *)t->t_cldata; 612 /* 613 * Initialize child's fxproc structure. 614 */ 615 cfxpp->fx_timeleft = cfxpp->fx_pquantum = pfxpp->fx_pquantum; 616 cfxpp->fx_pri = pfxpp->fx_pri; 617 cfxpp->fx_uprilim = pfxpp->fx_uprilim; 618 cfxpp->fx_nice = pfxpp->fx_nice; 619 cfxpp->fx_callback = NULL; 620 cfxpp->fx_cookie = NULL; 621 cfxpp->fx_flags = pfxpp->fx_flags & ~(FXBACKQ); 622 cpucaps_sc_init(&cfxpp->fx_caps); 623 624 cfxpp->fx_tp = ct; 625 ct->t_cldata = (void *)cfxpp; 626 thread_unlock(t); 627 628 /* 629 * Link new structure into fxproc list. 630 */ 631 return (0); 632 } 633 634 635 /* 636 * Child is placed at back of dispatcher queue and parent gives 637 * up processor so that the child runs first after the fork. 638 * This allows the child immediately execing to break the multiple 639 * use of copy on write pages with no disk home. The parent will 640 * get to steal them back rather than uselessly copying them. 641 */ 642 static void 643 fx_forkret(kthread_t *t, kthread_t *ct) 644 { 645 proc_t *pp = ttoproc(t); 646 proc_t *cp = ttoproc(ct); 647 fxproc_t *fxpp; 648 649 ASSERT(t == curthread); 650 ASSERT(MUTEX_HELD(&pidlock)); 651 652 /* 653 * Grab the child's p_lock before dropping pidlock to ensure 654 * the process does not disappear before we set it running. 655 */ 656 mutex_enter(&cp->p_lock); 657 continuelwps(cp); 658 mutex_exit(&cp->p_lock); 659 660 mutex_enter(&pp->p_lock); 661 mutex_exit(&pidlock); 662 continuelwps(pp); 663 664 thread_lock(t); 665 fxpp = (fxproc_t *)(t->t_cldata); 666 t->t_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 667 ASSERT(t->t_pri >= 0 && t->t_pri <= fx_maxglobpri); 668 THREAD_TRANSITION(t); 669 fx_setrun(t); 670 thread_unlock(t); 671 /* 672 * Safe to drop p_lock now since it is safe to change 673 * the scheduling class after this point. 674 */ 675 mutex_exit(&pp->p_lock); 676 677 swtch(); 678 } 679 680 681 /* 682 * Get information about the fixed-priority class into the buffer 683 * pointed to by fxinfop. The maximum configured user priority 684 * is the only information we supply. 685 */ 686 static int 687 fx_getclinfo(void *infop) 688 { 689 fxinfo_t *fxinfop = (fxinfo_t *)infop; 690 fxinfop->fx_maxupri = fx_maxupri; 691 return (0); 692 } 693 694 695 696 /* 697 * Return the user mode scheduling priority range. 698 */ 699 static int 700 fx_getclpri(pcpri_t *pcprip) 701 { 702 pcprip->pc_clpmax = fx_maxupri; 703 pcprip->pc_clpmin = 0; 704 return (0); 705 } 706 707 708 static void 709 fx_nullsys() 710 {} 711 712 713 /* 714 * Get the fixed-priority parameters of the thread pointed to by 715 * fxprocp into the buffer pointed to by fxparmsp. 716 */ 717 static void 718 fx_parmsget(kthread_t *t, void *parmsp) 719 { 720 fxproc_t *fxpp = (fxproc_t *)t->t_cldata; 721 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 722 723 fxkparmsp->fx_upri = fxpp->fx_pri; 724 fxkparmsp->fx_uprilim = fxpp->fx_uprilim; 725 fxkparmsp->fx_tqntm = fxpp->fx_pquantum; 726 } 727 728 729 730 /* 731 * Check the validity of the fixed-priority parameters in the buffer 732 * pointed to by fxparmsp. 733 */ 734 static int 735 fx_parmsin(void *parmsp) 736 { 737 fxparms_t *fxparmsp = (fxparms_t *)parmsp; 738 uint_t cflags; 739 longlong_t ticks; 740 /* 741 * Check validity of parameters. 742 */ 743 744 if ((fxparmsp->fx_uprilim > fx_maxupri || 745 fxparmsp->fx_uprilim < 0) && 746 fxparmsp->fx_uprilim != FX_NOCHANGE) 747 return (EINVAL); 748 749 if ((fxparmsp->fx_upri > fx_maxupri || 750 fxparmsp->fx_upri < 0) && 751 fxparmsp->fx_upri != FX_NOCHANGE) 752 return (EINVAL); 753 754 if ((fxparmsp->fx_tqsecs == 0 && fxparmsp->fx_tqnsecs == 0) || 755 fxparmsp->fx_tqnsecs >= NANOSEC) 756 return (EINVAL); 757 758 cflags = (fxparmsp->fx_upri != FX_NOCHANGE ? FX_DOUPRI : 0); 759 760 if (fxparmsp->fx_uprilim != FX_NOCHANGE) { 761 cflags |= FX_DOUPRILIM; 762 } 763 764 if (fxparmsp->fx_tqnsecs != FX_NOCHANGE) 765 cflags |= FX_DOTQ; 766 767 /* 768 * convert the buffer to kernel format. 769 */ 770 771 if (fxparmsp->fx_tqnsecs >= 0) { 772 if ((ticks = SEC_TO_TICK((longlong_t)fxparmsp->fx_tqsecs) + 773 NSEC_TO_TICK_ROUNDUP(fxparmsp->fx_tqnsecs)) > INT_MAX) 774 return (ERANGE); 775 776 ((fxkparms_t *)fxparmsp)->fx_tqntm = (int)ticks; 777 } else { 778 if ((fxparmsp->fx_tqnsecs != FX_NOCHANGE) && 779 (fxparmsp->fx_tqnsecs != FX_TQINF) && 780 (fxparmsp->fx_tqnsecs != FX_TQDEF)) 781 return (EINVAL); 782 ((fxkparms_t *)fxparmsp)->fx_tqntm = fxparmsp->fx_tqnsecs; 783 } 784 785 ((fxkparms_t *)fxparmsp)->fx_cflags = cflags; 786 787 return (0); 788 } 789 790 791 /* 792 * Check the validity of the fixed-priority parameters in the pc_vaparms_t 793 * structure vaparmsp and put them in the buffer pointed to by fxprmsp. 794 * pc_vaparms_t contains (key, value) pairs of parameter. 795 */ 796 static int 797 fx_vaparmsin(void *prmsp, pc_vaparms_t *vaparmsp) 798 { 799 uint_t secs = 0; 800 uint_t cnt; 801 int nsecs = 0; 802 int priflag, secflag, nsecflag, limflag; 803 longlong_t ticks; 804 fxkparms_t *fxprmsp = (fxkparms_t *)prmsp; 805 pc_vaparm_t *vpp = &vaparmsp->pc_parms[0]; 806 807 808 /* 809 * First check the validity of parameters and convert them 810 * from the user supplied format to the internal format. 811 */ 812 priflag = secflag = nsecflag = limflag = 0; 813 814 fxprmsp->fx_cflags = 0; 815 816 if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT) 817 return (EINVAL); 818 819 for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) { 820 821 switch (vpp->pc_key) { 822 case FX_KY_UPRILIM: 823 if (limflag++) 824 return (EINVAL); 825 fxprmsp->fx_cflags |= FX_DOUPRILIM; 826 fxprmsp->fx_uprilim = (pri_t)vpp->pc_parm; 827 if (fxprmsp->fx_uprilim > fx_maxupri || 828 fxprmsp->fx_uprilim < 0) 829 return (EINVAL); 830 break; 831 832 case FX_KY_UPRI: 833 if (priflag++) 834 return (EINVAL); 835 fxprmsp->fx_cflags |= FX_DOUPRI; 836 fxprmsp->fx_upri = (pri_t)vpp->pc_parm; 837 if (fxprmsp->fx_upri > fx_maxupri || 838 fxprmsp->fx_upri < 0) 839 return (EINVAL); 840 break; 841 842 case FX_KY_TQSECS: 843 if (secflag++) 844 return (EINVAL); 845 fxprmsp->fx_cflags |= FX_DOTQ; 846 secs = (uint_t)vpp->pc_parm; 847 break; 848 849 case FX_KY_TQNSECS: 850 if (nsecflag++) 851 return (EINVAL); 852 fxprmsp->fx_cflags |= FX_DOTQ; 853 nsecs = (int)vpp->pc_parm; 854 break; 855 856 default: 857 return (EINVAL); 858 } 859 } 860 861 if (vaparmsp->pc_vaparmscnt == 0) { 862 /* 863 * Use default parameters. 864 */ 865 fxprmsp->fx_upri = 0; 866 fxprmsp->fx_uprilim = 0; 867 fxprmsp->fx_tqntm = FX_TQDEF; 868 fxprmsp->fx_cflags = FX_DOUPRI | FX_DOUPRILIM | FX_DOTQ; 869 } else if ((fxprmsp->fx_cflags & FX_DOTQ) != 0) { 870 if ((secs == 0 && nsecs == 0) || nsecs >= NANOSEC) 871 return (EINVAL); 872 873 if (nsecs >= 0) { 874 if ((ticks = SEC_TO_TICK((longlong_t)secs) + 875 NSEC_TO_TICK_ROUNDUP(nsecs)) > INT_MAX) 876 return (ERANGE); 877 878 fxprmsp->fx_tqntm = (int)ticks; 879 } else { 880 if (nsecs != FX_TQINF && nsecs != FX_TQDEF) 881 return (EINVAL); 882 fxprmsp->fx_tqntm = nsecs; 883 } 884 } 885 886 return (0); 887 } 888 889 890 /* 891 * Nothing to do here but return success. 892 */ 893 /* ARGSUSED */ 894 static int 895 fx_parmsout(void *parmsp, pc_vaparms_t *vaparmsp) 896 { 897 register fxkparms_t *fxkprmsp = (fxkparms_t *)parmsp; 898 899 if (vaparmsp != NULL) 900 return (0); 901 902 if (fxkprmsp->fx_tqntm < 0) { 903 /* 904 * Quantum field set to special value (e.g. FX_TQINF) 905 */ 906 ((fxparms_t *)fxkprmsp)->fx_tqnsecs = fxkprmsp->fx_tqntm; 907 ((fxparms_t *)fxkprmsp)->fx_tqsecs = 0; 908 909 } else { 910 /* Convert quantum from ticks to seconds-nanoseconds */ 911 912 timestruc_t ts; 913 TICK_TO_TIMESTRUC(fxkprmsp->fx_tqntm, &ts); 914 ((fxparms_t *)fxkprmsp)->fx_tqsecs = ts.tv_sec; 915 ((fxparms_t *)fxkprmsp)->fx_tqnsecs = ts.tv_nsec; 916 } 917 918 return (0); 919 } 920 921 922 /* 923 * Copy all selected fixed-priority class parameters to the user. 924 * The parameters are specified by a key. 925 */ 926 static int 927 fx_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp) 928 { 929 fxkparms_t *fxkprmsp = (fxkparms_t *)prmsp; 930 timestruc_t ts; 931 uint_t cnt; 932 uint_t secs; 933 int nsecs; 934 int priflag, secflag, nsecflag, limflag; 935 pc_vaparm_t *vpp = &vaparmsp->pc_parms[0]; 936 937 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 938 939 priflag = secflag = nsecflag = limflag = 0; 940 941 if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT) 942 return (EINVAL); 943 944 if (fxkprmsp->fx_tqntm < 0) { 945 /* 946 * Quantum field set to special value (e.g. FX_TQINF). 947 */ 948 secs = 0; 949 nsecs = fxkprmsp->fx_tqntm; 950 } else { 951 /* 952 * Convert quantum from ticks to seconds-nanoseconds. 953 */ 954 TICK_TO_TIMESTRUC(fxkprmsp->fx_tqntm, &ts); 955 secs = ts.tv_sec; 956 nsecs = ts.tv_nsec; 957 } 958 959 960 for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) { 961 962 switch (vpp->pc_key) { 963 case FX_KY_UPRILIM: 964 if (limflag++) 965 return (EINVAL); 966 if (copyout(&fxkprmsp->fx_uprilim, 967 (void *)(uintptr_t)vpp->pc_parm, sizeof (pri_t))) 968 return (EFAULT); 969 break; 970 971 case FX_KY_UPRI: 972 if (priflag++) 973 return (EINVAL); 974 if (copyout(&fxkprmsp->fx_upri, 975 (void *)(uintptr_t)vpp->pc_parm, sizeof (pri_t))) 976 return (EFAULT); 977 break; 978 979 case FX_KY_TQSECS: 980 if (secflag++) 981 return (EINVAL); 982 if (copyout(&secs, 983 (void *)(uintptr_t)vpp->pc_parm, sizeof (uint_t))) 984 return (EFAULT); 985 break; 986 987 case FX_KY_TQNSECS: 988 if (nsecflag++) 989 return (EINVAL); 990 if (copyout(&nsecs, 991 (void *)(uintptr_t)vpp->pc_parm, sizeof (int))) 992 return (EFAULT); 993 break; 994 995 default: 996 return (EINVAL); 997 } 998 } 999 1000 return (0); 1001 } 1002 1003 /* 1004 * Set the scheduling parameters of the thread pointed to by fxprocp 1005 * to those specified in the buffer pointed to by fxparmsp. 1006 */ 1007 /* ARGSUSED */ 1008 static int 1009 fx_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp) 1010 { 1011 char nice; 1012 pri_t reqfxuprilim; 1013 pri_t reqfxupri; 1014 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 1015 fxproc_t *fxpp; 1016 1017 1018 ASSERT(MUTEX_HELD(&(ttoproc(tx))->p_lock)); 1019 1020 thread_lock(tx); 1021 fxpp = (fxproc_t *)tx->t_cldata; 1022 1023 if ((fxkparmsp->fx_cflags & FX_DOUPRILIM) == 0) 1024 reqfxuprilim = fxpp->fx_uprilim; 1025 else 1026 reqfxuprilim = fxkparmsp->fx_uprilim; 1027 1028 /* 1029 * Basic permissions enforced by generic kernel code 1030 * for all classes require that a thread attempting 1031 * to change the scheduling parameters of a target 1032 * thread be privileged or have a real or effective 1033 * UID matching that of the target thread. We are not 1034 * called unless these basic permission checks have 1035 * already passed. The fixed priority class requires in 1036 * addition that the calling thread be privileged if it 1037 * is attempting to raise the pri above its current 1038 * value This may have been checked previously but if our 1039 * caller passed us a non-NULL credential pointer we assume 1040 * it hasn't and we check it here. 1041 */ 1042 1043 if ((reqpcredp != NULL) && 1044 (reqfxuprilim > fxpp->fx_uprilim || 1045 ((fxkparmsp->fx_cflags & FX_DOTQ) != 0)) && 1046 secpolicy_raisepriority(reqpcredp) != 0) { 1047 thread_unlock(tx); 1048 return (EPERM); 1049 } 1050 1051 FX_ADJUST_PRI(reqfxuprilim); 1052 1053 if ((fxkparmsp->fx_cflags & FX_DOUPRI) == 0) 1054 reqfxupri = fxpp->fx_pri; 1055 else 1056 reqfxupri = fxkparmsp->fx_upri; 1057 1058 1059 /* 1060 * Make sure the user priority doesn't exceed the upri limit. 1061 */ 1062 if (reqfxupri > reqfxuprilim) 1063 reqfxupri = reqfxuprilim; 1064 1065 /* 1066 * Set fx_nice to the nice value corresponding to the user 1067 * priority we are setting. Note that setting the nice field 1068 * of the parameter struct won't affect upri or nice. 1069 */ 1070 1071 nice = NZERO - (reqfxupri * NZERO) / fx_maxupri; 1072 1073 if (nice > NZERO) 1074 nice = NZERO; 1075 1076 fxpp->fx_uprilim = reqfxuprilim; 1077 fxpp->fx_pri = reqfxupri; 1078 1079 if (fxkparmsp->fx_tqntm == FX_TQINF) 1080 fxpp->fx_pquantum = FX_TQINF; 1081 else if (fxkparmsp->fx_tqntm == FX_TQDEF) 1082 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1083 else if ((fxkparmsp->fx_cflags & FX_DOTQ) != 0) 1084 fxpp->fx_pquantum = fxkparmsp->fx_tqntm; 1085 1086 fxpp->fx_nice = nice; 1087 1088 fx_change_priority(tx, fxpp); 1089 thread_unlock(tx); 1090 return (0); 1091 } 1092 1093 1094 /* 1095 * Return the global scheduling priority that would be assigned 1096 * to a thread entering the fixed-priority class with the fx_upri. 1097 */ 1098 static pri_t 1099 fx_globpri(kthread_t *t) 1100 { 1101 fxproc_t *fxpp; 1102 1103 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 1104 1105 fxpp = (fxproc_t *)t->t_cldata; 1106 return (fx_dptbl[fxpp->fx_pri].fx_globpri); 1107 1108 } 1109 1110 /* 1111 * Arrange for thread to be placed in appropriate location 1112 * on dispatcher queue. 1113 * 1114 * This is called with the current thread in TS_ONPROC and locked. 1115 */ 1116 static void 1117 fx_preempt(kthread_t *t) 1118 { 1119 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1120 1121 ASSERT(t == curthread); 1122 ASSERT(THREAD_LOCK_HELD(curthread)); 1123 1124 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE); 1125 1126 /* 1127 * Check to see if we're doing "preemption control" here. If 1128 * we are, and if the user has requested that this thread not 1129 * be preempted, and if preemptions haven't been put off for 1130 * too long, let the preemption happen here but try to make 1131 * sure the thread is rescheduled as soon as possible. We do 1132 * this by putting it on the front of the highest priority run 1133 * queue in the FX class. If the preemption has been put off 1134 * for too long, clear the "nopreempt" bit and let the thread 1135 * be preempted. 1136 */ 1137 if (t->t_schedctl && schedctl_get_nopreempt(t)) { 1138 if (fxpp->fx_pquantum == FX_TQINF || 1139 fxpp->fx_timeleft > -SC_MAX_TICKS) { 1140 DTRACE_SCHED1(schedctl__nopreempt, kthread_t *, t); 1141 schedctl_set_yield(t, 1); 1142 setfrontdq(t); 1143 return; 1144 } else { 1145 schedctl_set_nopreempt(t, 0); 1146 DTRACE_SCHED1(schedctl__preempt, kthread_t *, t); 1147 TNF_PROBE_2(schedctl_preempt, "schedctl FX fx_preempt", 1148 /* CSTYLED */, tnf_pid, pid, ttoproc(t)->p_pid, 1149 tnf_lwpid, lwpid, t->t_tid); 1150 /* 1151 * Fall through and be preempted below. 1152 */ 1153 } 1154 } 1155 1156 if (FX_HAS_CB(fxpp)) { 1157 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1158 pri_t newpri = fxpp->fx_pri; 1159 FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie, 1160 &new_quantum, &newpri); 1161 FX_ADJUST_QUANTUM(new_quantum); 1162 if ((int)new_quantum != fxpp->fx_pquantum) { 1163 fxpp->fx_pquantum = (int)new_quantum; 1164 fxpp->fx_timeleft = fxpp->fx_pquantum; 1165 } 1166 FX_ADJUST_PRI(newpri); 1167 fxpp->fx_pri = newpri; 1168 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1169 } 1170 1171 /* 1172 * This thread may be placed on wait queue by CPU Caps. In this case we 1173 * do not need to do anything until it is removed from the wait queue. 1174 */ 1175 if (CPUCAPS_ENFORCE(t)) { 1176 return; 1177 } 1178 1179 if ((fxpp->fx_flags & (FXBACKQ)) == FXBACKQ) { 1180 fxpp->fx_timeleft = fxpp->fx_pquantum; 1181 fxpp->fx_flags &= ~FXBACKQ; 1182 setbackdq(t); 1183 } else { 1184 setfrontdq(t); 1185 } 1186 } 1187 1188 static void 1189 fx_setrun(kthread_t *t) 1190 { 1191 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1192 1193 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */ 1194 fxpp->fx_flags &= ~FXBACKQ; 1195 1196 if (t->t_disp_time != ddi_get_lbolt()) 1197 setbackdq(t); 1198 else 1199 setfrontdq(t); 1200 } 1201 1202 1203 /* 1204 * Prepare thread for sleep. We reset the thread priority so it will 1205 * run at the kernel priority level when it wakes up. 1206 */ 1207 static void 1208 fx_sleep(kthread_t *t) 1209 { 1210 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1211 1212 ASSERT(t == curthread); 1213 ASSERT(THREAD_LOCK_HELD(t)); 1214 1215 /* 1216 * Account for time spent on CPU before going to sleep. 1217 */ 1218 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE); 1219 1220 if (FX_HAS_CB(fxpp)) { 1221 FX_CB_SLEEP(FX_CALLB(fxpp), fxpp->fx_cookie); 1222 } 1223 } 1224 1225 /* ARGSUSED */ 1226 static void 1227 fx_stop(kthread_t *t, int why, int what) 1228 { 1229 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1230 1231 ASSERT(THREAD_LOCK_HELD(t)); 1232 1233 if (FX_HAS_CB(fxpp)) { 1234 FX_CB_STOP(FX_CALLB(fxpp), fxpp->fx_cookie); 1235 } 1236 } 1237 1238 /* 1239 * Check for time slice expiration. If time slice has expired 1240 * set runrun to cause preemption. 1241 */ 1242 static void 1243 fx_tick(kthread_t *t) 1244 { 1245 boolean_t call_cpu_surrender = B_FALSE; 1246 fxproc_t *fxpp; 1247 1248 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); 1249 1250 thread_lock(t); 1251 1252 fxpp = (fxproc_t *)(t->t_cldata); 1253 1254 if (FX_HAS_CB(fxpp)) { 1255 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1256 pri_t newpri = fxpp->fx_pri; 1257 FX_CB_TICK(FX_CALLB(fxpp), fxpp->fx_cookie, 1258 &new_quantum, &newpri); 1259 FX_ADJUST_QUANTUM(new_quantum); 1260 if ((int)new_quantum != fxpp->fx_pquantum) { 1261 fxpp->fx_pquantum = (int)new_quantum; 1262 fxpp->fx_timeleft = fxpp->fx_pquantum; 1263 } 1264 FX_ADJUST_PRI(newpri); 1265 if (newpri != fxpp->fx_pri) { 1266 fxpp->fx_pri = newpri; 1267 fx_change_priority(t, fxpp); 1268 } 1269 } 1270 1271 /* 1272 * Keep track of thread's project CPU usage. Note that projects 1273 * get charged even when threads are running in the kernel. 1274 */ 1275 call_cpu_surrender = CPUCAPS_CHARGE(t, &fxpp->fx_caps, 1276 CPUCAPS_CHARGE_ENFORCE); 1277 1278 if ((fxpp->fx_pquantum != FX_TQINF) && 1279 (--fxpp->fx_timeleft <= 0)) { 1280 pri_t new_pri; 1281 1282 /* 1283 * If we're doing preemption control and trying to 1284 * avoid preempting this thread, just note that 1285 * the thread should yield soon and let it keep 1286 * running (unless it's been a while). 1287 */ 1288 if (t->t_schedctl && schedctl_get_nopreempt(t)) { 1289 if (fxpp->fx_timeleft > -SC_MAX_TICKS) { 1290 DTRACE_SCHED1(schedctl__nopreempt, 1291 kthread_t *, t); 1292 schedctl_set_yield(t, 1); 1293 thread_unlock_nopreempt(t); 1294 return; 1295 } 1296 TNF_PROBE_2(schedctl_failsafe, 1297 "schedctl FX fx_tick", /* CSTYLED */, 1298 tnf_pid, pid, ttoproc(t)->p_pid, 1299 tnf_lwpid, lwpid, t->t_tid); 1300 } 1301 new_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 1302 ASSERT(new_pri >= 0 && new_pri <= fx_maxglobpri); 1303 /* 1304 * When the priority of a thread is changed, 1305 * it may be necessary to adjust its position 1306 * on a sleep queue or dispatch queue. Even 1307 * when the priority is not changed, we need 1308 * to preserve round robin on dispatch queue. 1309 * The function thread_change_pri accomplishes 1310 * this. 1311 */ 1312 if (thread_change_pri(t, new_pri, 0)) { 1313 fxpp->fx_timeleft = fxpp->fx_pquantum; 1314 } else { 1315 call_cpu_surrender = B_TRUE; 1316 } 1317 } else if (t->t_state == TS_ONPROC && 1318 t->t_pri < t->t_disp_queue->disp_maxrunpri) { 1319 call_cpu_surrender = B_TRUE; 1320 } 1321 1322 if (call_cpu_surrender) { 1323 fxpp->fx_flags |= FXBACKQ; 1324 cpu_surrender(t); 1325 } 1326 thread_unlock_nopreempt(t); /* clock thread can't be preempted */ 1327 } 1328 1329 1330 static void 1331 fx_trapret(kthread_t *t) 1332 { 1333 cpu_t *cp = CPU; 1334 1335 ASSERT(THREAD_LOCK_HELD(t)); 1336 ASSERT(t == curthread); 1337 ASSERT(cp->cpu_dispthread == t); 1338 ASSERT(t->t_state == TS_ONPROC); 1339 } 1340 1341 1342 /* 1343 * Processes waking up go to the back of their queue. 1344 */ 1345 static void 1346 fx_wakeup(kthread_t *t) 1347 { 1348 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1349 1350 ASSERT(THREAD_LOCK_HELD(t)); 1351 1352 if (FX_HAS_CB(fxpp)) { 1353 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1354 pri_t newpri = fxpp->fx_pri; 1355 FX_CB_WAKEUP(FX_CALLB(fxpp), fxpp->fx_cookie, 1356 &new_quantum, &newpri); 1357 FX_ADJUST_QUANTUM(new_quantum); 1358 if ((int)new_quantum != fxpp->fx_pquantum) { 1359 fxpp->fx_pquantum = (int)new_quantum; 1360 fxpp->fx_timeleft = fxpp->fx_pquantum; 1361 } 1362 1363 FX_ADJUST_PRI(newpri); 1364 if (newpri != fxpp->fx_pri) { 1365 fxpp->fx_pri = newpri; 1366 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1367 } 1368 } 1369 1370 fxpp->fx_flags &= ~FXBACKQ; 1371 1372 if (t->t_disp_time != ddi_get_lbolt()) 1373 setbackdq(t); 1374 else 1375 setfrontdq(t); 1376 } 1377 1378 1379 /* 1380 * When a thread yields, put it on the back of the run queue. 1381 */ 1382 static void 1383 fx_yield(kthread_t *t) 1384 { 1385 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1386 1387 ASSERT(t == curthread); 1388 ASSERT(THREAD_LOCK_HELD(t)); 1389 1390 /* 1391 * Collect CPU usage spent before yielding CPU. 1392 */ 1393 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE); 1394 1395 if (FX_HAS_CB(fxpp)) { 1396 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1397 pri_t newpri = fxpp->fx_pri; 1398 FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie, 1399 &new_quantum, &newpri); 1400 FX_ADJUST_QUANTUM(new_quantum); 1401 if ((int)new_quantum != fxpp->fx_pquantum) { 1402 fxpp->fx_pquantum = (int)new_quantum; 1403 fxpp->fx_timeleft = fxpp->fx_pquantum; 1404 } 1405 FX_ADJUST_PRI(newpri); 1406 fxpp->fx_pri = newpri; 1407 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1408 } 1409 1410 /* 1411 * Clear the preemption control "yield" bit since the user is 1412 * doing a yield. 1413 */ 1414 if (t->t_schedctl) 1415 schedctl_set_yield(t, 0); 1416 1417 if (fxpp->fx_timeleft <= 0) { 1418 /* 1419 * Time slice was artificially extended to avoid 1420 * preemption, so pretend we're preempting it now. 1421 */ 1422 DTRACE_SCHED1(schedctl__yield, int, -fxpp->fx_timeleft); 1423 fxpp->fx_timeleft = fxpp->fx_pquantum; 1424 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1425 ASSERT(t->t_pri >= 0 && t->t_pri <= fx_maxglobpri); 1426 } 1427 1428 fxpp->fx_flags &= ~FXBACKQ; 1429 setbackdq(t); 1430 } 1431 1432 /* 1433 * Increment the nice value of the specified thread by incr and 1434 * return the new value in *retvalp. 1435 */ 1436 static int 1437 fx_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp) 1438 { 1439 int newnice; 1440 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1441 fxkparms_t fxkparms; 1442 1443 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); 1444 1445 /* If there's no change to priority, just return current setting */ 1446 if (incr == 0) { 1447 if (retvalp) { 1448 *retvalp = fxpp->fx_nice - NZERO; 1449 } 1450 return (0); 1451 } 1452 1453 if ((incr < 0 || incr > 2 * NZERO) && 1454 secpolicy_raisepriority(cr) != 0) 1455 return (EPERM); 1456 1457 /* 1458 * Specifying a nice increment greater than the upper limit of 1459 * 2 * NZERO - 1 will result in the thread's nice value being 1460 * set to the upper limit. We check for this before computing 1461 * the new value because otherwise we could get overflow 1462 * if a privileged user specified some ridiculous increment. 1463 */ 1464 if (incr > 2 * NZERO - 1) 1465 incr = 2 * NZERO - 1; 1466 1467 newnice = fxpp->fx_nice + incr; 1468 if (newnice > NZERO) 1469 newnice = NZERO; 1470 else if (newnice < 0) 1471 newnice = 0; 1472 1473 fxkparms.fx_uprilim = fxkparms.fx_upri = 1474 -((newnice - NZERO) * fx_maxupri) / NZERO; 1475 1476 fxkparms.fx_cflags = FX_DOUPRILIM | FX_DOUPRI; 1477 1478 fxkparms.fx_tqntm = FX_TQDEF; 1479 1480 /* 1481 * Reset the uprilim and upri values of the thread. Adjust 1482 * time quantum accordingly. 1483 */ 1484 1485 (void) fx_parmsset(t, (void *)&fxkparms, (id_t)0, (cred_t *)NULL); 1486 1487 /* 1488 * Although fx_parmsset already reset fx_nice it may 1489 * not have been set to precisely the value calculated above 1490 * because fx_parmsset determines the nice value from the 1491 * user priority and we may have truncated during the integer 1492 * conversion from nice value to user priority and back. 1493 * We reset fx_nice to the value we calculated above. 1494 */ 1495 fxpp->fx_nice = (char)newnice; 1496 1497 if (retvalp) 1498 *retvalp = newnice - NZERO; 1499 1500 return (0); 1501 } 1502 1503 /* 1504 * Increment the priority of the specified thread by incr and 1505 * return the new value in *retvalp. 1506 */ 1507 static int 1508 fx_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp) 1509 { 1510 int newpri; 1511 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1512 fxkparms_t fxkparms; 1513 1514 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); 1515 1516 /* If there's no change to priority, just return current setting */ 1517 if (incr == 0) { 1518 *retvalp = fxpp->fx_pri; 1519 return (0); 1520 } 1521 1522 newpri = fxpp->fx_pri + incr; 1523 if (newpri > fx_maxupri || newpri < 0) 1524 return (EINVAL); 1525 1526 *retvalp = newpri; 1527 fxkparms.fx_uprilim = fxkparms.fx_upri = newpri; 1528 fxkparms.fx_tqntm = FX_NOCHANGE; 1529 fxkparms.fx_cflags = FX_DOUPRILIM | FX_DOUPRI; 1530 1531 /* 1532 * Reset the uprilim and upri values of the thread. 1533 */ 1534 return (fx_parmsset(t, (void *)&fxkparms, (id_t)0, cr)); 1535 } 1536 1537 static void 1538 fx_change_priority(kthread_t *t, fxproc_t *fxpp) 1539 { 1540 pri_t new_pri; 1541 1542 ASSERT(THREAD_LOCK_HELD(t)); 1543 new_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 1544 ASSERT(new_pri >= 0 && new_pri <= fx_maxglobpri); 1545 t->t_cpri = fxpp->fx_pri; 1546 if (t == curthread || t->t_state == TS_ONPROC) { 1547 /* curthread is always onproc */ 1548 cpu_t *cp = t->t_disp_queue->disp_cpu; 1549 THREAD_CHANGE_PRI(t, new_pri); 1550 if (t == cp->cpu_dispthread) 1551 cp->cpu_dispatch_pri = DISP_PRIO(t); 1552 if (DISP_MUST_SURRENDER(t)) { 1553 fxpp->fx_flags |= FXBACKQ; 1554 cpu_surrender(t); 1555 } else { 1556 fxpp->fx_timeleft = fxpp->fx_pquantum; 1557 } 1558 } else { 1559 /* 1560 * When the priority of a thread is changed, 1561 * it may be necessary to adjust its position 1562 * on a sleep queue or dispatch queue. 1563 * The function thread_change_pri accomplishes 1564 * this. 1565 */ 1566 if (thread_change_pri(t, new_pri, 0)) { 1567 /* 1568 * The thread was on a run queue. Reset 1569 * its CPU timeleft from the quantum 1570 * associated with the new priority. 1571 */ 1572 fxpp->fx_timeleft = fxpp->fx_pquantum; 1573 } else { 1574 fxpp->fx_flags |= FXBACKQ; 1575 } 1576 } 1577 } 1578 1579 static int 1580 fx_alloc(void **p, int flag) 1581 { 1582 void *bufp; 1583 1584 bufp = kmem_alloc(sizeof (fxproc_t), flag); 1585 if (bufp == NULL) { 1586 return (ENOMEM); 1587 } else { 1588 *p = bufp; 1589 return (0); 1590 } 1591 } 1592 1593 static void 1594 fx_free(void *bufp) 1595 { 1596 if (bufp) 1597 kmem_free(bufp, sizeof (fxproc_t)); 1598 } 1599 1600 /* 1601 * Release the callback list mutex after successful lookup 1602 */ 1603 void 1604 fx_list_release(fxproc_t *fxpp) 1605 { 1606 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); 1607 kmutex_t *lockp = &fx_cb_list_lock[index]; 1608 mutex_exit(lockp); 1609 } 1610 1611 fxproc_t * 1612 fx_list_lookup(kt_did_t ktid) 1613 { 1614 int index = FX_CB_LIST_HASH(ktid); 1615 kmutex_t *lockp = &fx_cb_list_lock[index]; 1616 fxproc_t *fxpp; 1617 1618 mutex_enter(lockp); 1619 1620 for (fxpp = fx_cb_plisthead[index].fx_cb_next; 1621 fxpp != &fx_cb_plisthead[index]; fxpp = fxpp->fx_cb_next) { 1622 if (fxpp->fx_tp->t_cid == fx_cid && fxpp->fx_ktid == ktid && 1623 fxpp->fx_callback != NULL) { 1624 /* 1625 * The caller is responsible for calling 1626 * fx_list_release to drop the lock upon 1627 * successful lookup 1628 */ 1629 return (fxpp); 1630 } 1631 } 1632 mutex_exit(lockp); 1633 return ((fxproc_t *)NULL); 1634 } 1635 1636 1637 /* 1638 * register a callback set of routines for current thread 1639 * thread should already be in FX class 1640 */ 1641 int 1642 fx_register_callbacks(fx_callbacks_t *fx_callback, fx_cookie_t cookie, 1643 pri_t pri, clock_t quantum) 1644 { 1645 1646 fxproc_t *fxpp; 1647 1648 if (fx_callback == NULL) 1649 return (EINVAL); 1650 1651 if (secpolicy_dispadm(CRED()) != 0) 1652 return (EPERM); 1653 1654 if (FX_CB_VERSION(fx_callback) != FX_CALLB_REV) 1655 return (EINVAL); 1656 1657 if (!FX_ISVALID(pri, quantum)) 1658 return (EINVAL); 1659 1660 thread_lock(curthread); /* get dispatcher lock on thread */ 1661 1662 if (curthread->t_cid != fx_cid) { 1663 thread_unlock(curthread); 1664 return (EINVAL); 1665 } 1666 1667 fxpp = (fxproc_t *)(curthread->t_cldata); 1668 ASSERT(fxpp != NULL); 1669 if (FX_HAS_CB(fxpp)) { 1670 thread_unlock(curthread); 1671 return (EINVAL); 1672 } 1673 1674 fxpp->fx_callback = fx_callback; 1675 fxpp->fx_cookie = cookie; 1676 1677 if (pri != FX_CB_NOCHANGE) { 1678 fxpp->fx_pri = pri; 1679 FX_ADJUST_PRI(fxpp->fx_pri); 1680 if (quantum == FX_TQDEF) { 1681 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1682 } else if (quantum == FX_TQINF) { 1683 fxpp->fx_pquantum = FX_TQINF; 1684 } else if (quantum != FX_NOCHANGE) { 1685 FX_ADJUST_QUANTUM(quantum); 1686 fxpp->fx_pquantum = quantum; 1687 } 1688 } else if (quantum != FX_NOCHANGE && quantum != FX_TQDEF) { 1689 if (quantum == FX_TQINF) 1690 fxpp->fx_pquantum = FX_TQINF; 1691 else { 1692 FX_ADJUST_QUANTUM(quantum); 1693 fxpp->fx_pquantum = quantum; 1694 } 1695 } 1696 1697 fxpp->fx_ktid = ddi_get_kt_did(); 1698 1699 fx_change_priority(curthread, fxpp); 1700 1701 thread_unlock(curthread); 1702 1703 /* 1704 * Link new structure into fxproc list. 1705 */ 1706 FX_CB_LIST_INSERT(fxpp); 1707 return (0); 1708 } 1709 1710 /* unregister a callback set of routines for current thread */ 1711 int 1712 fx_unregister_callbacks() 1713 { 1714 fxproc_t *fxpp; 1715 1716 if ((fxpp = fx_list_lookup(ddi_get_kt_did())) == NULL) { 1717 /* 1718 * did not have a registered callback; 1719 */ 1720 return (EINVAL); 1721 } 1722 1723 thread_lock(fxpp->fx_tp); 1724 fxpp->fx_callback = NULL; 1725 fxpp->fx_cookie = NULL; 1726 thread_unlock(fxpp->fx_tp); 1727 fx_list_release(fxpp); 1728 1729 FX_CB_LIST_DELETE(fxpp); 1730 return (0); 1731 } 1732 1733 /* 1734 * modify priority and/or quantum value of a thread with callback 1735 */ 1736 int 1737 fx_modify_priority(kt_did_t ktid, clock_t quantum, pri_t pri) 1738 { 1739 fxproc_t *fxpp; 1740 1741 if (!FX_ISVALID(pri, quantum)) 1742 return (EINVAL); 1743 1744 if ((fxpp = fx_list_lookup(ktid)) == NULL) { 1745 /* 1746 * either thread had exited or did not have a registered 1747 * callback; 1748 */ 1749 return (ESRCH); 1750 } 1751 1752 thread_lock(fxpp->fx_tp); 1753 1754 if (pri != FX_CB_NOCHANGE) { 1755 fxpp->fx_pri = pri; 1756 FX_ADJUST_PRI(fxpp->fx_pri); 1757 if (quantum == FX_TQDEF) { 1758 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1759 } else if (quantum == FX_TQINF) { 1760 fxpp->fx_pquantum = FX_TQINF; 1761 } else if (quantum != FX_NOCHANGE) { 1762 FX_ADJUST_QUANTUM(quantum); 1763 fxpp->fx_pquantum = quantum; 1764 } 1765 } else if (quantum != FX_NOCHANGE && quantum != FX_TQDEF) { 1766 if (quantum == FX_TQINF) { 1767 fxpp->fx_pquantum = FX_TQINF; 1768 } else { 1769 FX_ADJUST_QUANTUM(quantum); 1770 fxpp->fx_pquantum = quantum; 1771 } 1772 } 1773 1774 fx_change_priority(fxpp->fx_tp, fxpp); 1775 1776 thread_unlock(fxpp->fx_tp); 1777 fx_list_release(fxpp); 1778 return (0); 1779 } 1780 1781 1782 /* 1783 * return an iblock cookie for mutex initialization to be used in callbacks 1784 */ 1785 void * 1786 fx_get_mutex_cookie() 1787 { 1788 return ((void *)(uintptr_t)__ipltospl(DISP_LEVEL)); 1789 } 1790 1791 /* 1792 * return maximum relative priority 1793 */ 1794 pri_t 1795 fx_get_maxpri() 1796 { 1797 return (fx_maxumdpri); 1798 } --- EOF ---