1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2013, Joyent, Inc. All rights reserved.
  25  */
  26 
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  28 /*        All Rights Reserved   */
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/sysmacros.h>
  33 #include <sys/cred.h>
  34 #include <sys/proc.h>
  35 #include <sys/session.h>
  36 #include <sys/strsubr.h>
  37 #include <sys/signal.h>
  38 #include <sys/user.h>
  39 #include <sys/priocntl.h>
  40 #include <sys/class.h>
  41 #include <sys/disp.h>
  42 #include <sys/procset.h>
  43 #include <sys/debug.h>
  44 #include <sys/ts.h>
  45 #include <sys/tspriocntl.h>
  46 #include <sys/iapriocntl.h>
  47 #include <sys/kmem.h>
  48 #include <sys/errno.h>
  49 #include <sys/cpuvar.h>
  50 #include <sys/systm.h>            /* for lbolt */
  51 #include <sys/vtrace.h>
  52 #include <sys/vmsystm.h>
  53 #include <sys/schedctl.h>
  54 #include <sys/tnf_probe.h>
  55 #include <sys/atomic.h>
  56 #include <sys/policy.h>
  57 #include <sys/sdt.h>
  58 #include <sys/cpupart.h>
  59 #include <vm/rm.h>
  60 #include <vm/seg_kmem.h>
  61 #include <sys/modctl.h>
  62 #include <sys/cpucaps.h>
  63 
  64 static pri_t ts_init(id_t, int, classfuncs_t **);
  65 
  66 static struct sclass csw = {
  67         "TS",
  68         ts_init,
  69         0
  70 };
  71 
  72 static struct modlsched modlsched = {
  73         &mod_schedops, "time sharing sched class", &csw
  74 };
  75 
  76 static struct modlinkage modlinkage = {
  77         MODREV_1, (void *)&modlsched, NULL
  78 };
  79 
  80 int
  81 _init()
  82 {
  83         return (mod_install(&modlinkage));
  84 }
  85 
  86 int
  87 _fini()
  88 {
  89         return (EBUSY);         /* don't remove TS for now */
  90 }
  91 
  92 int
  93 _info(struct modinfo *modinfop)
  94 {
  95         return (mod_info(&modlinkage, modinfop));
  96 }
  97 
  98 /*
  99  * Class specific code for the time-sharing class
 100  */
 101 
 102 
 103 /*
 104  * Extern declarations for variables defined in the ts master file
 105  */
 106 #define TSMAXUPRI 60
 107 
 108 pri_t   ts_maxupri = TSMAXUPRI; /* max time-sharing user priority */
 109 pri_t   ts_maxumdpri;           /* maximum user mode ts priority */
 110 
 111 pri_t   ia_maxupri = IAMAXUPRI; /* max interactive user priority */
 112 pri_t   ia_boost = IA_BOOST;    /* boost value for interactive */
 113 
 114 tsdpent_t  *ts_dptbl;   /* time-sharing disp parameter table */
 115 pri_t   *ts_kmdpris;    /* array of global pris used by ts procs when */
 116                         /*  sleeping or running in kernel after sleep */
 117 
 118 static id_t ia_cid;
 119 
 120 int ts_sleep_promote = 1;
 121 
 122 #define tsmedumdpri     (ts_maxumdpri >> 1)
 123 
 124 #define TS_NEWUMDPRI(tspp) \
 125 { \
 126         pri_t pri; \
 127         pri = (tspp)->ts_cpupri + (tspp)->ts_upri + (tspp)->ts_boost; \
 128         if (pri > ts_maxumdpri) \
 129                 (tspp)->ts_umdpri = ts_maxumdpri; \
 130         else if (pri < 0) \
 131                 (tspp)->ts_umdpri = 0; \
 132         else \
 133                 (tspp)->ts_umdpri = pri; \
 134         ASSERT((tspp)->ts_umdpri >= 0 && (tspp)->ts_umdpri <= ts_maxumdpri); \
 135 }
 136 
 137 /*
 138  * The tsproc_t structures are kept in an array of circular doubly linked
 139  * lists.  A hash on the thread pointer is used to determine which list
 140  * each thread should be placed.  Each list has a dummy "head" which is
 141  * never removed, so the list is never empty.  ts_update traverses these
 142  * lists to update the priorities of threads that have been waiting on
 143  * the run queue.
 144  */
 145 
 146 #define TS_LISTS 16             /* number of lists, must be power of 2 */
 147 
 148 /* hash function, argument is a thread pointer */
 149 #define TS_LIST_HASH(tp)        (((uintptr_t)(tp) >> 9) & (TS_LISTS - 1))
 150 
 151 /* iterate to the next list */
 152 #define TS_LIST_NEXT(i)         (((i) + 1) & (TS_LISTS - 1))
 153 
 154 /*
 155  * Insert thread into the appropriate tsproc list.
 156  */
 157 #define TS_LIST_INSERT(tspp)                            \
 158 {                                                       \
 159         int index = TS_LIST_HASH(tspp->ts_tp);               \
 160         kmutex_t *lockp = &ts_list_lock[index];             \
 161         tsproc_t *headp = &ts_plisthead[index];             \
 162         mutex_enter(lockp);                             \
 163         tspp->ts_next = headp->ts_next;                   \
 164         tspp->ts_prev = headp;                               \
 165         headp->ts_next->ts_prev = tspp;                   \
 166         headp->ts_next = tspp;                               \
 167         mutex_exit(lockp);                              \
 168 }
 169 
 170 /*
 171  * Remove thread from tsproc list.
 172  */
 173 #define TS_LIST_DELETE(tspp)                            \
 174 {                                                       \
 175         int index = TS_LIST_HASH(tspp->ts_tp);               \
 176         kmutex_t *lockp = &ts_list_lock[index];             \
 177         mutex_enter(lockp);                             \
 178         tspp->ts_prev->ts_next = tspp->ts_next;                \
 179         tspp->ts_next->ts_prev = tspp->ts_prev;                \
 180         mutex_exit(lockp);                              \
 181 }
 182 
 183 
 184 static int      ts_admin(caddr_t, cred_t *);
 185 static int      ts_enterclass(kthread_t *, id_t, void *, cred_t *, void *);
 186 static int      ts_fork(kthread_t *, kthread_t *, void *);
 187 static int      ts_getclinfo(void *);
 188 static int      ts_getclpri(pcpri_t *);
 189 static int      ts_parmsin(void *);
 190 static int      ts_parmsout(void *, pc_vaparms_t *);
 191 static int      ts_vaparmsin(void *, pc_vaparms_t *);
 192 static int      ts_vaparmsout(void *, pc_vaparms_t *);
 193 static int      ts_parmsset(kthread_t *, void *, id_t, cred_t *);
 194 static void     ts_exit(kthread_t *);
 195 static int      ts_donice(kthread_t *, cred_t *, int, int *);
 196 static int      ts_doprio(kthread_t *, cred_t *, int, int *);
 197 static void     ts_exitclass(void *);
 198 static int      ts_canexit(kthread_t *, cred_t *);
 199 static void     ts_forkret(kthread_t *, kthread_t *);
 200 static void     ts_nullsys();
 201 static void     ts_parmsget(kthread_t *, void *);
 202 static void     ts_preempt(kthread_t *);
 203 static void     ts_setrun(kthread_t *);
 204 static void     ts_sleep(kthread_t *);
 205 static void     ts_tick(kthread_t *);
 206 static void     ts_trapret(kthread_t *);
 207 static void     ts_update(void *);
 208 static int      ts_update_list(int);
 209 static void     ts_wakeup(kthread_t *);
 210 static pri_t    ts_globpri(kthread_t *);
 211 static void     ts_yield(kthread_t *);
 212 extern tsdpent_t *ts_getdptbl(void);
 213 extern pri_t    *ts_getkmdpris(void);
 214 extern pri_t    td_getmaxumdpri(void);
 215 static int      ts_alloc(void **, int);
 216 static void     ts_free(void *);
 217 
 218 pri_t           ia_init(id_t, int, classfuncs_t **);
 219 static int      ia_getclinfo(void *);
 220 static int      ia_getclpri(pcpri_t *);
 221 static int      ia_parmsin(void *);
 222 static int      ia_vaparmsin(void *, pc_vaparms_t *);
 223 static int      ia_vaparmsout(void *, pc_vaparms_t *);
 224 static int      ia_parmsset(kthread_t *, void *, id_t, cred_t *);
 225 static void     ia_parmsget(kthread_t *, void *);
 226 static void     ia_set_process_group(pid_t, pid_t, pid_t);
 227 
 228 static void     ts_change_priority(kthread_t *, tsproc_t *);
 229 
 230 extern pri_t    ts_maxkmdpri;   /* maximum kernel mode ts priority */
 231 static pri_t    ts_maxglobpri;  /* maximum global priority used by ts class */
 232 static kmutex_t ts_dptblock;    /* protects time sharing dispatch table */
 233 static kmutex_t ts_list_lock[TS_LISTS]; /* protects tsproc lists */
 234 static tsproc_t ts_plisthead[TS_LISTS]; /* dummy tsproc at head of lists */
 235 
 236 static gid_t    IA_gid = 0;
 237 
 238 static struct classfuncs ts_classfuncs = {
 239         /* class functions */
 240         ts_admin,
 241         ts_getclinfo,
 242         ts_parmsin,
 243         ts_parmsout,
 244         ts_vaparmsin,
 245         ts_vaparmsout,
 246         ts_getclpri,
 247         ts_alloc,
 248         ts_free,
 249 
 250         /* thread functions */
 251         ts_enterclass,
 252         ts_exitclass,
 253         ts_canexit,
 254         ts_fork,
 255         ts_forkret,
 256         ts_parmsget,
 257         ts_parmsset,
 258         ts_nullsys,     /* stop */
 259         ts_exit,
 260         ts_nullsys,     /* active */
 261         ts_nullsys,     /* inactive */
 262         ts_trapret,
 263         ts_preempt,
 264         ts_setrun,
 265         ts_sleep,
 266         ts_tick,
 267         ts_wakeup,
 268         ts_donice,
 269         ts_globpri,
 270         ts_nullsys,     /* set_process_group */
 271         ts_yield,
 272         ts_doprio,
 273 };
 274 
 275 /*
 276  * ia_classfuncs is used for interactive class threads; IA threads are stored
 277  * on the same class list as TS threads, and most of the class functions are
 278  * identical, but a few have different enough functionality to require their
 279  * own functions.
 280  */
 281 static struct classfuncs ia_classfuncs = {
 282         /* class functions */
 283         ts_admin,
 284         ia_getclinfo,
 285         ia_parmsin,
 286         ts_parmsout,
 287         ia_vaparmsin,
 288         ia_vaparmsout,
 289         ia_getclpri,
 290         ts_alloc,
 291         ts_free,
 292 
 293         /* thread functions */
 294         ts_enterclass,
 295         ts_exitclass,
 296         ts_canexit,
 297         ts_fork,
 298         ts_forkret,
 299         ia_parmsget,
 300         ia_parmsset,
 301         ts_nullsys,     /* stop */
 302         ts_exit,
 303         ts_nullsys,     /* active */
 304         ts_nullsys,     /* inactive */
 305         ts_trapret,
 306         ts_preempt,
 307         ts_setrun,
 308         ts_sleep,
 309         ts_tick,
 310         ts_wakeup,
 311         ts_donice,
 312         ts_globpri,
 313         ia_set_process_group,
 314         ts_yield,
 315         ts_doprio,
 316 };
 317 
 318 
 319 /*
 320  * Time sharing class initialization.  Called by dispinit() at boot time.
 321  * We can ignore the clparmsz argument since we know that the smallest
 322  * possible parameter buffer is big enough for us.
 323  */
 324 /* ARGSUSED */
 325 static pri_t
 326 ts_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
 327 {
 328         int i;
 329         extern pri_t ts_getmaxumdpri(void);
 330 
 331         ts_dptbl = ts_getdptbl();
 332         ts_kmdpris = ts_getkmdpris();
 333         ts_maxumdpri = ts_getmaxumdpri();
 334         ts_maxglobpri = MAX(ts_kmdpris[0], ts_dptbl[ts_maxumdpri].ts_globpri);
 335 
 336         /*
 337          * Initialize the tsproc lists.
 338          */
 339         for (i = 0; i < TS_LISTS; i++) {
 340                 ts_plisthead[i].ts_next = ts_plisthead[i].ts_prev =
 341                     &ts_plisthead[i];
 342         }
 343 
 344         /*
 345          * We're required to return a pointer to our classfuncs
 346          * structure and the highest global priority value we use.
 347          */
 348         *clfuncspp = &ts_classfuncs;
 349         return (ts_maxglobpri);
 350 }
 351 
 352 
 353 /*
 354  * Interactive class scheduler initialization
 355  */
 356 /* ARGSUSED */
 357 pri_t
 358 ia_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
 359 {
 360         /*
 361          * We're required to return a pointer to our classfuncs
 362          * structure and the highest global priority value we use.
 363          */
 364         ia_cid = cid;
 365         *clfuncspp = &ia_classfuncs;
 366         return (ts_maxglobpri);
 367 }
 368 
 369 
 370 /*
 371  * Get or reset the ts_dptbl values per the user's request.
 372  */
 373 static int
 374 ts_admin(caddr_t uaddr, cred_t *reqpcredp)
 375 {
 376         tsadmin_t       tsadmin;
 377         tsdpent_t       *tmpdpp;
 378         int             userdpsz;
 379         int             i;
 380         size_t          tsdpsz;
 381 
 382         if (get_udatamodel() == DATAMODEL_NATIVE) {
 383                 if (copyin(uaddr, &tsadmin, sizeof (tsadmin_t)))
 384                         return (EFAULT);
 385         }
 386 #ifdef _SYSCALL32_IMPL
 387         else {
 388                 /* get tsadmin struct from ILP32 caller */
 389                 tsadmin32_t tsadmin32;
 390                 if (copyin(uaddr, &tsadmin32, sizeof (tsadmin32_t)))
 391                         return (EFAULT);
 392                 tsadmin.ts_dpents =
 393                     (struct tsdpent *)(uintptr_t)tsadmin32.ts_dpents;
 394                 tsadmin.ts_ndpents = tsadmin32.ts_ndpents;
 395                 tsadmin.ts_cmd = tsadmin32.ts_cmd;
 396         }
 397 #endif /* _SYSCALL32_IMPL */
 398 
 399         tsdpsz = (ts_maxumdpri + 1) * sizeof (tsdpent_t);
 400 
 401         switch (tsadmin.ts_cmd) {
 402         case TS_GETDPSIZE:
 403                 tsadmin.ts_ndpents = ts_maxumdpri + 1;
 404 
 405                 if (get_udatamodel() == DATAMODEL_NATIVE) {
 406                         if (copyout(&tsadmin, uaddr, sizeof (tsadmin_t)))
 407                                 return (EFAULT);
 408                 }
 409 #ifdef _SYSCALL32_IMPL
 410                 else {
 411                         /* return tsadmin struct to ILP32 caller */
 412                         tsadmin32_t tsadmin32;
 413                         tsadmin32.ts_dpents =
 414                             (caddr32_t)(uintptr_t)tsadmin.ts_dpents;
 415                         tsadmin32.ts_ndpents = tsadmin.ts_ndpents;
 416                         tsadmin32.ts_cmd = tsadmin.ts_cmd;
 417                         if (copyout(&tsadmin32, uaddr, sizeof (tsadmin32_t)))
 418                                 return (EFAULT);
 419                 }
 420 #endif /* _SYSCALL32_IMPL */
 421                 break;
 422 
 423         case TS_GETDPTBL:
 424                 userdpsz = MIN(tsadmin.ts_ndpents * sizeof (tsdpent_t),
 425                     tsdpsz);
 426                 if (copyout(ts_dptbl, tsadmin.ts_dpents, userdpsz))
 427                         return (EFAULT);
 428 
 429                 tsadmin.ts_ndpents = userdpsz / sizeof (tsdpent_t);
 430 
 431                 if (get_udatamodel() == DATAMODEL_NATIVE) {
 432                         if (copyout(&tsadmin, uaddr, sizeof (tsadmin_t)))
 433                                 return (EFAULT);
 434                 }
 435 #ifdef _SYSCALL32_IMPL
 436                 else {
 437                         /* return tsadmin struct to ILP32 callers */
 438                         tsadmin32_t tsadmin32;
 439                         tsadmin32.ts_dpents =
 440                             (caddr32_t)(uintptr_t)tsadmin.ts_dpents;
 441                         tsadmin32.ts_ndpents = tsadmin.ts_ndpents;
 442                         tsadmin32.ts_cmd = tsadmin.ts_cmd;
 443                         if (copyout(&tsadmin32, uaddr, sizeof (tsadmin32_t)))
 444                                 return (EFAULT);
 445                 }
 446 #endif /* _SYSCALL32_IMPL */
 447                 break;
 448 
 449         case TS_SETDPTBL:
 450                 /*
 451                  * We require that the requesting process has sufficient
 452                  * priveleges.  We also require that the table supplied by
 453                  * the user exactly match the current ts_dptbl in size.
 454                  */
 455                 if (secpolicy_dispadm(reqpcredp) != 0)
 456                         return (EPERM);
 457 
 458                 if (tsadmin.ts_ndpents * sizeof (tsdpent_t) != tsdpsz) {
 459                         return (EINVAL);
 460                 }
 461 
 462                 /*
 463                  * We read the user supplied table into a temporary buffer
 464                  * where it is validated before being copied over the
 465                  * ts_dptbl.
 466                  */
 467                 tmpdpp = kmem_alloc(tsdpsz, KM_SLEEP);
 468                 if (copyin((caddr_t)tsadmin.ts_dpents, (caddr_t)tmpdpp,
 469                     tsdpsz)) {
 470                         kmem_free(tmpdpp, tsdpsz);
 471                         return (EFAULT);
 472                 }
 473                 for (i = 0; i < tsadmin.ts_ndpents; i++) {
 474 
 475                         /*
 476                          * Validate the user supplied values.  All we are doing
 477                          * here is verifying that the values are within their
 478                          * allowable ranges and will not panic the system.  We
 479                          * make no attempt to ensure that the resulting
 480                          * configuration makes sense or results in reasonable
 481                          * performance.
 482                          */
 483                         if (tmpdpp[i].ts_quantum <= 0) {
 484                                 kmem_free(tmpdpp, tsdpsz);
 485                                 return (EINVAL);
 486                         }
 487                         if (tmpdpp[i].ts_tqexp > ts_maxumdpri ||
 488                             tmpdpp[i].ts_tqexp < 0) {
 489                                 kmem_free(tmpdpp, tsdpsz);
 490                                 return (EINVAL);
 491                         }
 492                         if (tmpdpp[i].ts_slpret > ts_maxumdpri ||
 493                             tmpdpp[i].ts_slpret < 0) {
 494                                 kmem_free(tmpdpp, tsdpsz);
 495                                 return (EINVAL);
 496                         }
 497                         if (tmpdpp[i].ts_maxwait < 0) {
 498                                 kmem_free(tmpdpp, tsdpsz);
 499                                 return (EINVAL);
 500                         }
 501                         if (tmpdpp[i].ts_lwait > ts_maxumdpri ||
 502                             tmpdpp[i].ts_lwait < 0) {
 503                                 kmem_free(tmpdpp, tsdpsz);
 504                                 return (EINVAL);
 505                         }
 506                 }
 507 
 508                 /*
 509                  * Copy the user supplied values over the current ts_dptbl
 510                  * values.  The ts_globpri member is read-only so we don't
 511                  * overwrite it.
 512                  */
 513                 mutex_enter(&ts_dptblock);
 514                 for (i = 0; i < tsadmin.ts_ndpents; i++) {
 515                         ts_dptbl[i].ts_quantum = tmpdpp[i].ts_quantum;
 516                         ts_dptbl[i].ts_tqexp = tmpdpp[i].ts_tqexp;
 517                         ts_dptbl[i].ts_slpret = tmpdpp[i].ts_slpret;
 518                         ts_dptbl[i].ts_maxwait = tmpdpp[i].ts_maxwait;
 519                         ts_dptbl[i].ts_lwait = tmpdpp[i].ts_lwait;
 520                 }
 521                 mutex_exit(&ts_dptblock);
 522                 kmem_free(tmpdpp, tsdpsz);
 523                 break;
 524 
 525         default:
 526                 return (EINVAL);
 527         }
 528         return (0);
 529 }
 530 
 531 
 532 /*
 533  * Allocate a time-sharing class specific thread structure and
 534  * initialize it with the parameters supplied. Also move the thread
 535  * to specified time-sharing priority.
 536  */
 537 static int
 538 ts_enterclass(kthread_t *t, id_t cid, void *parmsp,
 539         cred_t *reqpcredp, void *bufp)
 540 {
 541         tsparms_t       *tsparmsp = (tsparms_t *)parmsp;
 542         tsproc_t        *tspp;
 543         pri_t           reqtsuprilim;
 544         pri_t           reqtsupri;
 545         static uint32_t tspexists = 0;  /* set on first occurrence of */
 546                                         /*   a time-sharing process */
 547 
 548         tspp = (tsproc_t *)bufp;
 549         ASSERT(tspp != NULL);
 550 
 551         /*
 552          * Initialize the tsproc structure.
 553          */
 554         tspp->ts_cpupri = tsmedumdpri;
 555         if (cid == ia_cid) {
 556                 /*
 557                  * Check to make sure caller is either privileged or the
 558                  * window system.  When the window system is converted
 559                  * to using privileges, the second check can go away.
 560                  */
 561                 if (reqpcredp != NULL && !groupmember(IA_gid, reqpcredp) &&
 562                     secpolicy_setpriority(reqpcredp) != 0)
 563                         return (EPERM);
 564                 /*
 565                  * Belongs to IA "class", so set appropriate flags.
 566                  * Mark as 'on' so it will not be a swap victim
 567                  * while forking.
 568                  */
 569                 tspp->ts_flags = TSIA | TSIASET;
 570                 tspp->ts_boost = ia_boost;
 571         } else {
 572                 tspp->ts_flags = 0;
 573                 tspp->ts_boost = 0;
 574         }
 575 
 576         if (tsparmsp == NULL) {
 577                 /*
 578                  * Use default values.
 579                  */
 580                 tspp->ts_uprilim = tspp->ts_upri = 0;
 581                 tspp->ts_nice = NZERO;
 582         } else {
 583                 /*
 584                  * Use supplied values.
 585                  */
 586                 if (tsparmsp->ts_uprilim == TS_NOCHANGE)
 587                         reqtsuprilim = 0;
 588                 else {
 589                         if (tsparmsp->ts_uprilim > 0 &&
 590                             secpolicy_setpriority(reqpcredp) != 0)
 591                                 return (EPERM);
 592                         reqtsuprilim = tsparmsp->ts_uprilim;
 593                 }
 594 
 595                 if (tsparmsp->ts_upri == TS_NOCHANGE) {
 596                         reqtsupri = reqtsuprilim;
 597                 } else {
 598                         if (tsparmsp->ts_upri > 0 &&
 599                             secpolicy_setpriority(reqpcredp) != 0)
 600                                 return (EPERM);
 601                         /*
 602                          * Set the user priority to the requested value
 603                          * or the upri limit, whichever is lower.
 604                          */
 605                         reqtsupri = tsparmsp->ts_upri;
 606                         if (reqtsupri > reqtsuprilim)
 607                                 reqtsupri = reqtsuprilim;
 608                 }
 609 
 610 
 611                 tspp->ts_uprilim = reqtsuprilim;
 612                 tspp->ts_upri = reqtsupri;
 613                 tspp->ts_nice = NZERO - (NZERO * reqtsupri) / ts_maxupri;
 614         }
 615         TS_NEWUMDPRI(tspp);
 616 
 617         tspp->ts_dispwait = 0;
 618         tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
 619         tspp->ts_tp = t;
 620         cpucaps_sc_init(&tspp->ts_caps);
 621 
 622         /*
 623          * Reset priority. Process goes to a "user mode" priority
 624          * here regardless of whether or not it has slept since
 625          * entering the kernel.
 626          */
 627         thread_lock(t);                 /* get dispatcher lock on thread */
 628         t->t_clfuncs = &(sclass[cid].cl_funcs->thread);
 629         t->t_cid = cid;
 630         t->t_cldata = (void *)tspp;
 631         t->t_schedflag &= ~TS_RUNQMATCH;
 632         ts_change_priority(t, tspp);
 633         thread_unlock(t);
 634 
 635         /*
 636          * Link new structure into tsproc list.
 637          */
 638         TS_LIST_INSERT(tspp);
 639 
 640         /*
 641          * If this is the first time-sharing thread to occur since
 642          * boot we set up the initial call to ts_update() here.
 643          * Use an atomic compare-and-swap since that's easier and
 644          * faster than a mutex (but check with an ordinary load first
 645          * since most of the time this will already be done).
 646          */
 647         if (tspexists == 0 && cas32(&tspexists, 0, 1) == 0)
 648                 (void) timeout(ts_update, NULL, hz);
 649 
 650         return (0);
 651 }
 652 
 653 
 654 /*
 655  * Free tsproc structure of thread.
 656  */
 657 static void
 658 ts_exitclass(void *procp)
 659 {
 660         tsproc_t *tspp = (tsproc_t *)procp;
 661 
 662         /* Remove tsproc_t structure from list */
 663         TS_LIST_DELETE(tspp);
 664         kmem_free(tspp, sizeof (tsproc_t));
 665 }
 666 
 667 /* ARGSUSED */
 668 static int
 669 ts_canexit(kthread_t *t, cred_t *cred)
 670 {
 671         /*
 672          * A thread can always leave a TS/IA class
 673          */
 674         return (0);
 675 }
 676 
 677 static int
 678 ts_fork(kthread_t *t, kthread_t *ct, void *bufp)
 679 {
 680         tsproc_t        *ptspp;         /* ptr to parent's tsproc structure */
 681         tsproc_t        *ctspp;         /* ptr to child's tsproc structure */
 682 
 683         ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
 684 
 685         ctspp = (tsproc_t *)bufp;
 686         ASSERT(ctspp != NULL);
 687         ptspp = (tsproc_t *)t->t_cldata;
 688         /*
 689          * Initialize child's tsproc structure.
 690          */
 691         thread_lock(t);
 692         ctspp->ts_timeleft = ts_dptbl[ptspp->ts_cpupri].ts_quantum;
 693         ctspp->ts_cpupri = ptspp->ts_cpupri;
 694         ctspp->ts_boost = ptspp->ts_boost;
 695         ctspp->ts_uprilim = ptspp->ts_uprilim;
 696         ctspp->ts_upri = ptspp->ts_upri;
 697         TS_NEWUMDPRI(ctspp);
 698         ctspp->ts_nice = ptspp->ts_nice;
 699         ctspp->ts_dispwait = 0;
 700         ctspp->ts_flags = ptspp->ts_flags & ~(TSKPRI | TSBACKQ | TSRESTORE);
 701         ctspp->ts_tp = ct;
 702         cpucaps_sc_init(&ctspp->ts_caps);
 703         thread_unlock(t);
 704 
 705         /*
 706          * Link new structure into tsproc list.
 707          */
 708         ct->t_cldata = (void *)ctspp;
 709         TS_LIST_INSERT(ctspp);
 710         return (0);
 711 }
 712 
 713 
 714 /*
 715  * Child is placed at back of dispatcher queue and parent gives
 716  * up processor so that the child runs first after the fork.
 717  * This allows the child immediately execing to break the multiple
 718  * use of copy on write pages with no disk home. The parent will
 719  * get to steal them back rather than uselessly copying them.
 720  */
 721 static void
 722 ts_forkret(kthread_t *t, kthread_t *ct)
 723 {
 724         proc_t  *pp = ttoproc(t);
 725         proc_t  *cp = ttoproc(ct);
 726         tsproc_t *tspp;
 727 
 728         ASSERT(t == curthread);
 729         ASSERT(MUTEX_HELD(&pidlock));
 730 
 731         /*
 732          * Grab the child's p_lock before dropping pidlock to ensure
 733          * the process does not disappear before we set it running.
 734          */
 735         mutex_enter(&cp->p_lock);
 736         continuelwps(cp);
 737         mutex_exit(&cp->p_lock);
 738 
 739         mutex_enter(&pp->p_lock);
 740         mutex_exit(&pidlock);
 741         continuelwps(pp);
 742 
 743         thread_lock(t);
 744         tspp = (tsproc_t *)(t->t_cldata);
 745         tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_tqexp;
 746         TS_NEWUMDPRI(tspp);
 747         tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
 748         tspp->ts_dispwait = 0;
 749         t->t_pri = ts_dptbl[tspp->ts_umdpri].ts_globpri;
 750         ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
 751         tspp->ts_flags &= ~TSKPRI;
 752         THREAD_TRANSITION(t);
 753         ts_setrun(t);
 754         thread_unlock(t);
 755         /*
 756          * Safe to drop p_lock now since since it is safe to change
 757          * the scheduling class after this point.
 758          */
 759         mutex_exit(&pp->p_lock);
 760 
 761         swtch();
 762 }
 763 
 764 
 765 /*
 766  * Get information about the time-sharing class into the buffer
 767  * pointed to by tsinfop. The maximum configured user priority
 768  * is the only information we supply.  ts_getclinfo() is called
 769  * for TS threads, and ia_getclinfo() is called for IA threads.
 770  */
 771 static int
 772 ts_getclinfo(void *infop)
 773 {
 774         tsinfo_t *tsinfop = (tsinfo_t *)infop;
 775         tsinfop->ts_maxupri = ts_maxupri;
 776         return (0);
 777 }
 778 
 779 static int
 780 ia_getclinfo(void *infop)
 781 {
 782         iainfo_t *iainfop = (iainfo_t *)infop;
 783         iainfop->ia_maxupri = ia_maxupri;
 784         return (0);
 785 }
 786 
 787 
 788 /*
 789  * Return the user mode scheduling priority range.
 790  */
 791 static int
 792 ts_getclpri(pcpri_t *pcprip)
 793 {
 794         pcprip->pc_clpmax = ts_maxupri;
 795         pcprip->pc_clpmin = -ts_maxupri;
 796         return (0);
 797 }
 798 
 799 
 800 static int
 801 ia_getclpri(pcpri_t *pcprip)
 802 {
 803         pcprip->pc_clpmax = ia_maxupri;
 804         pcprip->pc_clpmin = -ia_maxupri;
 805         return (0);
 806 }
 807 
 808 
 809 static void
 810 ts_nullsys()
 811 {}
 812 
 813 
 814 /*
 815  * Get the time-sharing parameters of the thread pointed to by
 816  * tsprocp into the buffer pointed to by tsparmsp.  ts_parmsget()
 817  * is called for TS threads, and ia_parmsget() is called for IA
 818  * threads.
 819  */
 820 static void
 821 ts_parmsget(kthread_t *t, void *parmsp)
 822 {
 823         tsproc_t *tspp = (tsproc_t *)t->t_cldata;
 824         tsparms_t *tsparmsp = (tsparms_t *)parmsp;
 825 
 826         tsparmsp->ts_uprilim = tspp->ts_uprilim;
 827         tsparmsp->ts_upri = tspp->ts_upri;
 828 }
 829 
 830 static void
 831 ia_parmsget(kthread_t *t, void *parmsp)
 832 {
 833         tsproc_t *tspp = (tsproc_t *)t->t_cldata;
 834         iaparms_t *iaparmsp = (iaparms_t *)parmsp;
 835 
 836         iaparmsp->ia_uprilim = tspp->ts_uprilim;
 837         iaparmsp->ia_upri = tspp->ts_upri;
 838         if (tspp->ts_flags & TSIASET)
 839                 iaparmsp->ia_mode = IA_SET_INTERACTIVE;
 840         else
 841                 iaparmsp->ia_mode = IA_INTERACTIVE_OFF;
 842 }
 843 
 844 
 845 /*
 846  * Check the validity of the time-sharing parameters in the buffer
 847  * pointed to by tsparmsp.
 848  * ts_parmsin() is called for TS threads, and ia_parmsin() is called
 849  * for IA threads.
 850  */
 851 static int
 852 ts_parmsin(void *parmsp)
 853 {
 854         tsparms_t       *tsparmsp = (tsparms_t *)parmsp;
 855         /*
 856          * Check validity of parameters.
 857          */
 858         if ((tsparmsp->ts_uprilim > ts_maxupri ||
 859             tsparmsp->ts_uprilim < -ts_maxupri) &&
 860             tsparmsp->ts_uprilim != TS_NOCHANGE)
 861                 return (EINVAL);
 862 
 863         if ((tsparmsp->ts_upri > ts_maxupri ||
 864             tsparmsp->ts_upri < -ts_maxupri) &&
 865             tsparmsp->ts_upri != TS_NOCHANGE)
 866                 return (EINVAL);
 867 
 868         return (0);
 869 }
 870 
 871 static int
 872 ia_parmsin(void *parmsp)
 873 {
 874         iaparms_t       *iaparmsp = (iaparms_t *)parmsp;
 875 
 876         if ((iaparmsp->ia_uprilim > ia_maxupri ||
 877             iaparmsp->ia_uprilim < -ia_maxupri) &&
 878             iaparmsp->ia_uprilim != IA_NOCHANGE) {
 879                 return (EINVAL);
 880         }
 881 
 882         if ((iaparmsp->ia_upri > ia_maxupri ||
 883             iaparmsp->ia_upri < -ia_maxupri) &&
 884             iaparmsp->ia_upri != IA_NOCHANGE) {
 885                 return (EINVAL);
 886         }
 887 
 888         return (0);
 889 }
 890 
 891 
 892 /*
 893  * Check the validity of the time-sharing parameters in the pc_vaparms_t
 894  * structure vaparmsp and put them in the buffer pointed to by tsparmsp.
 895  * pc_vaparms_t contains (key, value) pairs of parameter.
 896  * ts_vaparmsin() is called for TS threads, and ia_vaparmsin() is called
 897  * for IA threads. ts_vaparmsin() is the variable parameter version of
 898  * ts_parmsin() and ia_vaparmsin() is the variable parameter version of
 899  * ia_parmsin().
 900  */
 901 static int
 902 ts_vaparmsin(void *parmsp, pc_vaparms_t *vaparmsp)
 903 {
 904         tsparms_t       *tsparmsp = (tsparms_t *)parmsp;
 905         int             priflag = 0;
 906         int             limflag = 0;
 907         uint_t          cnt;
 908         pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
 909 
 910 
 911         /*
 912          * TS_NOCHANGE (-32768) is outside of the range of values for
 913          * ts_uprilim and ts_upri. If the structure tsparms_t is changed,
 914          * TS_NOCHANGE should be replaced by a flag word (in the same manner
 915          * as in rt.c).
 916          */
 917         tsparmsp->ts_uprilim = TS_NOCHANGE;
 918         tsparmsp->ts_upri = TS_NOCHANGE;
 919 
 920         /*
 921          * Get the varargs parameter and check validity of parameters.
 922          */
 923         if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
 924                 return (EINVAL);
 925 
 926         for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
 927 
 928                 switch (vpp->pc_key) {
 929                 case TS_KY_UPRILIM:
 930                         if (limflag++)
 931                                 return (EINVAL);
 932                         tsparmsp->ts_uprilim = (pri_t)vpp->pc_parm;
 933                         if (tsparmsp->ts_uprilim > ts_maxupri ||
 934                             tsparmsp->ts_uprilim < -ts_maxupri)
 935                                 return (EINVAL);
 936                         break;
 937 
 938                 case TS_KY_UPRI:
 939                         if (priflag++)
 940                                 return (EINVAL);
 941                         tsparmsp->ts_upri = (pri_t)vpp->pc_parm;
 942                         if (tsparmsp->ts_upri > ts_maxupri ||
 943                             tsparmsp->ts_upri < -ts_maxupri)
 944                                 return (EINVAL);
 945                         break;
 946 
 947                 default:
 948                         return (EINVAL);
 949                 }
 950         }
 951 
 952         if (vaparmsp->pc_vaparmscnt == 0) {
 953                 /*
 954                  * Use default parameters.
 955                  */
 956                 tsparmsp->ts_upri = tsparmsp->ts_uprilim = 0;
 957         }
 958 
 959         return (0);
 960 }
 961 
 962 static int
 963 ia_vaparmsin(void *parmsp, pc_vaparms_t *vaparmsp)
 964 {
 965         iaparms_t       *iaparmsp = (iaparms_t *)parmsp;
 966         int             priflag = 0;
 967         int             limflag = 0;
 968         int             mflag = 0;
 969         uint_t          cnt;
 970         pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
 971 
 972         /*
 973          * IA_NOCHANGE (-32768) is outside of the range of values for
 974          * ia_uprilim, ia_upri and ia_mode. If the structure iaparms_t is
 975          * changed, IA_NOCHANGE should be replaced by a flag word (in the
 976          * same manner as in rt.c).
 977          */
 978         iaparmsp->ia_uprilim = IA_NOCHANGE;
 979         iaparmsp->ia_upri = IA_NOCHANGE;
 980         iaparmsp->ia_mode = IA_NOCHANGE;
 981 
 982         /*
 983          * Get the varargs parameter and check validity of parameters.
 984          */
 985         if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
 986                 return (EINVAL);
 987 
 988         for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
 989 
 990                 switch (vpp->pc_key) {
 991                 case IA_KY_UPRILIM:
 992                         if (limflag++)
 993                                 return (EINVAL);
 994                         iaparmsp->ia_uprilim = (pri_t)vpp->pc_parm;
 995                         if (iaparmsp->ia_uprilim > ia_maxupri ||
 996                             iaparmsp->ia_uprilim < -ia_maxupri)
 997                                 return (EINVAL);
 998                         break;
 999 
1000                 case IA_KY_UPRI:
1001                         if (priflag++)
1002                                 return (EINVAL);
1003                         iaparmsp->ia_upri = (pri_t)vpp->pc_parm;
1004                         if (iaparmsp->ia_upri > ia_maxupri ||
1005                             iaparmsp->ia_upri < -ia_maxupri)
1006                                 return (EINVAL);
1007                         break;
1008 
1009                 case IA_KY_MODE:
1010                         if (mflag++)
1011                                 return (EINVAL);
1012                         iaparmsp->ia_mode = (int)vpp->pc_parm;
1013                         if (iaparmsp->ia_mode != IA_SET_INTERACTIVE &&
1014                             iaparmsp->ia_mode != IA_INTERACTIVE_OFF)
1015                                 return (EINVAL);
1016                         break;
1017 
1018                 default:
1019                         return (EINVAL);
1020                 }
1021         }
1022 
1023         if (vaparmsp->pc_vaparmscnt == 0) {
1024                 /*
1025                  * Use default parameters.
1026                  */
1027                 iaparmsp->ia_upri = iaparmsp->ia_uprilim = 0;
1028                 iaparmsp->ia_mode = IA_SET_INTERACTIVE;
1029         }
1030 
1031         return (0);
1032 }
1033 
1034 /*
1035  * Nothing to do here but return success.
1036  */
1037 /* ARGSUSED */
1038 static int
1039 ts_parmsout(void *parmsp, pc_vaparms_t *vaparmsp)
1040 {
1041         return (0);
1042 }
1043 
1044 
1045 /*
1046  * Copy all selected time-sharing class parameters to the user.
1047  * The parameters are specified by a key.
1048  */
1049 static int
1050 ts_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp)
1051 {
1052         tsparms_t       *tsprmsp = (tsparms_t *)prmsp;
1053         int             priflag = 0;
1054         int             limflag = 0;
1055         uint_t          cnt;
1056         pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
1057 
1058         ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
1059 
1060         if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
1061                 return (EINVAL);
1062 
1063         for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
1064 
1065                 switch (vpp->pc_key) {
1066                 case TS_KY_UPRILIM:
1067                         if (limflag++)
1068                                 return (EINVAL);
1069                         if (copyout(&tsprmsp->ts_uprilim,
1070                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
1071                                 return (EFAULT);
1072                         break;
1073 
1074                 case TS_KY_UPRI:
1075                         if (priflag++)
1076                                 return (EINVAL);
1077                         if (copyout(&tsprmsp->ts_upri,
1078                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
1079                                 return (EFAULT);
1080                         break;
1081 
1082                 default:
1083                         return (EINVAL);
1084                 }
1085         }
1086 
1087         return (0);
1088 }
1089 
1090 
1091 /*
1092  * Copy all selected interactive class parameters to the user.
1093  * The parameters are specified by a key.
1094  */
1095 static int
1096 ia_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp)
1097 {
1098         iaparms_t       *iaprmsp = (iaparms_t *)prmsp;
1099         int             priflag = 0;
1100         int             limflag = 0;
1101         int             mflag = 0;
1102         uint_t          cnt;
1103         pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
1104 
1105         ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
1106 
1107         if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
1108                 return (EINVAL);
1109 
1110         for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
1111 
1112                 switch (vpp->pc_key) {
1113                 case IA_KY_UPRILIM:
1114                         if (limflag++)
1115                                 return (EINVAL);
1116                         if (copyout(&iaprmsp->ia_uprilim,
1117                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
1118                                 return (EFAULT);
1119                         break;
1120 
1121                 case IA_KY_UPRI:
1122                         if (priflag++)
1123                                 return (EINVAL);
1124                         if (copyout(&iaprmsp->ia_upri,
1125                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
1126                                 return (EFAULT);
1127                         break;
1128 
1129                 case IA_KY_MODE:
1130                         if (mflag++)
1131                                 return (EINVAL);
1132                         if (copyout(&iaprmsp->ia_mode,
1133                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (int)))
1134                                 return (EFAULT);
1135                         break;
1136 
1137                 default:
1138                         return (EINVAL);
1139                 }
1140         }
1141         return (0);
1142 }
1143 
1144 
1145 /*
1146  * Set the scheduling parameters of the thread pointed to by tsprocp
1147  * to those specified in the buffer pointed to by tsparmsp.
1148  * ts_parmsset() is called for TS threads, and ia_parmsset() is
1149  * called for IA threads.
1150  */
1151 /* ARGSUSED */
1152 static int
1153 ts_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp)
1154 {
1155         char            nice;
1156         pri_t           reqtsuprilim;
1157         pri_t           reqtsupri;
1158         tsparms_t       *tsparmsp = (tsparms_t *)parmsp;
1159         tsproc_t        *tspp = (tsproc_t *)tx->t_cldata;
1160 
1161         ASSERT(MUTEX_HELD(&(ttoproc(tx))->p_lock));
1162 
1163         if (tsparmsp->ts_uprilim == TS_NOCHANGE)
1164                 reqtsuprilim = tspp->ts_uprilim;
1165         else
1166                 reqtsuprilim = tsparmsp->ts_uprilim;
1167 
1168         if (tsparmsp->ts_upri == TS_NOCHANGE)
1169                 reqtsupri = tspp->ts_upri;
1170         else
1171                 reqtsupri = tsparmsp->ts_upri;
1172 
1173         /*
1174          * Make sure the user priority doesn't exceed the upri limit.
1175          */
1176         if (reqtsupri > reqtsuprilim)
1177                 reqtsupri = reqtsuprilim;
1178 
1179         /*
1180          * Basic permissions enforced by generic kernel code
1181          * for all classes require that a thread attempting
1182          * to change the scheduling parameters of a target
1183          * thread be privileged or have a real or effective
1184          * UID matching that of the target thread. We are not
1185          * called unless these basic permission checks have
1186          * already passed. The time-sharing class requires in
1187          * addition that the calling thread be privileged if it
1188          * is attempting to raise the upri limit above its current
1189          * value This may have been checked previously but if our
1190          * caller passed us a non-NULL credential pointer we assume
1191          * it hasn't and we check it here.
1192          */
1193         if (reqpcredp != NULL &&
1194             reqtsuprilim > tspp->ts_uprilim &&
1195             secpolicy_raisepriority(reqpcredp) != 0)
1196                 return (EPERM);
1197 
1198         /*
1199          * Set ts_nice to the nice value corresponding to the user
1200          * priority we are setting.  Note that setting the nice field
1201          * of the parameter struct won't affect upri or nice.
1202          */
1203         nice = NZERO - (reqtsupri * NZERO) / ts_maxupri;
1204         if (nice >= 2 * NZERO)
1205                 nice = 2 * NZERO - 1;
1206 
1207         thread_lock(tx);
1208 
1209         tspp->ts_uprilim = reqtsuprilim;
1210         tspp->ts_upri = reqtsupri;
1211         TS_NEWUMDPRI(tspp);
1212         tspp->ts_nice = nice;
1213 
1214         if ((tspp->ts_flags & TSKPRI) != 0) {
1215                 thread_unlock(tx);
1216                 return (0);
1217         }
1218 
1219         tspp->ts_dispwait = 0;
1220         ts_change_priority(tx, tspp);
1221         thread_unlock(tx);
1222         return (0);
1223 }
1224 
1225 
1226 static int
1227 ia_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp)
1228 {
1229         tsproc_t        *tspp = (tsproc_t *)tx->t_cldata;
1230         iaparms_t       *iaparmsp = (iaparms_t *)parmsp;
1231         proc_t          *p;
1232         pid_t           pid, pgid, sid;
1233         pid_t           on, off;
1234         struct stdata   *stp;
1235         int             sess_held;
1236 
1237         /*
1238          * Handle user priority changes
1239          */
1240         if (iaparmsp->ia_mode == IA_NOCHANGE)
1241                 return (ts_parmsset(tx, parmsp, reqpcid, reqpcredp));
1242 
1243         /*
1244          * Check permissions for changing modes.
1245          */
1246 
1247         if (reqpcredp != NULL && !groupmember(IA_gid, reqpcredp) &&
1248             secpolicy_raisepriority(reqpcredp) != 0) {
1249                 /*
1250                  * Silently fail in case this is just a priocntl
1251                  * call with upri and uprilim set to IA_NOCHANGE.
1252                  */
1253                 return (0);
1254         }
1255 
1256         ASSERT(MUTEX_HELD(&pidlock));
1257         if ((p = ttoproc(tx)) == NULL) {
1258                 return (0);
1259         }
1260         ASSERT(MUTEX_HELD(&p->p_lock));
1261         if (p->p_stat == SIDL) {
1262                 return (0);
1263         }
1264         pid = p->p_pid;
1265         sid = p->p_sessp->s_sid;
1266         pgid = p->p_pgrp;
1267         if (iaparmsp->ia_mode == IA_SET_INTERACTIVE) {
1268                 /*
1269                  * session leaders must be turned on now so all processes
1270                  * in the group controlling the tty will be turned on or off.
1271                  * if the ia_mode is off for the session leader,
1272                  * ia_set_process_group will return without setting the
1273                  * processes in the group controlling the tty on.
1274                  */
1275                 thread_lock(tx);
1276                 tspp->ts_flags |= TSIASET;
1277                 thread_unlock(tx);
1278         }
1279         mutex_enter(&p->p_sessp->s_lock);
1280         sess_held = 1;
1281         if ((pid == sid) && (p->p_sessp->s_vp != NULL) &&
1282             ((stp = p->p_sessp->s_vp->v_stream) != NULL)) {
1283                 if ((stp->sd_pgidp != NULL) && (stp->sd_sidp != NULL)) {
1284                         pgid = stp->sd_pgidp->pid_id;
1285                         sess_held = 0;
1286                         mutex_exit(&p->p_sessp->s_lock);
1287                         if (iaparmsp->ia_mode ==
1288                             IA_SET_INTERACTIVE) {
1289                                 off = 0;
1290                                 on = pgid;
1291                         } else {
1292                                 off = pgid;
1293                                 on = 0;
1294                         }
1295                         TRACE_3(TR_FAC_IA, TR_ACTIVE_CHAIN,
1296                             "active chain:pid %d gid %d %p",
1297                             pid, pgid, p);
1298                         ia_set_process_group(sid, off, on);
1299                 }
1300         }
1301         if (sess_held)
1302                 mutex_exit(&p->p_sessp->s_lock);
1303 
1304         thread_lock(tx);
1305 
1306         if (iaparmsp->ia_mode == IA_SET_INTERACTIVE) {
1307                 tspp->ts_flags |= TSIASET;
1308                 tspp->ts_boost = ia_boost;
1309         } else {
1310                 tspp->ts_flags &= ~TSIASET;
1311                 tspp->ts_boost = -ia_boost;
1312         }
1313         thread_unlock(tx);
1314 
1315         return (ts_parmsset(tx, parmsp, reqpcid, reqpcredp));
1316 }
1317 
1318 static void
1319 ts_exit(kthread_t *t)
1320 {
1321         tsproc_t *tspp;
1322 
1323         if (CPUCAPS_ON()) {
1324                 /*
1325                  * A thread could be exiting in between clock ticks,
1326                  * so we need to calculate how much CPU time it used
1327                  * since it was charged last time.
1328                  *
1329                  * CPU caps are not enforced on exiting processes - it is
1330                  * usually desirable to exit as soon as possible to free
1331                  * resources.
1332                  */
1333                 thread_lock(t);
1334                 tspp = (tsproc_t *)t->t_cldata;
1335                 (void) cpucaps_charge(t, &tspp->ts_caps, CPUCAPS_CHARGE_ONLY);
1336                 thread_unlock(t);
1337         }
1338 }
1339 
1340 /*
1341  * Return the global scheduling priority that would be assigned
1342  * to a thread entering the time-sharing class with the ts_upri.
1343  */
1344 static pri_t
1345 ts_globpri(kthread_t *t)
1346 {
1347         tsproc_t *tspp;
1348         pri_t   tspri;
1349 
1350         ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
1351         tspp = (tsproc_t *)t->t_cldata;
1352         tspri = tsmedumdpri + tspp->ts_upri;
1353         if (tspri > ts_maxumdpri)
1354                 tspri = ts_maxumdpri;
1355         else if (tspri < 0)
1356                 tspri = 0;
1357         return (ts_dptbl[tspri].ts_globpri);
1358 }
1359 
1360 /*
1361  * Arrange for thread to be placed in appropriate location
1362  * on dispatcher queue.
1363  *
1364  * This is called with the current thread in TS_ONPROC and locked.
1365  */
1366 static void
1367 ts_preempt(kthread_t *t)
1368 {
1369         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1370         klwp_t          *lwp = curthread->t_lwp;
1371         pri_t           oldpri = t->t_pri;
1372 
1373         ASSERT(t == curthread);
1374         ASSERT(THREAD_LOCK_HELD(curthread));
1375 
1376         /*
1377          * If preempted in the kernel, make sure the thread has
1378          * a kernel priority if needed.
1379          */
1380         if (!(tspp->ts_flags & TSKPRI) && lwp != NULL && t->t_kpri_req) {
1381                 tspp->ts_flags |= TSKPRI;
1382                 THREAD_CHANGE_PRI(t, ts_kmdpris[0]);
1383                 ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
1384                 t->t_trapret = 1;            /* so ts_trapret will run */
1385                 aston(t);
1386         }
1387 
1388         /*
1389          * This thread may be placed on wait queue by CPU Caps. In this case we
1390          * do not need to do anything until it is removed from the wait queue.
1391          * Do not enforce CPU caps on threads running at a kernel priority
1392          */
1393         if (CPUCAPS_ON()) {
1394                 (void) cpucaps_charge(t, &tspp->ts_caps,
1395                     CPUCAPS_CHARGE_ENFORCE);
1396                 if (!(tspp->ts_flags & TSKPRI) && CPUCAPS_ENFORCE(t))
1397                         return;
1398         }
1399 
1400         /*
1401          * Check to see if we're doing "preemption control" here.  If
1402          * we are, and if the user has requested that this thread not
1403          * be preempted, and if preemptions haven't been put off for
1404          * too long, let the preemption happen here but try to make
1405          * sure the thread is rescheduled as soon as possible.  We do
1406          * this by putting it on the front of the highest priority run
1407          * queue in the TS class.  If the preemption has been put off
1408          * for too long, clear the "nopreempt" bit and let the thread
1409          * be preempted.
1410          */
1411         if (t->t_schedctl && schedctl_get_nopreempt(t)) {
1412                 if (tspp->ts_timeleft > -SC_MAX_TICKS) {
1413                         DTRACE_SCHED1(schedctl__nopreempt, kthread_t *, t);
1414                         if (!(tspp->ts_flags & TSKPRI)) {
1415                                 /*
1416                                  * If not already remembered, remember current
1417                                  * priority for restoration in ts_yield().
1418                                  */
1419                                 if (!(tspp->ts_flags & TSRESTORE)) {
1420                                         tspp->ts_scpri = t->t_pri;
1421                                         tspp->ts_flags |= TSRESTORE;
1422                                 }
1423                                 THREAD_CHANGE_PRI(t, ts_maxumdpri);
1424                         }
1425                         schedctl_set_yield(t, 1);
1426                         setfrontdq(t);
1427                         goto done;
1428                 } else {
1429                         if (tspp->ts_flags & TSRESTORE) {
1430                                 THREAD_CHANGE_PRI(t, tspp->ts_scpri);
1431                                 tspp->ts_flags &= ~TSRESTORE;
1432                         }
1433                         schedctl_set_nopreempt(t, 0);
1434                         DTRACE_SCHED1(schedctl__preempt, kthread_t *, t);
1435                         TNF_PROBE_2(schedctl_preempt, "schedctl TS ts_preempt",
1436                             /* CSTYLED */, tnf_pid, pid, ttoproc(t)->p_pid,
1437                             tnf_lwpid, lwpid, t->t_tid);
1438                         /*
1439                          * Fall through and be preempted below.
1440                          */
1441                 }
1442         }
1443 
1444         if ((tspp->ts_flags & (TSBACKQ|TSKPRI)) == TSBACKQ) {
1445                 tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
1446                 tspp->ts_dispwait = 0;
1447                 tspp->ts_flags &= ~TSBACKQ;
1448                 setbackdq(t);
1449         } else if ((tspp->ts_flags & (TSBACKQ|TSKPRI)) == (TSBACKQ|TSKPRI)) {
1450                 tspp->ts_flags &= ~TSBACKQ;
1451                 setbackdq(t);
1452         } else {
1453                 setfrontdq(t);
1454         }
1455 
1456 done:
1457         TRACE_2(TR_FAC_DISP, TR_PREEMPT,
1458             "preempt:tid %p old pri %d", t, oldpri);
1459 }
1460 
1461 static void
1462 ts_setrun(kthread_t *t)
1463 {
1464         tsproc_t *tspp = (tsproc_t *)(t->t_cldata);
1465 
1466         ASSERT(THREAD_LOCK_HELD(t));    /* t should be in transition */
1467 
1468         if (tspp->ts_dispwait > ts_dptbl[tspp->ts_umdpri].ts_maxwait) {
1469                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_slpret;
1470                 TS_NEWUMDPRI(tspp);
1471                 tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
1472                 tspp->ts_dispwait = 0;
1473                 if ((tspp->ts_flags & TSKPRI) == 0) {
1474                         THREAD_CHANGE_PRI(t,
1475                             ts_dptbl[tspp->ts_umdpri].ts_globpri);
1476                         ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
1477                 }
1478         }
1479 
1480         tspp->ts_flags &= ~TSBACKQ;
1481 
1482         if (tspp->ts_flags & TSIA) {
1483                 if (tspp->ts_flags & TSIASET)
1484                         setfrontdq(t);
1485                 else
1486                         setbackdq(t);
1487         } else {
1488                 if (t->t_disp_time != ddi_get_lbolt())
1489                         setbackdq(t);
1490                 else
1491                         setfrontdq(t);
1492         }
1493 }
1494 
1495 
1496 /*
1497  * Prepare thread for sleep. We reset the thread priority so it will
1498  * run at the kernel priority level when it wakes up.
1499  */
1500 static void
1501 ts_sleep(kthread_t *t)
1502 {
1503         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1504         int             flags;
1505         pri_t           old_pri = t->t_pri;
1506 
1507         ASSERT(t == curthread);
1508         ASSERT(THREAD_LOCK_HELD(t));
1509 
1510         /*
1511          * Account for time spent on CPU before going to sleep.
1512          */
1513         (void) CPUCAPS_CHARGE(t, &tspp->ts_caps, CPUCAPS_CHARGE_ENFORCE);
1514 
1515         flags = tspp->ts_flags;
1516         if (t->t_kpri_req) {
1517                 tspp->ts_flags = flags | TSKPRI;
1518                 THREAD_CHANGE_PRI(t, ts_kmdpris[0]);
1519                 ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
1520                 t->t_trapret = 1;            /* so ts_trapret will run */
1521                 aston(t);
1522         } else if (tspp->ts_dispwait > ts_dptbl[tspp->ts_umdpri].ts_maxwait) {
1523                 /*
1524                  * If thread has blocked in the kernel (as opposed to
1525                  * being merely preempted), recompute the user mode priority.
1526                  */
1527                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_slpret;
1528                 TS_NEWUMDPRI(tspp);
1529                 tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
1530                 tspp->ts_dispwait = 0;
1531 
1532                 THREAD_CHANGE_PRI(curthread,
1533                     ts_dptbl[tspp->ts_umdpri].ts_globpri);
1534                 ASSERT(curthread->t_pri >= 0 &&
1535                     curthread->t_pri <= ts_maxglobpri);
1536                 tspp->ts_flags = flags & ~TSKPRI;
1537 
1538                 if (DISP_MUST_SURRENDER(curthread))
1539                         cpu_surrender(curthread);
1540         } else if (flags & TSKPRI) {
1541                 THREAD_CHANGE_PRI(curthread,
1542                     ts_dptbl[tspp->ts_umdpri].ts_globpri);
1543                 ASSERT(curthread->t_pri >= 0 &&
1544                     curthread->t_pri <= ts_maxglobpri);
1545                 tspp->ts_flags = flags & ~TSKPRI;
1546 
1547                 if (DISP_MUST_SURRENDER(curthread))
1548                         cpu_surrender(curthread);
1549         }
1550         TRACE_2(TR_FAC_DISP, TR_SLEEP,
1551             "sleep:tid %p old pri %d", t, old_pri);
1552 }
1553 
1554 /*
1555  * Check for time slice expiration.  If time slice has expired
1556  * move thread to priority specified in tsdptbl for time slice expiration
1557  * and set runrun to cause preemption.
1558  */
1559 static void
1560 ts_tick(kthread_t *t)
1561 {
1562         tsproc_t *tspp = (tsproc_t *)(t->t_cldata);
1563         klwp_t *lwp;
1564         boolean_t call_cpu_surrender = B_FALSE;
1565         pri_t   oldpri = t->t_pri;
1566 
1567         ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
1568 
1569         thread_lock(t);
1570 
1571         /*
1572          * Keep track of thread's project CPU usage.  Note that projects
1573          * get charged even when threads are running in the kernel.
1574          */
1575         if (CPUCAPS_ON()) {
1576                 call_cpu_surrender = cpucaps_charge(t, &tspp->ts_caps,
1577                     CPUCAPS_CHARGE_ENFORCE) && !(tspp->ts_flags & TSKPRI);
1578         }
1579 
1580         if ((tspp->ts_flags & TSKPRI) == 0) {
1581                 if (--tspp->ts_timeleft <= 0) {
1582                         pri_t   new_pri;
1583 
1584                         /*
1585                          * If we're doing preemption control and trying to
1586                          * avoid preempting this thread, just note that
1587                          * the thread should yield soon and let it keep
1588                          * running (unless it's been a while).
1589                          */
1590                         if (t->t_schedctl && schedctl_get_nopreempt(t)) {
1591                                 if (tspp->ts_timeleft > -SC_MAX_TICKS) {
1592                                         DTRACE_SCHED1(schedctl__nopreempt,
1593                                             kthread_t *, t);
1594                                         schedctl_set_yield(t, 1);
1595                                         thread_unlock_nopreempt(t);
1596                                         return;
1597                                 }
1598 
1599                                 TNF_PROBE_2(schedctl_failsafe,
1600                                     "schedctl TS ts_tick", /* CSTYLED */,
1601                                     tnf_pid, pid, ttoproc(t)->p_pid,
1602                                     tnf_lwpid, lwpid, t->t_tid);
1603                         }
1604                         tspp->ts_flags &= ~TSRESTORE;
1605                         tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_tqexp;
1606                         TS_NEWUMDPRI(tspp);
1607                         tspp->ts_dispwait = 0;
1608                         new_pri = ts_dptbl[tspp->ts_umdpri].ts_globpri;
1609                         ASSERT(new_pri >= 0 && new_pri <= ts_maxglobpri);
1610                         /*
1611                          * When the priority of a thread is changed,
1612                          * it may be necessary to adjust its position
1613                          * on a sleep queue or dispatch queue.
1614                          * The function thread_change_pri accomplishes
1615                          * this.
1616                          */
1617                         if (thread_change_pri(t, new_pri, 0)) {
1618                                 tspp->ts_timeleft =
1619                                     ts_dptbl[tspp->ts_cpupri].ts_quantum;
1620                         } else {
1621                                 call_cpu_surrender = B_TRUE;
1622                         }
1623                         TRACE_2(TR_FAC_DISP, TR_TICK,
1624                             "tick:tid %p old pri %d", t, oldpri);
1625                 } else if (t->t_state == TS_ONPROC &&
1626                     t->t_pri < t->t_disp_queue->disp_maxrunpri) {
1627                         call_cpu_surrender = B_TRUE;
1628                 }
1629         }
1630 
1631         if (call_cpu_surrender) {
1632                 tspp->ts_flags |= TSBACKQ;
1633                 cpu_surrender(t);
1634         }
1635 
1636         thread_unlock_nopreempt(t);     /* clock thread can't be preempted */
1637 }
1638 
1639 
1640 /*
1641  * If thread is currently at a kernel mode priority (has slept)
1642  * we assign it the appropriate user mode priority and time quantum
1643  * here.  If we are lowering the thread's priority below that of
1644  * other runnable threads we will normally set runrun via cpu_surrender() to
1645  * cause preemption.
1646  */
1647 static void
1648 ts_trapret(kthread_t *t)
1649 {
1650         tsproc_t        *tspp = (tsproc_t *)t->t_cldata;
1651         cpu_t           *cp = CPU;
1652         pri_t           old_pri = curthread->t_pri;
1653 
1654         ASSERT(THREAD_LOCK_HELD(t));
1655         ASSERT(t == curthread);
1656         ASSERT(cp->cpu_dispthread == t);
1657         ASSERT(t->t_state == TS_ONPROC);
1658 
1659         t->t_kpri_req = 0;
1660         if (tspp->ts_dispwait > ts_dptbl[tspp->ts_umdpri].ts_maxwait) {
1661                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_slpret;
1662                 TS_NEWUMDPRI(tspp);
1663                 tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
1664                 tspp->ts_dispwait = 0;
1665 
1666                 /*
1667                  * If thread has blocked in the kernel (as opposed to
1668                  * being merely preempted), recompute the user mode priority.
1669                  */
1670                 THREAD_CHANGE_PRI(t, ts_dptbl[tspp->ts_umdpri].ts_globpri);
1671                 cp->cpu_dispatch_pri = DISP_PRIO(t);
1672                 ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
1673                 tspp->ts_flags &= ~TSKPRI;
1674 
1675                 if (DISP_MUST_SURRENDER(t))
1676                         cpu_surrender(t);
1677         } else if (tspp->ts_flags & TSKPRI) {
1678                 /*
1679                  * If thread has blocked in the kernel (as opposed to
1680                  * being merely preempted), recompute the user mode priority.
1681                  */
1682                 THREAD_CHANGE_PRI(t, ts_dptbl[tspp->ts_umdpri].ts_globpri);
1683                 cp->cpu_dispatch_pri = DISP_PRIO(t);
1684                 ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
1685                 tspp->ts_flags &= ~TSKPRI;
1686 
1687                 if (DISP_MUST_SURRENDER(t))
1688                         cpu_surrender(t);
1689         }
1690 
1691         TRACE_2(TR_FAC_DISP, TR_TRAPRET,
1692             "trapret:tid %p old pri %d", t, old_pri);
1693 }
1694 
1695 
1696 /*
1697  * Update the ts_dispwait values of all time sharing threads that
1698  * are currently runnable at a user mode priority and bump the priority
1699  * if ts_dispwait exceeds ts_maxwait.  Called once per second via
1700  * timeout which we reset here.
1701  *
1702  * There are several lists of time sharing threads broken up by a hash on
1703  * the thread pointer.  Each list has its own lock.  This avoids blocking
1704  * all ts_enterclass, ts_fork, and ts_exitclass operations while ts_update
1705  * runs.  ts_update traverses each list in turn.
1706  *
1707  * If multiple threads have their priorities updated to the same value,
1708  * the system implicitly favors the one that is updated first (since it
1709  * winds up first on the run queue).  To avoid this unfairness, the
1710  * traversal of threads starts at the list indicated by a marker.  When
1711  * threads in more than one list have their priorities updated, the marker
1712  * is moved.  This changes the order the threads will be placed on the run
1713  * queue the next time ts_update is called and preserves fairness over the
1714  * long run.  The marker doesn't need to be protected by a lock since it's
1715  * only accessed by ts_update, which is inherently single-threaded (only
1716  * one instance can be running at a time).
1717  */
1718 static void
1719 ts_update(void *arg)
1720 {
1721         int             i;
1722         int             new_marker = -1;
1723         static int      ts_update_marker;
1724 
1725         /*
1726          * Start with the ts_update_marker list, then do the rest.
1727          */
1728         i = ts_update_marker;
1729         do {
1730                 /*
1731                  * If this is the first list after the current marker to
1732                  * have threads with priorities updated, advance the marker
1733                  * to this list for the next time ts_update runs.
1734                  */
1735                 if (ts_update_list(i) && new_marker == -1 &&
1736                     i != ts_update_marker) {
1737                         new_marker = i;
1738                 }
1739         } while ((i = TS_LIST_NEXT(i)) != ts_update_marker);
1740 
1741         /* advance marker for next ts_update call */
1742         if (new_marker != -1)
1743                 ts_update_marker = new_marker;
1744 
1745         (void) timeout(ts_update, arg, hz);
1746 }
1747 
1748 /*
1749  * Updates priority for a list of threads.  Returns 1 if the priority of
1750  * one of the threads was actually updated, 0 if none were for various
1751  * reasons (thread is no longer in the TS or IA class, isn't runnable,
1752  * hasn't waited long enough, has the preemption control no-preempt bit
1753  * set, etc.)
1754  */
1755 static int
1756 ts_update_list(int i)
1757 {
1758         tsproc_t *tspp;
1759         kthread_t *tx;
1760         int updated = 0;
1761 
1762         mutex_enter(&ts_list_lock[i]);
1763         for (tspp = ts_plisthead[i].ts_next; tspp != &ts_plisthead[i];
1764             tspp = tspp->ts_next) {
1765                 tx = tspp->ts_tp;
1766                 /*
1767                  * Lock the thread and verify state.
1768                  */
1769                 thread_lock(tx);
1770                 /*
1771                  * Skip the thread if it is no longer in the TS (or IA) class.
1772                  */
1773                 if (tx->t_clfuncs != &ts_classfuncs.thread &&
1774                     tx->t_clfuncs != &ia_classfuncs.thread)
1775                         goto next;
1776                 tspp->ts_dispwait++;
1777                 if ((tspp->ts_flags & TSKPRI) != 0)
1778                         goto next;
1779                 if (tspp->ts_dispwait <= ts_dptbl[tspp->ts_umdpri].ts_maxwait)
1780                         goto next;
1781                 if (tx->t_schedctl && schedctl_get_nopreempt(tx))
1782                         goto next;
1783                 if (tx->t_state != TS_RUN && tx->t_state != TS_WAIT &&
1784                     (tx->t_state != TS_SLEEP || !ts_sleep_promote)) {
1785                         /* make next syscall/trap do CL_TRAPRET */
1786                         tx->t_trapret = 1;
1787                         aston(tx);
1788                         goto next;
1789                 }
1790                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_lwait;
1791                 TS_NEWUMDPRI(tspp);
1792                 tspp->ts_dispwait = 0;
1793                 updated = 1;
1794 
1795                 /*
1796                  * Only dequeue it if needs to move; otherwise it should
1797                  * just round-robin here.
1798                  */
1799                 if (tx->t_pri != ts_dptbl[tspp->ts_umdpri].ts_globpri) {
1800                         pri_t oldpri = tx->t_pri;
1801                         ts_change_priority(tx, tspp);
1802                         TRACE_2(TR_FAC_DISP, TR_UPDATE,
1803                             "update:tid %p old pri %d", tx, oldpri);
1804                 }
1805 next:
1806                 thread_unlock(tx);
1807         }
1808         mutex_exit(&ts_list_lock[i]);
1809 
1810         return (updated);
1811 }
1812 
1813 /*
1814  * Processes waking up go to the back of their queue.  We don't
1815  * need to assign a time quantum here because thread is still
1816  * at a kernel mode priority and the time slicing is not done
1817  * for threads running in the kernel after sleeping.  The proper
1818  * time quantum will be assigned by ts_trapret before the thread
1819  * returns to user mode.
1820  */
1821 static void
1822 ts_wakeup(kthread_t *t)
1823 {
1824         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1825 
1826         ASSERT(THREAD_LOCK_HELD(t));
1827 
1828         if (tspp->ts_flags & TSKPRI) {
1829                 tspp->ts_flags &= ~TSBACKQ;
1830                 if (tspp->ts_flags & TSIASET)
1831                         setfrontdq(t);
1832                 else
1833                         setbackdq(t);
1834         } else if (t->t_kpri_req) {
1835                 /*
1836                  * Give thread a priority boost if we were asked.
1837                  */
1838                 tspp->ts_flags |= TSKPRI;
1839                 THREAD_CHANGE_PRI(t, ts_kmdpris[0]);
1840                 setbackdq(t);
1841                 t->t_trapret = 1;    /* so that ts_trapret will run */
1842                 aston(t);
1843         } else {
1844                 if (tspp->ts_dispwait > ts_dptbl[tspp->ts_umdpri].ts_maxwait) {
1845                         tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_slpret;
1846                         TS_NEWUMDPRI(tspp);
1847                         tspp->ts_timeleft =
1848                             ts_dptbl[tspp->ts_cpupri].ts_quantum;
1849                         tspp->ts_dispwait = 0;
1850                         THREAD_CHANGE_PRI(t,
1851                             ts_dptbl[tspp->ts_umdpri].ts_globpri);
1852                         ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
1853                 }
1854 
1855                 tspp->ts_flags &= ~TSBACKQ;
1856 
1857                 if (tspp->ts_flags & TSIA) {
1858                         if (tspp->ts_flags & TSIASET)
1859                                 setfrontdq(t);
1860                         else
1861                                 setbackdq(t);
1862                 } else {
1863                         if (t->t_disp_time != ddi_get_lbolt())
1864                                 setbackdq(t);
1865                         else
1866                                 setfrontdq(t);
1867                 }
1868         }
1869 }
1870 
1871 
1872 /*
1873  * When a thread yields, put it on the back of the run queue.
1874  */
1875 static void
1876 ts_yield(kthread_t *t)
1877 {
1878         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1879 
1880         ASSERT(t == curthread);
1881         ASSERT(THREAD_LOCK_HELD(t));
1882 
1883         /*
1884          * Collect CPU usage spent before yielding
1885          */
1886         (void) CPUCAPS_CHARGE(t, &tspp->ts_caps, CPUCAPS_CHARGE_ENFORCE);
1887 
1888         /*
1889          * Clear the preemption control "yield" bit since the user is
1890          * doing a yield.
1891          */
1892         if (t->t_schedctl)
1893                 schedctl_set_yield(t, 0);
1894         /*
1895          * If ts_preempt() artifically increased the thread's priority
1896          * to avoid preemption, restore the original priority now.
1897          */
1898         if (tspp->ts_flags & TSRESTORE) {
1899                 THREAD_CHANGE_PRI(t, tspp->ts_scpri);
1900                 tspp->ts_flags &= ~TSRESTORE;
1901         }
1902         if (tspp->ts_timeleft <= 0) {
1903                 /*
1904                  * Time slice was artificially extended to avoid
1905                  * preemption, so pretend we're preempting it now.
1906                  */
1907                 DTRACE_SCHED1(schedctl__yield, int, -tspp->ts_timeleft);
1908                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_tqexp;
1909                 TS_NEWUMDPRI(tspp);
1910                 tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
1911                 tspp->ts_dispwait = 0;
1912                 THREAD_CHANGE_PRI(t, ts_dptbl[tspp->ts_umdpri].ts_globpri);
1913                 ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
1914         }
1915         tspp->ts_flags &= ~TSBACKQ;
1916         setbackdq(t);
1917 }
1918 
1919 
1920 /*
1921  * Increment the nice value of the specified thread by incr and
1922  * return the new value in *retvalp.
1923  */
1924 static int
1925 ts_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
1926 {
1927         int             newnice;
1928         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1929         tsparms_t       tsparms;
1930 
1931         ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
1932 
1933         /* If there's no change to priority, just return current setting */
1934         if (incr == 0) {
1935                 if (retvalp) {
1936                         *retvalp = tspp->ts_nice - NZERO;
1937                 }
1938                 return (0);
1939         }
1940 
1941         if ((incr < 0 || incr > 2 * NZERO) &&
1942             secpolicy_raisepriority(cr) != 0)
1943                 return (EPERM);
1944 
1945         /*
1946          * Specifying a nice increment greater than the upper limit of
1947          * 2 * NZERO - 1 will result in the thread's nice value being
1948          * set to the upper limit.  We check for this before computing
1949          * the new value because otherwise we could get overflow
1950          * if a privileged process specified some ridiculous increment.
1951          */
1952         if (incr > 2 * NZERO - 1)
1953                 incr = 2 * NZERO - 1;
1954 
1955         newnice = tspp->ts_nice + incr;
1956         if (newnice >= 2 * NZERO)
1957                 newnice = 2 * NZERO - 1;
1958         else if (newnice < 0)
1959                 newnice = 0;
1960 
1961         tsparms.ts_uprilim = tsparms.ts_upri =
1962             -((newnice - NZERO) * ts_maxupri) / NZERO;
1963         /*
1964          * Reset the uprilim and upri values of the thread.
1965          * Call ts_parmsset even if thread is interactive since we're
1966          * not changing mode.
1967          */
1968         (void) ts_parmsset(t, (void *)&tsparms, (id_t)0, (cred_t *)NULL);
1969 
1970         /*
1971          * Although ts_parmsset already reset ts_nice it may
1972          * not have been set to precisely the value calculated above
1973          * because ts_parmsset determines the nice value from the
1974          * user priority and we may have truncated during the integer
1975          * conversion from nice value to user priority and back.
1976          * We reset ts_nice to the value we calculated above.
1977          */
1978         tspp->ts_nice = (char)newnice;
1979 
1980         if (retvalp)
1981                 *retvalp = newnice - NZERO;
1982         return (0);
1983 }
1984 
1985 /*
1986  * Increment the priority of the specified thread by incr and
1987  * return the new value in *retvalp.
1988  */
1989 static int
1990 ts_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp)
1991 {
1992         int             newpri;
1993         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1994         tsparms_t       tsparms;
1995 
1996         ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
1997 
1998         /* If there's no change to the priority, just return current setting */
1999         if (incr == 0) {
2000                 *retvalp = tspp->ts_upri;
2001                 return (0);
2002         }
2003 
2004         newpri = tspp->ts_upri + incr;
2005         if (newpri > ts_maxupri || newpri < -ts_maxupri)
2006                 return (EINVAL);
2007 
2008         *retvalp = newpri;
2009         tsparms.ts_uprilim = tsparms.ts_upri = newpri;
2010         /*
2011          * Reset the uprilim and upri values of the thread.
2012          * Call ts_parmsset even if thread is interactive since we're
2013          * not changing mode.
2014          */
2015         return (ts_parmsset(t, &tsparms, 0, cr));
2016 }
2017 
2018 /*
2019  * ia_set_process_group marks foreground processes as interactive
2020  * and background processes as non-interactive iff the session
2021  * leader is interactive.  This routine is called from two places:
2022  *      strioctl:SPGRP when a new process group gets
2023  *              control of the tty.
2024  *      ia_parmsset-when the process in question is a session leader.
2025  * ia_set_process_group assumes that pidlock is held by the caller,
2026  * either strioctl or priocntlsys.  If the caller is priocntlsys
2027  * (via ia_parmsset) then the p_lock of the session leader is held
2028  * and the code needs to be careful about acquiring other p_locks.
2029  */
2030 static void
2031 ia_set_process_group(pid_t sid, pid_t bg_pgid, pid_t fg_pgid)
2032 {
2033         proc_t          *leader, *fg, *bg;
2034         tsproc_t        *tspp;
2035         kthread_t       *tx;
2036         int             plocked = 0;
2037 
2038         ASSERT(MUTEX_HELD(&pidlock));
2039 
2040         /*
2041          * see if the session leader is interactive AND
2042          * if it is currently "on" AND controlling a tty
2043          * iff it is then make the processes in the foreground
2044          * group interactive and the processes in the background
2045          * group non-interactive.
2046          */
2047         if ((leader = (proc_t *)prfind(sid)) == NULL) {
2048                 return;
2049         }
2050         if (leader->p_stat == SIDL) {
2051                 return;
2052         }
2053         if ((tx = proctot(leader)) == NULL) {
2054                 return;
2055         }
2056         /*
2057          * XXX do all the threads in the leader
2058          */
2059         if (tx->t_cid != ia_cid) {
2060                 return;
2061         }
2062         tspp = tx->t_cldata;
2063         /*
2064          * session leaders that are not interactive need not have
2065          * any processing done for them.  They are typically shells
2066          * that do not have focus and are changing the process group
2067          * attatched to the tty, e.g. a process that is exiting
2068          */
2069         mutex_enter(&leader->p_sessp->s_lock);
2070         if (!(tspp->ts_flags & TSIASET) ||
2071             (leader->p_sessp->s_vp == NULL) ||
2072             (leader->p_sessp->s_vp->v_stream == NULL)) {
2073                 mutex_exit(&leader->p_sessp->s_lock);
2074                 return;
2075         }
2076         mutex_exit(&leader->p_sessp->s_lock);
2077 
2078         /*
2079          * If we're already holding the leader's p_lock, we should use
2080          * mutex_tryenter instead of mutex_enter to avoid deadlocks from
2081          * lock ordering violations.
2082          */
2083         if (mutex_owned(&leader->p_lock))
2084                 plocked = 1;
2085 
2086         if (fg_pgid == 0)
2087                 goto skip;
2088         /*
2089          * now look for all processes in the foreground group and
2090          * make them interactive
2091          */
2092         for (fg = (proc_t *)pgfind(fg_pgid); fg != NULL; fg = fg->p_pglink) {
2093                 /*
2094                  * if the process is SIDL it's begin forked, ignore it
2095                  */
2096                 if (fg->p_stat == SIDL) {
2097                         continue;
2098                 }
2099                 /*
2100                  * sesssion leaders must be turned on/off explicitly
2101                  * not implicitly as happens to other members of
2102                  * the process group.
2103                  */
2104                 if (fg->p_pid  == fg->p_sessp->s_sid) {
2105                         continue;
2106                 }
2107 
2108                 TRACE_1(TR_FAC_IA, TR_GROUP_ON,
2109                     "group on:proc %p", fg);
2110 
2111                 if (plocked) {
2112                         if (mutex_tryenter(&fg->p_lock) == 0)
2113                                 continue;
2114                 } else {
2115                         mutex_enter(&fg->p_lock);
2116                 }
2117 
2118                 if ((tx = proctot(fg)) == NULL) {
2119                         mutex_exit(&fg->p_lock);
2120                         continue;
2121                 }
2122                 do {
2123                         thread_lock(tx);
2124                         /*
2125                          * if this thread is not interactive continue
2126                          */
2127                         if (tx->t_cid != ia_cid) {
2128                                 thread_unlock(tx);
2129                                 continue;
2130                         }
2131                         tspp = tx->t_cldata;
2132                         tspp->ts_flags |= TSIASET;
2133                         tspp->ts_boost = ia_boost;
2134                         TS_NEWUMDPRI(tspp);
2135                         if ((tspp->ts_flags & TSKPRI) != 0) {
2136                                 thread_unlock(tx);
2137                                 continue;
2138                         }
2139                         tspp->ts_dispwait = 0;
2140                         ts_change_priority(tx, tspp);
2141                         thread_unlock(tx);
2142                 } while ((tx = tx->t_forw) != fg->p_tlist);
2143                 mutex_exit(&fg->p_lock);
2144         }
2145 skip:
2146         if (bg_pgid == 0)
2147                 return;
2148         for (bg = (proc_t *)pgfind(bg_pgid); bg != NULL; bg = bg->p_pglink) {
2149                 if (bg->p_stat == SIDL) {
2150                         continue;
2151                 }
2152                 /*
2153                  * sesssion leaders must be turned off explicitly
2154                  * not implicitly as happens to other members of
2155                  * the process group.
2156                  */
2157                 if (bg->p_pid == bg->p_sessp->s_sid) {
2158                         continue;
2159                 }
2160 
2161                 TRACE_1(TR_FAC_IA, TR_GROUP_OFF,
2162                     "group off:proc %p", bg);
2163 
2164                 if (plocked) {
2165                         if (mutex_tryenter(&bg->p_lock) == 0)
2166                                 continue;
2167                 } else {
2168                         mutex_enter(&bg->p_lock);
2169                 }
2170 
2171                 if ((tx = proctot(bg)) == NULL) {
2172                         mutex_exit(&bg->p_lock);
2173                         continue;
2174                 }
2175                 do {
2176                         thread_lock(tx);
2177                         /*
2178                          * if this thread is not interactive continue
2179                          */
2180                         if (tx->t_cid != ia_cid) {
2181                                 thread_unlock(tx);
2182                                 continue;
2183                         }
2184                         tspp = tx->t_cldata;
2185                         tspp->ts_flags &= ~TSIASET;
2186                         tspp->ts_boost = -ia_boost;
2187                         TS_NEWUMDPRI(tspp);
2188                         if ((tspp->ts_flags & TSKPRI) != 0) {
2189                                 thread_unlock(tx);
2190                                 continue;
2191                         }
2192 
2193                         tspp->ts_dispwait = 0;
2194                         ts_change_priority(tx, tspp);
2195                         thread_unlock(tx);
2196                 } while ((tx = tx->t_forw) != bg->p_tlist);
2197                 mutex_exit(&bg->p_lock);
2198         }
2199 }
2200 
2201 
2202 static void
2203 ts_change_priority(kthread_t *t, tsproc_t *tspp)
2204 {
2205         pri_t   new_pri;
2206 
2207         ASSERT(THREAD_LOCK_HELD(t));
2208         new_pri = ts_dptbl[tspp->ts_umdpri].ts_globpri;
2209         ASSERT(new_pri >= 0 && new_pri <= ts_maxglobpri);
2210         tspp->ts_flags &= ~TSRESTORE;
2211         t->t_cpri = tspp->ts_upri;
2212         if (t == curthread || t->t_state == TS_ONPROC) {
2213                 /* curthread is always onproc */
2214                 cpu_t   *cp = t->t_disp_queue->disp_cpu;
2215                 THREAD_CHANGE_PRI(t, new_pri);
2216                 if (t == cp->cpu_dispthread)
2217                         cp->cpu_dispatch_pri = DISP_PRIO(t);
2218                 if (DISP_MUST_SURRENDER(t)) {
2219                         tspp->ts_flags |= TSBACKQ;
2220                         cpu_surrender(t);
2221                 } else {
2222                         tspp->ts_timeleft =
2223                             ts_dptbl[tspp->ts_cpupri].ts_quantum;
2224                 }
2225         } else {
2226                 int     frontq;
2227 
2228                 frontq = (tspp->ts_flags & TSIASET) != 0;
2229                 /*
2230                  * When the priority of a thread is changed,
2231                  * it may be necessary to adjust its position
2232                  * on a sleep queue or dispatch queue.
2233                  * The function thread_change_pri accomplishes
2234                  * this.
2235                  */
2236                 if (thread_change_pri(t, new_pri, frontq)) {
2237                         /*
2238                          * The thread was on a run queue. Reset
2239                          * its CPU timeleft from the quantum
2240                          * associated with the new priority.
2241                          */
2242                         tspp->ts_timeleft =
2243                             ts_dptbl[tspp->ts_cpupri].ts_quantum;
2244                 } else {
2245                         tspp->ts_flags |= TSBACKQ;
2246                 }
2247         }
2248 }
2249 
2250 static int
2251 ts_alloc(void **p, int flag)
2252 {
2253         void *bufp;
2254         bufp = kmem_alloc(sizeof (tsproc_t), flag);
2255         if (bufp == NULL) {
2256                 return (ENOMEM);
2257         } else {
2258                 *p = bufp;
2259                 return (0);
2260         }
2261 }
2262 
2263 static void
2264 ts_free(void *bufp)
2265 {
2266         if (bufp)
2267                 kmem_free(bufp, sizeof (tsproc_t));
2268 }