1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2013, Joyent, Inc. All rights reserved.
  25  */
  26 
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  28 /*        All Rights Reserved   */
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/sysmacros.h>
  33 #include <sys/cred.h>
  34 #include <sys/proc.h>
  35 #include <sys/session.h>
  36 #include <sys/strsubr.h>
  37 #include <sys/signal.h>
  38 #include <sys/user.h>
  39 #include <sys/priocntl.h>
  40 #include <sys/class.h>
  41 #include <sys/disp.h>
  42 #include <sys/procset.h>
  43 #include <sys/debug.h>
  44 #include <sys/ts.h>
  45 #include <sys/tspriocntl.h>
  46 #include <sys/iapriocntl.h>
  47 #include <sys/kmem.h>
  48 #include <sys/errno.h>
  49 #include <sys/cpuvar.h>
  50 #include <sys/systm.h>            /* for lbolt */
  51 #include <sys/vtrace.h>
  52 #include <sys/vmsystm.h>
  53 #include <sys/schedctl.h>
  54 #include <sys/tnf_probe.h>
  55 #include <sys/atomic.h>
  56 #include <sys/policy.h>
  57 #include <sys/sdt.h>
  58 #include <sys/cpupart.h>
  59 #include <vm/rm.h>
  60 #include <vm/seg_kmem.h>
  61 #include <sys/modctl.h>
  62 #include <sys/cpucaps.h>
  63 
  64 static pri_t ts_init(id_t, int, classfuncs_t **);
  65 
  66 static struct sclass csw = {
  67         "TS",
  68         ts_init,
  69         0
  70 };
  71 
  72 static struct modlsched modlsched = {
  73         &mod_schedops, "time sharing sched class", &csw
  74 };
  75 
  76 static struct modlinkage modlinkage = {
  77         MODREV_1, (void *)&modlsched, NULL
  78 };
  79 
  80 int
  81 _init()
  82 {
  83         return (mod_install(&modlinkage));
  84 }
  85 
  86 int
  87 _fini()
  88 {
  89         return (EBUSY);         /* don't remove TS for now */
  90 }
  91 
  92 int
  93 _info(struct modinfo *modinfop)
  94 {
  95         return (mod_info(&modlinkage, modinfop));
  96 }
  97 
  98 /*
  99  * Class specific code for the time-sharing class
 100  */
 101 
 102 
 103 /*
 104  * Extern declarations for variables defined in the ts master file
 105  */
 106 #define TSMAXUPRI 60
 107 
 108 pri_t   ts_maxupri = TSMAXUPRI; /* max time-sharing user priority */
 109 pri_t   ts_maxumdpri;           /* maximum user mode ts priority */
 110 
 111 pri_t   ia_maxupri = IAMAXUPRI; /* max interactive user priority */
 112 pri_t   ia_boost = IA_BOOST;    /* boost value for interactive */
 113 
 114 tsdpent_t  *ts_dptbl;   /* time-sharing disp parameter table */
 115 pri_t   *ts_kmdpris;    /* array of global pris used by ts procs when */
 116                         /*  sleeping or running in kernel after sleep */
 117 
 118 static id_t ia_cid;
 119 
 120 int ts_sleep_promote = 1;
 121 
 122 #define tsmedumdpri     (ts_maxumdpri >> 1)
 123 
 124 #define TS_NEWUMDPRI(tspp) \
 125 { \
 126         pri_t pri; \
 127         pri = (tspp)->ts_cpupri + (tspp)->ts_upri + (tspp)->ts_boost; \
 128         if (pri > ts_maxumdpri) \
 129                 (tspp)->ts_umdpri = ts_maxumdpri; \
 130         else if (pri < 0) \
 131                 (tspp)->ts_umdpri = 0; \
 132         else \
 133                 (tspp)->ts_umdpri = pri; \
 134         ASSERT((tspp)->ts_umdpri >= 0 && (tspp)->ts_umdpri <= ts_maxumdpri); \
 135 }
 136 
 137 /*
 138  * The tsproc_t structures are kept in an array of circular doubly linked
 139  * lists.  A hash on the thread pointer is used to determine which list
 140  * each thread should be placed.  Each list has a dummy "head" which is
 141  * never removed, so the list is never empty.  ts_update traverses these
 142  * lists to update the priorities of threads that have been waiting on
 143  * the run queue.
 144  */
 145 
 146 #define TS_LISTS 16             /* number of lists, must be power of 2 */
 147 
 148 /* hash function, argument is a thread pointer */
 149 #define TS_LIST_HASH(tp)        (((uintptr_t)(tp) >> 9) & (TS_LISTS - 1))
 150 
 151 /* iterate to the next list */
 152 #define TS_LIST_NEXT(i)         (((i) + 1) & (TS_LISTS - 1))
 153 
 154 /*
 155  * Insert thread into the appropriate tsproc list.
 156  */
 157 #define TS_LIST_INSERT(tspp)                            \
 158 {                                                       \
 159         int index = TS_LIST_HASH(tspp->ts_tp);               \
 160         kmutex_t *lockp = &ts_list_lock[index];             \
 161         tsproc_t *headp = &ts_plisthead[index];             \
 162         mutex_enter(lockp);                             \
 163         tspp->ts_next = headp->ts_next;                   \
 164         tspp->ts_prev = headp;                               \
 165         headp->ts_next->ts_prev = tspp;                   \
 166         headp->ts_next = tspp;                               \
 167         mutex_exit(lockp);                              \
 168 }
 169 
 170 /*
 171  * Remove thread from tsproc list.
 172  */
 173 #define TS_LIST_DELETE(tspp)                            \
 174 {                                                       \
 175         int index = TS_LIST_HASH(tspp->ts_tp);               \
 176         kmutex_t *lockp = &ts_list_lock[index];             \
 177         mutex_enter(lockp);                             \
 178         tspp->ts_prev->ts_next = tspp->ts_next;                \
 179         tspp->ts_next->ts_prev = tspp->ts_prev;                \
 180         mutex_exit(lockp);                              \
 181 }
 182 
 183 
 184 static int      ts_admin(caddr_t, cred_t *);
 185 static int      ts_enterclass(kthread_t *, id_t, void *, cred_t *, void *);
 186 static int      ts_fork(kthread_t *, kthread_t *, void *);
 187 static int      ts_getclinfo(void *);
 188 static int      ts_getclpri(pcpri_t *);
 189 static int      ts_parmsin(void *);
 190 static int      ts_parmsout(void *, pc_vaparms_t *);
 191 static int      ts_vaparmsin(void *, pc_vaparms_t *);
 192 static int      ts_vaparmsout(void *, pc_vaparms_t *);
 193 static int      ts_parmsset(kthread_t *, void *, id_t, cred_t *);
 194 static void     ts_exit(kthread_t *);
 195 static int      ts_donice(kthread_t *, cred_t *, int, int *);
 196 static int      ts_doprio(kthread_t *, cred_t *, int, int *);
 197 static void     ts_exitclass(void *);
 198 static int      ts_canexit(kthread_t *, cred_t *);
 199 static void     ts_forkret(kthread_t *, kthread_t *);
 200 static void     ts_nullsys();
 201 static void     ts_parmsget(kthread_t *, void *);
 202 static void     ts_preempt(kthread_t *);
 203 static void     ts_setrun(kthread_t *);
 204 static void     ts_sleep(kthread_t *);
 205 static pri_t    ts_swapin(kthread_t *, int);
 206 static pri_t    ts_swapout(kthread_t *, int);
 207 static void     ts_tick(kthread_t *);
 208 static void     ts_trapret(kthread_t *);
 209 static void     ts_update(void *);
 210 static int      ts_update_list(int);
 211 static void     ts_wakeup(kthread_t *);
 212 static pri_t    ts_globpri(kthread_t *);
 213 static void     ts_yield(kthread_t *);
 214 extern tsdpent_t *ts_getdptbl(void);
 215 extern pri_t    *ts_getkmdpris(void);
 216 extern pri_t    td_getmaxumdpri(void);
 217 static int      ts_alloc(void **, int);
 218 static void     ts_free(void *);
 219 
 220 pri_t           ia_init(id_t, int, classfuncs_t **);
 221 static int      ia_getclinfo(void *);
 222 static int      ia_getclpri(pcpri_t *);
 223 static int      ia_parmsin(void *);
 224 static int      ia_vaparmsin(void *, pc_vaparms_t *);
 225 static int      ia_vaparmsout(void *, pc_vaparms_t *);
 226 static int      ia_parmsset(kthread_t *, void *, id_t, cred_t *);
 227 static void     ia_parmsget(kthread_t *, void *);
 228 static void     ia_set_process_group(pid_t, pid_t, pid_t);
 229 
 230 static void     ts_change_priority(kthread_t *, tsproc_t *);
 231 
 232 extern pri_t    ts_maxkmdpri;   /* maximum kernel mode ts priority */
 233 static pri_t    ts_maxglobpri;  /* maximum global priority used by ts class */
 234 static kmutex_t ts_dptblock;    /* protects time sharing dispatch table */
 235 static kmutex_t ts_list_lock[TS_LISTS]; /* protects tsproc lists */
 236 static tsproc_t ts_plisthead[TS_LISTS]; /* dummy tsproc at head of lists */
 237 
 238 static gid_t    IA_gid = 0;
 239 
 240 static struct classfuncs ts_classfuncs = {
 241         /* class functions */
 242         ts_admin,
 243         ts_getclinfo,
 244         ts_parmsin,
 245         ts_parmsout,
 246         ts_vaparmsin,
 247         ts_vaparmsout,
 248         ts_getclpri,
 249         ts_alloc,
 250         ts_free,
 251 
 252         /* thread functions */
 253         ts_enterclass,
 254         ts_exitclass,
 255         ts_canexit,
 256         ts_fork,
 257         ts_forkret,
 258         ts_parmsget,
 259         ts_parmsset,
 260         ts_nullsys,     /* stop */
 261         ts_exit,
 262         ts_nullsys,     /* active */
 263         ts_nullsys,     /* inactive */
 264         ts_swapin,
 265         ts_swapout,
 266         ts_trapret,
 267         ts_preempt,
 268         ts_setrun,
 269         ts_sleep,
 270         ts_tick,
 271         ts_wakeup,
 272         ts_donice,
 273         ts_globpri,
 274         ts_nullsys,     /* set_process_group */
 275         ts_yield,
 276         ts_doprio,
 277 };
 278 
 279 /*
 280  * ia_classfuncs is used for interactive class threads; IA threads are stored
 281  * on the same class list as TS threads, and most of the class functions are
 282  * identical, but a few have different enough functionality to require their
 283  * own functions.
 284  */
 285 static struct classfuncs ia_classfuncs = {
 286         /* class functions */
 287         ts_admin,
 288         ia_getclinfo,
 289         ia_parmsin,
 290         ts_parmsout,
 291         ia_vaparmsin,
 292         ia_vaparmsout,
 293         ia_getclpri,
 294         ts_alloc,
 295         ts_free,
 296 
 297         /* thread functions */
 298         ts_enterclass,
 299         ts_exitclass,
 300         ts_canexit,
 301         ts_fork,
 302         ts_forkret,
 303         ia_parmsget,
 304         ia_parmsset,
 305         ts_nullsys,     /* stop */
 306         ts_exit,
 307         ts_nullsys,     /* active */
 308         ts_nullsys,     /* inactive */
 309         ts_swapin,
 310         ts_swapout,
 311         ts_trapret,
 312         ts_preempt,
 313         ts_setrun,
 314         ts_sleep,
 315         ts_tick,
 316         ts_wakeup,
 317         ts_donice,
 318         ts_globpri,
 319         ia_set_process_group,
 320         ts_yield,
 321         ts_doprio,
 322 };
 323 
 324 
 325 /*
 326  * Time sharing class initialization.  Called by dispinit() at boot time.
 327  * We can ignore the clparmsz argument since we know that the smallest
 328  * possible parameter buffer is big enough for us.
 329  */
 330 /* ARGSUSED */
 331 static pri_t
 332 ts_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
 333 {
 334         int i;
 335         extern pri_t ts_getmaxumdpri(void);
 336 
 337         ts_dptbl = ts_getdptbl();
 338         ts_kmdpris = ts_getkmdpris();
 339         ts_maxumdpri = ts_getmaxumdpri();
 340         ts_maxglobpri = MAX(ts_kmdpris[0], ts_dptbl[ts_maxumdpri].ts_globpri);
 341 
 342         /*
 343          * Initialize the tsproc lists.
 344          */
 345         for (i = 0; i < TS_LISTS; i++) {
 346                 ts_plisthead[i].ts_next = ts_plisthead[i].ts_prev =
 347                     &ts_plisthead[i];
 348         }
 349 
 350         /*
 351          * We're required to return a pointer to our classfuncs
 352          * structure and the highest global priority value we use.
 353          */
 354         *clfuncspp = &ts_classfuncs;
 355         return (ts_maxglobpri);
 356 }
 357 
 358 
 359 /*
 360  * Interactive class scheduler initialization
 361  */
 362 /* ARGSUSED */
 363 pri_t
 364 ia_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
 365 {
 366         /*
 367          * We're required to return a pointer to our classfuncs
 368          * structure and the highest global priority value we use.
 369          */
 370         ia_cid = cid;
 371         *clfuncspp = &ia_classfuncs;
 372         return (ts_maxglobpri);
 373 }
 374 
 375 
 376 /*
 377  * Get or reset the ts_dptbl values per the user's request.
 378  */
 379 static int
 380 ts_admin(caddr_t uaddr, cred_t *reqpcredp)
 381 {
 382         tsadmin_t       tsadmin;
 383         tsdpent_t       *tmpdpp;
 384         int             userdpsz;
 385         int             i;
 386         size_t          tsdpsz;
 387 
 388         if (get_udatamodel() == DATAMODEL_NATIVE) {
 389                 if (copyin(uaddr, &tsadmin, sizeof (tsadmin_t)))
 390                         return (EFAULT);
 391         }
 392 #ifdef _SYSCALL32_IMPL
 393         else {
 394                 /* get tsadmin struct from ILP32 caller */
 395                 tsadmin32_t tsadmin32;
 396                 if (copyin(uaddr, &tsadmin32, sizeof (tsadmin32_t)))
 397                         return (EFAULT);
 398                 tsadmin.ts_dpents =
 399                     (struct tsdpent *)(uintptr_t)tsadmin32.ts_dpents;
 400                 tsadmin.ts_ndpents = tsadmin32.ts_ndpents;
 401                 tsadmin.ts_cmd = tsadmin32.ts_cmd;
 402         }
 403 #endif /* _SYSCALL32_IMPL */
 404 
 405         tsdpsz = (ts_maxumdpri + 1) * sizeof (tsdpent_t);
 406 
 407         switch (tsadmin.ts_cmd) {
 408         case TS_GETDPSIZE:
 409                 tsadmin.ts_ndpents = ts_maxumdpri + 1;
 410 
 411                 if (get_udatamodel() == DATAMODEL_NATIVE) {
 412                         if (copyout(&tsadmin, uaddr, sizeof (tsadmin_t)))
 413                                 return (EFAULT);
 414                 }
 415 #ifdef _SYSCALL32_IMPL
 416                 else {
 417                         /* return tsadmin struct to ILP32 caller */
 418                         tsadmin32_t tsadmin32;
 419                         tsadmin32.ts_dpents =
 420                             (caddr32_t)(uintptr_t)tsadmin.ts_dpents;
 421                         tsadmin32.ts_ndpents = tsadmin.ts_ndpents;
 422                         tsadmin32.ts_cmd = tsadmin.ts_cmd;
 423                         if (copyout(&tsadmin32, uaddr, sizeof (tsadmin32_t)))
 424                                 return (EFAULT);
 425                 }
 426 #endif /* _SYSCALL32_IMPL */
 427                 break;
 428 
 429         case TS_GETDPTBL:
 430                 userdpsz = MIN(tsadmin.ts_ndpents * sizeof (tsdpent_t),
 431                     tsdpsz);
 432                 if (copyout(ts_dptbl, tsadmin.ts_dpents, userdpsz))
 433                         return (EFAULT);
 434 
 435                 tsadmin.ts_ndpents = userdpsz / sizeof (tsdpent_t);
 436 
 437                 if (get_udatamodel() == DATAMODEL_NATIVE) {
 438                         if (copyout(&tsadmin, uaddr, sizeof (tsadmin_t)))
 439                                 return (EFAULT);
 440                 }
 441 #ifdef _SYSCALL32_IMPL
 442                 else {
 443                         /* return tsadmin struct to ILP32 callers */
 444                         tsadmin32_t tsadmin32;
 445                         tsadmin32.ts_dpents =
 446                             (caddr32_t)(uintptr_t)tsadmin.ts_dpents;
 447                         tsadmin32.ts_ndpents = tsadmin.ts_ndpents;
 448                         tsadmin32.ts_cmd = tsadmin.ts_cmd;
 449                         if (copyout(&tsadmin32, uaddr, sizeof (tsadmin32_t)))
 450                                 return (EFAULT);
 451                 }
 452 #endif /* _SYSCALL32_IMPL */
 453                 break;
 454 
 455         case TS_SETDPTBL:
 456                 /*
 457                  * We require that the requesting process has sufficient
 458                  * priveleges.  We also require that the table supplied by
 459                  * the user exactly match the current ts_dptbl in size.
 460                  */
 461                 if (secpolicy_dispadm(reqpcredp) != 0)
 462                         return (EPERM);
 463 
 464                 if (tsadmin.ts_ndpents * sizeof (tsdpent_t) != tsdpsz) {
 465                         return (EINVAL);
 466                 }
 467 
 468                 /*
 469                  * We read the user supplied table into a temporary buffer
 470                  * where it is validated before being copied over the
 471                  * ts_dptbl.
 472                  */
 473                 tmpdpp = kmem_alloc(tsdpsz, KM_SLEEP);
 474                 if (copyin((caddr_t)tsadmin.ts_dpents, (caddr_t)tmpdpp,
 475                     tsdpsz)) {
 476                         kmem_free(tmpdpp, tsdpsz);
 477                         return (EFAULT);
 478                 }
 479                 for (i = 0; i < tsadmin.ts_ndpents; i++) {
 480 
 481                         /*
 482                          * Validate the user supplied values.  All we are doing
 483                          * here is verifying that the values are within their
 484                          * allowable ranges and will not panic the system.  We
 485                          * make no attempt to ensure that the resulting
 486                          * configuration makes sense or results in reasonable
 487                          * performance.
 488                          */
 489                         if (tmpdpp[i].ts_quantum <= 0) {
 490                                 kmem_free(tmpdpp, tsdpsz);
 491                                 return (EINVAL);
 492                         }
 493                         if (tmpdpp[i].ts_tqexp > ts_maxumdpri ||
 494                             tmpdpp[i].ts_tqexp < 0) {
 495                                 kmem_free(tmpdpp, tsdpsz);
 496                                 return (EINVAL);
 497                         }
 498                         if (tmpdpp[i].ts_slpret > ts_maxumdpri ||
 499                             tmpdpp[i].ts_slpret < 0) {
 500                                 kmem_free(tmpdpp, tsdpsz);
 501                                 return (EINVAL);
 502                         }
 503                         if (tmpdpp[i].ts_maxwait < 0) {
 504                                 kmem_free(tmpdpp, tsdpsz);
 505                                 return (EINVAL);
 506                         }
 507                         if (tmpdpp[i].ts_lwait > ts_maxumdpri ||
 508                             tmpdpp[i].ts_lwait < 0) {
 509                                 kmem_free(tmpdpp, tsdpsz);
 510                                 return (EINVAL);
 511                         }
 512                 }
 513 
 514                 /*
 515                  * Copy the user supplied values over the current ts_dptbl
 516                  * values.  The ts_globpri member is read-only so we don't
 517                  * overwrite it.
 518                  */
 519                 mutex_enter(&ts_dptblock);
 520                 for (i = 0; i < tsadmin.ts_ndpents; i++) {
 521                         ts_dptbl[i].ts_quantum = tmpdpp[i].ts_quantum;
 522                         ts_dptbl[i].ts_tqexp = tmpdpp[i].ts_tqexp;
 523                         ts_dptbl[i].ts_slpret = tmpdpp[i].ts_slpret;
 524                         ts_dptbl[i].ts_maxwait = tmpdpp[i].ts_maxwait;
 525                         ts_dptbl[i].ts_lwait = tmpdpp[i].ts_lwait;
 526                 }
 527                 mutex_exit(&ts_dptblock);
 528                 kmem_free(tmpdpp, tsdpsz);
 529                 break;
 530 
 531         default:
 532                 return (EINVAL);
 533         }
 534         return (0);
 535 }
 536 
 537 
 538 /*
 539  * Allocate a time-sharing class specific thread structure and
 540  * initialize it with the parameters supplied. Also move the thread
 541  * to specified time-sharing priority.
 542  */
 543 static int
 544 ts_enterclass(kthread_t *t, id_t cid, void *parmsp,
 545         cred_t *reqpcredp, void *bufp)
 546 {
 547         tsparms_t       *tsparmsp = (tsparms_t *)parmsp;
 548         tsproc_t        *tspp;
 549         pri_t           reqtsuprilim;
 550         pri_t           reqtsupri;
 551         static uint32_t tspexists = 0;  /* set on first occurrence of */
 552                                         /*   a time-sharing process */
 553 
 554         tspp = (tsproc_t *)bufp;
 555         ASSERT(tspp != NULL);
 556 
 557         /*
 558          * Initialize the tsproc structure.
 559          */
 560         tspp->ts_cpupri = tsmedumdpri;
 561         if (cid == ia_cid) {
 562                 /*
 563                  * Check to make sure caller is either privileged or the
 564                  * window system.  When the window system is converted
 565                  * to using privileges, the second check can go away.
 566                  */
 567                 if (reqpcredp != NULL && !groupmember(IA_gid, reqpcredp) &&
 568                     secpolicy_setpriority(reqpcredp) != 0)
 569                         return (EPERM);
 570                 /*
 571                  * Belongs to IA "class", so set appropriate flags.
 572                  * Mark as 'on' so it will not be a swap victim
 573                  * while forking.
 574                  */
 575                 tspp->ts_flags = TSIA | TSIASET;
 576                 tspp->ts_boost = ia_boost;
 577         } else {
 578                 tspp->ts_flags = 0;
 579                 tspp->ts_boost = 0;
 580         }
 581 
 582         if (tsparmsp == NULL) {
 583                 /*
 584                  * Use default values.
 585                  */
 586                 tspp->ts_uprilim = tspp->ts_upri = 0;
 587                 tspp->ts_nice = NZERO;
 588         } else {
 589                 /*
 590                  * Use supplied values.
 591                  */
 592                 if (tsparmsp->ts_uprilim == TS_NOCHANGE)
 593                         reqtsuprilim = 0;
 594                 else {
 595                         if (tsparmsp->ts_uprilim > 0 &&
 596                             secpolicy_setpriority(reqpcredp) != 0)
 597                                 return (EPERM);
 598                         reqtsuprilim = tsparmsp->ts_uprilim;
 599                 }
 600 
 601                 if (tsparmsp->ts_upri == TS_NOCHANGE) {
 602                         reqtsupri = reqtsuprilim;
 603                 } else {
 604                         if (tsparmsp->ts_upri > 0 &&
 605                             secpolicy_setpriority(reqpcredp) != 0)
 606                                 return (EPERM);
 607                         /*
 608                          * Set the user priority to the requested value
 609                          * or the upri limit, whichever is lower.
 610                          */
 611                         reqtsupri = tsparmsp->ts_upri;
 612                         if (reqtsupri > reqtsuprilim)
 613                                 reqtsupri = reqtsuprilim;
 614                 }
 615 
 616 
 617                 tspp->ts_uprilim = reqtsuprilim;
 618                 tspp->ts_upri = reqtsupri;
 619                 tspp->ts_nice = NZERO - (NZERO * reqtsupri) / ts_maxupri;
 620         }
 621         TS_NEWUMDPRI(tspp);
 622 
 623         tspp->ts_dispwait = 0;
 624         tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
 625         tspp->ts_tp = t;
 626         cpucaps_sc_init(&tspp->ts_caps);
 627 
 628         /*
 629          * Reset priority. Process goes to a "user mode" priority
 630          * here regardless of whether or not it has slept since
 631          * entering the kernel.
 632          */
 633         thread_lock(t);                 /* get dispatcher lock on thread */
 634         t->t_clfuncs = &(sclass[cid].cl_funcs->thread);
 635         t->t_cid = cid;
 636         t->t_cldata = (void *)tspp;
 637         t->t_schedflag &= ~TS_RUNQMATCH;
 638         ts_change_priority(t, tspp);
 639         thread_unlock(t);
 640 
 641         /*
 642          * Link new structure into tsproc list.
 643          */
 644         TS_LIST_INSERT(tspp);
 645 
 646         /*
 647          * If this is the first time-sharing thread to occur since
 648          * boot we set up the initial call to ts_update() here.
 649          * Use an atomic compare-and-swap since that's easier and
 650          * faster than a mutex (but check with an ordinary load first
 651          * since most of the time this will already be done).
 652          */
 653         if (tspexists == 0 && cas32(&tspexists, 0, 1) == 0)
 654                 (void) timeout(ts_update, NULL, hz);
 655 
 656         return (0);
 657 }
 658 
 659 
 660 /*
 661  * Free tsproc structure of thread.
 662  */
 663 static void
 664 ts_exitclass(void *procp)
 665 {
 666         tsproc_t *tspp = (tsproc_t *)procp;
 667 
 668         /* Remove tsproc_t structure from list */
 669         TS_LIST_DELETE(tspp);
 670         kmem_free(tspp, sizeof (tsproc_t));
 671 }
 672 
 673 /* ARGSUSED */
 674 static int
 675 ts_canexit(kthread_t *t, cred_t *cred)
 676 {
 677         /*
 678          * A thread can always leave a TS/IA class
 679          */
 680         return (0);
 681 }
 682 
 683 static int
 684 ts_fork(kthread_t *t, kthread_t *ct, void *bufp)
 685 {
 686         tsproc_t        *ptspp;         /* ptr to parent's tsproc structure */
 687         tsproc_t        *ctspp;         /* ptr to child's tsproc structure */
 688 
 689         ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
 690 
 691         ctspp = (tsproc_t *)bufp;
 692         ASSERT(ctspp != NULL);
 693         ptspp = (tsproc_t *)t->t_cldata;
 694         /*
 695          * Initialize child's tsproc structure.
 696          */
 697         thread_lock(t);
 698         ctspp->ts_timeleft = ts_dptbl[ptspp->ts_cpupri].ts_quantum;
 699         ctspp->ts_cpupri = ptspp->ts_cpupri;
 700         ctspp->ts_boost = ptspp->ts_boost;
 701         ctspp->ts_uprilim = ptspp->ts_uprilim;
 702         ctspp->ts_upri = ptspp->ts_upri;
 703         TS_NEWUMDPRI(ctspp);
 704         ctspp->ts_nice = ptspp->ts_nice;
 705         ctspp->ts_dispwait = 0;
 706         ctspp->ts_flags = ptspp->ts_flags & ~(TSKPRI | TSBACKQ | TSRESTORE);
 707         ctspp->ts_tp = ct;
 708         cpucaps_sc_init(&ctspp->ts_caps);
 709         thread_unlock(t);
 710 
 711         /*
 712          * Link new structure into tsproc list.
 713          */
 714         ct->t_cldata = (void *)ctspp;
 715         TS_LIST_INSERT(ctspp);
 716         return (0);
 717 }
 718 
 719 
 720 /*
 721  * Child is placed at back of dispatcher queue and parent gives
 722  * up processor so that the child runs first after the fork.
 723  * This allows the child immediately execing to break the multiple
 724  * use of copy on write pages with no disk home. The parent will
 725  * get to steal them back rather than uselessly copying them.
 726  */
 727 static void
 728 ts_forkret(kthread_t *t, kthread_t *ct)
 729 {
 730         proc_t  *pp = ttoproc(t);
 731         proc_t  *cp = ttoproc(ct);
 732         tsproc_t *tspp;
 733 
 734         ASSERT(t == curthread);
 735         ASSERT(MUTEX_HELD(&pidlock));
 736 
 737         /*
 738          * Grab the child's p_lock before dropping pidlock to ensure
 739          * the process does not disappear before we set it running.
 740          */
 741         mutex_enter(&cp->p_lock);
 742         continuelwps(cp);
 743         mutex_exit(&cp->p_lock);
 744 
 745         mutex_enter(&pp->p_lock);
 746         mutex_exit(&pidlock);
 747         continuelwps(pp);
 748 
 749         thread_lock(t);
 750         tspp = (tsproc_t *)(t->t_cldata);
 751         tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_tqexp;
 752         TS_NEWUMDPRI(tspp);
 753         tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
 754         tspp->ts_dispwait = 0;
 755         t->t_pri = ts_dptbl[tspp->ts_umdpri].ts_globpri;
 756         ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
 757         tspp->ts_flags &= ~TSKPRI;
 758         THREAD_TRANSITION(t);
 759         ts_setrun(t);
 760         thread_unlock(t);
 761         /*
 762          * Safe to drop p_lock now since since it is safe to change
 763          * the scheduling class after this point.
 764          */
 765         mutex_exit(&pp->p_lock);
 766 
 767         swtch();
 768 }
 769 
 770 
 771 /*
 772  * Get information about the time-sharing class into the buffer
 773  * pointed to by tsinfop. The maximum configured user priority
 774  * is the only information we supply.  ts_getclinfo() is called
 775  * for TS threads, and ia_getclinfo() is called for IA threads.
 776  */
 777 static int
 778 ts_getclinfo(void *infop)
 779 {
 780         tsinfo_t *tsinfop = (tsinfo_t *)infop;
 781         tsinfop->ts_maxupri = ts_maxupri;
 782         return (0);
 783 }
 784 
 785 static int
 786 ia_getclinfo(void *infop)
 787 {
 788         iainfo_t *iainfop = (iainfo_t *)infop;
 789         iainfop->ia_maxupri = ia_maxupri;
 790         return (0);
 791 }
 792 
 793 
 794 /*
 795  * Return the user mode scheduling priority range.
 796  */
 797 static int
 798 ts_getclpri(pcpri_t *pcprip)
 799 {
 800         pcprip->pc_clpmax = ts_maxupri;
 801         pcprip->pc_clpmin = -ts_maxupri;
 802         return (0);
 803 }
 804 
 805 
 806 static int
 807 ia_getclpri(pcpri_t *pcprip)
 808 {
 809         pcprip->pc_clpmax = ia_maxupri;
 810         pcprip->pc_clpmin = -ia_maxupri;
 811         return (0);
 812 }
 813 
 814 
 815 static void
 816 ts_nullsys()
 817 {}
 818 
 819 
 820 /*
 821  * Get the time-sharing parameters of the thread pointed to by
 822  * tsprocp into the buffer pointed to by tsparmsp.  ts_parmsget()
 823  * is called for TS threads, and ia_parmsget() is called for IA
 824  * threads.
 825  */
 826 static void
 827 ts_parmsget(kthread_t *t, void *parmsp)
 828 {
 829         tsproc_t *tspp = (tsproc_t *)t->t_cldata;
 830         tsparms_t *tsparmsp = (tsparms_t *)parmsp;
 831 
 832         tsparmsp->ts_uprilim = tspp->ts_uprilim;
 833         tsparmsp->ts_upri = tspp->ts_upri;
 834 }
 835 
 836 static void
 837 ia_parmsget(kthread_t *t, void *parmsp)
 838 {
 839         tsproc_t *tspp = (tsproc_t *)t->t_cldata;
 840         iaparms_t *iaparmsp = (iaparms_t *)parmsp;
 841 
 842         iaparmsp->ia_uprilim = tspp->ts_uprilim;
 843         iaparmsp->ia_upri = tspp->ts_upri;
 844         if (tspp->ts_flags & TSIASET)
 845                 iaparmsp->ia_mode = IA_SET_INTERACTIVE;
 846         else
 847                 iaparmsp->ia_mode = IA_INTERACTIVE_OFF;
 848 }
 849 
 850 
 851 /*
 852  * Check the validity of the time-sharing parameters in the buffer
 853  * pointed to by tsparmsp.
 854  * ts_parmsin() is called for TS threads, and ia_parmsin() is called
 855  * for IA threads.
 856  */
 857 static int
 858 ts_parmsin(void *parmsp)
 859 {
 860         tsparms_t       *tsparmsp = (tsparms_t *)parmsp;
 861         /*
 862          * Check validity of parameters.
 863          */
 864         if ((tsparmsp->ts_uprilim > ts_maxupri ||
 865             tsparmsp->ts_uprilim < -ts_maxupri) &&
 866             tsparmsp->ts_uprilim != TS_NOCHANGE)
 867                 return (EINVAL);
 868 
 869         if ((tsparmsp->ts_upri > ts_maxupri ||
 870             tsparmsp->ts_upri < -ts_maxupri) &&
 871             tsparmsp->ts_upri != TS_NOCHANGE)
 872                 return (EINVAL);
 873 
 874         return (0);
 875 }
 876 
 877 static int
 878 ia_parmsin(void *parmsp)
 879 {
 880         iaparms_t       *iaparmsp = (iaparms_t *)parmsp;
 881 
 882         if ((iaparmsp->ia_uprilim > ia_maxupri ||
 883             iaparmsp->ia_uprilim < -ia_maxupri) &&
 884             iaparmsp->ia_uprilim != IA_NOCHANGE) {
 885                 return (EINVAL);
 886         }
 887 
 888         if ((iaparmsp->ia_upri > ia_maxupri ||
 889             iaparmsp->ia_upri < -ia_maxupri) &&
 890             iaparmsp->ia_upri != IA_NOCHANGE) {
 891                 return (EINVAL);
 892         }
 893 
 894         return (0);
 895 }
 896 
 897 
 898 /*
 899  * Check the validity of the time-sharing parameters in the pc_vaparms_t
 900  * structure vaparmsp and put them in the buffer pointed to by tsparmsp.
 901  * pc_vaparms_t contains (key, value) pairs of parameter.
 902  * ts_vaparmsin() is called for TS threads, and ia_vaparmsin() is called
 903  * for IA threads. ts_vaparmsin() is the variable parameter version of
 904  * ts_parmsin() and ia_vaparmsin() is the variable parameter version of
 905  * ia_parmsin().
 906  */
 907 static int
 908 ts_vaparmsin(void *parmsp, pc_vaparms_t *vaparmsp)
 909 {
 910         tsparms_t       *tsparmsp = (tsparms_t *)parmsp;
 911         int             priflag = 0;
 912         int             limflag = 0;
 913         uint_t          cnt;
 914         pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
 915 
 916 
 917         /*
 918          * TS_NOCHANGE (-32768) is outside of the range of values for
 919          * ts_uprilim and ts_upri. If the structure tsparms_t is changed,
 920          * TS_NOCHANGE should be replaced by a flag word (in the same manner
 921          * as in rt.c).
 922          */
 923         tsparmsp->ts_uprilim = TS_NOCHANGE;
 924         tsparmsp->ts_upri = TS_NOCHANGE;
 925 
 926         /*
 927          * Get the varargs parameter and check validity of parameters.
 928          */
 929         if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
 930                 return (EINVAL);
 931 
 932         for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
 933 
 934                 switch (vpp->pc_key) {
 935                 case TS_KY_UPRILIM:
 936                         if (limflag++)
 937                                 return (EINVAL);
 938                         tsparmsp->ts_uprilim = (pri_t)vpp->pc_parm;
 939                         if (tsparmsp->ts_uprilim > ts_maxupri ||
 940                             tsparmsp->ts_uprilim < -ts_maxupri)
 941                                 return (EINVAL);
 942                         break;
 943 
 944                 case TS_KY_UPRI:
 945                         if (priflag++)
 946                                 return (EINVAL);
 947                         tsparmsp->ts_upri = (pri_t)vpp->pc_parm;
 948                         if (tsparmsp->ts_upri > ts_maxupri ||
 949                             tsparmsp->ts_upri < -ts_maxupri)
 950                                 return (EINVAL);
 951                         break;
 952 
 953                 default:
 954                         return (EINVAL);
 955                 }
 956         }
 957 
 958         if (vaparmsp->pc_vaparmscnt == 0) {
 959                 /*
 960                  * Use default parameters.
 961                  */
 962                 tsparmsp->ts_upri = tsparmsp->ts_uprilim = 0;
 963         }
 964 
 965         return (0);
 966 }
 967 
 968 static int
 969 ia_vaparmsin(void *parmsp, pc_vaparms_t *vaparmsp)
 970 {
 971         iaparms_t       *iaparmsp = (iaparms_t *)parmsp;
 972         int             priflag = 0;
 973         int             limflag = 0;
 974         int             mflag = 0;
 975         uint_t          cnt;
 976         pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
 977 
 978         /*
 979          * IA_NOCHANGE (-32768) is outside of the range of values for
 980          * ia_uprilim, ia_upri and ia_mode. If the structure iaparms_t is
 981          * changed, IA_NOCHANGE should be replaced by a flag word (in the
 982          * same manner as in rt.c).
 983          */
 984         iaparmsp->ia_uprilim = IA_NOCHANGE;
 985         iaparmsp->ia_upri = IA_NOCHANGE;
 986         iaparmsp->ia_mode = IA_NOCHANGE;
 987 
 988         /*
 989          * Get the varargs parameter and check validity of parameters.
 990          */
 991         if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
 992                 return (EINVAL);
 993 
 994         for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
 995 
 996                 switch (vpp->pc_key) {
 997                 case IA_KY_UPRILIM:
 998                         if (limflag++)
 999                                 return (EINVAL);
1000                         iaparmsp->ia_uprilim = (pri_t)vpp->pc_parm;
1001                         if (iaparmsp->ia_uprilim > ia_maxupri ||
1002                             iaparmsp->ia_uprilim < -ia_maxupri)
1003                                 return (EINVAL);
1004                         break;
1005 
1006                 case IA_KY_UPRI:
1007                         if (priflag++)
1008                                 return (EINVAL);
1009                         iaparmsp->ia_upri = (pri_t)vpp->pc_parm;
1010                         if (iaparmsp->ia_upri > ia_maxupri ||
1011                             iaparmsp->ia_upri < -ia_maxupri)
1012                                 return (EINVAL);
1013                         break;
1014 
1015                 case IA_KY_MODE:
1016                         if (mflag++)
1017                                 return (EINVAL);
1018                         iaparmsp->ia_mode = (int)vpp->pc_parm;
1019                         if (iaparmsp->ia_mode != IA_SET_INTERACTIVE &&
1020                             iaparmsp->ia_mode != IA_INTERACTIVE_OFF)
1021                                 return (EINVAL);
1022                         break;
1023 
1024                 default:
1025                         return (EINVAL);
1026                 }
1027         }
1028 
1029         if (vaparmsp->pc_vaparmscnt == 0) {
1030                 /*
1031                  * Use default parameters.
1032                  */
1033                 iaparmsp->ia_upri = iaparmsp->ia_uprilim = 0;
1034                 iaparmsp->ia_mode = IA_SET_INTERACTIVE;
1035         }
1036 
1037         return (0);
1038 }
1039 
1040 /*
1041  * Nothing to do here but return success.
1042  */
1043 /* ARGSUSED */
1044 static int
1045 ts_parmsout(void *parmsp, pc_vaparms_t *vaparmsp)
1046 {
1047         return (0);
1048 }
1049 
1050 
1051 /*
1052  * Copy all selected time-sharing class parameters to the user.
1053  * The parameters are specified by a key.
1054  */
1055 static int
1056 ts_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp)
1057 {
1058         tsparms_t       *tsprmsp = (tsparms_t *)prmsp;
1059         int             priflag = 0;
1060         int             limflag = 0;
1061         uint_t          cnt;
1062         pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
1063 
1064         ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
1065 
1066         if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
1067                 return (EINVAL);
1068 
1069         for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
1070 
1071                 switch (vpp->pc_key) {
1072                 case TS_KY_UPRILIM:
1073                         if (limflag++)
1074                                 return (EINVAL);
1075                         if (copyout(&tsprmsp->ts_uprilim,
1076                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
1077                                 return (EFAULT);
1078                         break;
1079 
1080                 case TS_KY_UPRI:
1081                         if (priflag++)
1082                                 return (EINVAL);
1083                         if (copyout(&tsprmsp->ts_upri,
1084                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
1085                                 return (EFAULT);
1086                         break;
1087 
1088                 default:
1089                         return (EINVAL);
1090                 }
1091         }
1092 
1093         return (0);
1094 }
1095 
1096 
1097 /*
1098  * Copy all selected interactive class parameters to the user.
1099  * The parameters are specified by a key.
1100  */
1101 static int
1102 ia_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp)
1103 {
1104         iaparms_t       *iaprmsp = (iaparms_t *)prmsp;
1105         int             priflag = 0;
1106         int             limflag = 0;
1107         int             mflag = 0;
1108         uint_t          cnt;
1109         pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
1110 
1111         ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
1112 
1113         if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
1114                 return (EINVAL);
1115 
1116         for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
1117 
1118                 switch (vpp->pc_key) {
1119                 case IA_KY_UPRILIM:
1120                         if (limflag++)
1121                                 return (EINVAL);
1122                         if (copyout(&iaprmsp->ia_uprilim,
1123                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
1124                                 return (EFAULT);
1125                         break;
1126 
1127                 case IA_KY_UPRI:
1128                         if (priflag++)
1129                                 return (EINVAL);
1130                         if (copyout(&iaprmsp->ia_upri,
1131                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
1132                                 return (EFAULT);
1133                         break;
1134 
1135                 case IA_KY_MODE:
1136                         if (mflag++)
1137                                 return (EINVAL);
1138                         if (copyout(&iaprmsp->ia_mode,
1139                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (int)))
1140                                 return (EFAULT);
1141                         break;
1142 
1143                 default:
1144                         return (EINVAL);
1145                 }
1146         }
1147         return (0);
1148 }
1149 
1150 
1151 /*
1152  * Set the scheduling parameters of the thread pointed to by tsprocp
1153  * to those specified in the buffer pointed to by tsparmsp.
1154  * ts_parmsset() is called for TS threads, and ia_parmsset() is
1155  * called for IA threads.
1156  */
1157 /* ARGSUSED */
1158 static int
1159 ts_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp)
1160 {
1161         char            nice;
1162         pri_t           reqtsuprilim;
1163         pri_t           reqtsupri;
1164         tsparms_t       *tsparmsp = (tsparms_t *)parmsp;
1165         tsproc_t        *tspp = (tsproc_t *)tx->t_cldata;
1166 
1167         ASSERT(MUTEX_HELD(&(ttoproc(tx))->p_lock));
1168 
1169         if (tsparmsp->ts_uprilim == TS_NOCHANGE)
1170                 reqtsuprilim = tspp->ts_uprilim;
1171         else
1172                 reqtsuprilim = tsparmsp->ts_uprilim;
1173 
1174         if (tsparmsp->ts_upri == TS_NOCHANGE)
1175                 reqtsupri = tspp->ts_upri;
1176         else
1177                 reqtsupri = tsparmsp->ts_upri;
1178 
1179         /*
1180          * Make sure the user priority doesn't exceed the upri limit.
1181          */
1182         if (reqtsupri > reqtsuprilim)
1183                 reqtsupri = reqtsuprilim;
1184 
1185         /*
1186          * Basic permissions enforced by generic kernel code
1187          * for all classes require that a thread attempting
1188          * to change the scheduling parameters of a target
1189          * thread be privileged or have a real or effective
1190          * UID matching that of the target thread. We are not
1191          * called unless these basic permission checks have
1192          * already passed. The time-sharing class requires in
1193          * addition that the calling thread be privileged if it
1194          * is attempting to raise the upri limit above its current
1195          * value This may have been checked previously but if our
1196          * caller passed us a non-NULL credential pointer we assume
1197          * it hasn't and we check it here.
1198          */
1199         if (reqpcredp != NULL &&
1200             reqtsuprilim > tspp->ts_uprilim &&
1201             secpolicy_raisepriority(reqpcredp) != 0)
1202                 return (EPERM);
1203 
1204         /*
1205          * Set ts_nice to the nice value corresponding to the user
1206          * priority we are setting.  Note that setting the nice field
1207          * of the parameter struct won't affect upri or nice.
1208          */
1209         nice = NZERO - (reqtsupri * NZERO) / ts_maxupri;
1210         if (nice >= 2 * NZERO)
1211                 nice = 2 * NZERO - 1;
1212 
1213         thread_lock(tx);
1214 
1215         tspp->ts_uprilim = reqtsuprilim;
1216         tspp->ts_upri = reqtsupri;
1217         TS_NEWUMDPRI(tspp);
1218         tspp->ts_nice = nice;
1219 
1220         if ((tspp->ts_flags & TSKPRI) != 0) {
1221                 thread_unlock(tx);
1222                 return (0);
1223         }
1224 
1225         tspp->ts_dispwait = 0;
1226         ts_change_priority(tx, tspp);
1227         thread_unlock(tx);
1228         return (0);
1229 }
1230 
1231 
1232 static int
1233 ia_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp)
1234 {
1235         tsproc_t        *tspp = (tsproc_t *)tx->t_cldata;
1236         iaparms_t       *iaparmsp = (iaparms_t *)parmsp;
1237         proc_t          *p;
1238         pid_t           pid, pgid, sid;
1239         pid_t           on, off;
1240         struct stdata   *stp;
1241         int             sess_held;
1242 
1243         /*
1244          * Handle user priority changes
1245          */
1246         if (iaparmsp->ia_mode == IA_NOCHANGE)
1247                 return (ts_parmsset(tx, parmsp, reqpcid, reqpcredp));
1248 
1249         /*
1250          * Check permissions for changing modes.
1251          */
1252 
1253         if (reqpcredp != NULL && !groupmember(IA_gid, reqpcredp) &&
1254             secpolicy_raisepriority(reqpcredp) != 0) {
1255                 /*
1256                  * Silently fail in case this is just a priocntl
1257                  * call with upri and uprilim set to IA_NOCHANGE.
1258                  */
1259                 return (0);
1260         }
1261 
1262         ASSERT(MUTEX_HELD(&pidlock));
1263         if ((p = ttoproc(tx)) == NULL) {
1264                 return (0);
1265         }
1266         ASSERT(MUTEX_HELD(&p->p_lock));
1267         if (p->p_stat == SIDL) {
1268                 return (0);
1269         }
1270         pid = p->p_pid;
1271         sid = p->p_sessp->s_sid;
1272         pgid = p->p_pgrp;
1273         if (iaparmsp->ia_mode == IA_SET_INTERACTIVE) {
1274                 /*
1275                  * session leaders must be turned on now so all processes
1276                  * in the group controlling the tty will be turned on or off.
1277                  * if the ia_mode is off for the session leader,
1278                  * ia_set_process_group will return without setting the
1279                  * processes in the group controlling the tty on.
1280                  */
1281                 thread_lock(tx);
1282                 tspp->ts_flags |= TSIASET;
1283                 thread_unlock(tx);
1284         }
1285         mutex_enter(&p->p_sessp->s_lock);
1286         sess_held = 1;
1287         if ((pid == sid) && (p->p_sessp->s_vp != NULL) &&
1288             ((stp = p->p_sessp->s_vp->v_stream) != NULL)) {
1289                 if ((stp->sd_pgidp != NULL) && (stp->sd_sidp != NULL)) {
1290                         pgid = stp->sd_pgidp->pid_id;
1291                         sess_held = 0;
1292                         mutex_exit(&p->p_sessp->s_lock);
1293                         if (iaparmsp->ia_mode ==
1294                             IA_SET_INTERACTIVE) {
1295                                 off = 0;
1296                                 on = pgid;
1297                         } else {
1298                                 off = pgid;
1299                                 on = 0;
1300                         }
1301                         TRACE_3(TR_FAC_IA, TR_ACTIVE_CHAIN,
1302                             "active chain:pid %d gid %d %p",
1303                             pid, pgid, p);
1304                         ia_set_process_group(sid, off, on);
1305                 }
1306         }
1307         if (sess_held)
1308                 mutex_exit(&p->p_sessp->s_lock);
1309 
1310         thread_lock(tx);
1311 
1312         if (iaparmsp->ia_mode == IA_SET_INTERACTIVE) {
1313                 tspp->ts_flags |= TSIASET;
1314                 tspp->ts_boost = ia_boost;
1315         } else {
1316                 tspp->ts_flags &= ~TSIASET;
1317                 tspp->ts_boost = -ia_boost;
1318         }
1319         thread_unlock(tx);
1320 
1321         return (ts_parmsset(tx, parmsp, reqpcid, reqpcredp));
1322 }
1323 
1324 static void
1325 ts_exit(kthread_t *t)
1326 {
1327         tsproc_t *tspp;
1328 
1329         if (CPUCAPS_ON()) {
1330                 /*
1331                  * A thread could be exiting in between clock ticks,
1332                  * so we need to calculate how much CPU time it used
1333                  * since it was charged last time.
1334                  *
1335                  * CPU caps are not enforced on exiting processes - it is
1336                  * usually desirable to exit as soon as possible to free
1337                  * resources.
1338                  */
1339                 thread_lock(t);
1340                 tspp = (tsproc_t *)t->t_cldata;
1341                 (void) cpucaps_charge(t, &tspp->ts_caps, CPUCAPS_CHARGE_ONLY);
1342                 thread_unlock(t);
1343         }
1344 }
1345 
1346 /*
1347  * Return the global scheduling priority that would be assigned
1348  * to a thread entering the time-sharing class with the ts_upri.
1349  */
1350 static pri_t
1351 ts_globpri(kthread_t *t)
1352 {
1353         tsproc_t *tspp;
1354         pri_t   tspri;
1355 
1356         ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
1357         tspp = (tsproc_t *)t->t_cldata;
1358         tspri = tsmedumdpri + tspp->ts_upri;
1359         if (tspri > ts_maxumdpri)
1360                 tspri = ts_maxumdpri;
1361         else if (tspri < 0)
1362                 tspri = 0;
1363         return (ts_dptbl[tspri].ts_globpri);
1364 }
1365 
1366 /*
1367  * Arrange for thread to be placed in appropriate location
1368  * on dispatcher queue.
1369  *
1370  * This is called with the current thread in TS_ONPROC and locked.
1371  */
1372 static void
1373 ts_preempt(kthread_t *t)
1374 {
1375         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1376         klwp_t          *lwp = curthread->t_lwp;
1377         pri_t           oldpri = t->t_pri;
1378 
1379         ASSERT(t == curthread);
1380         ASSERT(THREAD_LOCK_HELD(curthread));
1381 
1382         /*
1383          * If preempted in the kernel, make sure the thread has
1384          * a kernel priority if needed.
1385          */
1386         if (!(tspp->ts_flags & TSKPRI) && lwp != NULL && t->t_kpri_req) {
1387                 tspp->ts_flags |= TSKPRI;
1388                 THREAD_CHANGE_PRI(t, ts_kmdpris[0]);
1389                 ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
1390                 t->t_trapret = 1;            /* so ts_trapret will run */
1391                 aston(t);
1392         }
1393 
1394         /*
1395          * This thread may be placed on wait queue by CPU Caps. In this case we
1396          * do not need to do anything until it is removed from the wait queue.
1397          * Do not enforce CPU caps on threads running at a kernel priority
1398          */
1399         if (CPUCAPS_ON()) {
1400                 (void) cpucaps_charge(t, &tspp->ts_caps,
1401                     CPUCAPS_CHARGE_ENFORCE);
1402                 if (!(tspp->ts_flags & TSKPRI) && CPUCAPS_ENFORCE(t))
1403                         return;
1404         }
1405 
1406         /*
1407          * If thread got preempted in the user-land then we know
1408          * it isn't holding any locks.  Mark it as swappable.
1409          */
1410         ASSERT(t->t_schedflag & TS_DONT_SWAP);
1411         if (lwp != NULL && lwp->lwp_state == LWP_USER)
1412                 t->t_schedflag &= ~TS_DONT_SWAP;
1413 
1414         /*
1415          * Check to see if we're doing "preemption control" here.  If
1416          * we are, and if the user has requested that this thread not
1417          * be preempted, and if preemptions haven't been put off for
1418          * too long, let the preemption happen here but try to make
1419          * sure the thread is rescheduled as soon as possible.  We do
1420          * this by putting it on the front of the highest priority run
1421          * queue in the TS class.  If the preemption has been put off
1422          * for too long, clear the "nopreempt" bit and let the thread
1423          * be preempted.
1424          */
1425         if (t->t_schedctl && schedctl_get_nopreempt(t)) {
1426                 if (tspp->ts_timeleft > -SC_MAX_TICKS) {
1427                         DTRACE_SCHED1(schedctl__nopreempt, kthread_t *, t);
1428                         if (!(tspp->ts_flags & TSKPRI)) {
1429                                 /*
1430                                  * If not already remembered, remember current
1431                                  * priority for restoration in ts_yield().
1432                                  */
1433                                 if (!(tspp->ts_flags & TSRESTORE)) {
1434                                         tspp->ts_scpri = t->t_pri;
1435                                         tspp->ts_flags |= TSRESTORE;
1436                                 }
1437                                 THREAD_CHANGE_PRI(t, ts_maxumdpri);
1438                                 t->t_schedflag |= TS_DONT_SWAP;
1439                         }
1440                         schedctl_set_yield(t, 1);
1441                         setfrontdq(t);
1442                         goto done;
1443                 } else {
1444                         if (tspp->ts_flags & TSRESTORE) {
1445                                 THREAD_CHANGE_PRI(t, tspp->ts_scpri);
1446                                 tspp->ts_flags &= ~TSRESTORE;
1447                         }
1448                         schedctl_set_nopreempt(t, 0);
1449                         DTRACE_SCHED1(schedctl__preempt, kthread_t *, t);
1450                         TNF_PROBE_2(schedctl_preempt, "schedctl TS ts_preempt",
1451                             /* CSTYLED */, tnf_pid, pid, ttoproc(t)->p_pid,
1452                             tnf_lwpid, lwpid, t->t_tid);
1453                         /*
1454                          * Fall through and be preempted below.
1455                          */
1456                 }
1457         }
1458 
1459         if ((tspp->ts_flags & (TSBACKQ|TSKPRI)) == TSBACKQ) {
1460                 tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
1461                 tspp->ts_dispwait = 0;
1462                 tspp->ts_flags &= ~TSBACKQ;
1463                 setbackdq(t);
1464         } else if ((tspp->ts_flags & (TSBACKQ|TSKPRI)) == (TSBACKQ|TSKPRI)) {
1465                 tspp->ts_flags &= ~TSBACKQ;
1466                 setbackdq(t);
1467         } else {
1468                 setfrontdq(t);
1469         }
1470 
1471 done:
1472         TRACE_2(TR_FAC_DISP, TR_PREEMPT,
1473             "preempt:tid %p old pri %d", t, oldpri);
1474 }
1475 
1476 static void
1477 ts_setrun(kthread_t *t)
1478 {
1479         tsproc_t *tspp = (tsproc_t *)(t->t_cldata);
1480 
1481         ASSERT(THREAD_LOCK_HELD(t));    /* t should be in transition */
1482 
1483         if (tspp->ts_dispwait > ts_dptbl[tspp->ts_umdpri].ts_maxwait) {
1484                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_slpret;
1485                 TS_NEWUMDPRI(tspp);
1486                 tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
1487                 tspp->ts_dispwait = 0;
1488                 if ((tspp->ts_flags & TSKPRI) == 0) {
1489                         THREAD_CHANGE_PRI(t,
1490                             ts_dptbl[tspp->ts_umdpri].ts_globpri);
1491                         ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
1492                 }
1493         }
1494 
1495         tspp->ts_flags &= ~TSBACKQ;
1496 
1497         if (tspp->ts_flags & TSIA) {
1498                 if (tspp->ts_flags & TSIASET)
1499                         setfrontdq(t);
1500                 else
1501                         setbackdq(t);
1502         } else {
1503                 if (t->t_disp_time != ddi_get_lbolt())
1504                         setbackdq(t);
1505                 else
1506                         setfrontdq(t);
1507         }
1508 }
1509 
1510 
1511 /*
1512  * Prepare thread for sleep. We reset the thread priority so it will
1513  * run at the kernel priority level when it wakes up.
1514  */
1515 static void
1516 ts_sleep(kthread_t *t)
1517 {
1518         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1519         int             flags;
1520         pri_t           old_pri = t->t_pri;
1521 
1522         ASSERT(t == curthread);
1523         ASSERT(THREAD_LOCK_HELD(t));
1524 
1525         /*
1526          * Account for time spent on CPU before going to sleep.
1527          */
1528         (void) CPUCAPS_CHARGE(t, &tspp->ts_caps, CPUCAPS_CHARGE_ENFORCE);
1529 
1530         flags = tspp->ts_flags;
1531         if (t->t_kpri_req) {
1532                 tspp->ts_flags = flags | TSKPRI;
1533                 THREAD_CHANGE_PRI(t, ts_kmdpris[0]);
1534                 ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
1535                 t->t_trapret = 1;            /* so ts_trapret will run */
1536                 aston(t);
1537         } else if (tspp->ts_dispwait > ts_dptbl[tspp->ts_umdpri].ts_maxwait) {
1538                 /*
1539                  * If thread has blocked in the kernel (as opposed to
1540                  * being merely preempted), recompute the user mode priority.
1541                  */
1542                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_slpret;
1543                 TS_NEWUMDPRI(tspp);
1544                 tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
1545                 tspp->ts_dispwait = 0;
1546 
1547                 THREAD_CHANGE_PRI(curthread,
1548                     ts_dptbl[tspp->ts_umdpri].ts_globpri);
1549                 ASSERT(curthread->t_pri >= 0 &&
1550                     curthread->t_pri <= ts_maxglobpri);
1551                 tspp->ts_flags = flags & ~TSKPRI;
1552 
1553                 if (DISP_MUST_SURRENDER(curthread))
1554                         cpu_surrender(curthread);
1555         } else if (flags & TSKPRI) {
1556                 THREAD_CHANGE_PRI(curthread,
1557                     ts_dptbl[tspp->ts_umdpri].ts_globpri);
1558                 ASSERT(curthread->t_pri >= 0 &&
1559                     curthread->t_pri <= ts_maxglobpri);
1560                 tspp->ts_flags = flags & ~TSKPRI;
1561 
1562                 if (DISP_MUST_SURRENDER(curthread))
1563                         cpu_surrender(curthread);
1564         }
1565         t->t_stime = ddi_get_lbolt();                /* time stamp for the swapper */
1566         TRACE_2(TR_FAC_DISP, TR_SLEEP,
1567             "sleep:tid %p old pri %d", t, old_pri);
1568 }
1569 
1570 
1571 /*
1572  * Return Values:
1573  *
1574  *      -1 if the thread is loaded or is not eligible to be swapped in.
1575  *
1576  *      effective priority of the specified thread based on swapout time
1577  *              and size of process (epri >= 0 , epri <= SHRT_MAX).
1578  */
1579 /* ARGSUSED */
1580 static pri_t
1581 ts_swapin(kthread_t *t, int flags)
1582 {
1583         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1584         long            epri = -1;
1585         proc_t          *pp = ttoproc(t);
1586 
1587         ASSERT(THREAD_LOCK_HELD(t));
1588 
1589         /*
1590          * We know that pri_t is a short.
1591          * Be sure not to overrun its range.
1592          */
1593         if (t->t_state == TS_RUN && (t->t_schedflag & TS_LOAD) == 0) {
1594                 time_t swapout_time;
1595 
1596                 swapout_time = (ddi_get_lbolt() - t->t_stime) / hz;
1597                 if (INHERITED(t) || (tspp->ts_flags & (TSKPRI | TSIASET)))
1598                         epri = (long)DISP_PRIO(t) + swapout_time;
1599                 else {
1600                         /*
1601                          * Threads which have been out for a long time,
1602                          * have high user mode priority and are associated
1603                          * with a small address space are more deserving
1604                          */
1605                         epri = ts_dptbl[tspp->ts_umdpri].ts_globpri;
1606                         ASSERT(epri >= 0 && epri <= ts_maxumdpri);
1607                         epri += swapout_time - pp->p_swrss / nz(maxpgio)/2;
1608                 }
1609                 /*
1610                  * Scale epri so SHRT_MAX/2 represents zero priority.
1611                  */
1612                 epri += SHRT_MAX/2;
1613                 if (epri < 0)
1614                         epri = 0;
1615                 else if (epri > SHRT_MAX)
1616                         epri = SHRT_MAX;
1617         }
1618         return ((pri_t)epri);
1619 }
1620 
1621 /*
1622  * Return Values
1623  *      -1 if the thread isn't loaded or is not eligible to be swapped out.
1624  *
1625  *      effective priority of the specified thread based on if the swapper
1626  *              is in softswap or hardswap mode.
1627  *
1628  *              Softswap:  Return a low effective priority for threads
1629  *                         sleeping for more than maxslp secs.
1630  *
1631  *              Hardswap:  Return an effective priority such that threads
1632  *                         which have been in memory for a while and are
1633  *                         associated with a small address space are swapped
1634  *                         in before others.
1635  *
1636  *              (epri >= 0 , epri <= SHRT_MAX).
1637  */
1638 time_t  ts_minrun = 2;          /* XXX - t_pri becomes 59 within 2 secs */
1639 time_t  ts_minslp = 2;          /* min time on sleep queue for hardswap */
1640 
1641 static pri_t
1642 ts_swapout(kthread_t *t, int flags)
1643 {
1644         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1645         long            epri = -1;
1646         proc_t          *pp = ttoproc(t);
1647         time_t          swapin_time;
1648 
1649         ASSERT(THREAD_LOCK_HELD(t));
1650 
1651         if (INHERITED(t) || (tspp->ts_flags & (TSKPRI | TSIASET)) ||
1652             (t->t_proc_flag & TP_LWPEXIT) ||
1653             (t->t_state & (TS_ZOMB | TS_FREE | TS_STOPPED |
1654             TS_ONPROC | TS_WAIT)) ||
1655             !(t->t_schedflag & TS_LOAD) || !SWAP_OK(t))
1656                 return (-1);
1657 
1658         ASSERT(t->t_state & (TS_SLEEP | TS_RUN));
1659 
1660         /*
1661          * We know that pri_t is a short.
1662          * Be sure not to overrun its range.
1663          */
1664         swapin_time = (ddi_get_lbolt() - t->t_stime) / hz;
1665         if (flags == SOFTSWAP) {
1666                 if (t->t_state == TS_SLEEP && swapin_time > maxslp) {
1667                         epri = 0;
1668                 } else {
1669                         return ((pri_t)epri);
1670                 }
1671         } else {
1672                 pri_t pri;
1673 
1674                 if ((t->t_state == TS_SLEEP && swapin_time > ts_minslp) ||
1675                     (t->t_state == TS_RUN && swapin_time > ts_minrun)) {
1676                         pri = ts_dptbl[tspp->ts_umdpri].ts_globpri;
1677                         ASSERT(pri >= 0 && pri <= ts_maxumdpri);
1678                         epri = swapin_time -
1679                             (rm_asrss(pp->p_as) / nz(maxpgio)/2) - (long)pri;
1680                 } else {
1681                         return ((pri_t)epri);
1682                 }
1683         }
1684 
1685         /*
1686          * Scale epri so SHRT_MAX/2 represents zero priority.
1687          */
1688         epri += SHRT_MAX/2;
1689         if (epri < 0)
1690                 epri = 0;
1691         else if (epri > SHRT_MAX)
1692                 epri = SHRT_MAX;
1693 
1694         return ((pri_t)epri);
1695 }
1696 
1697 /*
1698  * Check for time slice expiration.  If time slice has expired
1699  * move thread to priority specified in tsdptbl for time slice expiration
1700  * and set runrun to cause preemption.
1701  */
1702 static void
1703 ts_tick(kthread_t *t)
1704 {
1705         tsproc_t *tspp = (tsproc_t *)(t->t_cldata);
1706         klwp_t *lwp;
1707         boolean_t call_cpu_surrender = B_FALSE;
1708         pri_t   oldpri = t->t_pri;
1709 
1710         ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
1711 
1712         thread_lock(t);
1713 
1714         /*
1715          * Keep track of thread's project CPU usage.  Note that projects
1716          * get charged even when threads are running in the kernel.
1717          */
1718         if (CPUCAPS_ON()) {
1719                 call_cpu_surrender = cpucaps_charge(t, &tspp->ts_caps,
1720                     CPUCAPS_CHARGE_ENFORCE) && !(tspp->ts_flags & TSKPRI);
1721         }
1722 
1723         if ((tspp->ts_flags & TSKPRI) == 0) {
1724                 if (--tspp->ts_timeleft <= 0) {
1725                         pri_t   new_pri;
1726 
1727                         /*
1728                          * If we're doing preemption control and trying to
1729                          * avoid preempting this thread, just note that
1730                          * the thread should yield soon and let it keep
1731                          * running (unless it's been a while).
1732                          */
1733                         if (t->t_schedctl && schedctl_get_nopreempt(t)) {
1734                                 if (tspp->ts_timeleft > -SC_MAX_TICKS) {
1735                                         DTRACE_SCHED1(schedctl__nopreempt,
1736                                             kthread_t *, t);
1737                                         schedctl_set_yield(t, 1);
1738                                         thread_unlock_nopreempt(t);
1739                                         return;
1740                                 }
1741 
1742                                 TNF_PROBE_2(schedctl_failsafe,
1743                                     "schedctl TS ts_tick", /* CSTYLED */,
1744                                     tnf_pid, pid, ttoproc(t)->p_pid,
1745                                     tnf_lwpid, lwpid, t->t_tid);
1746                         }
1747                         tspp->ts_flags &= ~TSRESTORE;
1748                         tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_tqexp;
1749                         TS_NEWUMDPRI(tspp);
1750                         tspp->ts_dispwait = 0;
1751                         new_pri = ts_dptbl[tspp->ts_umdpri].ts_globpri;
1752                         ASSERT(new_pri >= 0 && new_pri <= ts_maxglobpri);
1753                         /*
1754                          * When the priority of a thread is changed,
1755                          * it may be necessary to adjust its position
1756                          * on a sleep queue or dispatch queue.
1757                          * The function thread_change_pri accomplishes
1758                          * this.
1759                          */
1760                         if (thread_change_pri(t, new_pri, 0)) {
1761                                 if ((t->t_schedflag & TS_LOAD) &&
1762                                     (lwp = t->t_lwp) &&
1763                                     lwp->lwp_state == LWP_USER)
1764                                         t->t_schedflag &= ~TS_DONT_SWAP;
1765                                 tspp->ts_timeleft =
1766                                     ts_dptbl[tspp->ts_cpupri].ts_quantum;
1767                         } else {
1768                                 call_cpu_surrender = B_TRUE;
1769                         }
1770                         TRACE_2(TR_FAC_DISP, TR_TICK,
1771                             "tick:tid %p old pri %d", t, oldpri);
1772                 } else if (t->t_state == TS_ONPROC &&
1773                     t->t_pri < t->t_disp_queue->disp_maxrunpri) {
1774                         call_cpu_surrender = B_TRUE;
1775                 }
1776         }
1777 
1778         if (call_cpu_surrender) {
1779                 tspp->ts_flags |= TSBACKQ;
1780                 cpu_surrender(t);
1781         }
1782 
1783         thread_unlock_nopreempt(t);     /* clock thread can't be preempted */
1784 }
1785 
1786 
1787 /*
1788  * If thread is currently at a kernel mode priority (has slept)
1789  * we assign it the appropriate user mode priority and time quantum
1790  * here.  If we are lowering the thread's priority below that of
1791  * other runnable threads we will normally set runrun via cpu_surrender() to
1792  * cause preemption.
1793  */
1794 static void
1795 ts_trapret(kthread_t *t)
1796 {
1797         tsproc_t        *tspp = (tsproc_t *)t->t_cldata;
1798         cpu_t           *cp = CPU;
1799         pri_t           old_pri = curthread->t_pri;
1800 
1801         ASSERT(THREAD_LOCK_HELD(t));
1802         ASSERT(t == curthread);
1803         ASSERT(cp->cpu_dispthread == t);
1804         ASSERT(t->t_state == TS_ONPROC);
1805 
1806         t->t_kpri_req = 0;
1807         if (tspp->ts_dispwait > ts_dptbl[tspp->ts_umdpri].ts_maxwait) {
1808                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_slpret;
1809                 TS_NEWUMDPRI(tspp);
1810                 tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
1811                 tspp->ts_dispwait = 0;
1812 
1813                 /*
1814                  * If thread has blocked in the kernel (as opposed to
1815                  * being merely preempted), recompute the user mode priority.
1816                  */
1817                 THREAD_CHANGE_PRI(t, ts_dptbl[tspp->ts_umdpri].ts_globpri);
1818                 cp->cpu_dispatch_pri = DISP_PRIO(t);
1819                 ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
1820                 tspp->ts_flags &= ~TSKPRI;
1821 
1822                 if (DISP_MUST_SURRENDER(t))
1823                         cpu_surrender(t);
1824         } else if (tspp->ts_flags & TSKPRI) {
1825                 /*
1826                  * If thread has blocked in the kernel (as opposed to
1827                  * being merely preempted), recompute the user mode priority.
1828                  */
1829                 THREAD_CHANGE_PRI(t, ts_dptbl[tspp->ts_umdpri].ts_globpri);
1830                 cp->cpu_dispatch_pri = DISP_PRIO(t);
1831                 ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
1832                 tspp->ts_flags &= ~TSKPRI;
1833 
1834                 if (DISP_MUST_SURRENDER(t))
1835                         cpu_surrender(t);
1836         }
1837 
1838         /*
1839          * Swapout lwp if the swapper is waiting for this thread to
1840          * reach a safe point.
1841          */
1842         if ((t->t_schedflag & TS_SWAPENQ) && !(tspp->ts_flags & TSIASET)) {
1843                 thread_unlock(t);
1844                 swapout_lwp(ttolwp(t));
1845                 thread_lock(t);
1846         }
1847 
1848         TRACE_2(TR_FAC_DISP, TR_TRAPRET,
1849             "trapret:tid %p old pri %d", t, old_pri);
1850 }
1851 
1852 
1853 /*
1854  * Update the ts_dispwait values of all time sharing threads that
1855  * are currently runnable at a user mode priority and bump the priority
1856  * if ts_dispwait exceeds ts_maxwait.  Called once per second via
1857  * timeout which we reset here.
1858  *
1859  * There are several lists of time sharing threads broken up by a hash on
1860  * the thread pointer.  Each list has its own lock.  This avoids blocking
1861  * all ts_enterclass, ts_fork, and ts_exitclass operations while ts_update
1862  * runs.  ts_update traverses each list in turn.
1863  *
1864  * If multiple threads have their priorities updated to the same value,
1865  * the system implicitly favors the one that is updated first (since it
1866  * winds up first on the run queue).  To avoid this unfairness, the
1867  * traversal of threads starts at the list indicated by a marker.  When
1868  * threads in more than one list have their priorities updated, the marker
1869  * is moved.  This changes the order the threads will be placed on the run
1870  * queue the next time ts_update is called and preserves fairness over the
1871  * long run.  The marker doesn't need to be protected by a lock since it's
1872  * only accessed by ts_update, which is inherently single-threaded (only
1873  * one instance can be running at a time).
1874  */
1875 static void
1876 ts_update(void *arg)
1877 {
1878         int             i;
1879         int             new_marker = -1;
1880         static int      ts_update_marker;
1881 
1882         /*
1883          * Start with the ts_update_marker list, then do the rest.
1884          */
1885         i = ts_update_marker;
1886         do {
1887                 /*
1888                  * If this is the first list after the current marker to
1889                  * have threads with priorities updated, advance the marker
1890                  * to this list for the next time ts_update runs.
1891                  */
1892                 if (ts_update_list(i) && new_marker == -1 &&
1893                     i != ts_update_marker) {
1894                         new_marker = i;
1895                 }
1896         } while ((i = TS_LIST_NEXT(i)) != ts_update_marker);
1897 
1898         /* advance marker for next ts_update call */
1899         if (new_marker != -1)
1900                 ts_update_marker = new_marker;
1901 
1902         (void) timeout(ts_update, arg, hz);
1903 }
1904 
1905 /*
1906  * Updates priority for a list of threads.  Returns 1 if the priority of
1907  * one of the threads was actually updated, 0 if none were for various
1908  * reasons (thread is no longer in the TS or IA class, isn't runnable,
1909  * hasn't waited long enough, has the preemption control no-preempt bit
1910  * set, etc.)
1911  */
1912 static int
1913 ts_update_list(int i)
1914 {
1915         tsproc_t *tspp;
1916         kthread_t *tx;
1917         int updated = 0;
1918 
1919         mutex_enter(&ts_list_lock[i]);
1920         for (tspp = ts_plisthead[i].ts_next; tspp != &ts_plisthead[i];
1921             tspp = tspp->ts_next) {
1922                 tx = tspp->ts_tp;
1923                 /*
1924                  * Lock the thread and verify state.
1925                  */
1926                 thread_lock(tx);
1927                 /*
1928                  * Skip the thread if it is no longer in the TS (or IA) class.
1929                  */
1930                 if (tx->t_clfuncs != &ts_classfuncs.thread &&
1931                     tx->t_clfuncs != &ia_classfuncs.thread)
1932                         goto next;
1933                 tspp->ts_dispwait++;
1934                 if ((tspp->ts_flags & TSKPRI) != 0)
1935                         goto next;
1936                 if (tspp->ts_dispwait <= ts_dptbl[tspp->ts_umdpri].ts_maxwait)
1937                         goto next;
1938                 if (tx->t_schedctl && schedctl_get_nopreempt(tx))
1939                         goto next;
1940                 if (tx->t_state != TS_RUN && tx->t_state != TS_WAIT &&
1941                     (tx->t_state != TS_SLEEP || !ts_sleep_promote)) {
1942                         /* make next syscall/trap do CL_TRAPRET */
1943                         tx->t_trapret = 1;
1944                         aston(tx);
1945                         goto next;
1946                 }
1947                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_lwait;
1948                 TS_NEWUMDPRI(tspp);
1949                 tspp->ts_dispwait = 0;
1950                 updated = 1;
1951 
1952                 /*
1953                  * Only dequeue it if needs to move; otherwise it should
1954                  * just round-robin here.
1955                  */
1956                 if (tx->t_pri != ts_dptbl[tspp->ts_umdpri].ts_globpri) {
1957                         pri_t oldpri = tx->t_pri;
1958                         ts_change_priority(tx, tspp);
1959                         TRACE_2(TR_FAC_DISP, TR_UPDATE,
1960                             "update:tid %p old pri %d", tx, oldpri);
1961                 }
1962 next:
1963                 thread_unlock(tx);
1964         }
1965         mutex_exit(&ts_list_lock[i]);
1966 
1967         return (updated);
1968 }
1969 
1970 /*
1971  * Processes waking up go to the back of their queue.  We don't
1972  * need to assign a time quantum here because thread is still
1973  * at a kernel mode priority and the time slicing is not done
1974  * for threads running in the kernel after sleeping.  The proper
1975  * time quantum will be assigned by ts_trapret before the thread
1976  * returns to user mode.
1977  */
1978 static void
1979 ts_wakeup(kthread_t *t)
1980 {
1981         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1982 
1983         ASSERT(THREAD_LOCK_HELD(t));
1984 
1985         t->t_stime = ddi_get_lbolt();                /* time stamp for the swapper */
1986 
1987         if (tspp->ts_flags & TSKPRI) {
1988                 tspp->ts_flags &= ~TSBACKQ;
1989                 if (tspp->ts_flags & TSIASET)
1990                         setfrontdq(t);
1991                 else
1992                         setbackdq(t);
1993         } else if (t->t_kpri_req) {
1994                 /*
1995                  * Give thread a priority boost if we were asked.
1996                  */
1997                 tspp->ts_flags |= TSKPRI;
1998                 THREAD_CHANGE_PRI(t, ts_kmdpris[0]);
1999                 setbackdq(t);
2000                 t->t_trapret = 1;    /* so that ts_trapret will run */
2001                 aston(t);
2002         } else {
2003                 if (tspp->ts_dispwait > ts_dptbl[tspp->ts_umdpri].ts_maxwait) {
2004                         tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_slpret;
2005                         TS_NEWUMDPRI(tspp);
2006                         tspp->ts_timeleft =
2007                             ts_dptbl[tspp->ts_cpupri].ts_quantum;
2008                         tspp->ts_dispwait = 0;
2009                         THREAD_CHANGE_PRI(t,
2010                             ts_dptbl[tspp->ts_umdpri].ts_globpri);
2011                         ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
2012                 }
2013 
2014                 tspp->ts_flags &= ~TSBACKQ;
2015 
2016                 if (tspp->ts_flags & TSIA) {
2017                         if (tspp->ts_flags & TSIASET)
2018                                 setfrontdq(t);
2019                         else
2020                                 setbackdq(t);
2021                 } else {
2022                         if (t->t_disp_time != ddi_get_lbolt())
2023                                 setbackdq(t);
2024                         else
2025                                 setfrontdq(t);
2026                 }
2027         }
2028 }
2029 
2030 
2031 /*
2032  * When a thread yields, put it on the back of the run queue.
2033  */
2034 static void
2035 ts_yield(kthread_t *t)
2036 {
2037         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
2038 
2039         ASSERT(t == curthread);
2040         ASSERT(THREAD_LOCK_HELD(t));
2041 
2042         /*
2043          * Collect CPU usage spent before yielding
2044          */
2045         (void) CPUCAPS_CHARGE(t, &tspp->ts_caps, CPUCAPS_CHARGE_ENFORCE);
2046 
2047         /*
2048          * Clear the preemption control "yield" bit since the user is
2049          * doing a yield.
2050          */
2051         if (t->t_schedctl)
2052                 schedctl_set_yield(t, 0);
2053         /*
2054          * If ts_preempt() artifically increased the thread's priority
2055          * to avoid preemption, restore the original priority now.
2056          */
2057         if (tspp->ts_flags & TSRESTORE) {
2058                 THREAD_CHANGE_PRI(t, tspp->ts_scpri);
2059                 tspp->ts_flags &= ~TSRESTORE;
2060         }
2061         if (tspp->ts_timeleft <= 0) {
2062                 /*
2063                  * Time slice was artificially extended to avoid
2064                  * preemption, so pretend we're preempting it now.
2065                  */
2066                 DTRACE_SCHED1(schedctl__yield, int, -tspp->ts_timeleft);
2067                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_tqexp;
2068                 TS_NEWUMDPRI(tspp);
2069                 tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
2070                 tspp->ts_dispwait = 0;
2071                 THREAD_CHANGE_PRI(t, ts_dptbl[tspp->ts_umdpri].ts_globpri);
2072                 ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
2073         }
2074         tspp->ts_flags &= ~TSBACKQ;
2075         setbackdq(t);
2076 }
2077 
2078 
2079 /*
2080  * Increment the nice value of the specified thread by incr and
2081  * return the new value in *retvalp.
2082  */
2083 static int
2084 ts_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
2085 {
2086         int             newnice;
2087         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
2088         tsparms_t       tsparms;
2089 
2090         ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
2091 
2092         /* If there's no change to priority, just return current setting */
2093         if (incr == 0) {
2094                 if (retvalp) {
2095                         *retvalp = tspp->ts_nice - NZERO;
2096                 }
2097                 return (0);
2098         }
2099 
2100         if ((incr < 0 || incr > 2 * NZERO) &&
2101             secpolicy_raisepriority(cr) != 0)
2102                 return (EPERM);
2103 
2104         /*
2105          * Specifying a nice increment greater than the upper limit of
2106          * 2 * NZERO - 1 will result in the thread's nice value being
2107          * set to the upper limit.  We check for this before computing
2108          * the new value because otherwise we could get overflow
2109          * if a privileged process specified some ridiculous increment.
2110          */
2111         if (incr > 2 * NZERO - 1)
2112                 incr = 2 * NZERO - 1;
2113 
2114         newnice = tspp->ts_nice + incr;
2115         if (newnice >= 2 * NZERO)
2116                 newnice = 2 * NZERO - 1;
2117         else if (newnice < 0)
2118                 newnice = 0;
2119 
2120         tsparms.ts_uprilim = tsparms.ts_upri =
2121             -((newnice - NZERO) * ts_maxupri) / NZERO;
2122         /*
2123          * Reset the uprilim and upri values of the thread.
2124          * Call ts_parmsset even if thread is interactive since we're
2125          * not changing mode.
2126          */
2127         (void) ts_parmsset(t, (void *)&tsparms, (id_t)0, (cred_t *)NULL);
2128 
2129         /*
2130          * Although ts_parmsset already reset ts_nice it may
2131          * not have been set to precisely the value calculated above
2132          * because ts_parmsset determines the nice value from the
2133          * user priority and we may have truncated during the integer
2134          * conversion from nice value to user priority and back.
2135          * We reset ts_nice to the value we calculated above.
2136          */
2137         tspp->ts_nice = (char)newnice;
2138 
2139         if (retvalp)
2140                 *retvalp = newnice - NZERO;
2141         return (0);
2142 }
2143 
2144 /*
2145  * Increment the priority of the specified thread by incr and
2146  * return the new value in *retvalp.
2147  */
2148 static int
2149 ts_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp)
2150 {
2151         int             newpri;
2152         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
2153         tsparms_t       tsparms;
2154 
2155         ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
2156 
2157         /* If there's no change to the priority, just return current setting */
2158         if (incr == 0) {
2159                 *retvalp = tspp->ts_upri;
2160                 return (0);
2161         }
2162 
2163         newpri = tspp->ts_upri + incr;
2164         if (newpri > ts_maxupri || newpri < -ts_maxupri)
2165                 return (EINVAL);
2166 
2167         *retvalp = newpri;
2168         tsparms.ts_uprilim = tsparms.ts_upri = newpri;
2169         /*
2170          * Reset the uprilim and upri values of the thread.
2171          * Call ts_parmsset even if thread is interactive since we're
2172          * not changing mode.
2173          */
2174         return (ts_parmsset(t, &tsparms, 0, cr));
2175 }
2176 
2177 /*
2178  * ia_set_process_group marks foreground processes as interactive
2179  * and background processes as non-interactive iff the session
2180  * leader is interactive.  This routine is called from two places:
2181  *      strioctl:SPGRP when a new process group gets
2182  *              control of the tty.
2183  *      ia_parmsset-when the process in question is a session leader.
2184  * ia_set_process_group assumes that pidlock is held by the caller,
2185  * either strioctl or priocntlsys.  If the caller is priocntlsys
2186  * (via ia_parmsset) then the p_lock of the session leader is held
2187  * and the code needs to be careful about acquiring other p_locks.
2188  */
2189 static void
2190 ia_set_process_group(pid_t sid, pid_t bg_pgid, pid_t fg_pgid)
2191 {
2192         proc_t          *leader, *fg, *bg;
2193         tsproc_t        *tspp;
2194         kthread_t       *tx;
2195         int             plocked = 0;
2196 
2197         ASSERT(MUTEX_HELD(&pidlock));
2198 
2199         /*
2200          * see if the session leader is interactive AND
2201          * if it is currently "on" AND controlling a tty
2202          * iff it is then make the processes in the foreground
2203          * group interactive and the processes in the background
2204          * group non-interactive.
2205          */
2206         if ((leader = (proc_t *)prfind(sid)) == NULL) {
2207                 return;
2208         }
2209         if (leader->p_stat == SIDL) {
2210                 return;
2211         }
2212         if ((tx = proctot(leader)) == NULL) {
2213                 return;
2214         }
2215         /*
2216          * XXX do all the threads in the leader
2217          */
2218         if (tx->t_cid != ia_cid) {
2219                 return;
2220         }
2221         tspp = tx->t_cldata;
2222         /*
2223          * session leaders that are not interactive need not have
2224          * any processing done for them.  They are typically shells
2225          * that do not have focus and are changing the process group
2226          * attatched to the tty, e.g. a process that is exiting
2227          */
2228         mutex_enter(&leader->p_sessp->s_lock);
2229         if (!(tspp->ts_flags & TSIASET) ||
2230             (leader->p_sessp->s_vp == NULL) ||
2231             (leader->p_sessp->s_vp->v_stream == NULL)) {
2232                 mutex_exit(&leader->p_sessp->s_lock);
2233                 return;
2234         }
2235         mutex_exit(&leader->p_sessp->s_lock);
2236 
2237         /*
2238          * If we're already holding the leader's p_lock, we should use
2239          * mutex_tryenter instead of mutex_enter to avoid deadlocks from
2240          * lock ordering violations.
2241          */
2242         if (mutex_owned(&leader->p_lock))
2243                 plocked = 1;
2244 
2245         if (fg_pgid == 0)
2246                 goto skip;
2247         /*
2248          * now look for all processes in the foreground group and
2249          * make them interactive
2250          */
2251         for (fg = (proc_t *)pgfind(fg_pgid); fg != NULL; fg = fg->p_pglink) {
2252                 /*
2253                  * if the process is SIDL it's begin forked, ignore it
2254                  */
2255                 if (fg->p_stat == SIDL) {
2256                         continue;
2257                 }
2258                 /*
2259                  * sesssion leaders must be turned on/off explicitly
2260                  * not implicitly as happens to other members of
2261                  * the process group.
2262                  */
2263                 if (fg->p_pid  == fg->p_sessp->s_sid) {
2264                         continue;
2265                 }
2266 
2267                 TRACE_1(TR_FAC_IA, TR_GROUP_ON,
2268                     "group on:proc %p", fg);
2269 
2270                 if (plocked) {
2271                         if (mutex_tryenter(&fg->p_lock) == 0)
2272                                 continue;
2273                 } else {
2274                         mutex_enter(&fg->p_lock);
2275                 }
2276 
2277                 if ((tx = proctot(fg)) == NULL) {
2278                         mutex_exit(&fg->p_lock);
2279                         continue;
2280                 }
2281                 do {
2282                         thread_lock(tx);
2283                         /*
2284                          * if this thread is not interactive continue
2285                          */
2286                         if (tx->t_cid != ia_cid) {
2287                                 thread_unlock(tx);
2288                                 continue;
2289                         }
2290                         tspp = tx->t_cldata;
2291                         tspp->ts_flags |= TSIASET;
2292                         tspp->ts_boost = ia_boost;
2293                         TS_NEWUMDPRI(tspp);
2294                         if ((tspp->ts_flags & TSKPRI) != 0) {
2295                                 thread_unlock(tx);
2296                                 continue;
2297                         }
2298                         tspp->ts_dispwait = 0;
2299                         ts_change_priority(tx, tspp);
2300                         thread_unlock(tx);
2301                 } while ((tx = tx->t_forw) != fg->p_tlist);
2302                 mutex_exit(&fg->p_lock);
2303         }
2304 skip:
2305         if (bg_pgid == 0)
2306                 return;
2307         for (bg = (proc_t *)pgfind(bg_pgid); bg != NULL; bg = bg->p_pglink) {
2308                 if (bg->p_stat == SIDL) {
2309                         continue;
2310                 }
2311                 /*
2312                  * sesssion leaders must be turned off explicitly
2313                  * not implicitly as happens to other members of
2314                  * the process group.
2315                  */
2316                 if (bg->p_pid == bg->p_sessp->s_sid) {
2317                         continue;
2318                 }
2319 
2320                 TRACE_1(TR_FAC_IA, TR_GROUP_OFF,
2321                     "group off:proc %p", bg);
2322 
2323                 if (plocked) {
2324                         if (mutex_tryenter(&bg->p_lock) == 0)
2325                                 continue;
2326                 } else {
2327                         mutex_enter(&bg->p_lock);
2328                 }
2329 
2330                 if ((tx = proctot(bg)) == NULL) {
2331                         mutex_exit(&bg->p_lock);
2332                         continue;
2333                 }
2334                 do {
2335                         thread_lock(tx);
2336                         /*
2337                          * if this thread is not interactive continue
2338                          */
2339                         if (tx->t_cid != ia_cid) {
2340                                 thread_unlock(tx);
2341                                 continue;
2342                         }
2343                         tspp = tx->t_cldata;
2344                         tspp->ts_flags &= ~TSIASET;
2345                         tspp->ts_boost = -ia_boost;
2346                         TS_NEWUMDPRI(tspp);
2347                         if ((tspp->ts_flags & TSKPRI) != 0) {
2348                                 thread_unlock(tx);
2349                                 continue;
2350                         }
2351 
2352                         tspp->ts_dispwait = 0;
2353                         ts_change_priority(tx, tspp);
2354                         thread_unlock(tx);
2355                 } while ((tx = tx->t_forw) != bg->p_tlist);
2356                 mutex_exit(&bg->p_lock);
2357         }
2358 }
2359 
2360 
2361 static void
2362 ts_change_priority(kthread_t *t, tsproc_t *tspp)
2363 {
2364         pri_t   new_pri;
2365 
2366         ASSERT(THREAD_LOCK_HELD(t));
2367         new_pri = ts_dptbl[tspp->ts_umdpri].ts_globpri;
2368         ASSERT(new_pri >= 0 && new_pri <= ts_maxglobpri);
2369         tspp->ts_flags &= ~TSRESTORE;
2370         t->t_cpri = tspp->ts_upri;
2371         if (t == curthread || t->t_state == TS_ONPROC) {
2372                 /* curthread is always onproc */
2373                 cpu_t   *cp = t->t_disp_queue->disp_cpu;
2374                 THREAD_CHANGE_PRI(t, new_pri);
2375                 if (t == cp->cpu_dispthread)
2376                         cp->cpu_dispatch_pri = DISP_PRIO(t);
2377                 if (DISP_MUST_SURRENDER(t)) {
2378                         tspp->ts_flags |= TSBACKQ;
2379                         cpu_surrender(t);
2380                 } else {
2381                         tspp->ts_timeleft =
2382                             ts_dptbl[tspp->ts_cpupri].ts_quantum;
2383                 }
2384         } else {
2385                 int     frontq;
2386 
2387                 frontq = (tspp->ts_flags & TSIASET) != 0;
2388                 /*
2389                  * When the priority of a thread is changed,
2390                  * it may be necessary to adjust its position
2391                  * on a sleep queue or dispatch queue.
2392                  * The function thread_change_pri accomplishes
2393                  * this.
2394                  */
2395                 if (thread_change_pri(t, new_pri, frontq)) {
2396                         /*
2397                          * The thread was on a run queue. Reset
2398                          * its CPU timeleft from the quantum
2399                          * associated with the new priority.
2400                          */
2401                         tspp->ts_timeleft =
2402                             ts_dptbl[tspp->ts_cpupri].ts_quantum;
2403                 } else {
2404                         tspp->ts_flags |= TSBACKQ;
2405                 }
2406         }
2407 }
2408 
2409 static int
2410 ts_alloc(void **p, int flag)
2411 {
2412         void *bufp;
2413         bufp = kmem_alloc(sizeof (tsproc_t), flag);
2414         if (bufp == NULL) {
2415                 return (ENOMEM);
2416         } else {
2417                 *p = bufp;
2418                 return (0);
2419         }
2420 }
2421 
2422 static void
2423 ts_free(void *bufp)
2424 {
2425         if (bufp)
2426                 kmem_free(bufp, sizeof (tsproc_t));
2427 }