Print this page
patch delete-t_stime
patch remove-swapenq-flag
patch remove-dont-swap-flag
patch remove-swapinout-class-ops
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/disp/fss.c
+++ new/usr/src/uts/common/disp/fss.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2013, Joyent, Inc. All rights reserved.
25 25 */
26 26
27 27 #include <sys/types.h>
28 28 #include <sys/param.h>
29 29 #include <sys/sysmacros.h>
30 30 #include <sys/cred.h>
31 31 #include <sys/proc.h>
32 32 #include <sys/strsubr.h>
33 33 #include <sys/priocntl.h>
34 34 #include <sys/class.h>
35 35 #include <sys/disp.h>
36 36 #include <sys/procset.h>
37 37 #include <sys/debug.h>
38 38 #include <sys/kmem.h>
39 39 #include <sys/errno.h>
40 40 #include <sys/systm.h>
41 41 #include <sys/schedctl.h>
42 42 #include <sys/vmsystm.h>
43 43 #include <sys/atomic.h>
44 44 #include <sys/project.h>
45 45 #include <sys/modctl.h>
46 46 #include <sys/fss.h>
47 47 #include <sys/fsspriocntl.h>
48 48 #include <sys/cpupart.h>
49 49 #include <sys/zone.h>
50 50 #include <vm/rm.h>
51 51 #include <vm/seg_kmem.h>
52 52 #include <sys/tnf_probe.h>
53 53 #include <sys/policy.h>
54 54 #include <sys/sdt.h>
55 55 #include <sys/cpucaps.h>
56 56
57 57 /*
58 58 * FSS Data Structures:
59 59 *
60 60 * fsszone
61 61 * ----- -----
62 62 * ----- | | | |
63 63 * | |-------->| |<------->| |<---->...
64 64 * | | ----- -----
65 65 * | | ^ ^ ^
66 66 * | |--- | \ \
67 67 * ----- | | \ \
68 68 * fsspset | | \ \
69 69 * | | \ \
70 70 * | ----- ----- -----
71 71 * -->| |<--->| |<--->| |
72 72 * | | | | | |
73 73 * ----- ----- -----
74 74 * fssproj
75 75 *
76 76 *
77 77 * That is, fsspsets contain a list of fsszone's that are currently active in
78 78 * the pset, and a list of fssproj's, corresponding to projects with runnable
79 79 * threads on the pset. fssproj's in turn point to the fsszone which they
80 80 * are a member of.
81 81 *
82 82 * An fssproj_t is removed when there are no threads in it.
83 83 *
84 84 * An fsszone_t is removed when there are no projects with threads in it.
85 85 *
86 86 * Projects in a zone compete with each other for cpu time, receiving cpu
87 87 * allocation within a zone proportional to fssproj->fssp_shares
88 88 * (project.cpu-shares); at a higher level zones compete with each other,
89 89 * receiving allocation in a pset proportional to fsszone->fssz_shares
90 90 * (zone.cpu-shares). See fss_decay_usage() for the precise formula.
91 91 */
92 92
93 93 static pri_t fss_init(id_t, int, classfuncs_t **);
94 94
95 95 static struct sclass fss = {
96 96 "FSS",
97 97 fss_init,
98 98 0
99 99 };
100 100
101 101 extern struct mod_ops mod_schedops;
102 102
103 103 /*
104 104 * Module linkage information for the kernel.
105 105 */
106 106 static struct modlsched modlsched = {
107 107 &mod_schedops, "fair share scheduling class", &fss
108 108 };
109 109
110 110 static struct modlinkage modlinkage = {
111 111 MODREV_1, (void *)&modlsched, NULL
112 112 };
113 113
114 114 #define FSS_MAXUPRI 60
115 115
116 116 /*
117 117 * The fssproc_t structures are kept in an array of circular doubly linked
118 118 * lists. A hash on the thread pointer is used to determine which list each
119 119 * thread should be placed in. Each list has a dummy "head" which is never
120 120 * removed, so the list is never empty. fss_update traverses these lists to
121 121 * update the priorities of threads that have been waiting on the run queue.
122 122 */
123 123 #define FSS_LISTS 16 /* number of lists, must be power of 2 */
124 124 #define FSS_LIST_HASH(t) (((uintptr_t)(t) >> 9) & (FSS_LISTS - 1))
125 125 #define FSS_LIST_NEXT(i) (((i) + 1) & (FSS_LISTS - 1))
126 126
127 127 #define FSS_LIST_INSERT(fssproc) \
128 128 { \
129 129 int index = FSS_LIST_HASH(fssproc->fss_tp); \
130 130 kmutex_t *lockp = &fss_listlock[index]; \
131 131 fssproc_t *headp = &fss_listhead[index]; \
132 132 mutex_enter(lockp); \
133 133 fssproc->fss_next = headp->fss_next; \
134 134 fssproc->fss_prev = headp; \
135 135 headp->fss_next->fss_prev = fssproc; \
136 136 headp->fss_next = fssproc; \
137 137 mutex_exit(lockp); \
138 138 }
139 139
140 140 #define FSS_LIST_DELETE(fssproc) \
141 141 { \
142 142 int index = FSS_LIST_HASH(fssproc->fss_tp); \
143 143 kmutex_t *lockp = &fss_listlock[index]; \
144 144 mutex_enter(lockp); \
145 145 fssproc->fss_prev->fss_next = fssproc->fss_next; \
146 146 fssproc->fss_next->fss_prev = fssproc->fss_prev; \
147 147 mutex_exit(lockp); \
148 148 }
149 149
150 150 #define FSS_TICK_COST 1000 /* tick cost for threads with nice level = 0 */
151 151
152 152 /*
153 153 * Decay rate percentages are based on n/128 rather than n/100 so that
154 154 * calculations can avoid having to do an integer divide by 100 (divide
155 155 * by FSS_DECAY_BASE == 128 optimizes to an arithmetic shift).
156 156 *
157 157 * FSS_DECAY_MIN = 83/128 ~= 65%
158 158 * FSS_DECAY_MAX = 108/128 ~= 85%
159 159 * FSS_DECAY_USG = 96/128 ~= 75%
160 160 */
161 161 #define FSS_DECAY_MIN 83 /* fsspri decay pct for threads w/ nice -20 */
162 162 #define FSS_DECAY_MAX 108 /* fsspri decay pct for threads w/ nice +19 */
163 163 #define FSS_DECAY_USG 96 /* fssusage decay pct for projects */
164 164 #define FSS_DECAY_BASE 128 /* base for decay percentages above */
165 165
166 166 #define FSS_NICE_MIN 0
167 167 #define FSS_NICE_MAX (2 * NZERO - 1)
168 168 #define FSS_NICE_RANGE (FSS_NICE_MAX - FSS_NICE_MIN + 1)
169 169
170 170 static int fss_nice_tick[FSS_NICE_RANGE];
171 171 static int fss_nice_decay[FSS_NICE_RANGE];
172 172
173 173 static pri_t fss_maxupri = FSS_MAXUPRI; /* maximum FSS user priority */
174 174 static pri_t fss_maxumdpri; /* maximum user mode fss priority */
175 175 static pri_t fss_maxglobpri; /* maximum global priority used by fss class */
176 176 static pri_t fss_minglobpri; /* minimum global priority */
177 177
178 178 static fssproc_t fss_listhead[FSS_LISTS];
179 179 static kmutex_t fss_listlock[FSS_LISTS];
180 180
181 181 static fsspset_t *fsspsets;
182 182 static kmutex_t fsspsets_lock; /* protects fsspsets */
183 183
184 184 static id_t fss_cid;
185 185
186 186 static time_t fss_minrun = 2; /* t_pri becomes 59 within 2 secs */
187 187 static time_t fss_minslp = 2; /* min time on sleep queue for hardswap */
188 188 static int fss_quantum = 11;
189 189
190 190 static void fss_newpri(fssproc_t *);
191 191 static void fss_update(void *);
192 192 static int fss_update_list(int);
193 193 static void fss_change_priority(kthread_t *, fssproc_t *);
194 194
195 195 static int fss_admin(caddr_t, cred_t *);
196 196 static int fss_getclinfo(void *);
197 197 static int fss_parmsin(void *);
198 198 static int fss_parmsout(void *, pc_vaparms_t *);
199 199 static int fss_vaparmsin(void *, pc_vaparms_t *);
200 200 static int fss_vaparmsout(void *, pc_vaparms_t *);
201 201 static int fss_getclpri(pcpri_t *);
202 202 static int fss_alloc(void **, int);
203 203 static void fss_free(void *);
204 204
205 205 static int fss_enterclass(kthread_t *, id_t, void *, cred_t *, void *);
↓ open down ↓ |
205 lines elided |
↑ open up ↑ |
206 206 static void fss_exitclass(void *);
207 207 static int fss_canexit(kthread_t *, cred_t *);
208 208 static int fss_fork(kthread_t *, kthread_t *, void *);
209 209 static void fss_forkret(kthread_t *, kthread_t *);
210 210 static void fss_parmsget(kthread_t *, void *);
211 211 static int fss_parmsset(kthread_t *, void *, id_t, cred_t *);
212 212 static void fss_stop(kthread_t *, int, int);
213 213 static void fss_exit(kthread_t *);
214 214 static void fss_active(kthread_t *);
215 215 static void fss_inactive(kthread_t *);
216 -static pri_t fss_swapin(kthread_t *, int);
217 -static pri_t fss_swapout(kthread_t *, int);
218 216 static void fss_trapret(kthread_t *);
219 217 static void fss_preempt(kthread_t *);
220 218 static void fss_setrun(kthread_t *);
221 219 static void fss_sleep(kthread_t *);
222 220 static void fss_tick(kthread_t *);
223 221 static void fss_wakeup(kthread_t *);
224 222 static int fss_donice(kthread_t *, cred_t *, int, int *);
225 223 static int fss_doprio(kthread_t *, cred_t *, int, int *);
226 224 static pri_t fss_globpri(kthread_t *);
227 225 static void fss_yield(kthread_t *);
228 226 static void fss_nullsys();
229 227
230 228 static struct classfuncs fss_classfuncs = {
231 229 /* class functions */
232 230 fss_admin,
233 231 fss_getclinfo,
234 232 fss_parmsin,
235 233 fss_parmsout,
236 234 fss_vaparmsin,
237 235 fss_vaparmsout,
238 236 fss_getclpri,
239 237 fss_alloc,
240 238 fss_free,
241 239
242 240 /* thread functions */
243 241 fss_enterclass,
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
244 242 fss_exitclass,
245 243 fss_canexit,
246 244 fss_fork,
247 245 fss_forkret,
248 246 fss_parmsget,
249 247 fss_parmsset,
250 248 fss_stop,
251 249 fss_exit,
252 250 fss_active,
253 251 fss_inactive,
254 - fss_swapin,
255 - fss_swapout,
256 252 fss_trapret,
257 253 fss_preempt,
258 254 fss_setrun,
259 255 fss_sleep,
260 256 fss_tick,
261 257 fss_wakeup,
262 258 fss_donice,
263 259 fss_globpri,
264 260 fss_nullsys, /* set_process_group */
265 261 fss_yield,
266 262 fss_doprio,
267 263 };
268 264
269 265 int
270 266 _init()
271 267 {
272 268 return (mod_install(&modlinkage));
273 269 }
274 270
275 271 int
276 272 _fini()
277 273 {
278 274 return (EBUSY);
279 275 }
280 276
281 277 int
282 278 _info(struct modinfo *modinfop)
283 279 {
284 280 return (mod_info(&modlinkage, modinfop));
285 281 }
286 282
287 283 /*ARGSUSED*/
288 284 static int
289 285 fss_project_walker(kproject_t *kpj, void *buf)
290 286 {
291 287 return (0);
292 288 }
293 289
294 290 void *
295 291 fss_allocbuf(int op, int type)
296 292 {
297 293 fssbuf_t *fssbuf;
298 294 void **fsslist;
299 295 int cnt;
300 296 int i;
301 297 size_t size;
302 298
303 299 ASSERT(op == FSS_NPSET_BUF || op == FSS_NPROJ_BUF || op == FSS_ONE_BUF);
304 300 ASSERT(type == FSS_ALLOC_PROJ || type == FSS_ALLOC_ZONE);
305 301 ASSERT(MUTEX_HELD(&cpu_lock));
306 302
307 303 fssbuf = kmem_zalloc(sizeof (fssbuf_t), KM_SLEEP);
308 304 switch (op) {
309 305 case FSS_NPSET_BUF:
310 306 cnt = cpupart_list(NULL, 0, CP_NONEMPTY);
311 307 break;
312 308 case FSS_NPROJ_BUF:
313 309 cnt = project_walk_all(ALL_ZONES, fss_project_walker, NULL);
314 310 break;
315 311 case FSS_ONE_BUF:
316 312 cnt = 1;
317 313 break;
318 314 }
319 315
320 316 switch (type) {
321 317 case FSS_ALLOC_PROJ:
322 318 size = sizeof (fssproj_t);
323 319 break;
324 320 case FSS_ALLOC_ZONE:
325 321 size = sizeof (fsszone_t);
326 322 break;
327 323 }
328 324 fsslist = kmem_zalloc(cnt * sizeof (void *), KM_SLEEP);
329 325 fssbuf->fssb_size = cnt;
330 326 fssbuf->fssb_list = fsslist;
331 327 for (i = 0; i < cnt; i++)
332 328 fsslist[i] = kmem_zalloc(size, KM_SLEEP);
333 329 return (fssbuf);
334 330 }
335 331
336 332 void
337 333 fss_freebuf(fssbuf_t *fssbuf, int type)
338 334 {
339 335 void **fsslist;
340 336 int i;
341 337 size_t size;
342 338
343 339 ASSERT(fssbuf != NULL);
344 340 ASSERT(type == FSS_ALLOC_PROJ || type == FSS_ALLOC_ZONE);
345 341 fsslist = fssbuf->fssb_list;
346 342
347 343 switch (type) {
348 344 case FSS_ALLOC_PROJ:
349 345 size = sizeof (fssproj_t);
350 346 break;
351 347 case FSS_ALLOC_ZONE:
352 348 size = sizeof (fsszone_t);
353 349 break;
354 350 }
355 351
356 352 for (i = 0; i < fssbuf->fssb_size; i++) {
357 353 if (fsslist[i] != NULL)
358 354 kmem_free(fsslist[i], size);
359 355 }
360 356 kmem_free(fsslist, sizeof (void *) * fssbuf->fssb_size);
361 357 kmem_free(fssbuf, sizeof (fssbuf_t));
362 358 }
363 359
364 360 static fsspset_t *
365 361 fss_find_fsspset(cpupart_t *cpupart)
366 362 {
367 363 int i;
368 364 fsspset_t *fsspset = NULL;
369 365 int found = 0;
370 366
371 367 ASSERT(cpupart != NULL);
372 368 ASSERT(MUTEX_HELD(&fsspsets_lock));
373 369
374 370 /*
375 371 * Search for the cpupart pointer in the array of fsspsets.
376 372 */
377 373 for (i = 0; i < max_ncpus; i++) {
378 374 fsspset = &fsspsets[i];
379 375 if (fsspset->fssps_cpupart == cpupart) {
380 376 ASSERT(fsspset->fssps_nproj > 0);
381 377 found = 1;
382 378 break;
383 379 }
384 380 }
385 381 if (found == 0) {
386 382 /*
387 383 * If we didn't find anything, then use the first
388 384 * available slot in the fsspsets array.
389 385 */
390 386 for (i = 0; i < max_ncpus; i++) {
391 387 fsspset = &fsspsets[i];
392 388 if (fsspset->fssps_cpupart == NULL) {
393 389 ASSERT(fsspset->fssps_nproj == 0);
394 390 found = 1;
395 391 break;
396 392 }
397 393 }
398 394 fsspset->fssps_cpupart = cpupart;
399 395 }
400 396 ASSERT(found == 1);
401 397 return (fsspset);
402 398 }
403 399
404 400 static void
405 401 fss_del_fsspset(fsspset_t *fsspset)
406 402 {
407 403 ASSERT(MUTEX_HELD(&fsspsets_lock));
408 404 ASSERT(MUTEX_HELD(&fsspset->fssps_lock));
409 405 ASSERT(fsspset->fssps_nproj == 0);
410 406 ASSERT(fsspset->fssps_list == NULL);
411 407 ASSERT(fsspset->fssps_zones == NULL);
412 408 fsspset->fssps_cpupart = NULL;
413 409 fsspset->fssps_maxfsspri = 0;
414 410 fsspset->fssps_shares = 0;
415 411 }
416 412
417 413 /*
418 414 * The following routine returns a pointer to the fsszone structure which
419 415 * belongs to zone "zone" and cpu partition fsspset, if such structure exists.
420 416 */
421 417 static fsszone_t *
422 418 fss_find_fsszone(fsspset_t *fsspset, zone_t *zone)
423 419 {
424 420 fsszone_t *fsszone;
425 421
426 422 ASSERT(MUTEX_HELD(&fsspset->fssps_lock));
427 423
428 424 if (fsspset->fssps_list != NULL) {
429 425 /*
430 426 * There are projects/zones active on this cpu partition
431 427 * already. Try to find our zone among them.
432 428 */
433 429 fsszone = fsspset->fssps_zones;
434 430 do {
435 431 if (fsszone->fssz_zone == zone) {
436 432 return (fsszone);
437 433 }
438 434 fsszone = fsszone->fssz_next;
439 435 } while (fsszone != fsspset->fssps_zones);
440 436 }
441 437 return (NULL);
442 438 }
443 439
444 440 /*
445 441 * The following routine links new fsszone structure into doubly linked list of
446 442 * zones active on the specified cpu partition.
447 443 */
448 444 static void
449 445 fss_insert_fsszone(fsspset_t *fsspset, zone_t *zone, fsszone_t *fsszone)
450 446 {
451 447 ASSERT(MUTEX_HELD(&fsspset->fssps_lock));
452 448
453 449 fsszone->fssz_zone = zone;
454 450 fsszone->fssz_rshares = zone->zone_shares;
455 451
456 452 if (fsspset->fssps_zones == NULL) {
457 453 /*
458 454 * This will be the first fsszone for this fsspset
459 455 */
460 456 fsszone->fssz_next = fsszone->fssz_prev = fsszone;
461 457 fsspset->fssps_zones = fsszone;
462 458 } else {
463 459 /*
464 460 * Insert this fsszone to the doubly linked list.
465 461 */
466 462 fsszone_t *fssz_head = fsspset->fssps_zones;
467 463
468 464 fsszone->fssz_next = fssz_head;
469 465 fsszone->fssz_prev = fssz_head->fssz_prev;
470 466 fssz_head->fssz_prev->fssz_next = fsszone;
471 467 fssz_head->fssz_prev = fsszone;
472 468 fsspset->fssps_zones = fsszone;
473 469 }
474 470 }
475 471
476 472 /*
477 473 * The following routine removes a single fsszone structure from the doubly
478 474 * linked list of zones active on the specified cpu partition. Note that
479 475 * global fsspsets_lock must be held in case this fsszone structure is the last
480 476 * on the above mentioned list. Also note that the fsszone structure is not
481 477 * freed here, it is the responsibility of the caller to call kmem_free for it.
482 478 */
483 479 static void
484 480 fss_remove_fsszone(fsspset_t *fsspset, fsszone_t *fsszone)
485 481 {
486 482 ASSERT(MUTEX_HELD(&fsspset->fssps_lock));
487 483 ASSERT(fsszone->fssz_nproj == 0);
488 484 ASSERT(fsszone->fssz_shares == 0);
489 485 ASSERT(fsszone->fssz_runnable == 0);
490 486
491 487 if (fsszone->fssz_next != fsszone) {
492 488 /*
493 489 * This is not the last zone in the list.
494 490 */
495 491 fsszone->fssz_prev->fssz_next = fsszone->fssz_next;
496 492 fsszone->fssz_next->fssz_prev = fsszone->fssz_prev;
497 493 if (fsspset->fssps_zones == fsszone)
498 494 fsspset->fssps_zones = fsszone->fssz_next;
499 495 } else {
500 496 /*
501 497 * This was the last zone active in this cpu partition.
502 498 */
503 499 fsspset->fssps_zones = NULL;
504 500 }
505 501 }
506 502
507 503 /*
508 504 * The following routine returns a pointer to the fssproj structure
509 505 * which belongs to project kpj and cpu partition fsspset, if such structure
510 506 * exists.
511 507 */
512 508 static fssproj_t *
513 509 fss_find_fssproj(fsspset_t *fsspset, kproject_t *kpj)
514 510 {
515 511 fssproj_t *fssproj;
516 512
517 513 ASSERT(MUTEX_HELD(&fsspset->fssps_lock));
518 514
519 515 if (fsspset->fssps_list != NULL) {
520 516 /*
521 517 * There are projects running on this cpu partition already.
522 518 * Try to find our project among them.
523 519 */
524 520 fssproj = fsspset->fssps_list;
525 521 do {
526 522 if (fssproj->fssp_proj == kpj) {
527 523 ASSERT(fssproj->fssp_pset == fsspset);
528 524 return (fssproj);
529 525 }
530 526 fssproj = fssproj->fssp_next;
531 527 } while (fssproj != fsspset->fssps_list);
532 528 }
533 529 return (NULL);
534 530 }
535 531
536 532 /*
537 533 * The following routine links new fssproj structure into doubly linked list
538 534 * of projects running on the specified cpu partition.
539 535 */
540 536 static void
541 537 fss_insert_fssproj(fsspset_t *fsspset, kproject_t *kpj, fsszone_t *fsszone,
542 538 fssproj_t *fssproj)
543 539 {
544 540 ASSERT(MUTEX_HELD(&fsspset->fssps_lock));
545 541
546 542 fssproj->fssp_pset = fsspset;
547 543 fssproj->fssp_proj = kpj;
548 544 fssproj->fssp_shares = kpj->kpj_shares;
549 545
550 546 fsspset->fssps_nproj++;
551 547
552 548 if (fsspset->fssps_list == NULL) {
553 549 /*
554 550 * This will be the first fssproj for this fsspset
555 551 */
556 552 fssproj->fssp_next = fssproj->fssp_prev = fssproj;
557 553 fsspset->fssps_list = fssproj;
558 554 } else {
559 555 /*
560 556 * Insert this fssproj to the doubly linked list.
561 557 */
562 558 fssproj_t *fssp_head = fsspset->fssps_list;
563 559
564 560 fssproj->fssp_next = fssp_head;
565 561 fssproj->fssp_prev = fssp_head->fssp_prev;
566 562 fssp_head->fssp_prev->fssp_next = fssproj;
567 563 fssp_head->fssp_prev = fssproj;
568 564 fsspset->fssps_list = fssproj;
569 565 }
570 566 fssproj->fssp_fsszone = fsszone;
571 567 fsszone->fssz_nproj++;
572 568 ASSERT(fsszone->fssz_nproj != 0);
573 569 }
574 570
575 571 /*
576 572 * The following routine removes a single fssproj structure from the doubly
577 573 * linked list of projects running on the specified cpu partition. Note that
578 574 * global fsspsets_lock must be held in case if this fssproj structure is the
579 575 * last on the above mentioned list. Also note that the fssproj structure is
580 576 * not freed here, it is the responsibility of the caller to call kmem_free
581 577 * for it.
582 578 */
583 579 static void
584 580 fss_remove_fssproj(fsspset_t *fsspset, fssproj_t *fssproj)
585 581 {
586 582 fsszone_t *fsszone;
587 583
588 584 ASSERT(MUTEX_HELD(&fsspsets_lock));
589 585 ASSERT(MUTEX_HELD(&fsspset->fssps_lock));
590 586 ASSERT(fssproj->fssp_runnable == 0);
591 587
592 588 fsspset->fssps_nproj--;
593 589
594 590 fsszone = fssproj->fssp_fsszone;
595 591 fsszone->fssz_nproj--;
596 592
597 593 if (fssproj->fssp_next != fssproj) {
598 594 /*
599 595 * This is not the last part in the list.
600 596 */
601 597 fssproj->fssp_prev->fssp_next = fssproj->fssp_next;
602 598 fssproj->fssp_next->fssp_prev = fssproj->fssp_prev;
603 599 if (fsspset->fssps_list == fssproj)
604 600 fsspset->fssps_list = fssproj->fssp_next;
605 601 if (fsszone->fssz_nproj == 0)
606 602 fss_remove_fsszone(fsspset, fsszone);
607 603 } else {
608 604 /*
609 605 * This was the last project part running
610 606 * at this cpu partition.
611 607 */
612 608 fsspset->fssps_list = NULL;
613 609 ASSERT(fsspset->fssps_nproj == 0);
614 610 ASSERT(fsszone->fssz_nproj == 0);
615 611 fss_remove_fsszone(fsspset, fsszone);
616 612 fss_del_fsspset(fsspset);
617 613 }
618 614 }
619 615
620 616 static void
621 617 fss_inactive(kthread_t *t)
622 618 {
623 619 fssproc_t *fssproc;
624 620 fssproj_t *fssproj;
625 621 fsspset_t *fsspset;
626 622 fsszone_t *fsszone;
627 623
628 624 ASSERT(THREAD_LOCK_HELD(t));
629 625 fssproc = FSSPROC(t);
630 626 fssproj = FSSPROC2FSSPROJ(fssproc);
631 627 if (fssproj == NULL) /* if this thread already exited */
632 628 return;
633 629 fsspset = FSSPROJ2FSSPSET(fssproj);
634 630 fsszone = fssproj->fssp_fsszone;
635 631 disp_lock_enter_high(&fsspset->fssps_displock);
636 632 ASSERT(fssproj->fssp_runnable > 0);
637 633 if (--fssproj->fssp_runnable == 0) {
638 634 fsszone->fssz_shares -= fssproj->fssp_shares;
639 635 if (--fsszone->fssz_runnable == 0)
640 636 fsspset->fssps_shares -= fsszone->fssz_rshares;
641 637 }
642 638 ASSERT(fssproc->fss_runnable == 1);
643 639 fssproc->fss_runnable = 0;
644 640 disp_lock_exit_high(&fsspset->fssps_displock);
645 641 }
646 642
647 643 static void
648 644 fss_active(kthread_t *t)
649 645 {
650 646 fssproc_t *fssproc;
651 647 fssproj_t *fssproj;
652 648 fsspset_t *fsspset;
653 649 fsszone_t *fsszone;
654 650
655 651 ASSERT(THREAD_LOCK_HELD(t));
656 652 fssproc = FSSPROC(t);
657 653 fssproj = FSSPROC2FSSPROJ(fssproc);
658 654 if (fssproj == NULL) /* if this thread already exited */
659 655 return;
660 656 fsspset = FSSPROJ2FSSPSET(fssproj);
661 657 fsszone = fssproj->fssp_fsszone;
662 658 disp_lock_enter_high(&fsspset->fssps_displock);
663 659 if (++fssproj->fssp_runnable == 1) {
664 660 fsszone->fssz_shares += fssproj->fssp_shares;
665 661 if (++fsszone->fssz_runnable == 1)
666 662 fsspset->fssps_shares += fsszone->fssz_rshares;
667 663 }
668 664 ASSERT(fssproc->fss_runnable == 0);
669 665 fssproc->fss_runnable = 1;
670 666 disp_lock_exit_high(&fsspset->fssps_displock);
671 667 }
672 668
673 669 /*
674 670 * Fair share scheduler initialization. Called by dispinit() at boot time.
675 671 * We can ignore clparmsz argument since we know that the smallest possible
676 672 * parameter buffer is big enough for us.
677 673 */
678 674 /*ARGSUSED*/
679 675 static pri_t
680 676 fss_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
681 677 {
682 678 int i;
683 679
684 680 ASSERT(MUTEX_HELD(&cpu_lock));
685 681
686 682 fss_cid = cid;
687 683 fss_maxumdpri = minclsyspri - 1;
688 684 fss_maxglobpri = minclsyspri;
689 685 fss_minglobpri = 0;
690 686 fsspsets = kmem_zalloc(sizeof (fsspset_t) * max_ncpus, KM_SLEEP);
691 687
692 688 /*
693 689 * Initialize the fssproc hash table.
694 690 */
695 691 for (i = 0; i < FSS_LISTS; i++)
696 692 fss_listhead[i].fss_next = fss_listhead[i].fss_prev =
697 693 &fss_listhead[i];
698 694
699 695 *clfuncspp = &fss_classfuncs;
700 696
701 697 /*
702 698 * Fill in fss_nice_tick and fss_nice_decay arrays:
703 699 * The cost of a tick is lower at positive nice values (so that it
704 700 * will not increase its project's usage as much as normal) with 50%
705 701 * drop at the maximum level and 50% increase at the minimum level.
706 702 * The fsspri decay is slower at positive nice values. fsspri values
707 703 * of processes with negative nice levels must decay faster to receive
708 704 * time slices more frequently than normal.
709 705 */
710 706 for (i = 0; i < FSS_NICE_RANGE; i++) {
711 707 fss_nice_tick[i] = (FSS_TICK_COST * (((3 * FSS_NICE_RANGE) / 2)
712 708 - i)) / FSS_NICE_RANGE;
713 709 fss_nice_decay[i] = FSS_DECAY_MIN +
714 710 ((FSS_DECAY_MAX - FSS_DECAY_MIN) * i) /
715 711 (FSS_NICE_RANGE - 1);
716 712 }
717 713
718 714 return (fss_maxglobpri);
719 715 }
720 716
721 717 /*
722 718 * Calculate the new cpupri based on the usage, the number of shares and
723 719 * the number of active threads. Reset the tick counter for this thread.
724 720 */
725 721 static void
726 722 fss_newpri(fssproc_t *fssproc)
727 723 {
728 724 kthread_t *tp;
729 725 fssproj_t *fssproj;
730 726 fsspset_t *fsspset;
731 727 fsszone_t *fsszone;
732 728 fsspri_t fsspri, maxfsspri;
733 729 pri_t invpri;
734 730 uint32_t ticks;
735 731
736 732 tp = fssproc->fss_tp;
737 733 ASSERT(tp != NULL);
738 734
739 735 if (tp->t_cid != fss_cid)
740 736 return;
741 737
742 738 ASSERT(THREAD_LOCK_HELD(tp));
743 739
744 740 fssproj = FSSPROC2FSSPROJ(fssproc);
745 741 fsszone = FSSPROJ2FSSZONE(fssproj);
746 742 if (fssproj == NULL)
747 743 /*
748 744 * No need to change priority of exited threads.
749 745 */
750 746 return;
751 747
752 748 fsspset = FSSPROJ2FSSPSET(fssproj);
753 749 disp_lock_enter_high(&fsspset->fssps_displock);
754 750
755 751 if (fssproj->fssp_shares == 0 || fsszone->fssz_rshares == 0) {
756 752 /*
757 753 * Special case: threads with no shares.
758 754 */
759 755 fssproc->fss_umdpri = fss_minglobpri;
760 756 fssproc->fss_ticks = 0;
761 757 disp_lock_exit_high(&fsspset->fssps_displock);
762 758 return;
763 759 }
764 760
765 761 /*
766 762 * fsspri += shusage * nrunnable * ticks
767 763 */
768 764 ticks = fssproc->fss_ticks;
769 765 fssproc->fss_ticks = 0;
770 766 fsspri = fssproc->fss_fsspri;
771 767 fsspri += fssproj->fssp_shusage * fssproj->fssp_runnable * ticks;
772 768 fssproc->fss_fsspri = fsspri;
773 769
774 770 if (fsspri < fss_maxumdpri)
775 771 fsspri = fss_maxumdpri; /* so that maxfsspri is != 0 */
776 772
777 773 /*
778 774 * The general priority formula:
779 775 *
780 776 * (fsspri * umdprirange)
781 777 * pri = maxumdpri - ------------------------
782 778 * maxfsspri
783 779 *
784 780 * If this thread's fsspri is greater than the previous largest
785 781 * fsspri, then record it as the new high and priority for this
786 782 * thread will be one (the lowest priority assigned to a thread
787 783 * that has non-zero shares).
788 784 * Note that this formula cannot produce out of bounds priority
789 785 * values; if it is changed, additional checks may need to be
790 786 * added.
791 787 */
792 788 maxfsspri = fsspset->fssps_maxfsspri;
793 789 if (fsspri >= maxfsspri) {
794 790 fsspset->fssps_maxfsspri = fsspri;
795 791 disp_lock_exit_high(&fsspset->fssps_displock);
796 792 fssproc->fss_umdpri = 1;
797 793 } else {
798 794 disp_lock_exit_high(&fsspset->fssps_displock);
799 795 invpri = (fsspri * (fss_maxumdpri - 1)) / maxfsspri;
800 796 fssproc->fss_umdpri = fss_maxumdpri - invpri;
801 797 }
802 798 }
803 799
804 800 /*
805 801 * Decays usages of all running projects and resets their tick counters.
806 802 * Called once per second from fss_update() after updating priorities.
807 803 */
808 804 static void
809 805 fss_decay_usage()
810 806 {
811 807 uint32_t zone_ext_shares, zone_int_shares;
812 808 uint32_t kpj_shares, pset_shares;
813 809 fsspset_t *fsspset;
814 810 fssproj_t *fssproj;
815 811 fsszone_t *fsszone;
816 812 fsspri_t maxfsspri;
817 813 int psetid;
818 814
819 815 mutex_enter(&fsspsets_lock);
820 816 /*
821 817 * Go through all active processor sets and decay usages of projects
822 818 * running on them.
823 819 */
824 820 for (psetid = 0; psetid < max_ncpus; psetid++) {
825 821 fsspset = &fsspsets[psetid];
826 822 mutex_enter(&fsspset->fssps_lock);
827 823
828 824 if (fsspset->fssps_cpupart == NULL ||
829 825 (fssproj = fsspset->fssps_list) == NULL) {
830 826 mutex_exit(&fsspset->fssps_lock);
831 827 continue;
832 828 }
833 829
834 830 /*
835 831 * Decay maxfsspri for this cpu partition with the
836 832 * fastest possible decay rate.
837 833 */
838 834 disp_lock_enter(&fsspset->fssps_displock);
839 835
840 836 maxfsspri = (fsspset->fssps_maxfsspri *
841 837 fss_nice_decay[NZERO]) / FSS_DECAY_BASE;
842 838 if (maxfsspri < fss_maxumdpri)
843 839 maxfsspri = fss_maxumdpri;
844 840 fsspset->fssps_maxfsspri = maxfsspri;
845 841
846 842 do {
847 843 /*
848 844 * Decay usage for each project running on
849 845 * this cpu partition.
850 846 */
851 847 fssproj->fssp_usage =
852 848 (fssproj->fssp_usage * FSS_DECAY_USG) /
853 849 FSS_DECAY_BASE + fssproj->fssp_ticks;
854 850 fssproj->fssp_ticks = 0;
855 851
856 852 fsszone = fssproj->fssp_fsszone;
857 853 /*
858 854 * Readjust the project's number of shares if it has
859 855 * changed since we checked it last time.
860 856 */
861 857 kpj_shares = fssproj->fssp_proj->kpj_shares;
862 858 if (fssproj->fssp_shares != kpj_shares) {
863 859 if (fssproj->fssp_runnable != 0) {
864 860 fsszone->fssz_shares -=
865 861 fssproj->fssp_shares;
866 862 fsszone->fssz_shares += kpj_shares;
867 863 }
868 864 fssproj->fssp_shares = kpj_shares;
869 865 }
870 866
871 867 /*
872 868 * Readjust the zone's number of shares if it
873 869 * has changed since we checked it last time.
874 870 */
875 871 zone_ext_shares = fsszone->fssz_zone->zone_shares;
876 872 if (fsszone->fssz_rshares != zone_ext_shares) {
877 873 if (fsszone->fssz_runnable != 0) {
878 874 fsspset->fssps_shares -=
879 875 fsszone->fssz_rshares;
880 876 fsspset->fssps_shares +=
881 877 zone_ext_shares;
882 878 }
883 879 fsszone->fssz_rshares = zone_ext_shares;
884 880 }
885 881 zone_int_shares = fsszone->fssz_shares;
886 882 pset_shares = fsspset->fssps_shares;
887 883 /*
888 884 * Calculate fssp_shusage value to be used
889 885 * for fsspri increments for the next second.
890 886 */
891 887 if (kpj_shares == 0 || zone_ext_shares == 0) {
892 888 fssproj->fssp_shusage = 0;
893 889 } else if (FSSPROJ2KPROJ(fssproj) == proj0p) {
894 890 /*
895 891 * Project 0 in the global zone has 50%
896 892 * of its zone.
897 893 */
898 894 fssproj->fssp_shusage = (fssproj->fssp_usage *
899 895 zone_int_shares * zone_int_shares) /
900 896 (zone_ext_shares * zone_ext_shares);
901 897 } else {
902 898 /*
903 899 * Thread's priority is based on its project's
904 900 * normalized usage (shusage) value which gets
905 901 * calculated this way:
906 902 *
907 903 * pset_shares^2 zone_int_shares^2
908 904 * usage * ------------- * ------------------
909 905 * kpj_shares^2 zone_ext_shares^2
910 906 *
911 907 * Where zone_int_shares is the sum of shares
912 908 * of all active projects within the zone (and
913 909 * the pset), and zone_ext_shares is the number
914 910 * of zone shares (ie, zone.cpu-shares).
915 911 *
916 912 * If there is only one zone active on the pset
917 913 * the above reduces to:
918 914 *
919 915 * zone_int_shares^2
920 916 * shusage = usage * ---------------------
921 917 * kpj_shares^2
922 918 *
923 919 * If there's only one project active in the
924 920 * zone this formula reduces to:
925 921 *
926 922 * pset_shares^2
927 923 * shusage = usage * ----------------------
928 924 * zone_ext_shares^2
929 925 */
930 926 fssproj->fssp_shusage = fssproj->fssp_usage *
931 927 pset_shares * zone_int_shares;
932 928 fssproj->fssp_shusage /=
933 929 kpj_shares * zone_ext_shares;
934 930 fssproj->fssp_shusage *=
935 931 pset_shares * zone_int_shares;
936 932 fssproj->fssp_shusage /=
937 933 kpj_shares * zone_ext_shares;
938 934 }
939 935 fssproj = fssproj->fssp_next;
940 936 } while (fssproj != fsspset->fssps_list);
941 937
942 938 disp_lock_exit(&fsspset->fssps_displock);
943 939 mutex_exit(&fsspset->fssps_lock);
944 940 }
945 941 mutex_exit(&fsspsets_lock);
946 942 }
947 943
948 944 static void
949 945 fss_change_priority(kthread_t *t, fssproc_t *fssproc)
950 946 {
951 947 pri_t new_pri;
952 948
953 949 ASSERT(THREAD_LOCK_HELD(t));
954 950 new_pri = fssproc->fss_umdpri;
955 951 ASSERT(new_pri >= 0 && new_pri <= fss_maxglobpri);
956 952
957 953 t->t_cpri = fssproc->fss_upri;
958 954 fssproc->fss_flags &= ~FSSRESTORE;
959 955 if (t == curthread || t->t_state == TS_ONPROC) {
960 956 /*
961 957 * curthread is always onproc
962 958 */
963 959 cpu_t *cp = t->t_disp_queue->disp_cpu;
964 960 THREAD_CHANGE_PRI(t, new_pri);
965 961 if (t == cp->cpu_dispthread)
966 962 cp->cpu_dispatch_pri = DISP_PRIO(t);
967 963 if (DISP_MUST_SURRENDER(t)) {
968 964 fssproc->fss_flags |= FSSBACKQ;
969 965 cpu_surrender(t);
970 966 } else {
971 967 fssproc->fss_timeleft = fss_quantum;
972 968 }
973 969 } else {
974 970 /*
975 971 * When the priority of a thread is changed, it may be
976 972 * necessary to adjust its position on a sleep queue or
977 973 * dispatch queue. The function thread_change_pri accomplishes
978 974 * this.
979 975 */
980 976 if (thread_change_pri(t, new_pri, 0)) {
981 977 /*
982 978 * The thread was on a run queue.
983 979 */
984 980 fssproc->fss_timeleft = fss_quantum;
985 981 } else {
986 982 fssproc->fss_flags |= FSSBACKQ;
987 983 }
988 984 }
989 985 }
990 986
991 987 /*
992 988 * Update priorities of all fair-sharing threads that are currently runnable
993 989 * at a user mode priority based on the number of shares and current usage.
994 990 * Called once per second via timeout which we reset here.
995 991 *
996 992 * There are several lists of fair-sharing threads broken up by a hash on the
997 993 * thread pointer. Each list has its own lock. This avoids blocking all
998 994 * fss_enterclass, fss_fork, and fss_exitclass operations while fss_update runs.
999 995 * fss_update traverses each list in turn.
1000 996 */
1001 997 static void
1002 998 fss_update(void *arg)
1003 999 {
1004 1000 int i;
1005 1001 int new_marker = -1;
1006 1002 static int fss_update_marker;
1007 1003
1008 1004 /*
1009 1005 * Decay and update usages for all projects.
1010 1006 */
1011 1007 fss_decay_usage();
1012 1008
1013 1009 /*
1014 1010 * Start with the fss_update_marker list, then do the rest.
1015 1011 */
1016 1012 i = fss_update_marker;
1017 1013
1018 1014 /*
1019 1015 * Go around all threads, set new priorities and decay
1020 1016 * per-thread CPU usages.
1021 1017 */
1022 1018 do {
1023 1019 /*
1024 1020 * If this is the first list after the current marker to have
1025 1021 * threads with priorities updates, advance the marker to this
1026 1022 * list for the next time fss_update runs.
1027 1023 */
1028 1024 if (fss_update_list(i) &&
1029 1025 new_marker == -1 && i != fss_update_marker)
1030 1026 new_marker = i;
1031 1027 } while ((i = FSS_LIST_NEXT(i)) != fss_update_marker);
1032 1028
1033 1029 /*
1034 1030 * Advance marker for the next fss_update call
1035 1031 */
1036 1032 if (new_marker != -1)
1037 1033 fss_update_marker = new_marker;
1038 1034
1039 1035 (void) timeout(fss_update, arg, hz);
1040 1036 }
1041 1037
1042 1038 /*
1043 1039 * Updates priority for a list of threads. Returns 1 if the priority of one
1044 1040 * of the threads was actually updated, 0 if none were for various reasons
1045 1041 * (thread is no longer in the FSS class, is not runnable, has the preemption
1046 1042 * control no-preempt bit set, etc.)
1047 1043 */
1048 1044 static int
1049 1045 fss_update_list(int i)
1050 1046 {
1051 1047 fssproc_t *fssproc;
1052 1048 fssproj_t *fssproj;
1053 1049 fsspri_t fsspri;
1054 1050 kthread_t *t;
1055 1051 int updated = 0;
1056 1052
1057 1053 mutex_enter(&fss_listlock[i]);
1058 1054 for (fssproc = fss_listhead[i].fss_next; fssproc != &fss_listhead[i];
1059 1055 fssproc = fssproc->fss_next) {
1060 1056 t = fssproc->fss_tp;
1061 1057 /*
1062 1058 * Lock the thread and verify the state.
1063 1059 */
1064 1060 thread_lock(t);
1065 1061 /*
1066 1062 * Skip the thread if it is no longer in the FSS class or
1067 1063 * is running with kernel mode priority.
1068 1064 */
1069 1065 if (t->t_cid != fss_cid)
1070 1066 goto next;
1071 1067 if ((fssproc->fss_flags & FSSKPRI) != 0)
1072 1068 goto next;
1073 1069
1074 1070 fssproj = FSSPROC2FSSPROJ(fssproc);
1075 1071 if (fssproj == NULL)
1076 1072 goto next;
1077 1073 if (fssproj->fssp_shares != 0) {
1078 1074 /*
1079 1075 * Decay fsspri value.
1080 1076 */
1081 1077 fsspri = fssproc->fss_fsspri;
1082 1078 fsspri = (fsspri * fss_nice_decay[fssproc->fss_nice]) /
1083 1079 FSS_DECAY_BASE;
1084 1080 fssproc->fss_fsspri = fsspri;
1085 1081 }
1086 1082
1087 1083 if (t->t_schedctl && schedctl_get_nopreempt(t))
1088 1084 goto next;
1089 1085 if (t->t_state != TS_RUN && t->t_state != TS_WAIT) {
1090 1086 /*
1091 1087 * Make next syscall/trap call fss_trapret
1092 1088 */
1093 1089 t->t_trapret = 1;
1094 1090 aston(t);
1095 1091 goto next;
1096 1092 }
1097 1093 fss_newpri(fssproc);
1098 1094 updated = 1;
1099 1095
1100 1096 /*
1101 1097 * Only dequeue the thread if it needs to be moved; otherwise
1102 1098 * it should just round-robin here.
1103 1099 */
1104 1100 if (t->t_pri != fssproc->fss_umdpri)
1105 1101 fss_change_priority(t, fssproc);
1106 1102 next:
1107 1103 thread_unlock(t);
1108 1104 }
1109 1105 mutex_exit(&fss_listlock[i]);
1110 1106 return (updated);
1111 1107 }
1112 1108
1113 1109 /*ARGSUSED*/
1114 1110 static int
1115 1111 fss_admin(caddr_t uaddr, cred_t *reqpcredp)
1116 1112 {
1117 1113 fssadmin_t fssadmin;
1118 1114
1119 1115 if (copyin(uaddr, &fssadmin, sizeof (fssadmin_t)))
1120 1116 return (EFAULT);
1121 1117
1122 1118 switch (fssadmin.fss_cmd) {
1123 1119 case FSS_SETADMIN:
1124 1120 if (secpolicy_dispadm(reqpcredp) != 0)
1125 1121 return (EPERM);
1126 1122 if (fssadmin.fss_quantum <= 0 || fssadmin.fss_quantum >= hz)
1127 1123 return (EINVAL);
1128 1124 fss_quantum = fssadmin.fss_quantum;
1129 1125 break;
1130 1126 case FSS_GETADMIN:
1131 1127 fssadmin.fss_quantum = fss_quantum;
1132 1128 if (copyout(&fssadmin, uaddr, sizeof (fssadmin_t)))
1133 1129 return (EFAULT);
1134 1130 break;
1135 1131 default:
1136 1132 return (EINVAL);
1137 1133 }
1138 1134 return (0);
1139 1135 }
1140 1136
1141 1137 static int
1142 1138 fss_getclinfo(void *infop)
1143 1139 {
1144 1140 fssinfo_t *fssinfo = (fssinfo_t *)infop;
1145 1141 fssinfo->fss_maxupri = fss_maxupri;
1146 1142 return (0);
1147 1143 }
1148 1144
1149 1145 static int
1150 1146 fss_parmsin(void *parmsp)
1151 1147 {
1152 1148 fssparms_t *fssparmsp = (fssparms_t *)parmsp;
1153 1149
1154 1150 /*
1155 1151 * Check validity of parameters.
1156 1152 */
1157 1153 if ((fssparmsp->fss_uprilim > fss_maxupri ||
1158 1154 fssparmsp->fss_uprilim < -fss_maxupri) &&
1159 1155 fssparmsp->fss_uprilim != FSS_NOCHANGE)
1160 1156 return (EINVAL);
1161 1157
1162 1158 if ((fssparmsp->fss_upri > fss_maxupri ||
1163 1159 fssparmsp->fss_upri < -fss_maxupri) &&
1164 1160 fssparmsp->fss_upri != FSS_NOCHANGE)
1165 1161 return (EINVAL);
1166 1162
1167 1163 return (0);
1168 1164 }
1169 1165
1170 1166 /*ARGSUSED*/
1171 1167 static int
1172 1168 fss_parmsout(void *parmsp, pc_vaparms_t *vaparmsp)
1173 1169 {
1174 1170 return (0);
1175 1171 }
1176 1172
1177 1173 static int
1178 1174 fss_vaparmsin(void *parmsp, pc_vaparms_t *vaparmsp)
1179 1175 {
1180 1176 fssparms_t *fssparmsp = (fssparms_t *)parmsp;
1181 1177 int priflag = 0;
1182 1178 int limflag = 0;
1183 1179 uint_t cnt;
1184 1180 pc_vaparm_t *vpp = &vaparmsp->pc_parms[0];
1185 1181
1186 1182 /*
1187 1183 * FSS_NOCHANGE (-32768) is outside of the range of values for
1188 1184 * fss_uprilim and fss_upri. If the structure fssparms_t is changed,
1189 1185 * FSS_NOCHANGE should be replaced by a flag word.
1190 1186 */
1191 1187 fssparmsp->fss_uprilim = FSS_NOCHANGE;
1192 1188 fssparmsp->fss_upri = FSS_NOCHANGE;
1193 1189
1194 1190 /*
1195 1191 * Get the varargs parameter and check validity of parameters.
1196 1192 */
1197 1193 if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
1198 1194 return (EINVAL);
1199 1195
1200 1196 for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
1201 1197 switch (vpp->pc_key) {
1202 1198 case FSS_KY_UPRILIM:
1203 1199 if (limflag++)
1204 1200 return (EINVAL);
1205 1201 fssparmsp->fss_uprilim = (pri_t)vpp->pc_parm;
1206 1202 if (fssparmsp->fss_uprilim > fss_maxupri ||
1207 1203 fssparmsp->fss_uprilim < -fss_maxupri)
1208 1204 return (EINVAL);
1209 1205 break;
1210 1206 case FSS_KY_UPRI:
1211 1207 if (priflag++)
1212 1208 return (EINVAL);
1213 1209 fssparmsp->fss_upri = (pri_t)vpp->pc_parm;
1214 1210 if (fssparmsp->fss_upri > fss_maxupri ||
1215 1211 fssparmsp->fss_upri < -fss_maxupri)
1216 1212 return (EINVAL);
1217 1213 break;
1218 1214 default:
1219 1215 return (EINVAL);
1220 1216 }
1221 1217 }
1222 1218
1223 1219 if (vaparmsp->pc_vaparmscnt == 0) {
1224 1220 /*
1225 1221 * Use default parameters.
1226 1222 */
1227 1223 fssparmsp->fss_upri = fssparmsp->fss_uprilim = 0;
1228 1224 }
1229 1225
1230 1226 return (0);
1231 1227 }
1232 1228
1233 1229 /*
1234 1230 * Copy all selected fair-sharing class parameters to the user. The parameters
1235 1231 * are specified by a key.
1236 1232 */
1237 1233 static int
1238 1234 fss_vaparmsout(void *parmsp, pc_vaparms_t *vaparmsp)
1239 1235 {
1240 1236 fssparms_t *fssparmsp = (fssparms_t *)parmsp;
1241 1237 int priflag = 0;
1242 1238 int limflag = 0;
1243 1239 uint_t cnt;
1244 1240 pc_vaparm_t *vpp = &vaparmsp->pc_parms[0];
1245 1241
1246 1242 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
1247 1243
1248 1244 if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
1249 1245 return (EINVAL);
1250 1246
1251 1247 for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
1252 1248 switch (vpp->pc_key) {
1253 1249 case FSS_KY_UPRILIM:
1254 1250 if (limflag++)
1255 1251 return (EINVAL);
1256 1252 if (copyout(&fssparmsp->fss_uprilim,
1257 1253 (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
1258 1254 return (EFAULT);
1259 1255 break;
1260 1256 case FSS_KY_UPRI:
1261 1257 if (priflag++)
1262 1258 return (EINVAL);
1263 1259 if (copyout(&fssparmsp->fss_upri,
1264 1260 (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
1265 1261 return (EFAULT);
1266 1262 break;
1267 1263 default:
1268 1264 return (EINVAL);
1269 1265 }
1270 1266 }
1271 1267
1272 1268 return (0);
1273 1269 }
1274 1270
1275 1271 /*
1276 1272 * Return the user mode scheduling priority range.
1277 1273 */
1278 1274 static int
1279 1275 fss_getclpri(pcpri_t *pcprip)
1280 1276 {
1281 1277 pcprip->pc_clpmax = fss_maxupri;
1282 1278 pcprip->pc_clpmin = -fss_maxupri;
1283 1279 return (0);
1284 1280 }
1285 1281
1286 1282 static int
1287 1283 fss_alloc(void **p, int flag)
1288 1284 {
1289 1285 void *bufp;
1290 1286
1291 1287 if ((bufp = kmem_zalloc(sizeof (fssproc_t), flag)) == NULL) {
1292 1288 return (ENOMEM);
1293 1289 } else {
1294 1290 *p = bufp;
1295 1291 return (0);
1296 1292 }
1297 1293 }
1298 1294
1299 1295 static void
1300 1296 fss_free(void *bufp)
1301 1297 {
1302 1298 if (bufp)
1303 1299 kmem_free(bufp, sizeof (fssproc_t));
1304 1300 }
1305 1301
1306 1302 /*
1307 1303 * Thread functions
1308 1304 */
1309 1305 static int
1310 1306 fss_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp,
1311 1307 void *bufp)
1312 1308 {
1313 1309 fssparms_t *fssparmsp = (fssparms_t *)parmsp;
1314 1310 fssproc_t *fssproc;
1315 1311 pri_t reqfssuprilim;
1316 1312 pri_t reqfssupri;
1317 1313 static uint32_t fssexists = 0;
1318 1314 fsspset_t *fsspset;
1319 1315 fssproj_t *fssproj;
1320 1316 fsszone_t *fsszone;
1321 1317 kproject_t *kpj;
1322 1318 zone_t *zone;
1323 1319 int fsszone_allocated = 0;
1324 1320
1325 1321 fssproc = (fssproc_t *)bufp;
1326 1322 ASSERT(fssproc != NULL);
1327 1323
1328 1324 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
1329 1325
1330 1326 /*
1331 1327 * Only root can move threads to FSS class.
1332 1328 */
1333 1329 if (reqpcredp != NULL && secpolicy_setpriority(reqpcredp) != 0)
1334 1330 return (EPERM);
1335 1331 /*
1336 1332 * Initialize the fssproc structure.
1337 1333 */
1338 1334 fssproc->fss_umdpri = fss_maxumdpri / 2;
1339 1335
1340 1336 if (fssparmsp == NULL) {
1341 1337 /*
1342 1338 * Use default values.
1343 1339 */
1344 1340 fssproc->fss_nice = NZERO;
1345 1341 fssproc->fss_uprilim = fssproc->fss_upri = 0;
1346 1342 } else {
1347 1343 /*
1348 1344 * Use supplied values.
1349 1345 */
1350 1346 if (fssparmsp->fss_uprilim == FSS_NOCHANGE) {
1351 1347 reqfssuprilim = 0;
1352 1348 } else {
1353 1349 if (fssparmsp->fss_uprilim > 0 &&
1354 1350 secpolicy_setpriority(reqpcredp) != 0)
1355 1351 return (EPERM);
1356 1352 reqfssuprilim = fssparmsp->fss_uprilim;
1357 1353 }
1358 1354 if (fssparmsp->fss_upri == FSS_NOCHANGE) {
1359 1355 reqfssupri = reqfssuprilim;
1360 1356 } else {
1361 1357 if (fssparmsp->fss_upri > 0 &&
1362 1358 secpolicy_setpriority(reqpcredp) != 0)
1363 1359 return (EPERM);
1364 1360 /*
1365 1361 * Set the user priority to the requested value or
1366 1362 * the upri limit, whichever is lower.
1367 1363 */
1368 1364 reqfssupri = fssparmsp->fss_upri;
1369 1365 if (reqfssupri > reqfssuprilim)
1370 1366 reqfssupri = reqfssuprilim;
1371 1367 }
1372 1368 fssproc->fss_uprilim = reqfssuprilim;
1373 1369 fssproc->fss_upri = reqfssupri;
1374 1370 fssproc->fss_nice = NZERO - (NZERO * reqfssupri) / fss_maxupri;
1375 1371 if (fssproc->fss_nice > FSS_NICE_MAX)
1376 1372 fssproc->fss_nice = FSS_NICE_MAX;
1377 1373 }
1378 1374
1379 1375 fssproc->fss_timeleft = fss_quantum;
1380 1376 fssproc->fss_tp = t;
1381 1377 cpucaps_sc_init(&fssproc->fss_caps);
1382 1378
1383 1379 /*
1384 1380 * Put a lock on our fsspset structure.
1385 1381 */
1386 1382 mutex_enter(&fsspsets_lock);
1387 1383 fsspset = fss_find_fsspset(t->t_cpupart);
1388 1384 mutex_enter(&fsspset->fssps_lock);
1389 1385 mutex_exit(&fsspsets_lock);
1390 1386
1391 1387 zone = ttoproc(t)->p_zone;
1392 1388 if ((fsszone = fss_find_fsszone(fsspset, zone)) == NULL) {
1393 1389 if ((fsszone = kmem_zalloc(sizeof (fsszone_t), KM_NOSLEEP))
1394 1390 == NULL) {
1395 1391 mutex_exit(&fsspset->fssps_lock);
1396 1392 return (ENOMEM);
1397 1393 } else {
1398 1394 fsszone_allocated = 1;
1399 1395 fss_insert_fsszone(fsspset, zone, fsszone);
1400 1396 }
1401 1397 }
1402 1398 kpj = ttoproj(t);
1403 1399 if ((fssproj = fss_find_fssproj(fsspset, kpj)) == NULL) {
1404 1400 if ((fssproj = kmem_zalloc(sizeof (fssproj_t), KM_NOSLEEP))
1405 1401 == NULL) {
1406 1402 if (fsszone_allocated) {
1407 1403 fss_remove_fsszone(fsspset, fsszone);
1408 1404 kmem_free(fsszone, sizeof (fsszone_t));
1409 1405 }
1410 1406 mutex_exit(&fsspset->fssps_lock);
1411 1407 return (ENOMEM);
1412 1408 } else {
1413 1409 fss_insert_fssproj(fsspset, kpj, fsszone, fssproj);
1414 1410 }
1415 1411 }
1416 1412 fssproj->fssp_threads++;
1417 1413 fssproc->fss_proj = fssproj;
1418 1414
1419 1415 /*
1420 1416 * Reset priority. Process goes to a "user mode" priority here
1421 1417 * regardless of whether or not it has slept since entering the kernel.
1422 1418 */
1423 1419 thread_lock(t);
1424 1420 t->t_clfuncs = &(sclass[cid].cl_funcs->thread);
1425 1421 t->t_cid = cid;
1426 1422 t->t_cldata = (void *)fssproc;
1427 1423 t->t_schedflag |= TS_RUNQMATCH;
1428 1424 fss_change_priority(t, fssproc);
1429 1425 if (t->t_state == TS_RUN || t->t_state == TS_ONPROC ||
1430 1426 t->t_state == TS_WAIT)
1431 1427 fss_active(t);
1432 1428 thread_unlock(t);
1433 1429
1434 1430 mutex_exit(&fsspset->fssps_lock);
1435 1431
1436 1432 /*
1437 1433 * Link new structure into fssproc list.
1438 1434 */
1439 1435 FSS_LIST_INSERT(fssproc);
1440 1436
1441 1437 /*
1442 1438 * If this is the first fair-sharing thread to occur since boot,
1443 1439 * we set up the initial call to fss_update() here. Use an atomic
1444 1440 * compare-and-swap since that's easier and faster than a mutex
1445 1441 * (but check with an ordinary load first since most of the time
1446 1442 * this will already be done).
1447 1443 */
1448 1444 if (fssexists == 0 && cas32(&fssexists, 0, 1) == 0)
1449 1445 (void) timeout(fss_update, NULL, hz);
1450 1446
1451 1447 return (0);
1452 1448 }
1453 1449
1454 1450 /*
1455 1451 * Remove fssproc_t from the list.
1456 1452 */
1457 1453 static void
1458 1454 fss_exitclass(void *procp)
1459 1455 {
1460 1456 fssproc_t *fssproc = (fssproc_t *)procp;
1461 1457 fssproj_t *fssproj;
1462 1458 fsspset_t *fsspset;
1463 1459 fsszone_t *fsszone;
1464 1460 kthread_t *t = fssproc->fss_tp;
1465 1461
1466 1462 /*
1467 1463 * We should be either getting this thread off the deathrow or
1468 1464 * this thread has already moved to another scheduling class and
1469 1465 * we're being called with its old cldata buffer pointer. In both
1470 1466 * cases, the content of this buffer can not be changed while we're
1471 1467 * here.
1472 1468 */
1473 1469 mutex_enter(&fsspsets_lock);
1474 1470 thread_lock(t);
1475 1471 if (t->t_cid != fss_cid) {
1476 1472 /*
1477 1473 * We're being called as a result of the priocntl() system
1478 1474 * call -- someone is trying to move our thread to another
1479 1475 * scheduling class. We can't call fss_inactive() here
1480 1476 * because our thread's t_cldata pointer already points
1481 1477 * to another scheduling class specific data.
1482 1478 */
1483 1479 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
1484 1480
1485 1481 fssproj = FSSPROC2FSSPROJ(fssproc);
1486 1482 fsspset = FSSPROJ2FSSPSET(fssproj);
1487 1483 fsszone = fssproj->fssp_fsszone;
1488 1484
1489 1485 if (fssproc->fss_runnable) {
1490 1486 disp_lock_enter_high(&fsspset->fssps_displock);
1491 1487 if (--fssproj->fssp_runnable == 0) {
1492 1488 fsszone->fssz_shares -= fssproj->fssp_shares;
1493 1489 if (--fsszone->fssz_runnable == 0)
1494 1490 fsspset->fssps_shares -=
1495 1491 fsszone->fssz_rshares;
1496 1492 }
1497 1493 disp_lock_exit_high(&fsspset->fssps_displock);
1498 1494 }
1499 1495 thread_unlock(t);
1500 1496
1501 1497 mutex_enter(&fsspset->fssps_lock);
1502 1498 if (--fssproj->fssp_threads == 0) {
1503 1499 fss_remove_fssproj(fsspset, fssproj);
1504 1500 if (fsszone->fssz_nproj == 0)
1505 1501 kmem_free(fsszone, sizeof (fsszone_t));
1506 1502 kmem_free(fssproj, sizeof (fssproj_t));
1507 1503 }
1508 1504 mutex_exit(&fsspset->fssps_lock);
1509 1505
1510 1506 } else {
1511 1507 ASSERT(t->t_state == TS_FREE);
1512 1508 /*
1513 1509 * We're being called from thread_free() when our thread
1514 1510 * is removed from the deathrow. There is nothing we need
1515 1511 * do here since everything should've been done earlier
1516 1512 * in fss_exit().
1517 1513 */
1518 1514 thread_unlock(t);
1519 1515 }
1520 1516 mutex_exit(&fsspsets_lock);
1521 1517
1522 1518 FSS_LIST_DELETE(fssproc);
1523 1519 fss_free(fssproc);
1524 1520 }
1525 1521
1526 1522 /*ARGSUSED*/
1527 1523 static int
1528 1524 fss_canexit(kthread_t *t, cred_t *credp)
1529 1525 {
1530 1526 /*
1531 1527 * A thread is allowed to exit FSS only if we have sufficient
1532 1528 * privileges.
1533 1529 */
1534 1530 if (credp != NULL && secpolicy_setpriority(credp) != 0)
1535 1531 return (EPERM);
1536 1532 else
1537 1533 return (0);
1538 1534 }
1539 1535
1540 1536 /*
1541 1537 * Initialize fair-share class specific proc structure for a child.
1542 1538 */
1543 1539 static int
1544 1540 fss_fork(kthread_t *pt, kthread_t *ct, void *bufp)
1545 1541 {
1546 1542 fssproc_t *pfssproc; /* ptr to parent's fssproc structure */
1547 1543 fssproc_t *cfssproc; /* ptr to child's fssproc structure */
1548 1544 fssproj_t *fssproj;
1549 1545 fsspset_t *fsspset;
1550 1546
1551 1547 ASSERT(MUTEX_HELD(&ttoproc(pt)->p_lock));
1552 1548 ASSERT(ct->t_state == TS_STOPPED);
1553 1549
1554 1550 cfssproc = (fssproc_t *)bufp;
1555 1551 ASSERT(cfssproc != NULL);
1556 1552 bzero(cfssproc, sizeof (fssproc_t));
1557 1553
1558 1554 thread_lock(pt);
1559 1555 pfssproc = FSSPROC(pt);
1560 1556 fssproj = FSSPROC2FSSPROJ(pfssproc);
1561 1557 fsspset = FSSPROJ2FSSPSET(fssproj);
1562 1558 thread_unlock(pt);
1563 1559
1564 1560 mutex_enter(&fsspset->fssps_lock);
1565 1561 /*
1566 1562 * Initialize child's fssproc structure.
1567 1563 */
1568 1564 thread_lock(pt);
1569 1565 ASSERT(FSSPROJ(pt) == fssproj);
1570 1566 cfssproc->fss_proj = fssproj;
1571 1567 cfssproc->fss_timeleft = fss_quantum;
1572 1568 cfssproc->fss_umdpri = pfssproc->fss_umdpri;
1573 1569 cfssproc->fss_fsspri = 0;
1574 1570 cfssproc->fss_uprilim = pfssproc->fss_uprilim;
1575 1571 cfssproc->fss_upri = pfssproc->fss_upri;
1576 1572 cfssproc->fss_tp = ct;
1577 1573 cfssproc->fss_nice = pfssproc->fss_nice;
1578 1574 cpucaps_sc_init(&cfssproc->fss_caps);
1579 1575
1580 1576 cfssproc->fss_flags =
1581 1577 pfssproc->fss_flags & ~(FSSKPRI | FSSBACKQ | FSSRESTORE);
1582 1578 ct->t_cldata = (void *)cfssproc;
1583 1579 ct->t_schedflag |= TS_RUNQMATCH;
1584 1580 thread_unlock(pt);
1585 1581
1586 1582 fssproj->fssp_threads++;
1587 1583 mutex_exit(&fsspset->fssps_lock);
1588 1584
1589 1585 /*
1590 1586 * Link new structure into fssproc hash table.
1591 1587 */
1592 1588 FSS_LIST_INSERT(cfssproc);
1593 1589 return (0);
1594 1590 }
1595 1591
1596 1592 /*
1597 1593 * Child is placed at back of dispatcher queue and parent gives up processor
1598 1594 * so that the child runs first after the fork. This allows the child
1599 1595 * immediately execing to break the multiple use of copy on write pages with no
1600 1596 * disk home. The parent will get to steal them back rather than uselessly
1601 1597 * copying them.
1602 1598 */
1603 1599 static void
1604 1600 fss_forkret(kthread_t *t, kthread_t *ct)
1605 1601 {
1606 1602 proc_t *pp = ttoproc(t);
1607 1603 proc_t *cp = ttoproc(ct);
1608 1604 fssproc_t *fssproc;
1609 1605
1610 1606 ASSERT(t == curthread);
1611 1607 ASSERT(MUTEX_HELD(&pidlock));
1612 1608
1613 1609 /*
1614 1610 * Grab the child's p_lock before dropping pidlock to ensure the
1615 1611 * process does not disappear before we set it running.
1616 1612 */
1617 1613 mutex_enter(&cp->p_lock);
1618 1614 continuelwps(cp);
1619 1615 mutex_exit(&cp->p_lock);
1620 1616
1621 1617 mutex_enter(&pp->p_lock);
1622 1618 mutex_exit(&pidlock);
1623 1619 continuelwps(pp);
1624 1620
1625 1621 thread_lock(t);
1626 1622
1627 1623 fssproc = FSSPROC(t);
1628 1624 fss_newpri(fssproc);
1629 1625 fssproc->fss_timeleft = fss_quantum;
1630 1626 t->t_pri = fssproc->fss_umdpri;
1631 1627 ASSERT(t->t_pri >= 0 && t->t_pri <= fss_maxglobpri);
1632 1628 fssproc->fss_flags &= ~FSSKPRI;
1633 1629 THREAD_TRANSITION(t);
1634 1630
1635 1631 /*
1636 1632 * We don't want to call fss_setrun(t) here because it may call
1637 1633 * fss_active, which we don't need.
1638 1634 */
1639 1635 fssproc->fss_flags &= ~FSSBACKQ;
1640 1636
1641 1637 if (t->t_disp_time != ddi_get_lbolt())
1642 1638 setbackdq(t);
1643 1639 else
1644 1640 setfrontdq(t);
1645 1641
1646 1642 thread_unlock(t);
1647 1643 /*
1648 1644 * Safe to drop p_lock now since it is safe to change
1649 1645 * the scheduling class after this point.
1650 1646 */
1651 1647 mutex_exit(&pp->p_lock);
1652 1648
1653 1649 swtch();
1654 1650 }
1655 1651
1656 1652 /*
1657 1653 * Get the fair-sharing parameters of the thread pointed to by fssprocp into
1658 1654 * the buffer pointed by fssparmsp.
1659 1655 */
1660 1656 static void
1661 1657 fss_parmsget(kthread_t *t, void *parmsp)
1662 1658 {
1663 1659 fssproc_t *fssproc = FSSPROC(t);
1664 1660 fssparms_t *fssparmsp = (fssparms_t *)parmsp;
1665 1661
1666 1662 fssparmsp->fss_uprilim = fssproc->fss_uprilim;
1667 1663 fssparmsp->fss_upri = fssproc->fss_upri;
1668 1664 }
1669 1665
1670 1666 /*ARGSUSED*/
1671 1667 static int
1672 1668 fss_parmsset(kthread_t *t, void *parmsp, id_t reqpcid, cred_t *reqpcredp)
1673 1669 {
1674 1670 char nice;
1675 1671 pri_t reqfssuprilim;
1676 1672 pri_t reqfssupri;
1677 1673 fssproc_t *fssproc = FSSPROC(t);
1678 1674 fssparms_t *fssparmsp = (fssparms_t *)parmsp;
1679 1675
1680 1676 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
1681 1677
1682 1678 if (fssparmsp->fss_uprilim == FSS_NOCHANGE)
1683 1679 reqfssuprilim = fssproc->fss_uprilim;
1684 1680 else
1685 1681 reqfssuprilim = fssparmsp->fss_uprilim;
1686 1682
1687 1683 if (fssparmsp->fss_upri == FSS_NOCHANGE)
1688 1684 reqfssupri = fssproc->fss_upri;
1689 1685 else
1690 1686 reqfssupri = fssparmsp->fss_upri;
1691 1687
1692 1688 /*
1693 1689 * Make sure the user priority doesn't exceed the upri limit.
1694 1690 */
1695 1691 if (reqfssupri > reqfssuprilim)
1696 1692 reqfssupri = reqfssuprilim;
1697 1693
1698 1694 /*
1699 1695 * Basic permissions enforced by generic kernel code for all classes
1700 1696 * require that a thread attempting to change the scheduling parameters
1701 1697 * of a target thread be privileged or have a real or effective UID
1702 1698 * matching that of the target thread. We are not called unless these
1703 1699 * basic permission checks have already passed. The fair-sharing class
1704 1700 * requires in addition that the calling thread be privileged if it
1705 1701 * is attempting to raise the upri limit above its current value.
1706 1702 * This may have been checked previously but if our caller passed us
1707 1703 * a non-NULL credential pointer we assume it hasn't and we check it
1708 1704 * here.
1709 1705 */
1710 1706 if ((reqpcredp != NULL) &&
1711 1707 (reqfssuprilim > fssproc->fss_uprilim) &&
1712 1708 secpolicy_raisepriority(reqpcredp) != 0)
1713 1709 return (EPERM);
1714 1710
1715 1711 /*
1716 1712 * Set fss_nice to the nice value corresponding to the user priority we
1717 1713 * are setting. Note that setting the nice field of the parameter
1718 1714 * struct won't affect upri or nice.
1719 1715 */
1720 1716 nice = NZERO - (reqfssupri * NZERO) / fss_maxupri;
1721 1717 if (nice > FSS_NICE_MAX)
1722 1718 nice = FSS_NICE_MAX;
1723 1719
1724 1720 thread_lock(t);
1725 1721
1726 1722 fssproc->fss_uprilim = reqfssuprilim;
1727 1723 fssproc->fss_upri = reqfssupri;
1728 1724 fssproc->fss_nice = nice;
1729 1725 fss_newpri(fssproc);
1730 1726
1731 1727 if ((fssproc->fss_flags & FSSKPRI) != 0) {
1732 1728 thread_unlock(t);
1733 1729 return (0);
1734 1730 }
1735 1731
1736 1732 fss_change_priority(t, fssproc);
1737 1733 thread_unlock(t);
1738 1734 return (0);
1739 1735
1740 1736 }
1741 1737
1742 1738 /*
1743 1739 * The thread is being stopped.
1744 1740 */
1745 1741 /*ARGSUSED*/
1746 1742 static void
1747 1743 fss_stop(kthread_t *t, int why, int what)
1748 1744 {
1749 1745 ASSERT(THREAD_LOCK_HELD(t));
1750 1746 ASSERT(t == curthread);
1751 1747
1752 1748 fss_inactive(t);
1753 1749 }
1754 1750
1755 1751 /*
1756 1752 * The current thread is exiting, do necessary adjustments to its project
1757 1753 */
1758 1754 static void
1759 1755 fss_exit(kthread_t *t)
1760 1756 {
1761 1757 fsspset_t *fsspset;
1762 1758 fssproj_t *fssproj;
1763 1759 fssproc_t *fssproc;
1764 1760 fsszone_t *fsszone;
1765 1761 int free = 0;
1766 1762
1767 1763 /*
1768 1764 * Thread t here is either a current thread (in which case we hold
1769 1765 * its process' p_lock), or a thread being destroyed by forklwp_fail(),
1770 1766 * in which case we hold pidlock and thread is no longer on the
1771 1767 * thread list.
1772 1768 */
1773 1769 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock) || MUTEX_HELD(&pidlock));
1774 1770
1775 1771 fssproc = FSSPROC(t);
1776 1772 fssproj = FSSPROC2FSSPROJ(fssproc);
1777 1773 fsspset = FSSPROJ2FSSPSET(fssproj);
1778 1774 fsszone = fssproj->fssp_fsszone;
1779 1775
1780 1776 mutex_enter(&fsspsets_lock);
1781 1777 mutex_enter(&fsspset->fssps_lock);
1782 1778
1783 1779 thread_lock(t);
1784 1780 disp_lock_enter_high(&fsspset->fssps_displock);
1785 1781 if (t->t_state == TS_ONPROC || t->t_state == TS_RUN) {
1786 1782 if (--fssproj->fssp_runnable == 0) {
1787 1783 fsszone->fssz_shares -= fssproj->fssp_shares;
1788 1784 if (--fsszone->fssz_runnable == 0)
1789 1785 fsspset->fssps_shares -= fsszone->fssz_rshares;
1790 1786 }
1791 1787 ASSERT(fssproc->fss_runnable == 1);
1792 1788 fssproc->fss_runnable = 0;
1793 1789 }
1794 1790 if (--fssproj->fssp_threads == 0) {
1795 1791 fss_remove_fssproj(fsspset, fssproj);
1796 1792 free = 1;
1797 1793 }
1798 1794 disp_lock_exit_high(&fsspset->fssps_displock);
1799 1795 fssproc->fss_proj = NULL; /* mark this thread as already exited */
1800 1796 thread_unlock(t);
1801 1797
1802 1798 if (free) {
1803 1799 if (fsszone->fssz_nproj == 0)
1804 1800 kmem_free(fsszone, sizeof (fsszone_t));
1805 1801 kmem_free(fssproj, sizeof (fssproj_t));
1806 1802 }
1807 1803 mutex_exit(&fsspset->fssps_lock);
1808 1804 mutex_exit(&fsspsets_lock);
1809 1805
1810 1806 /*
1811 1807 * A thread could be exiting in between clock ticks, so we need to
1812 1808 * calculate how much CPU time it used since it was charged last time.
1813 1809 *
1814 1810 * CPU caps are not enforced on exiting processes - it is usually
1815 1811 * desirable to exit as soon as possible to free resources.
1816 1812 */
1817 1813 if (CPUCAPS_ON()) {
1818 1814 thread_lock(t);
1819 1815 fssproc = FSSPROC(t);
1820 1816 (void) cpucaps_charge(t, &fssproc->fss_caps,
1821 1817 CPUCAPS_CHARGE_ONLY);
↓ open down ↓ |
1556 lines elided |
↑ open up ↑ |
1822 1818 thread_unlock(t);
1823 1819 }
1824 1820 }
1825 1821
1826 1822 static void
1827 1823 fss_nullsys()
1828 1824 {
1829 1825 }
1830 1826
1831 1827 /*
1832 - * fss_swapin() returns -1 if the thread is loaded or is not eligible to be
1833 - * swapped in. Otherwise, it returns the thread's effective priority based
1834 - * on swapout time and size of process (0 <= epri <= 0 SHRT_MAX).
1835 - */
1836 -/*ARGSUSED*/
1837 -static pri_t
1838 -fss_swapin(kthread_t *t, int flags)
1839 -{
1840 - fssproc_t *fssproc = FSSPROC(t);
1841 - long epri = -1;
1842 - proc_t *pp = ttoproc(t);
1843 -
1844 - ASSERT(THREAD_LOCK_HELD(t));
1845 -
1846 - if (t->t_state == TS_RUN && (t->t_schedflag & TS_LOAD) == 0) {
1847 - time_t swapout_time;
1848 -
1849 - swapout_time = (ddi_get_lbolt() - t->t_stime) / hz;
1850 - if (INHERITED(t) || (fssproc->fss_flags & FSSKPRI)) {
1851 - epri = (long)DISP_PRIO(t) + swapout_time;
1852 - } else {
1853 - /*
1854 - * Threads which have been out for a long time,
1855 - * have high user mode priority and are associated
1856 - * with a small address space are more deserving.
1857 - */
1858 - epri = fssproc->fss_umdpri;
1859 - ASSERT(epri >= 0 && epri <= fss_maxumdpri);
1860 - epri += swapout_time - pp->p_swrss / nz(maxpgio)/2;
1861 - }
1862 - /*
1863 - * Scale epri so that SHRT_MAX / 2 represents zero priority.
1864 - */
1865 - epri += SHRT_MAX / 2;
1866 - if (epri < 0)
1867 - epri = 0;
1868 - else if (epri > SHRT_MAX)
1869 - epri = SHRT_MAX;
1870 - }
1871 - return ((pri_t)epri);
1872 -}
1873 -
1874 -/*
1875 - * fss_swapout() returns -1 if the thread isn't loaded or is not eligible to
1876 - * be swapped out. Otherwise, it returns the thread's effective priority
1877 - * based on if the swapper is in softswap or hardswap mode.
1878 - */
1879 -static pri_t
1880 -fss_swapout(kthread_t *t, int flags)
1881 -{
1882 - fssproc_t *fssproc = FSSPROC(t);
1883 - long epri = -1;
1884 - proc_t *pp = ttoproc(t);
1885 - time_t swapin_time;
1886 -
1887 - ASSERT(THREAD_LOCK_HELD(t));
1888 -
1889 - if (INHERITED(t) ||
1890 - (fssproc->fss_flags & FSSKPRI) ||
1891 - (t->t_proc_flag & TP_LWPEXIT) ||
1892 - (t->t_state & (TS_ZOMB|TS_FREE|TS_STOPPED|TS_ONPROC|TS_WAIT)) ||
1893 - !(t->t_schedflag & TS_LOAD) ||
1894 - !(SWAP_OK(t)))
1895 - return (-1);
1896 -
1897 - ASSERT(t->t_state & (TS_SLEEP | TS_RUN));
1898 -
1899 - swapin_time = (ddi_get_lbolt() - t->t_stime) / hz;
1900 -
1901 - if (flags == SOFTSWAP) {
1902 - if (t->t_state == TS_SLEEP && swapin_time > maxslp) {
1903 - epri = 0;
1904 - } else {
1905 - return ((pri_t)epri);
1906 - }
1907 - } else {
1908 - pri_t pri;
1909 -
1910 - if ((t->t_state == TS_SLEEP && swapin_time > fss_minslp) ||
1911 - (t->t_state == TS_RUN && swapin_time > fss_minrun)) {
1912 - pri = fss_maxumdpri;
1913 - epri = swapin_time -
1914 - (rm_asrss(pp->p_as) / nz(maxpgio)/2) - (long)pri;
1915 - } else {
1916 - return ((pri_t)epri);
1917 - }
1918 - }
1919 -
1920 - /*
1921 - * Scale epri so that SHRT_MAX / 2 represents zero priority.
1922 - */
1923 - epri += SHRT_MAX / 2;
1924 - if (epri < 0)
1925 - epri = 0;
1926 - else if (epri > SHRT_MAX)
1927 - epri = SHRT_MAX;
1928 -
1929 - return ((pri_t)epri);
1930 -}
1931 -
1932 -/*
1933 1828 * If thread is currently at a kernel mode priority (has slept) and is
1934 1829 * returning to the userland we assign it the appropriate user mode priority
1935 1830 * and time quantum here. If we're lowering the thread's priority below that
1936 1831 * of other runnable threads then we will set runrun via cpu_surrender() to
1937 1832 * cause preemption.
1938 1833 */
1939 1834 static void
1940 1835 fss_trapret(kthread_t *t)
1941 1836 {
1942 1837 fssproc_t *fssproc = FSSPROC(t);
1943 1838 cpu_t *cp = CPU;
1944 1839
1945 1840 ASSERT(THREAD_LOCK_HELD(t));
1946 1841 ASSERT(t == curthread);
1947 1842 ASSERT(cp->cpu_dispthread == t);
1948 1843 ASSERT(t->t_state == TS_ONPROC);
1949 1844
1950 1845 t->t_kpri_req = 0;
1951 1846 if (fssproc->fss_flags & FSSKPRI) {
1952 1847 /*
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
1953 1848 * If thread has blocked in the kernel
1954 1849 */
1955 1850 THREAD_CHANGE_PRI(t, fssproc->fss_umdpri);
1956 1851 cp->cpu_dispatch_pri = DISP_PRIO(t);
1957 1852 ASSERT(t->t_pri >= 0 && t->t_pri <= fss_maxglobpri);
1958 1853 fssproc->fss_flags &= ~FSSKPRI;
1959 1854
1960 1855 if (DISP_MUST_SURRENDER(t))
1961 1856 cpu_surrender(t);
1962 1857 }
1963 -
1964 - /*
1965 - * Swapout lwp if the swapper is waiting for this thread to reach
1966 - * a safe point.
1967 - */
1968 - if (t->t_schedflag & TS_SWAPENQ) {
1969 - thread_unlock(t);
1970 - swapout_lwp(ttolwp(t));
1971 - thread_lock(t);
1972 - }
1973 1858 }
1974 1859
1975 1860 /*
1976 1861 * Arrange for thread to be placed in appropriate location on dispatcher queue.
1977 1862 * This is called with the current thread in TS_ONPROC and locked.
1978 1863 */
1979 1864 static void
1980 1865 fss_preempt(kthread_t *t)
1981 1866 {
1982 1867 fssproc_t *fssproc = FSSPROC(t);
1983 1868 klwp_t *lwp;
1984 1869 uint_t flags;
1985 1870
1986 1871 ASSERT(t == curthread);
1987 1872 ASSERT(THREAD_LOCK_HELD(curthread));
1988 1873 ASSERT(t->t_state == TS_ONPROC);
1989 1874
1990 1875 /*
1991 1876 * If preempted in the kernel, make sure the thread has a kernel
1992 1877 * priority if needed.
1993 1878 */
1994 1879 lwp = curthread->t_lwp;
1995 1880 if (!(fssproc->fss_flags & FSSKPRI) && lwp != NULL && t->t_kpri_req) {
1996 1881 fssproc->fss_flags |= FSSKPRI;
1997 1882 THREAD_CHANGE_PRI(t, minclsyspri);
1998 1883 ASSERT(t->t_pri >= 0 && t->t_pri <= fss_maxglobpri);
1999 1884 t->t_trapret = 1; /* so that fss_trapret will run */
2000 1885 aston(t);
2001 1886 }
2002 1887
2003 1888 /*
2004 1889 * This thread may be placed on wait queue by CPU Caps. In this case we
2005 1890 * do not need to do anything until it is removed from the wait queue.
2006 1891 * Do not enforce CPU caps on threads running at a kernel priority
↓ open down ↓ |
24 lines elided |
↑ open up ↑ |
2007 1892 */
2008 1893 if (CPUCAPS_ON()) {
2009 1894 (void) cpucaps_charge(t, &fssproc->fss_caps,
2010 1895 CPUCAPS_CHARGE_ENFORCE);
2011 1896
2012 1897 if (!(fssproc->fss_flags & FSSKPRI) && CPUCAPS_ENFORCE(t))
2013 1898 return;
2014 1899 }
2015 1900
2016 1901 /*
2017 - * If preempted in user-land mark the thread as swappable because it
2018 - * cannot be holding any kernel locks.
2019 - */
2020 - ASSERT(t->t_schedflag & TS_DONT_SWAP);
2021 - if (lwp != NULL && lwp->lwp_state == LWP_USER)
2022 - t->t_schedflag &= ~TS_DONT_SWAP;
2023 -
2024 - /*
2025 1902 * Check to see if we're doing "preemption control" here. If
2026 1903 * we are, and if the user has requested that this thread not
2027 1904 * be preempted, and if preemptions haven't been put off for
2028 1905 * too long, let the preemption happen here but try to make
2029 1906 * sure the thread is rescheduled as soon as possible. We do
2030 1907 * this by putting it on the front of the highest priority run
2031 1908 * queue in the FSS class. If the preemption has been put off
2032 1909 * for too long, clear the "nopreempt" bit and let the thread
2033 1910 * be preempted.
2034 1911 */
2035 1912 if (t->t_schedctl && schedctl_get_nopreempt(t)) {
2036 1913 if (fssproc->fss_timeleft > -SC_MAX_TICKS) {
2037 1914 DTRACE_SCHED1(schedctl__nopreempt, kthread_t *, t);
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
2038 1915 if (!(fssproc->fss_flags & FSSKPRI)) {
2039 1916 /*
2040 1917 * If not already remembered, remember current
2041 1918 * priority for restoration in fss_yield().
2042 1919 */
2043 1920 if (!(fssproc->fss_flags & FSSRESTORE)) {
2044 1921 fssproc->fss_scpri = t->t_pri;
2045 1922 fssproc->fss_flags |= FSSRESTORE;
2046 1923 }
2047 1924 THREAD_CHANGE_PRI(t, fss_maxumdpri);
2048 - t->t_schedflag |= TS_DONT_SWAP;
2049 1925 }
2050 1926 schedctl_set_yield(t, 1);
2051 1927 setfrontdq(t);
2052 1928 return;
2053 1929 } else {
2054 1930 if (fssproc->fss_flags & FSSRESTORE) {
2055 1931 THREAD_CHANGE_PRI(t, fssproc->fss_scpri);
2056 1932 fssproc->fss_flags &= ~FSSRESTORE;
2057 1933 }
2058 1934 schedctl_set_nopreempt(t, 0);
2059 1935 DTRACE_SCHED1(schedctl__preempt, kthread_t *, t);
2060 1936 /*
2061 1937 * Fall through and be preempted below.
2062 1938 */
2063 1939 }
2064 1940 }
2065 1941
2066 1942 flags = fssproc->fss_flags & (FSSBACKQ | FSSKPRI);
2067 1943
2068 1944 if (flags == FSSBACKQ) {
2069 1945 fssproc->fss_timeleft = fss_quantum;
2070 1946 fssproc->fss_flags &= ~FSSBACKQ;
2071 1947 setbackdq(t);
2072 1948 } else if (flags == (FSSBACKQ | FSSKPRI)) {
2073 1949 fssproc->fss_flags &= ~FSSBACKQ;
2074 1950 setbackdq(t);
2075 1951 } else {
2076 1952 setfrontdq(t);
2077 1953 }
2078 1954 }
2079 1955
2080 1956 /*
2081 1957 * Called when a thread is waking up and is to be placed on the run queue.
2082 1958 */
2083 1959 static void
2084 1960 fss_setrun(kthread_t *t)
2085 1961 {
2086 1962 fssproc_t *fssproc = FSSPROC(t);
2087 1963
2088 1964 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */
2089 1965
2090 1966 if (t->t_state == TS_SLEEP || t->t_state == TS_STOPPED)
2091 1967 fss_active(t);
2092 1968
2093 1969 fssproc->fss_timeleft = fss_quantum;
2094 1970
2095 1971 fssproc->fss_flags &= ~FSSBACKQ;
2096 1972 /*
2097 1973 * If previously were running at the kernel priority then keep that
2098 1974 * priority and the fss_timeleft doesn't matter.
2099 1975 */
2100 1976 if ((fssproc->fss_flags & FSSKPRI) == 0)
2101 1977 THREAD_CHANGE_PRI(t, fssproc->fss_umdpri);
2102 1978
2103 1979 if (t->t_disp_time != ddi_get_lbolt())
2104 1980 setbackdq(t);
2105 1981 else
2106 1982 setfrontdq(t);
2107 1983 }
2108 1984
2109 1985 /*
2110 1986 * Prepare thread for sleep. We reset the thread priority so it will run at the
2111 1987 * kernel priority level when it wakes up.
2112 1988 */
2113 1989 static void
2114 1990 fss_sleep(kthread_t *t)
2115 1991 {
2116 1992 fssproc_t *fssproc = FSSPROC(t);
2117 1993
2118 1994 ASSERT(t == curthread);
2119 1995 ASSERT(THREAD_LOCK_HELD(t));
2120 1996
2121 1997 ASSERT(t->t_state == TS_ONPROC);
2122 1998
2123 1999 /*
2124 2000 * Account for time spent on CPU before going to sleep.
2125 2001 */
2126 2002 (void) CPUCAPS_CHARGE(t, &fssproc->fss_caps, CPUCAPS_CHARGE_ENFORCE);
2127 2003
2128 2004 fss_inactive(t);
2129 2005
2130 2006 /*
2131 2007 * Assign a system priority to the thread and arrange for it to be
2132 2008 * retained when the thread is next placed on the run queue (i.e.,
2133 2009 * when it wakes up) instead of being given a new pri. Also arrange
2134 2010 * for trapret processing as the thread leaves the system call so it
2135 2011 * will drop back to normal priority range.
2136 2012 */
2137 2013 if (t->t_kpri_req) {
2138 2014 THREAD_CHANGE_PRI(t, minclsyspri);
2139 2015 fssproc->fss_flags |= FSSKPRI;
2140 2016 t->t_trapret = 1; /* so that fss_trapret will run */
2141 2017 aston(t);
2142 2018 } else if (fssproc->fss_flags & FSSKPRI) {
2143 2019 /*
↓ open down ↓ |
85 lines elided |
↑ open up ↑ |
2144 2020 * The thread has done a THREAD_KPRI_REQUEST(), slept, then
2145 2021 * done THREAD_KPRI_RELEASE() (so no t_kpri_req is 0 again),
2146 2022 * then slept again all without finishing the current system
2147 2023 * call so trapret won't have cleared FSSKPRI
2148 2024 */
2149 2025 fssproc->fss_flags &= ~FSSKPRI;
2150 2026 THREAD_CHANGE_PRI(t, fssproc->fss_umdpri);
2151 2027 if (DISP_MUST_SURRENDER(curthread))
2152 2028 cpu_surrender(t);
2153 2029 }
2154 - t->t_stime = ddi_get_lbolt(); /* time stamp for the swapper */
2155 2030 }
2156 2031
2157 2032 /*
2158 2033 * A tick interrupt has ocurrend on a running thread. Check to see if our
2159 - * time slice has expired. We must also clear the TS_DONT_SWAP flag in
2160 - * t_schedflag if the thread is eligible to be swapped out.
2034 + * time slice has expired.
2161 2035 */
2162 2036 static void
2163 2037 fss_tick(kthread_t *t)
2164 2038 {
2165 2039 fssproc_t *fssproc;
2166 2040 fssproj_t *fssproj;
2167 2041 klwp_t *lwp;
2168 2042 boolean_t call_cpu_surrender = B_FALSE;
2169 2043 boolean_t cpucaps_enforce = B_FALSE;
2170 2044
2171 2045 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
2172 2046
2173 2047 /*
2174 2048 * It's safe to access fsspset and fssproj structures because we're
2175 2049 * holding our p_lock here.
2176 2050 */
2177 2051 thread_lock(t);
2178 2052 fssproc = FSSPROC(t);
2179 2053 fssproj = FSSPROC2FSSPROJ(fssproc);
2180 2054 if (fssproj != NULL) {
2181 2055 fsspset_t *fsspset = FSSPROJ2FSSPSET(fssproj);
2182 2056 disp_lock_enter_high(&fsspset->fssps_displock);
2183 2057 fssproj->fssp_ticks += fss_nice_tick[fssproc->fss_nice];
2184 2058 fssproc->fss_ticks++;
2185 2059 disp_lock_exit_high(&fsspset->fssps_displock);
2186 2060 }
2187 2061
2188 2062 /*
2189 2063 * Keep track of thread's project CPU usage. Note that projects
2190 2064 * get charged even when threads are running in the kernel.
2191 2065 * Do not surrender CPU if running in the SYS class.
2192 2066 */
2193 2067 if (CPUCAPS_ON()) {
2194 2068 cpucaps_enforce = cpucaps_charge(t,
2195 2069 &fssproc->fss_caps, CPUCAPS_CHARGE_ENFORCE) &&
2196 2070 !(fssproc->fss_flags & FSSKPRI);
2197 2071 }
2198 2072
2199 2073 /*
2200 2074 * A thread's execution time for threads running in the SYS class
2201 2075 * is not tracked.
2202 2076 */
2203 2077 if ((fssproc->fss_flags & FSSKPRI) == 0) {
2204 2078 /*
2205 2079 * If thread is not in kernel mode, decrement its fss_timeleft
2206 2080 */
2207 2081 if (--fssproc->fss_timeleft <= 0) {
2208 2082 pri_t new_pri;
2209 2083
2210 2084 /*
2211 2085 * If we're doing preemption control and trying to
2212 2086 * avoid preempting this thread, just note that the
2213 2087 * thread should yield soon and let it keep running
2214 2088 * (unless it's been a while).
2215 2089 */
2216 2090 if (t->t_schedctl && schedctl_get_nopreempt(t)) {
2217 2091 if (fssproc->fss_timeleft > -SC_MAX_TICKS) {
2218 2092 DTRACE_SCHED1(schedctl__nopreempt,
2219 2093 kthread_t *, t);
2220 2094 schedctl_set_yield(t, 1);
2221 2095 thread_unlock_nopreempt(t);
2222 2096 return;
2223 2097 }
2224 2098 }
2225 2099 fssproc->fss_flags &= ~FSSRESTORE;
2226 2100
2227 2101 fss_newpri(fssproc);
↓ open down ↓ |
57 lines elided |
↑ open up ↑ |
2228 2102 new_pri = fssproc->fss_umdpri;
2229 2103 ASSERT(new_pri >= 0 && new_pri <= fss_maxglobpri);
2230 2104
2231 2105 /*
2232 2106 * When the priority of a thread is changed, it may
2233 2107 * be necessary to adjust its position on a sleep queue
2234 2108 * or dispatch queue. The function thread_change_pri
2235 2109 * accomplishes this.
2236 2110 */
2237 2111 if (thread_change_pri(t, new_pri, 0)) {
2238 - if ((t->t_schedflag & TS_LOAD) &&
2239 - (lwp = t->t_lwp) &&
2240 - lwp->lwp_state == LWP_USER)
2241 - t->t_schedflag &= ~TS_DONT_SWAP;
2242 2112 fssproc->fss_timeleft = fss_quantum;
2243 2113 } else {
2244 2114 call_cpu_surrender = B_TRUE;
2245 2115 }
2246 2116 } else if (t->t_state == TS_ONPROC &&
2247 2117 t->t_pri < t->t_disp_queue->disp_maxrunpri) {
2248 2118 /*
2249 2119 * If there is a higher-priority thread which is
2250 2120 * waiting for a processor, then thread surrenders
2251 2121 * the processor.
2252 2122 */
2253 2123 call_cpu_surrender = B_TRUE;
2254 2124 }
2255 2125 }
2256 2126
2257 2127 if (cpucaps_enforce && 2 * fssproc->fss_timeleft > fss_quantum) {
2258 2128 /*
2259 2129 * The thread used more than half of its quantum, so assume that
2260 2130 * it used the whole quantum.
2261 2131 *
2262 2132 * Update thread's priority just before putting it on the wait
2263 2133 * queue so that it gets charged for the CPU time from its
2264 2134 * quantum even before that quantum expires.
2265 2135 */
2266 2136 fss_newpri(fssproc);
2267 2137 if (t->t_pri != fssproc->fss_umdpri)
2268 2138 fss_change_priority(t, fssproc);
2269 2139
2270 2140 /*
2271 2141 * We need to call cpu_surrender for this thread due to cpucaps
2272 2142 * enforcement, but fss_change_priority may have already done
2273 2143 * so. In this case FSSBACKQ is set and there is no need to call
2274 2144 * cpu-surrender again.
2275 2145 */
2276 2146 if (!(fssproc->fss_flags & FSSBACKQ))
2277 2147 call_cpu_surrender = B_TRUE;
2278 2148 }
2279 2149
2280 2150 if (call_cpu_surrender) {
2281 2151 fssproc->fss_flags |= FSSBACKQ;
2282 2152 cpu_surrender(t);
2283 2153 }
2284 2154
2285 2155 thread_unlock_nopreempt(t); /* clock thread can't be preempted */
2286 2156 }
2287 2157
2288 2158 /*
2289 2159 * Processes waking up go to the back of their queue. We don't need to assign
2290 2160 * a time quantum here because thread is still at a kernel mode priority and
2291 2161 * the time slicing is not done for threads running in the kernel after
2292 2162 * sleeping. The proper time quantum will be assigned by fss_trapret before the
2293 2163 * thread returns to user mode.
2294 2164 */
↓ open down ↓ |
43 lines elided |
↑ open up ↑ |
2295 2165 static void
2296 2166 fss_wakeup(kthread_t *t)
2297 2167 {
2298 2168 fssproc_t *fssproc;
2299 2169
2300 2170 ASSERT(THREAD_LOCK_HELD(t));
2301 2171 ASSERT(t->t_state == TS_SLEEP);
2302 2172
2303 2173 fss_active(t);
2304 2174
2305 - t->t_stime = ddi_get_lbolt(); /* time stamp for the swapper */
2306 2175 fssproc = FSSPROC(t);
2307 2176 fssproc->fss_flags &= ~FSSBACKQ;
2308 2177
2309 2178 if (fssproc->fss_flags & FSSKPRI) {
2310 2179 /*
2311 2180 * If we already have a kernel priority assigned, then we
2312 2181 * just use it.
2313 2182 */
2314 2183 setbackdq(t);
2315 2184 } else if (t->t_kpri_req) {
2316 2185 /*
2317 2186 * Give thread a priority boost if we were asked.
2318 2187 */
2319 2188 fssproc->fss_flags |= FSSKPRI;
2320 2189 THREAD_CHANGE_PRI(t, minclsyspri);
2321 2190 setbackdq(t);
2322 2191 t->t_trapret = 1; /* so that fss_trapret will run */
2323 2192 aston(t);
2324 2193 } else {
2325 2194 /*
2326 2195 * Otherwise, we recalculate the priority.
2327 2196 */
2328 2197 if (t->t_disp_time == ddi_get_lbolt()) {
2329 2198 setfrontdq(t);
2330 2199 } else {
2331 2200 fssproc->fss_timeleft = fss_quantum;
2332 2201 THREAD_CHANGE_PRI(t, fssproc->fss_umdpri);
2333 2202 setbackdq(t);
2334 2203 }
2335 2204 }
2336 2205 }
2337 2206
2338 2207 /*
2339 2208 * fss_donice() is called when a nice(1) command is issued on the thread to
2340 2209 * alter the priority. The nice(1) command exists in Solaris for compatibility.
2341 2210 * Thread priority adjustments should be done via priocntl(1).
2342 2211 */
2343 2212 static int
2344 2213 fss_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
2345 2214 {
2346 2215 int newnice;
2347 2216 fssproc_t *fssproc = FSSPROC(t);
2348 2217 fssparms_t fssparms;
2349 2218
2350 2219 /*
2351 2220 * If there is no change to priority, just return current setting.
2352 2221 */
2353 2222 if (incr == 0) {
2354 2223 if (retvalp)
2355 2224 *retvalp = fssproc->fss_nice - NZERO;
2356 2225 return (0);
2357 2226 }
2358 2227
2359 2228 if ((incr < 0 || incr > 2 * NZERO) && secpolicy_raisepriority(cr) != 0)
2360 2229 return (EPERM);
2361 2230
2362 2231 /*
2363 2232 * Specifying a nice increment greater than the upper limit of
2364 2233 * FSS_NICE_MAX (== 2 * NZERO - 1) will result in the thread's nice
2365 2234 * value being set to the upper limit. We check for this before
2366 2235 * computing the new value because otherwise we could get overflow
2367 2236 * if a privileged user specified some ridiculous increment.
2368 2237 */
2369 2238 if (incr > FSS_NICE_MAX)
2370 2239 incr = FSS_NICE_MAX;
2371 2240
2372 2241 newnice = fssproc->fss_nice + incr;
2373 2242 if (newnice > FSS_NICE_MAX)
2374 2243 newnice = FSS_NICE_MAX;
2375 2244 else if (newnice < FSS_NICE_MIN)
2376 2245 newnice = FSS_NICE_MIN;
2377 2246
2378 2247 fssparms.fss_uprilim = fssparms.fss_upri =
2379 2248 -((newnice - NZERO) * fss_maxupri) / NZERO;
2380 2249
2381 2250 /*
2382 2251 * Reset the uprilim and upri values of the thread.
2383 2252 */
2384 2253 (void) fss_parmsset(t, (void *)&fssparms, (id_t)0, (cred_t *)NULL);
2385 2254
2386 2255 /*
2387 2256 * Although fss_parmsset already reset fss_nice it may not have been
2388 2257 * set to precisely the value calculated above because fss_parmsset
2389 2258 * determines the nice value from the user priority and we may have
2390 2259 * truncated during the integer conversion from nice value to user
2391 2260 * priority and back. We reset fss_nice to the value we calculated
2392 2261 * above.
2393 2262 */
2394 2263 fssproc->fss_nice = (char)newnice;
2395 2264
2396 2265 if (retvalp)
2397 2266 *retvalp = newnice - NZERO;
2398 2267 return (0);
2399 2268 }
2400 2269
2401 2270 /*
2402 2271 * Increment the priority of the specified thread by incr and
2403 2272 * return the new value in *retvalp.
2404 2273 */
2405 2274 static int
2406 2275 fss_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp)
2407 2276 {
2408 2277 int newpri;
2409 2278 fssproc_t *fssproc = FSSPROC(t);
2410 2279 fssparms_t fssparms;
2411 2280
2412 2281 /*
2413 2282 * If there is no change to priority, just return current setting.
2414 2283 */
2415 2284 if (incr == 0) {
2416 2285 *retvalp = fssproc->fss_upri;
2417 2286 return (0);
2418 2287 }
2419 2288
2420 2289 newpri = fssproc->fss_upri + incr;
2421 2290 if (newpri > fss_maxupri || newpri < -fss_maxupri)
2422 2291 return (EINVAL);
2423 2292
2424 2293 *retvalp = newpri;
2425 2294 fssparms.fss_uprilim = fssparms.fss_upri = newpri;
2426 2295
2427 2296 /*
2428 2297 * Reset the uprilim and upri values of the thread.
2429 2298 */
2430 2299 return (fss_parmsset(t, &fssparms, (id_t)0, cr));
2431 2300 }
2432 2301
2433 2302 /*
2434 2303 * Return the global scheduling priority that would be assigned to a thread
2435 2304 * entering the fair-sharing class with the fss_upri.
2436 2305 */
2437 2306 /*ARGSUSED*/
2438 2307 static pri_t
2439 2308 fss_globpri(kthread_t *t)
2440 2309 {
2441 2310 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2442 2311
2443 2312 return (fss_maxumdpri / 2);
2444 2313 }
2445 2314
2446 2315 /*
2447 2316 * Called from the yield(2) system call when a thread is yielding (surrendering)
2448 2317 * the processor. The kernel thread is placed at the back of a dispatch queue.
2449 2318 */
2450 2319 static void
2451 2320 fss_yield(kthread_t *t)
2452 2321 {
2453 2322 fssproc_t *fssproc = FSSPROC(t);
2454 2323
2455 2324 ASSERT(t == curthread);
2456 2325 ASSERT(THREAD_LOCK_HELD(t));
2457 2326
2458 2327 /*
2459 2328 * Collect CPU usage spent before yielding
2460 2329 */
2461 2330 (void) CPUCAPS_CHARGE(t, &fssproc->fss_caps, CPUCAPS_CHARGE_ENFORCE);
2462 2331
2463 2332 /*
2464 2333 * Clear the preemption control "yield" bit since the user is
2465 2334 * doing a yield.
2466 2335 */
2467 2336 if (t->t_schedctl)
2468 2337 schedctl_set_yield(t, 0);
2469 2338 /*
2470 2339 * If fss_preempt() artifically increased the thread's priority
2471 2340 * to avoid preemption, restore the original priority now.
2472 2341 */
2473 2342 if (fssproc->fss_flags & FSSRESTORE) {
2474 2343 THREAD_CHANGE_PRI(t, fssproc->fss_scpri);
2475 2344 fssproc->fss_flags &= ~FSSRESTORE;
2476 2345 }
2477 2346 if (fssproc->fss_timeleft < 0) {
2478 2347 /*
2479 2348 * Time slice was artificially extended to avoid preemption,
2480 2349 * so pretend we're preempting it now.
2481 2350 */
2482 2351 DTRACE_SCHED1(schedctl__yield, int, -fssproc->fss_timeleft);
2483 2352 fssproc->fss_timeleft = fss_quantum;
2484 2353 }
2485 2354 fssproc->fss_flags &= ~FSSBACKQ;
2486 2355 setbackdq(t);
2487 2356 }
2488 2357
2489 2358 void
2490 2359 fss_changeproj(kthread_t *t, void *kp, void *zp, fssbuf_t *projbuf,
2491 2360 fssbuf_t *zonebuf)
2492 2361 {
2493 2362 kproject_t *kpj_new = kp;
2494 2363 zone_t *zone = zp;
2495 2364 fssproj_t *fssproj_old, *fssproj_new;
2496 2365 fsspset_t *fsspset;
2497 2366 kproject_t *kpj_old;
2498 2367 fssproc_t *fssproc;
2499 2368 fsszone_t *fsszone_old, *fsszone_new;
2500 2369 int free = 0;
2501 2370 int id;
2502 2371
2503 2372 ASSERT(MUTEX_HELD(&cpu_lock));
2504 2373 ASSERT(MUTEX_HELD(&pidlock));
2505 2374 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2506 2375
2507 2376 if (t->t_cid != fss_cid)
2508 2377 return;
2509 2378
2510 2379 fssproc = FSSPROC(t);
2511 2380 mutex_enter(&fsspsets_lock);
2512 2381 fssproj_old = FSSPROC2FSSPROJ(fssproc);
2513 2382 if (fssproj_old == NULL) {
2514 2383 mutex_exit(&fsspsets_lock);
2515 2384 return;
2516 2385 }
2517 2386
2518 2387 fsspset = FSSPROJ2FSSPSET(fssproj_old);
2519 2388 mutex_enter(&fsspset->fssps_lock);
2520 2389 kpj_old = FSSPROJ2KPROJ(fssproj_old);
2521 2390 fsszone_old = fssproj_old->fssp_fsszone;
2522 2391
2523 2392 ASSERT(t->t_cpupart == fsspset->fssps_cpupart);
2524 2393
2525 2394 if (kpj_old == kpj_new) {
2526 2395 mutex_exit(&fsspset->fssps_lock);
2527 2396 mutex_exit(&fsspsets_lock);
2528 2397 return;
2529 2398 }
2530 2399
2531 2400 if ((fsszone_new = fss_find_fsszone(fsspset, zone)) == NULL) {
2532 2401 /*
2533 2402 * If the zone for the new project is not currently active on
2534 2403 * the cpu partition we're on, get one of the pre-allocated
2535 2404 * buffers and link it in our per-pset zone list. Such buffers
2536 2405 * should already exist.
2537 2406 */
2538 2407 for (id = 0; id < zonebuf->fssb_size; id++) {
2539 2408 if ((fsszone_new = zonebuf->fssb_list[id]) != NULL) {
2540 2409 fss_insert_fsszone(fsspset, zone, fsszone_new);
2541 2410 zonebuf->fssb_list[id] = NULL;
2542 2411 break;
2543 2412 }
2544 2413 }
2545 2414 }
2546 2415 ASSERT(fsszone_new != NULL);
2547 2416 if ((fssproj_new = fss_find_fssproj(fsspset, kpj_new)) == NULL) {
2548 2417 /*
2549 2418 * If our new project is not currently running
2550 2419 * on the cpu partition we're on, get one of the
2551 2420 * pre-allocated buffers and link it in our new cpu
2552 2421 * partition doubly linked list. Such buffers should already
2553 2422 * exist.
2554 2423 */
2555 2424 for (id = 0; id < projbuf->fssb_size; id++) {
2556 2425 if ((fssproj_new = projbuf->fssb_list[id]) != NULL) {
2557 2426 fss_insert_fssproj(fsspset, kpj_new,
2558 2427 fsszone_new, fssproj_new);
2559 2428 projbuf->fssb_list[id] = NULL;
2560 2429 break;
2561 2430 }
2562 2431 }
2563 2432 }
2564 2433 ASSERT(fssproj_new != NULL);
2565 2434
2566 2435 thread_lock(t);
2567 2436 if (t->t_state == TS_RUN || t->t_state == TS_ONPROC ||
2568 2437 t->t_state == TS_WAIT)
2569 2438 fss_inactive(t);
2570 2439 ASSERT(fssproj_old->fssp_threads > 0);
2571 2440 if (--fssproj_old->fssp_threads == 0) {
2572 2441 fss_remove_fssproj(fsspset, fssproj_old);
2573 2442 free = 1;
2574 2443 }
2575 2444 fssproc->fss_proj = fssproj_new;
2576 2445 fssproc->fss_fsspri = 0;
2577 2446 fssproj_new->fssp_threads++;
2578 2447 if (t->t_state == TS_RUN || t->t_state == TS_ONPROC ||
2579 2448 t->t_state == TS_WAIT)
2580 2449 fss_active(t);
2581 2450 thread_unlock(t);
2582 2451 if (free) {
2583 2452 if (fsszone_old->fssz_nproj == 0)
2584 2453 kmem_free(fsszone_old, sizeof (fsszone_t));
2585 2454 kmem_free(fssproj_old, sizeof (fssproj_t));
2586 2455 }
2587 2456
2588 2457 mutex_exit(&fsspset->fssps_lock);
2589 2458 mutex_exit(&fsspsets_lock);
2590 2459 }
2591 2460
2592 2461 void
2593 2462 fss_changepset(kthread_t *t, void *newcp, fssbuf_t *projbuf,
2594 2463 fssbuf_t *zonebuf)
2595 2464 {
2596 2465 fsspset_t *fsspset_old, *fsspset_new;
2597 2466 fssproj_t *fssproj_old, *fssproj_new;
2598 2467 fsszone_t *fsszone_old, *fsszone_new;
2599 2468 fssproc_t *fssproc;
2600 2469 kproject_t *kpj;
2601 2470 zone_t *zone;
2602 2471 int id;
2603 2472
2604 2473 ASSERT(MUTEX_HELD(&cpu_lock));
2605 2474 ASSERT(MUTEX_HELD(&pidlock));
2606 2475 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2607 2476
2608 2477 if (t->t_cid != fss_cid)
2609 2478 return;
2610 2479
2611 2480 fssproc = FSSPROC(t);
2612 2481 zone = ttoproc(t)->p_zone;
2613 2482 mutex_enter(&fsspsets_lock);
2614 2483 fssproj_old = FSSPROC2FSSPROJ(fssproc);
2615 2484 if (fssproj_old == NULL) {
2616 2485 mutex_exit(&fsspsets_lock);
2617 2486 return;
2618 2487 }
2619 2488 fsszone_old = fssproj_old->fssp_fsszone;
2620 2489 fsspset_old = FSSPROJ2FSSPSET(fssproj_old);
2621 2490 kpj = FSSPROJ2KPROJ(fssproj_old);
2622 2491
2623 2492 if (fsspset_old->fssps_cpupart == newcp) {
2624 2493 mutex_exit(&fsspsets_lock);
2625 2494 return;
2626 2495 }
2627 2496
2628 2497 ASSERT(ttoproj(t) == kpj);
2629 2498
2630 2499 fsspset_new = fss_find_fsspset(newcp);
2631 2500
2632 2501 mutex_enter(&fsspset_new->fssps_lock);
2633 2502 if ((fsszone_new = fss_find_fsszone(fsspset_new, zone)) == NULL) {
2634 2503 for (id = 0; id < zonebuf->fssb_size; id++) {
2635 2504 if ((fsszone_new = zonebuf->fssb_list[id]) != NULL) {
2636 2505 fss_insert_fsszone(fsspset_new, zone,
2637 2506 fsszone_new);
2638 2507 zonebuf->fssb_list[id] = NULL;
2639 2508 break;
2640 2509 }
2641 2510 }
2642 2511 }
2643 2512 ASSERT(fsszone_new != NULL);
2644 2513 if ((fssproj_new = fss_find_fssproj(fsspset_new, kpj)) == NULL) {
2645 2514 for (id = 0; id < projbuf->fssb_size; id++) {
2646 2515 if ((fssproj_new = projbuf->fssb_list[id]) != NULL) {
2647 2516 fss_insert_fssproj(fsspset_new, kpj,
2648 2517 fsszone_new, fssproj_new);
2649 2518 projbuf->fssb_list[id] = NULL;
2650 2519 break;
2651 2520 }
2652 2521 }
2653 2522 }
2654 2523 ASSERT(fssproj_new != NULL);
2655 2524
2656 2525 fssproj_new->fssp_threads++;
2657 2526 thread_lock(t);
2658 2527 if (t->t_state == TS_RUN || t->t_state == TS_ONPROC ||
2659 2528 t->t_state == TS_WAIT)
2660 2529 fss_inactive(t);
2661 2530 fssproc->fss_proj = fssproj_new;
2662 2531 fssproc->fss_fsspri = 0;
2663 2532 if (t->t_state == TS_RUN || t->t_state == TS_ONPROC ||
2664 2533 t->t_state == TS_WAIT)
2665 2534 fss_active(t);
2666 2535 thread_unlock(t);
2667 2536 mutex_exit(&fsspset_new->fssps_lock);
2668 2537
2669 2538 mutex_enter(&fsspset_old->fssps_lock);
2670 2539 if (--fssproj_old->fssp_threads == 0) {
2671 2540 fss_remove_fssproj(fsspset_old, fssproj_old);
2672 2541 if (fsszone_old->fssz_nproj == 0)
2673 2542 kmem_free(fsszone_old, sizeof (fsszone_t));
2674 2543 kmem_free(fssproj_old, sizeof (fssproj_t));
2675 2544 }
2676 2545 mutex_exit(&fsspset_old->fssps_lock);
2677 2546
2678 2547 mutex_exit(&fsspsets_lock);
2679 2548 }
↓ open down ↓ |
364 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX