Print this page
patch as-lock-macro-simplification
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/syscall/lgrpsys.c
+++ new/usr/src/uts/common/syscall/lgrpsys.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 * Copyright 2015 Joyent, Inc.
26 26 */
27 27
28 28 /*
29 29 * lgroup system calls
30 30 */
31 31
32 32 #include <sys/types.h>
33 33 #include <sys/errno.h>
34 34 #include <sys/sunddi.h>
35 35 #include <sys/systm.h>
36 36 #include <sys/mman.h>
37 37 #include <sys/cpupart.h>
38 38 #include <sys/lgrp.h>
39 39 #include <sys/lgrp_user.h>
40 40 #include <sys/promif.h> /* for prom_printf() */
41 41 #include <sys/sysmacros.h>
42 42 #include <sys/policy.h>
43 43
44 44 #include <vm/as.h>
45 45
46 46
47 47 /* definitions for mi_validity */
48 48 #define VALID_ADDR 1
49 49 #define VALID_REQ 2
50 50
51 51 /*
52 52 * run through the given number of addresses and requests and return the
53 53 * corresponding memory information for each address
54 54 */
55 55 static int
56 56 meminfo(int addr_count, struct meminfo *mip)
57 57 {
58 58 size_t in_size, out_size, req_size, val_size;
59 59 struct as *as;
60 60 struct hat *hat;
61 61 int i, j, out_idx, info_count;
62 62 lgrp_t *lgrp;
63 63 pfn_t pfn;
64 64 ssize_t pgsz;
65 65 int *req_array, *val_array;
66 66 uint64_t *in_array, *out_array;
67 67 uint64_t addr, paddr;
68 68 uintptr_t vaddr;
69 69 int ret = 0;
70 70 struct meminfo minfo;
71 71 #if defined(_SYSCALL32_IMPL)
72 72 struct meminfo32 minfo32;
73 73 #endif
74 74
75 75 /*
76 76 * Make sure that there is at least one address to translate and
77 77 * limit how many virtual addresses the kernel can do per call
78 78 */
79 79 if (addr_count < 1)
80 80 return (set_errno(EINVAL));
81 81 else if (addr_count > MAX_MEMINFO_CNT)
82 82 addr_count = MAX_MEMINFO_CNT;
83 83
84 84 if (get_udatamodel() == DATAMODEL_NATIVE) {
85 85 if (copyin(mip, &minfo, sizeof (struct meminfo)))
86 86 return (set_errno(EFAULT));
87 87 }
88 88 #if defined(_SYSCALL32_IMPL)
89 89 else {
90 90 bzero(&minfo, sizeof (minfo));
91 91 if (copyin(mip, &minfo32, sizeof (struct meminfo32)))
92 92 return (set_errno(EFAULT));
93 93 minfo.mi_inaddr = (const uint64_t *)(uintptr_t)
94 94 minfo32.mi_inaddr;
95 95 minfo.mi_info_req = (const uint_t *)(uintptr_t)
96 96 minfo32.mi_info_req;
97 97 minfo.mi_info_count = minfo32.mi_info_count;
98 98 minfo.mi_outdata = (uint64_t *)(uintptr_t)
99 99 minfo32.mi_outdata;
100 100 minfo.mi_validity = (uint_t *)(uintptr_t)
101 101 minfo32.mi_validity;
102 102 }
103 103 #endif
104 104 /*
105 105 * all the input parameters have been copied in:-
106 106 * addr_count - number of input addresses
107 107 * minfo.mi_inaddr - array of input addresses
108 108 * minfo.mi_info_req - array of types of information requested
109 109 * minfo.mi_info_count - no. of pieces of info requested for each addr
110 110 * minfo.mi_outdata - array into which the results are placed
111 111 * minfo.mi_validity - array containing bitwise result codes; 0th bit
112 112 * evaluates validity of corresponding input
113 113 * address, 1st bit validity of response to first
114 114 * member of info_req, etc.
115 115 */
116 116
117 117 /* make sure mi_info_count is within limit */
118 118 info_count = minfo.mi_info_count;
119 119 if (info_count < 1 || info_count > MAX_MEMINFO_REQ)
120 120 return (set_errno(EINVAL));
121 121
122 122 /*
123 123 * allocate buffer in_array for the input addresses and copy them in
124 124 */
125 125 in_size = sizeof (uint64_t) * addr_count;
126 126 in_array = kmem_alloc(in_size, KM_SLEEP);
127 127 if (copyin(minfo.mi_inaddr, in_array, in_size)) {
128 128 kmem_free(in_array, in_size);
129 129 return (set_errno(EFAULT));
130 130 }
131 131
132 132 /*
133 133 * allocate buffer req_array for the input info_reqs and copy them in
134 134 */
135 135 req_size = sizeof (uint_t) * info_count;
136 136 req_array = kmem_alloc(req_size, KM_SLEEP);
137 137 if (copyin(minfo.mi_info_req, req_array, req_size)) {
138 138 kmem_free(req_array, req_size);
139 139 kmem_free(in_array, in_size);
140 140 return (set_errno(EFAULT));
141 141 }
142 142
143 143 /*
144 144 * Validate privs for each req.
145 145 */
146 146 for (i = 0; i < info_count; i++) {
147 147 switch (req_array[i] & MEMINFO_MASK) {
148 148 case MEMINFO_VLGRP:
149 149 case MEMINFO_VPAGESIZE:
150 150 break;
151 151 default:
152 152 if (secpolicy_meminfo(CRED()) != 0) {
153 153 kmem_free(req_array, req_size);
154 154 kmem_free(in_array, in_size);
155 155 return (set_errno(EPERM));
156 156 }
157 157 break;
158 158 }
159 159 }
160 160
161 161 /*
162 162 * allocate buffer out_array which holds the results and will have
163 163 * to be copied out later
164 164 */
165 165 out_size = sizeof (uint64_t) * addr_count * info_count;
166 166 out_array = kmem_alloc(out_size, KM_SLEEP);
167 167
168 168 /*
169 169 * allocate buffer val_array which holds the validity bits and will
170 170 * have to be copied out later
171 171 */
172 172 val_size = sizeof (uint_t) * addr_count;
173 173 val_array = kmem_alloc(val_size, KM_SLEEP);
174 174
175 175 if ((req_array[0] & MEMINFO_MASK) == MEMINFO_PLGRP) {
176 176 /* find the corresponding lgroup for each physical address */
177 177 for (i = 0; i < addr_count; i++) {
178 178 paddr = in_array[i];
179 179 pfn = btop(paddr);
180 180 lgrp = lgrp_pfn_to_lgrp(pfn);
181 181 if (lgrp) {
182 182 out_array[i] = lgrp->lgrp_id;
↓ open down ↓ |
182 lines elided |
↑ open up ↑ |
183 183 val_array[i] = VALID_ADDR | VALID_REQ;
184 184 } else {
185 185 out_array[i] = NULL;
186 186 val_array[i] = 0;
187 187 }
188 188 }
189 189 } else {
190 190 /* get the corresponding memory info for each virtual address */
191 191 as = curproc->p_as;
192 192
193 - AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
193 + AS_LOCK_ENTER(as, RW_READER);
194 194 hat = as->a_hat;
195 195 for (i = out_idx = 0; i < addr_count; i++, out_idx +=
196 196 info_count) {
197 197 addr = in_array[i];
198 198 vaddr = (uintptr_t)(addr & ~PAGEOFFSET);
199 199 if (!as_segat(as, (caddr_t)vaddr)) {
200 200 val_array[i] = 0;
201 201 continue;
202 202 }
203 203 val_array[i] = VALID_ADDR;
204 204 pfn = hat_getpfnum(hat, (caddr_t)vaddr);
205 205 if (pfn != PFN_INVALID) {
206 206 paddr = (uint64_t)((pfn << PAGESHIFT) |
207 207 (addr & PAGEOFFSET));
208 208 for (j = 0; j < info_count; j++) {
209 209 switch (req_array[j] & MEMINFO_MASK) {
210 210 case MEMINFO_VPHYSICAL:
211 211 /*
212 212 * return the physical address
213 213 * corresponding to the input
214 214 * virtual address
215 215 */
216 216 out_array[out_idx + j] = paddr;
217 217 val_array[i] |= VALID_REQ << j;
218 218 break;
219 219 case MEMINFO_VLGRP:
220 220 /*
221 221 * return the lgroup of physical
222 222 * page corresponding to the
223 223 * input virtual address
224 224 */
225 225 lgrp = lgrp_pfn_to_lgrp(pfn);
226 226 if (lgrp) {
227 227 out_array[out_idx + j] =
228 228 lgrp->lgrp_id;
229 229 val_array[i] |=
230 230 VALID_REQ << j;
231 231 }
232 232 break;
233 233 case MEMINFO_VPAGESIZE:
234 234 /*
235 235 * return the size of physical
236 236 * page corresponding to the
237 237 * input virtual address
238 238 */
239 239 pgsz = hat_getpagesize(hat,
240 240 (caddr_t)vaddr);
241 241 if (pgsz != -1) {
242 242 out_array[out_idx + j] =
243 243 pgsz;
244 244 val_array[i] |=
245 245 VALID_REQ << j;
246 246 }
247 247 break;
248 248 case MEMINFO_VREPLCNT:
249 249 /*
250 250 * for future use:-
251 251 * return the no. replicated
252 252 * physical pages corresponding
253 253 * to the input virtual address,
254 254 * so it is always 0 at the
255 255 * moment
256 256 */
257 257 out_array[out_idx + j] = 0;
258 258 val_array[i] |= VALID_REQ << j;
259 259 break;
260 260 case MEMINFO_VREPL:
261 261 /*
262 262 * for future use:-
263 263 * return the nth physical
264 264 * replica of the specified
265 265 * virtual address
266 266 */
267 267 break;
268 268 case MEMINFO_VREPL_LGRP:
269 269 /*
270 270 * for future use:-
271 271 * return the lgroup of nth
272 272 * physical replica of the
273 273 * specified virtual address
274 274 */
275 275 break;
276 276 case MEMINFO_PLGRP:
277 277 /*
278 278 * this is for physical address
↓ open down ↓ |
75 lines elided |
↑ open up ↑ |
279 279 * only, shouldn't mix with
280 280 * virtual address
281 281 */
282 282 break;
283 283 default:
284 284 break;
285 285 }
286 286 }
287 287 }
288 288 }
289 - AS_LOCK_EXIT(as, &as->a_lock);
289 + AS_LOCK_EXIT(as);
290 290 }
291 291
292 292 /* copy out the results and validity bits and free the buffers */
293 293 if ((copyout(out_array, minfo.mi_outdata, out_size) != 0) ||
294 294 (copyout(val_array, minfo.mi_validity, val_size) != 0))
295 295 ret = set_errno(EFAULT);
296 296
297 297 kmem_free(in_array, in_size);
298 298 kmem_free(out_array, out_size);
299 299 kmem_free(req_array, req_size);
300 300 kmem_free(val_array, val_size);
301 301
302 302 return (ret);
303 303 }
304 304
305 305
306 306 /*
307 307 * Initialize lgroup affinities for thread
308 308 */
309 309 void
310 310 lgrp_affinity_init(lgrp_affinity_t **bufaddr)
311 311 {
312 312 if (bufaddr)
313 313 *bufaddr = NULL;
314 314 }
315 315
316 316
317 317 /*
318 318 * Free lgroup affinities for thread and set to NULL
319 319 * just in case thread gets recycled
320 320 */
321 321 void
322 322 lgrp_affinity_free(lgrp_affinity_t **bufaddr)
323 323 {
324 324 if (bufaddr && *bufaddr) {
325 325 kmem_free(*bufaddr, nlgrpsmax * sizeof (lgrp_affinity_t));
326 326 *bufaddr = NULL;
327 327 }
328 328 }
329 329
330 330
331 331 #define P_ANY -2 /* cookie specifying any ID */
332 332
333 333
334 334 /*
335 335 * Find LWP with given ID in specified process and get its affinity for
336 336 * specified lgroup
337 337 */
338 338 lgrp_affinity_t
339 339 lgrp_affinity_get_thread(proc_t *p, id_t lwpid, lgrp_id_t lgrp)
340 340 {
341 341 lgrp_affinity_t aff;
342 342 int found;
343 343 kthread_t *t;
344 344
345 345 ASSERT(MUTEX_HELD(&p->p_lock));
346 346
347 347 aff = LGRP_AFF_NONE;
348 348 found = 0;
349 349 t = p->p_tlist;
350 350 /*
351 351 * The process may be executing in proc_exit() and its p->p_list may be
352 352 * already NULL.
353 353 */
354 354 if (t == NULL)
355 355 return (set_errno(ESRCH));
356 356
357 357 do {
358 358 if (t->t_tid == lwpid || lwpid == P_ANY) {
359 359 thread_lock(t);
360 360 /*
361 361 * Check to see whether caller has permission to set
362 362 * affinity for LWP
363 363 */
364 364 if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
365 365 thread_unlock(t);
366 366 return (set_errno(EPERM));
367 367 }
368 368
369 369 if (t->t_lgrp_affinity)
370 370 aff = t->t_lgrp_affinity[lgrp];
371 371 thread_unlock(t);
372 372 found = 1;
373 373 break;
374 374 }
375 375 } while ((t = t->t_forw) != p->p_tlist);
376 376 if (!found)
377 377 aff = set_errno(ESRCH);
378 378
379 379 return (aff);
380 380 }
381 381
382 382
383 383 /*
384 384 * Get lgroup affinity for given LWP
385 385 */
386 386 lgrp_affinity_t
387 387 lgrp_affinity_get(lgrp_affinity_args_t *ap)
388 388 {
389 389 lgrp_affinity_t aff;
390 390 lgrp_affinity_args_t args;
391 391 id_t id;
392 392 idtype_t idtype;
393 393 lgrp_id_t lgrp;
394 394 proc_t *p;
395 395 kthread_t *t;
396 396
397 397 /*
398 398 * Copyin arguments
399 399 */
400 400 if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
401 401 return (set_errno(EFAULT));
402 402
403 403 id = args.id;
404 404 idtype = args.idtype;
405 405 lgrp = args.lgrp;
406 406
407 407 /*
408 408 * Check for invalid lgroup
409 409 */
410 410 if (lgrp < 0 || lgrp == LGRP_NONE)
411 411 return (set_errno(EINVAL));
412 412
413 413 /*
414 414 * Check for existing lgroup
415 415 */
416 416 if (lgrp > lgrp_alloc_max)
417 417 return (set_errno(ESRCH));
418 418
419 419 /*
420 420 * Get lgroup affinity for given LWP or process
421 421 */
422 422 switch (idtype) {
423 423
424 424 case P_LWPID:
425 425 /*
426 426 * LWP in current process
427 427 */
428 428 p = curproc;
429 429 mutex_enter(&p->p_lock);
430 430 if (id != P_MYID) /* different thread */
431 431 aff = lgrp_affinity_get_thread(p, id, lgrp);
432 432 else { /* current thread */
433 433 aff = LGRP_AFF_NONE;
434 434 t = curthread;
435 435 thread_lock(t);
436 436 if (t->t_lgrp_affinity)
437 437 aff = t->t_lgrp_affinity[lgrp];
438 438 thread_unlock(t);
439 439 }
440 440 mutex_exit(&p->p_lock);
441 441 break;
442 442
443 443 case P_PID:
444 444 /*
445 445 * Process
446 446 */
447 447 mutex_enter(&pidlock);
448 448
449 449 if (id == P_MYID)
450 450 p = curproc;
451 451 else {
452 452 p = prfind(id);
453 453 if (p == NULL) {
454 454 mutex_exit(&pidlock);
455 455 return (set_errno(ESRCH));
456 456 }
457 457 }
458 458
459 459 mutex_enter(&p->p_lock);
460 460 aff = lgrp_affinity_get_thread(p, P_ANY, lgrp);
461 461 mutex_exit(&p->p_lock);
462 462
463 463 mutex_exit(&pidlock);
464 464 break;
465 465
466 466 default:
467 467 aff = set_errno(EINVAL);
468 468 break;
469 469 }
470 470
471 471 return (aff);
472 472 }
473 473
474 474
475 475 /*
476 476 * Find lgroup for which this thread has most affinity in specified partition
477 477 * starting from home lgroup unless specified starting lgroup is preferred
478 478 */
479 479 lpl_t *
480 480 lgrp_affinity_best(kthread_t *t, struct cpupart *cpupart, lgrp_id_t start,
481 481 boolean_t prefer_start)
482 482 {
483 483 lgrp_affinity_t *affs;
484 484 lgrp_affinity_t best_aff;
485 485 lpl_t *best_lpl;
486 486 lgrp_id_t finish;
487 487 lgrp_id_t home;
488 488 lgrp_id_t lgrpid;
489 489 lpl_t *lpl;
490 490
491 491 ASSERT(t != NULL);
492 492 ASSERT((MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0) ||
493 493 (MUTEX_HELD(&ttoproc(t)->p_lock) && THREAD_LOCK_HELD(t)));
494 494 ASSERT(cpupart != NULL);
495 495
496 496 if (t->t_lgrp_affinity == NULL)
497 497 return (NULL);
498 498
499 499 affs = t->t_lgrp_affinity;
500 500
501 501 /*
502 502 * Thread bound to CPU
503 503 */
504 504 if (t->t_bind_cpu != PBIND_NONE) {
505 505 cpu_t *cp;
506 506
507 507 /*
508 508 * Find which lpl has most affinity among leaf lpl directly
509 509 * containing CPU and its ancestor lpls
510 510 */
511 511 cp = cpu[t->t_bind_cpu];
512 512
513 513 best_lpl = lpl = cp->cpu_lpl;
514 514 best_aff = affs[best_lpl->lpl_lgrpid];
515 515 while (lpl->lpl_parent != NULL) {
516 516 lpl = lpl->lpl_parent;
517 517 lgrpid = lpl->lpl_lgrpid;
518 518 if (affs[lgrpid] > best_aff) {
519 519 best_lpl = lpl;
520 520 best_aff = affs[lgrpid];
521 521 }
522 522 }
523 523 return (best_lpl);
524 524 }
525 525
526 526 /*
527 527 * Start searching from home lgroup unless given starting lgroup is
528 528 * preferred or home lgroup isn't in given pset. Use root lgroup as
529 529 * starting point if both home and starting lgroups aren't in given
530 530 * pset.
531 531 */
532 532 ASSERT(start >= 0 && start <= lgrp_alloc_max);
533 533 home = t->t_lpl->lpl_lgrpid;
534 534 if (!prefer_start && LGRP_CPUS_IN_PART(home, cpupart))
535 535 lgrpid = home;
536 536 else if (start != LGRP_NONE && LGRP_CPUS_IN_PART(start, cpupart))
537 537 lgrpid = start;
538 538 else
539 539 lgrpid = LGRP_ROOTID;
540 540
541 541 best_lpl = &cpupart->cp_lgrploads[lgrpid];
542 542 best_aff = affs[lgrpid];
543 543 finish = lgrpid;
544 544 do {
545 545 /*
546 546 * Skip any lgroups that don't have CPU resources
547 547 * in this processor set.
548 548 */
549 549 if (!LGRP_CPUS_IN_PART(lgrpid, cpupart)) {
550 550 if (++lgrpid > lgrp_alloc_max)
551 551 lgrpid = 0; /* wrap the search */
552 552 continue;
553 553 }
554 554
555 555 /*
556 556 * Find lgroup with most affinity
557 557 */
558 558 lpl = &cpupart->cp_lgrploads[lgrpid];
559 559 if (affs[lgrpid] > best_aff) {
560 560 best_aff = affs[lgrpid];
561 561 best_lpl = lpl;
562 562 }
563 563
564 564 if (++lgrpid > lgrp_alloc_max)
565 565 lgrpid = 0; /* wrap the search */
566 566
567 567 } while (lgrpid != finish);
568 568
569 569 /*
570 570 * No lgroup (in this pset) with any affinity
571 571 */
572 572 if (best_aff == LGRP_AFF_NONE)
573 573 return (NULL);
574 574
575 575 lgrpid = best_lpl->lpl_lgrpid;
576 576 ASSERT(LGRP_CPUS_IN_PART(lgrpid, cpupart) && best_lpl->lpl_ncpu > 0);
577 577
578 578 return (best_lpl);
579 579 }
580 580
581 581
582 582 /*
583 583 * Set thread's affinity for given lgroup
584 584 */
585 585 int
586 586 lgrp_affinity_set_thread(kthread_t *t, lgrp_id_t lgrp, lgrp_affinity_t aff,
587 587 lgrp_affinity_t **aff_buf)
588 588 {
589 589 lgrp_affinity_t *affs;
590 590 lgrp_id_t best;
591 591 lpl_t *best_lpl;
592 592 lgrp_id_t home;
593 593 int retval;
594 594
595 595 ASSERT(t != NULL);
596 596 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
597 597
598 598 retval = 0;
599 599
600 600 thread_lock(t);
601 601
602 602 /*
603 603 * Check to see whether caller has permission to set affinity for
604 604 * thread
605 605 */
606 606 if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
607 607 thread_unlock(t);
608 608 return (set_errno(EPERM));
609 609 }
610 610
611 611 if (t->t_lgrp_affinity == NULL) {
612 612 if (aff == LGRP_AFF_NONE) {
613 613 thread_unlock(t);
614 614 return (0);
615 615 }
616 616 ASSERT(aff_buf != NULL && *aff_buf != NULL);
617 617 t->t_lgrp_affinity = *aff_buf;
618 618 *aff_buf = NULL;
619 619 }
620 620
621 621 affs = t->t_lgrp_affinity;
622 622 affs[lgrp] = aff;
623 623
624 624 /*
625 625 * Find lgroup for which thread has most affinity,
626 626 * starting with lgroup for which affinity being set
627 627 */
628 628 best_lpl = lgrp_affinity_best(t, t->t_cpupart, lgrp, B_TRUE);
629 629
630 630 /*
631 631 * Rehome if found lgroup with more affinity than home or lgroup for
632 632 * which affinity is being set has same affinity as home
633 633 */
634 634 home = t->t_lpl->lpl_lgrpid;
635 635 if (best_lpl != NULL && best_lpl != t->t_lpl) {
636 636 best = best_lpl->lpl_lgrpid;
637 637 if (affs[best] > affs[home] || (affs[best] == affs[home] &&
638 638 best == lgrp))
639 639 lgrp_move_thread(t, best_lpl, 1);
640 640 }
641 641
642 642 thread_unlock(t);
643 643
644 644 return (retval);
645 645 }
646 646
647 647
648 648 /*
649 649 * Set process' affinity for specified lgroup
650 650 */
651 651 int
652 652 lgrp_affinity_set_proc(proc_t *p, lgrp_id_t lgrp, lgrp_affinity_t aff,
653 653 lgrp_affinity_t **aff_buf_array)
654 654 {
655 655 lgrp_affinity_t *buf;
656 656 int err = 0;
657 657 int i;
658 658 int retval;
659 659 kthread_t *t;
660 660
661 661 ASSERT(MUTEX_HELD(&pidlock) && MUTEX_HELD(&p->p_lock));
662 662 ASSERT(aff_buf_array != NULL);
663 663
664 664 i = 0;
665 665 t = p->p_tlist;
666 666 if (t != NULL) {
667 667 do {
668 668 /*
669 669 * Set lgroup affinity for thread
670 670 */
671 671 buf = aff_buf_array[i];
672 672 retval = lgrp_affinity_set_thread(t, lgrp, aff, &buf);
673 673
674 674 if (err == 0 && retval != 0)
675 675 err = retval;
676 676
677 677 /*
678 678 * Advance pointer to next buffer
679 679 */
680 680 if (buf == NULL) {
681 681 ASSERT(i < p->p_lwpcnt);
682 682 aff_buf_array[i] = NULL;
683 683 i++;
684 684 }
685 685
686 686 } while ((t = t->t_forw) != p->p_tlist);
687 687 }
688 688 return (err);
689 689 }
690 690
691 691
692 692 /*
693 693 * Set LWP's or process' affinity for specified lgroup
694 694 *
695 695 * When setting affinities, pidlock, process p_lock, and thread_lock()
696 696 * need to be held in that order to protect target thread's pset, process,
697 697 * process contents, and thread contents. thread_lock() does splhigh(),
698 698 * so it ends up having similiar effect as kpreempt_disable(), so it will
699 699 * protect calls to lgrp_move_thread() and lgrp_choose() from pset changes.
700 700 */
701 701 int
702 702 lgrp_affinity_set(lgrp_affinity_args_t *ap)
703 703 {
704 704 lgrp_affinity_t aff;
705 705 lgrp_affinity_t *aff_buf;
706 706 lgrp_affinity_args_t args;
707 707 id_t id;
708 708 idtype_t idtype;
709 709 lgrp_id_t lgrp;
710 710 int nthreads;
711 711 proc_t *p;
712 712 int retval;
713 713
714 714 /*
715 715 * Copyin arguments
716 716 */
717 717 if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
718 718 return (set_errno(EFAULT));
719 719
720 720 idtype = args.idtype;
721 721 id = args.id;
722 722 lgrp = args.lgrp;
723 723 aff = args.aff;
724 724
725 725 /*
726 726 * Check for invalid lgroup
727 727 */
728 728 if (lgrp < 0 || lgrp == LGRP_NONE)
729 729 return (set_errno(EINVAL));
730 730
731 731 /*
732 732 * Check for existing lgroup
733 733 */
734 734 if (lgrp > lgrp_alloc_max)
735 735 return (set_errno(ESRCH));
736 736
737 737 /*
738 738 * Check for legal affinity
739 739 */
740 740 if (aff != LGRP_AFF_NONE && aff != LGRP_AFF_WEAK &&
741 741 aff != LGRP_AFF_STRONG)
742 742 return (set_errno(EINVAL));
743 743
744 744 /*
745 745 * Must be process or LWP ID
746 746 */
747 747 if (idtype != P_LWPID && idtype != P_PID)
748 748 return (set_errno(EINVAL));
749 749
750 750 /*
751 751 * Set given LWP's or process' affinity for specified lgroup
752 752 */
753 753 switch (idtype) {
754 754
755 755 case P_LWPID:
756 756 /*
757 757 * Allocate memory for thread's lgroup affinities
758 758 * ahead of time w/o holding locks
759 759 */
760 760 aff_buf = kmem_zalloc(nlgrpsmax * sizeof (lgrp_affinity_t),
761 761 KM_SLEEP);
762 762
763 763 p = curproc;
764 764
765 765 /*
766 766 * Set affinity for thread
767 767 */
768 768 mutex_enter(&p->p_lock);
769 769 if (id == P_MYID) { /* current thread */
770 770 retval = lgrp_affinity_set_thread(curthread, lgrp, aff,
771 771 &aff_buf);
772 772 } else if (p->p_tlist == NULL) {
773 773 retval = set_errno(ESRCH);
774 774 } else { /* other thread */
775 775 int found = 0;
776 776 kthread_t *t;
777 777
778 778 t = p->p_tlist;
779 779 do {
780 780 if (t->t_tid == id) {
781 781 retval = lgrp_affinity_set_thread(t,
782 782 lgrp, aff, &aff_buf);
783 783 found = 1;
784 784 break;
785 785 }
786 786 } while ((t = t->t_forw) != p->p_tlist);
787 787 if (!found)
788 788 retval = set_errno(ESRCH);
789 789 }
790 790 mutex_exit(&p->p_lock);
791 791
792 792 /*
793 793 * Free memory for lgroup affinities,
794 794 * since thread didn't need it
795 795 */
796 796 if (aff_buf)
797 797 kmem_free(aff_buf,
798 798 nlgrpsmax * sizeof (lgrp_affinity_t));
799 799
800 800 break;
801 801
802 802 case P_PID:
803 803
804 804 do {
805 805 lgrp_affinity_t **aff_buf_array;
806 806 int i;
807 807 size_t size;
808 808
809 809 /*
810 810 * Get process
811 811 */
812 812 mutex_enter(&pidlock);
813 813
814 814 if (id == P_MYID)
815 815 p = curproc;
816 816 else
817 817 p = prfind(id);
818 818
819 819 if (p == NULL) {
820 820 mutex_exit(&pidlock);
821 821 return (set_errno(ESRCH));
822 822 }
823 823
824 824 /*
825 825 * Get number of threads in process
826 826 *
827 827 * NOTE: Only care about user processes,
828 828 * so p_lwpcnt should be number of threads.
829 829 */
830 830 mutex_enter(&p->p_lock);
831 831 nthreads = p->p_lwpcnt;
832 832 mutex_exit(&p->p_lock);
833 833
834 834 mutex_exit(&pidlock);
835 835
836 836 if (nthreads < 1)
837 837 return (set_errno(ESRCH));
838 838
839 839 /*
840 840 * Preallocate memory for lgroup affinities for
841 841 * each thread in process now to avoid holding
842 842 * any locks. Allocate an array to hold a buffer
843 843 * for each thread.
844 844 */
845 845 aff_buf_array = kmem_zalloc(nthreads *
846 846 sizeof (lgrp_affinity_t *), KM_SLEEP);
847 847
848 848 size = nlgrpsmax * sizeof (lgrp_affinity_t);
849 849 for (i = 0; i < nthreads; i++)
850 850 aff_buf_array[i] = kmem_zalloc(size, KM_SLEEP);
851 851
852 852 mutex_enter(&pidlock);
853 853
854 854 /*
855 855 * Get process again since dropped locks to allocate
856 856 * memory (except current process)
857 857 */
858 858 if (id != P_MYID)
859 859 p = prfind(id);
860 860
861 861 /*
862 862 * Process went away after we dropped locks and before
863 863 * reacquiring them, so drop locks, free memory, and
864 864 * return.
865 865 */
866 866 if (p == NULL) {
867 867 mutex_exit(&pidlock);
868 868 for (i = 0; i < nthreads; i++)
869 869 kmem_free(aff_buf_array[i], size);
870 870 kmem_free(aff_buf_array,
871 871 nthreads * sizeof (lgrp_affinity_t *));
872 872 return (set_errno(ESRCH));
873 873 }
874 874
875 875 mutex_enter(&p->p_lock);
876 876
877 877 /*
878 878 * See whether number of threads is same
879 879 * If not, drop locks, free memory, and try again
880 880 */
881 881 if (nthreads != p->p_lwpcnt) {
882 882 mutex_exit(&p->p_lock);
883 883 mutex_exit(&pidlock);
884 884 for (i = 0; i < nthreads; i++)
885 885 kmem_free(aff_buf_array[i], size);
886 886 kmem_free(aff_buf_array,
887 887 nthreads * sizeof (lgrp_affinity_t *));
888 888 continue;
889 889 }
890 890
891 891 /*
892 892 * Set lgroup affinity for threads in process
893 893 */
894 894 retval = lgrp_affinity_set_proc(p, lgrp, aff,
895 895 aff_buf_array);
896 896
897 897 mutex_exit(&p->p_lock);
898 898 mutex_exit(&pidlock);
899 899
900 900 /*
901 901 * Free any leftover memory, since some threads may
902 902 * have already allocated memory and set lgroup
903 903 * affinities before
904 904 */
905 905 for (i = 0; i < nthreads; i++)
906 906 if (aff_buf_array[i] != NULL)
907 907 kmem_free(aff_buf_array[i], size);
908 908 kmem_free(aff_buf_array,
909 909 nthreads * sizeof (lgrp_affinity_t *));
910 910
911 911 break;
912 912
913 913 } while (nthreads != p->p_lwpcnt);
914 914
915 915 break;
916 916
917 917 default:
918 918 retval = set_errno(EINVAL);
919 919 break;
920 920 }
921 921
922 922 return (retval);
923 923 }
924 924
925 925
926 926 /*
927 927 * Return the latest generation number for the lgroup hierarchy
928 928 * with the given view
929 929 */
930 930 lgrp_gen_t
931 931 lgrp_generation(lgrp_view_t view)
932 932 {
933 933 cpupart_t *cpupart;
934 934 uint_t gen;
935 935
936 936 kpreempt_disable();
937 937
938 938 /*
939 939 * Determine generation number for given view
940 940 */
941 941 if (view == LGRP_VIEW_OS)
942 942 /*
943 943 * Return generation number of lgroup hierarchy for OS view
944 944 */
945 945 gen = lgrp_gen;
946 946 else {
947 947 /*
948 948 * For caller's view, use generation numbers for lgroup
949 949 * hierarchy and caller's pset
950 950 * NOTE: Caller needs to check for change in pset ID
951 951 */
952 952 cpupart = curthread->t_cpupart;
953 953 ASSERT(cpupart);
954 954 gen = lgrp_gen + cpupart->cp_gen;
955 955 }
956 956
957 957 kpreempt_enable();
958 958
959 959 return (gen);
960 960 }
961 961
962 962
963 963 lgrp_id_t
964 964 lgrp_home_thread(kthread_t *t)
965 965 {
966 966 lgrp_id_t home;
967 967
968 968 ASSERT(t != NULL);
969 969 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
970 970
971 971 thread_lock(t);
972 972
973 973 /*
974 974 * Check to see whether caller has permission to set affinity for
975 975 * thread
976 976 */
977 977 if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
978 978 thread_unlock(t);
979 979 return (set_errno(EPERM));
980 980 }
981 981
982 982 home = lgrp_home_id(t);
983 983
984 984 thread_unlock(t);
985 985 return (home);
986 986 }
987 987
988 988
989 989 /*
990 990 * Get home lgroup of given process or thread
991 991 */
992 992 lgrp_id_t
993 993 lgrp_home_get(idtype_t idtype, id_t id)
994 994 {
995 995 proc_t *p;
996 996 lgrp_id_t retval;
997 997 kthread_t *t;
998 998
999 999 /*
1000 1000 * Get home lgroup of given LWP or process
1001 1001 */
1002 1002 switch (idtype) {
1003 1003
1004 1004 case P_LWPID:
1005 1005 p = curproc;
1006 1006
1007 1007 /*
1008 1008 * Set affinity for thread
1009 1009 */
1010 1010 mutex_enter(&p->p_lock);
1011 1011 if (id == P_MYID) { /* current thread */
1012 1012 retval = lgrp_home_thread(curthread);
1013 1013 } else if (p->p_tlist == NULL) {
1014 1014 retval = set_errno(ESRCH);
1015 1015 } else { /* other thread */
1016 1016 int found = 0;
1017 1017
1018 1018 t = p->p_tlist;
1019 1019 do {
1020 1020 if (t->t_tid == id) {
1021 1021 retval = lgrp_home_thread(t);
1022 1022 found = 1;
1023 1023 break;
1024 1024 }
1025 1025 } while ((t = t->t_forw) != p->p_tlist);
1026 1026 if (!found)
1027 1027 retval = set_errno(ESRCH);
1028 1028 }
1029 1029 mutex_exit(&p->p_lock);
1030 1030 break;
1031 1031
1032 1032 case P_PID:
1033 1033 /*
1034 1034 * Get process
1035 1035 */
1036 1036 mutex_enter(&pidlock);
1037 1037
1038 1038 if (id == P_MYID)
1039 1039 p = curproc;
1040 1040 else
1041 1041 p = prfind(id);
1042 1042
1043 1043 if (p == NULL) {
1044 1044 mutex_exit(&pidlock);
1045 1045 return (set_errno(ESRCH));
1046 1046 }
1047 1047
1048 1048 mutex_enter(&p->p_lock);
1049 1049 t = p->p_tlist;
1050 1050 if (t == NULL)
1051 1051 retval = set_errno(ESRCH);
1052 1052 else
1053 1053 retval = lgrp_home_thread(t);
1054 1054 mutex_exit(&p->p_lock);
1055 1055
1056 1056 mutex_exit(&pidlock);
1057 1057
1058 1058 break;
1059 1059
1060 1060 default:
1061 1061 retval = set_errno(EINVAL);
1062 1062 break;
1063 1063 }
1064 1064
1065 1065 return (retval);
1066 1066 }
1067 1067
1068 1068
1069 1069 /*
1070 1070 * Return latency between "from" and "to" lgroups
1071 1071 *
1072 1072 * This latency number can only be used for relative comparison
1073 1073 * between lgroups on the running system, cannot be used across platforms,
1074 1074 * and may not reflect the actual latency. It is platform and implementation
1075 1075 * specific, so platform gets to decide its value. It would be nice if the
1076 1076 * number was at least proportional to make comparisons more meaningful though.
1077 1077 */
1078 1078 int
1079 1079 lgrp_latency(lgrp_id_t from, lgrp_id_t to)
1080 1080 {
1081 1081 lgrp_t *from_lgrp;
1082 1082 int i;
1083 1083 int latency;
1084 1084 int latency_max;
1085 1085 lgrp_t *to_lgrp;
1086 1086
1087 1087 ASSERT(MUTEX_HELD(&cpu_lock));
1088 1088
1089 1089 if (from < 0 || to < 0)
1090 1090 return (set_errno(EINVAL));
1091 1091
1092 1092 if (from > lgrp_alloc_max || to > lgrp_alloc_max)
1093 1093 return (set_errno(ESRCH));
1094 1094
1095 1095 from_lgrp = lgrp_table[from];
1096 1096 to_lgrp = lgrp_table[to];
1097 1097
1098 1098 if (!LGRP_EXISTS(from_lgrp) || !LGRP_EXISTS(to_lgrp)) {
1099 1099 return (set_errno(ESRCH));
1100 1100 }
1101 1101
1102 1102 /*
1103 1103 * Get latency for same lgroup
1104 1104 */
1105 1105 if (from == to) {
1106 1106 latency = from_lgrp->lgrp_latency;
1107 1107 return (latency);
1108 1108 }
1109 1109
1110 1110 /*
1111 1111 * Get latency between leaf lgroups
1112 1112 */
1113 1113 if (from_lgrp->lgrp_childcnt == 0 && to_lgrp->lgrp_childcnt == 0)
1114 1114 return (lgrp_plat_latency(from_lgrp->lgrp_plathand,
1115 1115 to_lgrp->lgrp_plathand));
1116 1116
1117 1117 /*
1118 1118 * Determine max latency between resources in two lgroups
1119 1119 */
1120 1120 latency_max = 0;
1121 1121 for (i = 0; i <= lgrp_alloc_max; i++) {
1122 1122 lgrp_t *from_rsrc;
1123 1123 int j;
1124 1124 lgrp_t *to_rsrc;
1125 1125
1126 1126 from_rsrc = lgrp_table[i];
1127 1127 if (!LGRP_EXISTS(from_rsrc) ||
1128 1128 !klgrpset_ismember(from_lgrp->lgrp_set[LGRP_RSRC_CPU], i))
1129 1129 continue;
1130 1130
1131 1131 for (j = 0; j <= lgrp_alloc_max; j++) {
1132 1132 to_rsrc = lgrp_table[j];
1133 1133 if (!LGRP_EXISTS(to_rsrc) ||
1134 1134 klgrpset_ismember(to_lgrp->lgrp_set[LGRP_RSRC_MEM],
1135 1135 j) == 0)
1136 1136 continue;
1137 1137 latency = lgrp_plat_latency(from_rsrc->lgrp_plathand,
1138 1138 to_rsrc->lgrp_plathand);
1139 1139 if (latency > latency_max)
1140 1140 latency_max = latency;
1141 1141 }
1142 1142 }
1143 1143 return (latency_max);
1144 1144 }
1145 1145
1146 1146
1147 1147 /*
1148 1148 * Return lgroup interface version number
1149 1149 * 0 - none
1150 1150 * 1 - original
1151 1151 * 2 - lgrp_latency_cookie() and lgrp_resources() added
1152 1152 */
1153 1153 int
1154 1154 lgrp_version(int version)
1155 1155 {
1156 1156 /*
1157 1157 * Return LGRP_VER_NONE when requested version isn't supported
1158 1158 */
1159 1159 if (version < LGRP_VER_NONE || version > LGRP_VER_CURRENT)
1160 1160 return (LGRP_VER_NONE);
1161 1161
1162 1162 /*
1163 1163 * Return current version when LGRP_VER_NONE passed in
1164 1164 */
1165 1165 if (version == LGRP_VER_NONE)
1166 1166 return (LGRP_VER_CURRENT);
1167 1167
1168 1168 /*
1169 1169 * Otherwise, return supported version.
1170 1170 */
1171 1171 return (version);
1172 1172 }
1173 1173
1174 1174
1175 1175 /*
1176 1176 * Snapshot of lgroup hieararchy
1177 1177 *
1178 1178 * One snapshot is kept and is based on the kernel's native data model, so
1179 1179 * a 32-bit snapshot is kept for the 32-bit kernel and a 64-bit one for the
1180 1180 * 64-bit kernel. If a 32-bit user wants a snapshot from the 64-bit kernel,
1181 1181 * the kernel generates a 32-bit snapshot from the data in its 64-bit snapshot.
1182 1182 *
1183 1183 * The format is defined by lgroup snapshot header and the layout of
1184 1184 * the snapshot in memory is as follows:
1185 1185 * 1) lgroup snapshot header
1186 1186 * - specifies format of snapshot
1187 1187 * - defined by lgrp_snapshot_header_t
1188 1188 * 2) lgroup info array
1189 1189 * - contains information about each lgroup
1190 1190 * - one element for each lgroup
1191 1191 * - each element is defined by lgrp_info_t
1192 1192 * 3) lgroup CPU ID array
1193 1193 * - contains list (array) of CPU IDs for each lgroup
1194 1194 * - lgrp_info_t points into array and specifies how many CPUs belong to
1195 1195 * given lgroup
1196 1196 * 4) lgroup parents array
1197 1197 * - contains lgroup bitmask of parents for each lgroup
1198 1198 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1199 1199 * 5) lgroup children array
1200 1200 * - contains lgroup bitmask of children for each lgroup
1201 1201 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1202 1202 * 6) lgroup resources array
1203 1203 * - contains lgroup bitmask of resources for each lgroup
1204 1204 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1205 1205 * 7) lgroup latency table
1206 1206 * - contains latency from each lgroup to each of other lgroups
1207 1207 *
1208 1208 * NOTE: Must use nlgrpsmax for per lgroup data structures because lgroups
1209 1209 * may be sparsely allocated.
1210 1210 */
1211 1211 lgrp_snapshot_header_t *lgrp_snap = NULL; /* lgroup snapshot */
1212 1212 static kmutex_t lgrp_snap_lock; /* snapshot lock */
1213 1213
1214 1214
1215 1215 /*
1216 1216 * Take a snapshot of lgroup hierarchy and return size of buffer
1217 1217 * needed to hold snapshot
1218 1218 */
1219 1219 static int
1220 1220 lgrp_snapshot(void)
1221 1221 {
1222 1222 size_t bitmask_size;
1223 1223 size_t bitmasks_size;
1224 1224 size_t bufsize;
1225 1225 int cpu_index;
1226 1226 size_t cpuids_size;
1227 1227 int i;
1228 1228 int j;
1229 1229 size_t info_size;
1230 1230 size_t lats_size;
1231 1231 ulong_t *lgrp_children;
1232 1232 processorid_t *lgrp_cpuids;
1233 1233 lgrp_info_t *lgrp_info;
1234 1234 int **lgrp_lats;
1235 1235 ulong_t *lgrp_parents;
1236 1236 ulong_t *lgrp_rsets;
1237 1237 ulong_t *lgrpset;
1238 1238 int snap_ncpus;
1239 1239 int snap_nlgrps;
1240 1240 int snap_nlgrpsmax;
1241 1241 size_t snap_hdr_size;
1242 1242 #ifdef _SYSCALL32_IMPL
1243 1243 model_t model = DATAMODEL_NATIVE;
1244 1244
1245 1245 /*
1246 1246 * Have up-to-date snapshot, so check to see whether caller is 32-bit
1247 1247 * program and need to return size of 32-bit snapshot now.
1248 1248 */
1249 1249 model = get_udatamodel();
1250 1250 if (model == DATAMODEL_ILP32 && lgrp_snap &&
1251 1251 lgrp_snap->ss_gen == lgrp_gen) {
1252 1252
1253 1253 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1254 1254
1255 1255 /*
1256 1256 * Calculate size of buffer needed for 32-bit snapshot,
1257 1257 * rounding up size of each object to allow for alignment
1258 1258 * of next object in buffer.
1259 1259 */
1260 1260 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1261 1261 sizeof (caddr32_t));
1262 1262 info_size =
1263 1263 P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1264 1264 sizeof (processorid_t));
1265 1265 cpuids_size =
1266 1266 P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1267 1267 sizeof (ulong_t));
1268 1268
1269 1269 /*
1270 1270 * lgroup bitmasks needed for parents, children, and resources
1271 1271 * for each lgroup and pset lgroup set
1272 1272 */
1273 1273 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1274 1274 bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1275 1275 snap_nlgrpsmax) + 1) * bitmask_size;
1276 1276
1277 1277 /*
1278 1278 * Size of latency table and buffer
1279 1279 */
1280 1280 lats_size = snap_nlgrpsmax * sizeof (caddr32_t) +
1281 1281 snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1282 1282
1283 1283 bufsize = snap_hdr_size + info_size + cpuids_size +
1284 1284 bitmasks_size + lats_size;
1285 1285 return (bufsize);
1286 1286 }
1287 1287 #endif /* _SYSCALL32_IMPL */
1288 1288
1289 1289 /*
1290 1290 * Check whether snapshot is up-to-date
1291 1291 * Free it and take another one if not
1292 1292 */
1293 1293 if (lgrp_snap) {
1294 1294 if (lgrp_snap->ss_gen == lgrp_gen)
1295 1295 return (lgrp_snap->ss_size);
1296 1296
1297 1297 kmem_free(lgrp_snap, lgrp_snap->ss_size);
1298 1298 lgrp_snap = NULL;
1299 1299 }
1300 1300
1301 1301 /*
1302 1302 * Allocate memory for snapshot
1303 1303 * w/o holding cpu_lock while waiting for memory
1304 1304 */
1305 1305 while (lgrp_snap == NULL) {
1306 1306 int old_generation;
1307 1307
1308 1308 /*
1309 1309 * Take snapshot of lgroup generation number
1310 1310 * and configuration size dependent information
1311 1311 * NOTE: Only count number of online CPUs,
1312 1312 * since only online CPUs appear in lgroups.
1313 1313 */
1314 1314 mutex_enter(&cpu_lock);
1315 1315 old_generation = lgrp_gen;
1316 1316 snap_ncpus = ncpus_online;
1317 1317 snap_nlgrps = nlgrps;
1318 1318 snap_nlgrpsmax = nlgrpsmax;
1319 1319 mutex_exit(&cpu_lock);
1320 1320
1321 1321 /*
1322 1322 * Calculate size of buffer needed for snapshot,
1323 1323 * rounding up size of each object to allow for alignment
1324 1324 * of next object in buffer.
1325 1325 */
1326 1326 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1327 1327 sizeof (void *));
1328 1328 info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1329 1329 sizeof (processorid_t));
1330 1330 cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1331 1331 sizeof (ulong_t));
1332 1332 /*
1333 1333 * lgroup bitmasks needed for pset lgroup set and parents,
1334 1334 * children, and resource sets for each lgroup
1335 1335 */
1336 1336 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1337 1337 bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1338 1338 snap_nlgrpsmax) + 1) * bitmask_size;
1339 1339
1340 1340 /*
1341 1341 * Size of latency table and buffer
1342 1342 */
1343 1343 lats_size = snap_nlgrpsmax * sizeof (int *) +
1344 1344 snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1345 1345
1346 1346 bufsize = snap_hdr_size + info_size + cpuids_size +
1347 1347 bitmasks_size + lats_size;
1348 1348
1349 1349 /*
1350 1350 * Allocate memory for buffer
1351 1351 */
1352 1352 lgrp_snap = kmem_zalloc(bufsize, KM_NOSLEEP);
1353 1353 if (lgrp_snap == NULL)
1354 1354 return (set_errno(ENOMEM));
1355 1355
1356 1356 /*
1357 1357 * Check whether generation number has changed
1358 1358 */
1359 1359 mutex_enter(&cpu_lock);
1360 1360 if (lgrp_gen == old_generation)
1361 1361 break; /* hasn't change, so done. */
1362 1362
1363 1363 /*
1364 1364 * Generation number changed, so free memory and try again.
1365 1365 */
1366 1366 mutex_exit(&cpu_lock);
1367 1367 kmem_free(lgrp_snap, bufsize);
1368 1368 lgrp_snap = NULL;
1369 1369 }
1370 1370
1371 1371 /*
1372 1372 * Fill in lgroup snapshot header
1373 1373 * (including pointers to tables of lgroup info, CPU IDs, and parents
1374 1374 * and children)
1375 1375 */
1376 1376 lgrp_snap->ss_version = LGRP_VER_CURRENT;
1377 1377
1378 1378 /*
1379 1379 * XXX For now, liblgrp only needs to know whether the hierarchy
1380 1380 * XXX only has one level or not
1381 1381 */
1382 1382 if (snap_nlgrps == 1)
1383 1383 lgrp_snap->ss_levels = 1;
1384 1384 else
1385 1385 lgrp_snap->ss_levels = 2;
1386 1386
1387 1387 lgrp_snap->ss_root = LGRP_ROOTID;
1388 1388
1389 1389 lgrp_snap->ss_nlgrps = lgrp_snap->ss_nlgrps_os = snap_nlgrps;
1390 1390 lgrp_snap->ss_nlgrps_max = snap_nlgrpsmax;
1391 1391 lgrp_snap->ss_ncpus = snap_ncpus;
1392 1392 lgrp_snap->ss_gen = lgrp_gen;
1393 1393 lgrp_snap->ss_view = LGRP_VIEW_OS;
1394 1394 lgrp_snap->ss_pset = 0; /* NOTE: caller should set if needed */
1395 1395 lgrp_snap->ss_size = bufsize;
1396 1396 lgrp_snap->ss_magic = (uintptr_t)lgrp_snap;
1397 1397
1398 1398 lgrp_snap->ss_info = lgrp_info =
1399 1399 (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1400 1400
1401 1401 lgrp_snap->ss_cpuids = lgrp_cpuids =
1402 1402 (processorid_t *)((uintptr_t)lgrp_info + info_size);
1403 1403
1404 1404 lgrp_snap->ss_lgrpset = lgrpset =
1405 1405 (ulong_t *)((uintptr_t)lgrp_cpuids + cpuids_size);
1406 1406
1407 1407 lgrp_snap->ss_parents = lgrp_parents =
1408 1408 (ulong_t *)((uintptr_t)lgrpset + bitmask_size);
1409 1409
1410 1410 lgrp_snap->ss_children = lgrp_children =
1411 1411 (ulong_t *)((uintptr_t)lgrp_parents + (snap_nlgrpsmax *
1412 1412 bitmask_size));
1413 1413
1414 1414 lgrp_snap->ss_rsets = lgrp_rsets =
1415 1415 (ulong_t *)((uintptr_t)lgrp_children + (snap_nlgrpsmax *
1416 1416 bitmask_size));
1417 1417
1418 1418 lgrp_snap->ss_latencies = lgrp_lats =
1419 1419 (int **)((uintptr_t)lgrp_rsets + (LGRP_RSRC_COUNT *
1420 1420 snap_nlgrpsmax * bitmask_size));
1421 1421
1422 1422 /*
1423 1423 * Fill in lgroup information
1424 1424 */
1425 1425 cpu_index = 0;
1426 1426 for (i = 0; i < snap_nlgrpsmax; i++) {
1427 1427 struct cpu *cp;
1428 1428 int cpu_count;
1429 1429 struct cpu *head;
1430 1430 int k;
1431 1431 lgrp_t *lgrp;
1432 1432
1433 1433 lgrp = lgrp_table[i];
1434 1434 if (!LGRP_EXISTS(lgrp)) {
1435 1435 bzero(&lgrp_info[i], sizeof (lgrp_info[i]));
1436 1436 lgrp_info[i].info_lgrpid = LGRP_NONE;
1437 1437 continue;
1438 1438 }
1439 1439
1440 1440 lgrp_info[i].info_lgrpid = i;
1441 1441 lgrp_info[i].info_latency = lgrp->lgrp_latency;
1442 1442
1443 1443 /*
1444 1444 * Fill in parents, children, and lgroup resources
1445 1445 */
1446 1446 lgrp_info[i].info_parents =
1447 1447 (ulong_t *)((uintptr_t)lgrp_parents + (i * bitmask_size));
1448 1448
1449 1449 if (lgrp->lgrp_parent)
1450 1450 BT_SET(lgrp_info[i].info_parents,
1451 1451 lgrp->lgrp_parent->lgrp_id);
1452 1452
1453 1453 lgrp_info[i].info_children =
1454 1454 (ulong_t *)((uintptr_t)lgrp_children + (i * bitmask_size));
1455 1455
1456 1456 for (j = 0; j < snap_nlgrpsmax; j++)
1457 1457 if (klgrpset_ismember(lgrp->lgrp_children, j))
1458 1458 BT_SET(lgrp_info[i].info_children, j);
1459 1459
1460 1460 lgrp_info[i].info_rset =
1461 1461 (ulong_t *)((uintptr_t)lgrp_rsets +
1462 1462 (i * LGRP_RSRC_COUNT * bitmask_size));
1463 1463
1464 1464 for (j = 0; j < LGRP_RSRC_COUNT; j++) {
1465 1465 ulong_t *rset;
1466 1466
1467 1467 rset = (ulong_t *)((uintptr_t)lgrp_info[i].info_rset +
1468 1468 (j * bitmask_size));
1469 1469 for (k = 0; k < snap_nlgrpsmax; k++)
1470 1470 if (klgrpset_ismember(lgrp->lgrp_set[j], k))
1471 1471 BT_SET(rset, k);
1472 1472 }
1473 1473
1474 1474 /*
1475 1475 * Fill in CPU IDs
1476 1476 */
1477 1477 cpu_count = 0;
1478 1478 lgrp_info[i].info_cpuids = NULL;
1479 1479 cp = head = lgrp->lgrp_cpu;
1480 1480 if (head != NULL) {
1481 1481 lgrp_info[i].info_cpuids = &lgrp_cpuids[cpu_index];
1482 1482 do {
1483 1483 lgrp_cpuids[cpu_index] = cp->cpu_id;
1484 1484 cpu_index++;
1485 1485 cpu_count++;
1486 1486 cp = cp->cpu_next_lgrp;
1487 1487 } while (cp != head);
1488 1488 }
1489 1489 ASSERT(cpu_count == lgrp->lgrp_cpucnt);
1490 1490 lgrp_info[i].info_ncpus = cpu_count;
1491 1491
1492 1492 /*
1493 1493 * Fill in memory sizes for lgroups that directly contain
1494 1494 * memory
1495 1495 */
1496 1496 if (klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], i)) {
1497 1497 lgrp_info[i].info_mem_free =
1498 1498 lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1499 1499 lgrp_info[i].info_mem_install =
1500 1500 lgrp_mem_size(i, LGRP_MEM_SIZE_INSTALL);
1501 1501 }
1502 1502
1503 1503 /*
1504 1504 * Fill in latency table and buffer
1505 1505 */
1506 1506 lgrp_lats[i] = (int *)((uintptr_t)lgrp_lats + snap_nlgrpsmax *
1507 1507 sizeof (int *) + i * snap_nlgrpsmax * sizeof (int));
1508 1508 for (j = 0; j < snap_nlgrpsmax; j++) {
1509 1509 lgrp_t *to;
1510 1510
1511 1511 to = lgrp_table[j];
1512 1512 if (!LGRP_EXISTS(to))
1513 1513 continue;
1514 1514 lgrp_lats[i][j] = lgrp_latency(lgrp->lgrp_id,
1515 1515 to->lgrp_id);
1516 1516 }
1517 1517 }
1518 1518 ASSERT(cpu_index == snap_ncpus);
1519 1519
1520 1520
1521 1521 mutex_exit(&cpu_lock);
1522 1522
1523 1523 #ifdef _SYSCALL32_IMPL
1524 1524 /*
1525 1525 * Check to see whether caller is 32-bit program and need to return
1526 1526 * size of 32-bit snapshot now that snapshot has been taken/updated.
1527 1527 * May not have been able to do this earlier if snapshot was out of
1528 1528 * date or didn't exist yet.
1529 1529 */
1530 1530 if (model == DATAMODEL_ILP32) {
1531 1531
1532 1532 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1533 1533
1534 1534 /*
1535 1535 * Calculate size of buffer needed for 32-bit snapshot,
1536 1536 * rounding up size of each object to allow for alignment
1537 1537 * of next object in buffer.
1538 1538 */
1539 1539 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1540 1540 sizeof (caddr32_t));
1541 1541 info_size =
1542 1542 P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1543 1543 sizeof (processorid_t));
1544 1544 cpuids_size =
1545 1545 P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1546 1546 sizeof (ulong_t));
1547 1547
1548 1548 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1549 1549 bitmasks_size = (((2 + LGRP_RSRC_COUNT) * snap_nlgrpsmax) +
1550 1550 1) * bitmask_size;
1551 1551
1552 1552
1553 1553 /*
1554 1554 * Size of latency table and buffer
1555 1555 */
1556 1556 lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1557 1557 (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1558 1558
1559 1559 bufsize = snap_hdr_size + info_size + cpuids_size +
1560 1560 bitmasks_size + lats_size;
1561 1561 return (bufsize);
1562 1562 }
1563 1563 #endif /* _SYSCALL32_IMPL */
1564 1564
1565 1565 return (lgrp_snap->ss_size);
1566 1566 }
1567 1567
1568 1568
1569 1569 /*
1570 1570 * Copy snapshot into given user buffer, fix up any pointers in buffer to point
1571 1571 * into user instead of kernel address space, and return size of buffer
1572 1572 * needed to hold snapshot
1573 1573 */
1574 1574 static int
1575 1575 lgrp_snapshot_copy(char *buf, size_t bufsize)
1576 1576 {
1577 1577 size_t bitmask_size;
1578 1578 int cpu_index;
1579 1579 size_t cpuids_size;
1580 1580 int i;
1581 1581 size_t info_size;
1582 1582 lgrp_info_t *lgrp_info;
1583 1583 int retval;
1584 1584 size_t snap_hdr_size;
1585 1585 int snap_ncpus;
1586 1586 int snap_nlgrpsmax;
1587 1587 lgrp_snapshot_header_t *user_snap;
1588 1588 lgrp_info_t *user_info;
1589 1589 lgrp_info_t *user_info_buffer;
1590 1590 processorid_t *user_cpuids;
1591 1591 ulong_t *user_lgrpset;
1592 1592 ulong_t *user_parents;
1593 1593 ulong_t *user_children;
1594 1594 int **user_lats;
1595 1595 int **user_lats_buffer;
1596 1596 ulong_t *user_rsets;
1597 1597
1598 1598 if (lgrp_snap == NULL)
1599 1599 return (0);
1600 1600
1601 1601 if (buf == NULL || bufsize <= 0)
1602 1602 return (lgrp_snap->ss_size);
1603 1603
1604 1604 /*
1605 1605 * User needs to try getting size of buffer again
1606 1606 * because given buffer size is too small.
1607 1607 * The lgroup hierarchy may have changed after they asked for the size
1608 1608 * but before the snapshot was taken.
1609 1609 */
1610 1610 if (bufsize < lgrp_snap->ss_size)
1611 1611 return (set_errno(EAGAIN));
1612 1612
1613 1613 snap_ncpus = lgrp_snap->ss_ncpus;
1614 1614 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1615 1615
1616 1616 /*
1617 1617 * Fill in lgrpset now because caller may have change psets
1618 1618 */
1619 1619 kpreempt_disable();
1620 1620 for (i = 0; i < snap_nlgrpsmax; i++) {
1621 1621 if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1622 1622 i)) {
1623 1623 BT_SET(lgrp_snap->ss_lgrpset, i);
1624 1624 }
1625 1625 }
1626 1626 kpreempt_enable();
1627 1627
1628 1628 /*
1629 1629 * Copy lgroup snapshot (snapshot header, lgroup info, and CPU IDs)
1630 1630 * into user buffer all at once
1631 1631 */
1632 1632 if (copyout(lgrp_snap, buf, lgrp_snap->ss_size) != 0)
1633 1633 return (set_errno(EFAULT));
1634 1634
1635 1635 /*
1636 1636 * Round up sizes of lgroup snapshot header and info for alignment
1637 1637 */
1638 1638 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1639 1639 sizeof (void *));
1640 1640 info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1641 1641 sizeof (processorid_t));
1642 1642 cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1643 1643 sizeof (ulong_t));
1644 1644
1645 1645 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1646 1646
1647 1647 /*
1648 1648 * Calculate pointers into user buffer for lgroup snapshot header,
1649 1649 * info, and CPU IDs
1650 1650 */
1651 1651 user_snap = (lgrp_snapshot_header_t *)buf;
1652 1652 user_info = (lgrp_info_t *)((uintptr_t)user_snap + snap_hdr_size);
1653 1653 user_cpuids = (processorid_t *)((uintptr_t)user_info + info_size);
1654 1654 user_lgrpset = (ulong_t *)((uintptr_t)user_cpuids + cpuids_size);
1655 1655 user_parents = (ulong_t *)((uintptr_t)user_lgrpset + bitmask_size);
1656 1656 user_children = (ulong_t *)((uintptr_t)user_parents +
1657 1657 (snap_nlgrpsmax * bitmask_size));
1658 1658 user_rsets = (ulong_t *)((uintptr_t)user_children +
1659 1659 (snap_nlgrpsmax * bitmask_size));
1660 1660 user_lats = (int **)((uintptr_t)user_rsets +
1661 1661 (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size));
1662 1662
1663 1663 /*
1664 1664 * Copyout magic number (ie. pointer to beginning of buffer)
1665 1665 */
1666 1666 if (copyout(&buf, &user_snap->ss_magic, sizeof (buf)) != 0)
1667 1667 return (set_errno(EFAULT));
1668 1668
1669 1669 /*
1670 1670 * Fix up pointers in user buffer to point into user buffer
1671 1671 * not kernel snapshot
1672 1672 */
1673 1673 if (copyout(&user_info, &user_snap->ss_info, sizeof (user_info)) != 0)
1674 1674 return (set_errno(EFAULT));
1675 1675
1676 1676 if (copyout(&user_cpuids, &user_snap->ss_cpuids,
1677 1677 sizeof (user_cpuids)) != 0)
1678 1678 return (set_errno(EFAULT));
1679 1679
1680 1680 if (copyout(&user_lgrpset, &user_snap->ss_lgrpset,
1681 1681 sizeof (user_lgrpset)) != 0)
1682 1682 return (set_errno(EFAULT));
1683 1683
1684 1684 if (copyout(&user_parents, &user_snap->ss_parents,
1685 1685 sizeof (user_parents)) != 0)
1686 1686 return (set_errno(EFAULT));
1687 1687
1688 1688 if (copyout(&user_children, &user_snap->ss_children,
1689 1689 sizeof (user_children)) != 0)
1690 1690 return (set_errno(EFAULT));
1691 1691
1692 1692 if (copyout(&user_rsets, &user_snap->ss_rsets,
1693 1693 sizeof (user_rsets)) != 0)
1694 1694 return (set_errno(EFAULT));
1695 1695
1696 1696 if (copyout(&user_lats, &user_snap->ss_latencies,
1697 1697 sizeof (user_lats)) != 0)
1698 1698 return (set_errno(EFAULT));
1699 1699
1700 1700 /*
1701 1701 * Make copies of lgroup info and latency table, fix up pointers,
1702 1702 * and then copy them into user buffer
1703 1703 */
1704 1704 user_info_buffer = kmem_zalloc(info_size, KM_NOSLEEP);
1705 1705 if (user_info_buffer == NULL)
1706 1706 return (set_errno(ENOMEM));
1707 1707
1708 1708 user_lats_buffer = kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1709 1709 KM_NOSLEEP);
1710 1710 if (user_lats_buffer == NULL) {
1711 1711 kmem_free(user_info_buffer, info_size);
1712 1712 return (set_errno(ENOMEM));
1713 1713 }
1714 1714
1715 1715 lgrp_info = (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1716 1716 bcopy(lgrp_info, user_info_buffer, info_size);
1717 1717
1718 1718 cpu_index = 0;
1719 1719 for (i = 0; i < snap_nlgrpsmax; i++) {
1720 1720 ulong_t *snap_rset;
1721 1721
1722 1722 /*
1723 1723 * Skip non-existent lgroups
1724 1724 */
1725 1725 if (user_info_buffer[i].info_lgrpid == LGRP_NONE)
1726 1726 continue;
1727 1727
1728 1728 /*
1729 1729 * Update free memory size since it changes frequently
1730 1730 * Only do so for lgroups directly containing memory
1731 1731 *
1732 1732 * NOTE: This must be done before changing the pointers to
1733 1733 * point into user space since we need to dereference
1734 1734 * lgroup resource set
1735 1735 */
1736 1736 snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
1737 1737 BT_BITOUL(snap_nlgrpsmax)];
1738 1738 if (BT_TEST(snap_rset, i))
1739 1739 user_info_buffer[i].info_mem_free =
1740 1740 lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1741 1741
1742 1742 /*
1743 1743 * Fix up pointers to parents, children, resources, and
1744 1744 * latencies
1745 1745 */
1746 1746 user_info_buffer[i].info_parents =
1747 1747 (ulong_t *)((uintptr_t)user_parents + (i * bitmask_size));
1748 1748 user_info_buffer[i].info_children =
1749 1749 (ulong_t *)((uintptr_t)user_children + (i * bitmask_size));
1750 1750 user_info_buffer[i].info_rset =
1751 1751 (ulong_t *)((uintptr_t)user_rsets +
1752 1752 (i * LGRP_RSRC_COUNT * bitmask_size));
1753 1753 user_lats_buffer[i] = (int *)((uintptr_t)user_lats +
1754 1754 (snap_nlgrpsmax * sizeof (int *)) + (i * snap_nlgrpsmax *
1755 1755 sizeof (int)));
1756 1756
1757 1757 /*
1758 1758 * Fix up pointer to CPU IDs
1759 1759 */
1760 1760 if (user_info_buffer[i].info_ncpus == 0) {
1761 1761 user_info_buffer[i].info_cpuids = NULL;
1762 1762 continue;
1763 1763 }
1764 1764 user_info_buffer[i].info_cpuids = &user_cpuids[cpu_index];
1765 1765 cpu_index += user_info_buffer[i].info_ncpus;
1766 1766 }
1767 1767 ASSERT(cpu_index == snap_ncpus);
1768 1768
1769 1769 /*
1770 1770 * Copy lgroup info and latency table with pointers fixed up to point
1771 1771 * into user buffer out to user buffer now
1772 1772 */
1773 1773 retval = lgrp_snap->ss_size;
1774 1774 if (copyout(user_info_buffer, user_info, info_size) != 0)
1775 1775 retval = set_errno(EFAULT);
1776 1776 kmem_free(user_info_buffer, info_size);
1777 1777
1778 1778 if (copyout(user_lats_buffer, user_lats, snap_nlgrpsmax *
1779 1779 sizeof (int *)) != 0)
1780 1780 retval = set_errno(EFAULT);
1781 1781 kmem_free(user_lats_buffer, snap_nlgrpsmax * sizeof (int *));
1782 1782
1783 1783 return (retval);
1784 1784 }
1785 1785
1786 1786
1787 1787 #ifdef _SYSCALL32_IMPL
1788 1788 /*
1789 1789 * Make 32-bit copy of snapshot, fix up any pointers in buffer to point
1790 1790 * into user instead of kernel address space, copy 32-bit snapshot into
1791 1791 * given user buffer, and return size of buffer needed to hold snapshot
1792 1792 */
1793 1793 static int
1794 1794 lgrp_snapshot_copy32(caddr32_t buf, size32_t bufsize)
1795 1795 {
1796 1796 size32_t bitmask_size;
1797 1797 size32_t bitmasks_size;
1798 1798 size32_t children_size;
1799 1799 int cpu_index;
1800 1800 size32_t cpuids_size;
1801 1801 int i;
1802 1802 int j;
1803 1803 size32_t info_size;
1804 1804 size32_t lats_size;
1805 1805 lgrp_info_t *lgrp_info;
1806 1806 lgrp_snapshot_header32_t *lgrp_snap32;
1807 1807 lgrp_info32_t *lgrp_info32;
1808 1808 processorid_t *lgrp_cpuids32;
1809 1809 caddr32_t *lgrp_lats32;
1810 1810 int **lgrp_lats32_kernel;
1811 1811 uint_t *lgrp_set32;
1812 1812 uint_t *lgrp_parents32;
1813 1813 uint_t *lgrp_children32;
1814 1814 uint_t *lgrp_rsets32;
1815 1815 size32_t parents_size;
1816 1816 size32_t rsets_size;
1817 1817 size32_t set_size;
1818 1818 size32_t snap_hdr_size;
1819 1819 int snap_ncpus;
1820 1820 int snap_nlgrpsmax;
1821 1821 size32_t snap_size;
1822 1822
1823 1823 if (lgrp_snap == NULL)
1824 1824 return (0);
1825 1825
1826 1826 snap_ncpus = lgrp_snap->ss_ncpus;
1827 1827 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1828 1828
1829 1829 /*
1830 1830 * Calculate size of buffer needed for 32-bit snapshot,
1831 1831 * rounding up size of each object to allow for alignment
1832 1832 * of next object in buffer.
1833 1833 */
1834 1834 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1835 1835 sizeof (caddr32_t));
1836 1836 info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1837 1837 sizeof (processorid_t));
1838 1838 cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1839 1839 sizeof (ulong_t));
1840 1840
1841 1841 bitmask_size = BT_SIZEOFMAP32(snap_nlgrpsmax);
1842 1842
1843 1843 set_size = bitmask_size;
1844 1844 parents_size = snap_nlgrpsmax * bitmask_size;
1845 1845 children_size = snap_nlgrpsmax * bitmask_size;
1846 1846 rsets_size = P2ROUNDUP(LGRP_RSRC_COUNT * snap_nlgrpsmax *
1847 1847 (int)bitmask_size, sizeof (caddr32_t));
1848 1848
1849 1849 bitmasks_size = set_size + parents_size + children_size + rsets_size;
1850 1850
1851 1851 /*
1852 1852 * Size of latency table and buffer
1853 1853 */
1854 1854 lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1855 1855 (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1856 1856
1857 1857 snap_size = snap_hdr_size + info_size + cpuids_size + bitmasks_size +
1858 1858 lats_size;
1859 1859
1860 1860 if (buf == NULL || bufsize <= 0) {
1861 1861 return (snap_size);
1862 1862 }
1863 1863
1864 1864 /*
1865 1865 * User needs to try getting size of buffer again
1866 1866 * because given buffer size is too small.
1867 1867 * The lgroup hierarchy may have changed after they asked for the size
1868 1868 * but before the snapshot was taken.
1869 1869 */
1870 1870 if (bufsize < snap_size)
1871 1871 return (set_errno(EAGAIN));
1872 1872
1873 1873 /*
1874 1874 * Make 32-bit copy of snapshot, fix up pointers to point into user
1875 1875 * buffer not kernel, and then copy whole thing into user buffer
1876 1876 */
1877 1877 lgrp_snap32 = kmem_zalloc(snap_size, KM_NOSLEEP);
1878 1878 if (lgrp_snap32 == NULL)
1879 1879 return (set_errno(ENOMEM));
1880 1880
1881 1881 /*
1882 1882 * Calculate pointers into 32-bit copy of snapshot
1883 1883 * for lgroup info, CPU IDs, pset lgroup bitmask, parents, children,
1884 1884 * resources, and latency table and buffer
1885 1885 */
1886 1886 lgrp_info32 = (lgrp_info32_t *)((uintptr_t)lgrp_snap32 +
1887 1887 snap_hdr_size);
1888 1888 lgrp_cpuids32 = (processorid_t *)((uintptr_t)lgrp_info32 + info_size);
1889 1889 lgrp_set32 = (uint_t *)((uintptr_t)lgrp_cpuids32 + cpuids_size);
1890 1890 lgrp_parents32 = (uint_t *)((uintptr_t)lgrp_set32 + set_size);
1891 1891 lgrp_children32 = (uint_t *)((uintptr_t)lgrp_parents32 + parents_size);
1892 1892 lgrp_rsets32 = (uint_t *)((uintptr_t)lgrp_children32 + children_size);
1893 1893 lgrp_lats32 = (caddr32_t *)((uintptr_t)lgrp_rsets32 + rsets_size);
1894 1894
1895 1895 /*
1896 1896 * Make temporary lgroup latency table of pointers for kernel to use
1897 1897 * to fill in rows of table with latencies from each lgroup
1898 1898 */
1899 1899 lgrp_lats32_kernel = kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1900 1900 KM_NOSLEEP);
1901 1901 if (lgrp_lats32_kernel == NULL) {
1902 1902 kmem_free(lgrp_snap32, snap_size);
1903 1903 return (set_errno(ENOMEM));
1904 1904 }
1905 1905
1906 1906 /*
1907 1907 * Fill in 32-bit lgroup snapshot header
1908 1908 * (with pointers into user's buffer for lgroup info, CPU IDs,
1909 1909 * bit masks, and latencies)
1910 1910 */
1911 1911 lgrp_snap32->ss_version = lgrp_snap->ss_version;
1912 1912 lgrp_snap32->ss_levels = lgrp_snap->ss_levels;
1913 1913 lgrp_snap32->ss_nlgrps = lgrp_snap32->ss_nlgrps_os =
1914 1914 lgrp_snap->ss_nlgrps;
1915 1915 lgrp_snap32->ss_nlgrps_max = snap_nlgrpsmax;
1916 1916 lgrp_snap32->ss_root = lgrp_snap->ss_root;
1917 1917 lgrp_snap32->ss_ncpus = lgrp_snap->ss_ncpus;
1918 1918 lgrp_snap32->ss_gen = lgrp_snap->ss_gen;
1919 1919 lgrp_snap32->ss_view = LGRP_VIEW_OS;
1920 1920 lgrp_snap32->ss_size = snap_size;
1921 1921 lgrp_snap32->ss_magic = buf;
1922 1922 lgrp_snap32->ss_info = buf + snap_hdr_size;
1923 1923 lgrp_snap32->ss_cpuids = lgrp_snap32->ss_info + info_size;
1924 1924 lgrp_snap32->ss_lgrpset = lgrp_snap32->ss_cpuids + cpuids_size;
1925 1925 lgrp_snap32->ss_parents = lgrp_snap32->ss_lgrpset + bitmask_size;
1926 1926 lgrp_snap32->ss_children = lgrp_snap32->ss_parents +
1927 1927 (snap_nlgrpsmax * bitmask_size);
1928 1928 lgrp_snap32->ss_rsets = lgrp_snap32->ss_children +
1929 1929 (snap_nlgrpsmax * bitmask_size);
1930 1930 lgrp_snap32->ss_latencies = lgrp_snap32->ss_rsets +
1931 1931 (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size);
1932 1932
1933 1933 /*
1934 1934 * Fill in lgrpset now because caller may have change psets
1935 1935 */
1936 1936 kpreempt_disable();
1937 1937 for (i = 0; i < snap_nlgrpsmax; i++) {
1938 1938 if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1939 1939 i)) {
1940 1940 BT_SET32(lgrp_set32, i);
1941 1941 }
1942 1942 }
1943 1943 kpreempt_enable();
1944 1944
1945 1945 /*
1946 1946 * Fill in 32-bit copy of lgroup info and fix up pointers
1947 1947 * to point into user's buffer instead of kernel's
1948 1948 */
1949 1949 cpu_index = 0;
1950 1950 lgrp_info = lgrp_snap->ss_info;
1951 1951 for (i = 0; i < snap_nlgrpsmax; i++) {
1952 1952 uint_t *children;
1953 1953 uint_t *lgrp_rset;
1954 1954 uint_t *parents;
1955 1955 ulong_t *snap_rset;
1956 1956
1957 1957 /*
1958 1958 * Skip non-existent lgroups
1959 1959 */
1960 1960 if (lgrp_info[i].info_lgrpid == LGRP_NONE) {
1961 1961 bzero(&lgrp_info32[i], sizeof (lgrp_info32[i]));
1962 1962 lgrp_info32[i].info_lgrpid = LGRP_NONE;
1963 1963 continue;
1964 1964 }
1965 1965
1966 1966 /*
1967 1967 * Fill in parents, children, lgroup resource set, and
1968 1968 * latencies from snapshot
1969 1969 */
1970 1970 parents = (uint_t *)((uintptr_t)lgrp_parents32 +
1971 1971 i * bitmask_size);
1972 1972 children = (uint_t *)((uintptr_t)lgrp_children32 +
1973 1973 i * bitmask_size);
1974 1974 snap_rset = (ulong_t *)((uintptr_t)lgrp_snap->ss_rsets +
1975 1975 (i * LGRP_RSRC_COUNT * BT_SIZEOFMAP(snap_nlgrpsmax)));
1976 1976 lgrp_rset = (uint_t *)((uintptr_t)lgrp_rsets32 +
1977 1977 (i * LGRP_RSRC_COUNT * bitmask_size));
1978 1978 lgrp_lats32_kernel[i] = (int *)((uintptr_t)lgrp_lats32 +
1979 1979 snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
1980 1980 sizeof (int));
1981 1981 for (j = 0; j < snap_nlgrpsmax; j++) {
1982 1982 int k;
1983 1983 uint_t *rset;
1984 1984
1985 1985 if (BT_TEST(&lgrp_snap->ss_parents[i], j))
1986 1986 BT_SET32(parents, j);
1987 1987
1988 1988 if (BT_TEST(&lgrp_snap->ss_children[i], j))
1989 1989 BT_SET32(children, j);
1990 1990
1991 1991 for (k = 0; k < LGRP_RSRC_COUNT; k++) {
1992 1992 rset = (uint_t *)((uintptr_t)lgrp_rset +
1993 1993 k * bitmask_size);
1994 1994 if (BT_TEST(&snap_rset[k], j))
1995 1995 BT_SET32(rset, j);
1996 1996 }
1997 1997
1998 1998 lgrp_lats32_kernel[i][j] =
1999 1999 lgrp_snap->ss_latencies[i][j];
2000 2000 }
2001 2001
2002 2002 /*
2003 2003 * Fix up pointer to latency buffer
2004 2004 */
2005 2005 lgrp_lats32[i] = lgrp_snap32->ss_latencies +
2006 2006 snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
2007 2007 sizeof (int);
2008 2008
2009 2009 /*
2010 2010 * Fix up pointers for parents, children, and resources
2011 2011 */
2012 2012 lgrp_info32[i].info_parents = lgrp_snap32->ss_parents +
2013 2013 (i * bitmask_size);
2014 2014 lgrp_info32[i].info_children = lgrp_snap32->ss_children +
2015 2015 (i * bitmask_size);
2016 2016 lgrp_info32[i].info_rset = lgrp_snap32->ss_rsets +
2017 2017 (i * LGRP_RSRC_COUNT * bitmask_size);
2018 2018
2019 2019 /*
2020 2020 * Fill in memory and CPU info
2021 2021 * Only fill in memory for lgroups directly containing memory
2022 2022 */
2023 2023 snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
2024 2024 BT_BITOUL(snap_nlgrpsmax)];
2025 2025 if (BT_TEST(snap_rset, i)) {
2026 2026 lgrp_info32[i].info_mem_free = lgrp_mem_size(i,
2027 2027 LGRP_MEM_SIZE_FREE);
2028 2028 lgrp_info32[i].info_mem_install =
2029 2029 lgrp_info[i].info_mem_install;
2030 2030 }
2031 2031
2032 2032 lgrp_info32[i].info_ncpus = lgrp_info[i].info_ncpus;
2033 2033
2034 2034 lgrp_info32[i].info_lgrpid = lgrp_info[i].info_lgrpid;
2035 2035 lgrp_info32[i].info_latency = lgrp_info[i].info_latency;
2036 2036
2037 2037 if (lgrp_info32[i].info_ncpus == 0) {
2038 2038 lgrp_info32[i].info_cpuids = 0;
2039 2039 continue;
2040 2040 }
2041 2041
2042 2042 /*
2043 2043 * Fix up pointer for CPU IDs
2044 2044 */
2045 2045 lgrp_info32[i].info_cpuids = lgrp_snap32->ss_cpuids +
2046 2046 (cpu_index * sizeof (processorid_t));
2047 2047 cpu_index += lgrp_info32[i].info_ncpus;
2048 2048 }
2049 2049 ASSERT(cpu_index == snap_ncpus);
2050 2050
2051 2051 /*
2052 2052 * Copy lgroup CPU IDs into 32-bit snapshot
2053 2053 * before copying it out into user's buffer
2054 2054 */
2055 2055 bcopy(lgrp_snap->ss_cpuids, lgrp_cpuids32, cpuids_size);
2056 2056
2057 2057 /*
2058 2058 * Copy 32-bit lgroup snapshot into user's buffer all at once
2059 2059 */
2060 2060 if (copyout(lgrp_snap32, (void *)(uintptr_t)buf, snap_size) != 0) {
2061 2061 kmem_free(lgrp_snap32, snap_size);
2062 2062 kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2063 2063 return (set_errno(EFAULT));
2064 2064 }
2065 2065
2066 2066 kmem_free(lgrp_snap32, snap_size);
2067 2067 kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2068 2068
2069 2069 return (snap_size);
2070 2070 }
2071 2071 #endif /* _SYSCALL32_IMPL */
2072 2072
2073 2073
2074 2074 int
2075 2075 lgrpsys(int subcode, long ia, void *ap)
2076 2076 {
2077 2077 size_t bufsize;
2078 2078 int latency;
2079 2079
2080 2080 switch (subcode) {
2081 2081
2082 2082 case LGRP_SYS_AFFINITY_GET:
2083 2083 return (lgrp_affinity_get((lgrp_affinity_args_t *)ap));
2084 2084
2085 2085 case LGRP_SYS_AFFINITY_SET:
2086 2086 return (lgrp_affinity_set((lgrp_affinity_args_t *)ap));
2087 2087
2088 2088 case LGRP_SYS_GENERATION:
2089 2089 return (lgrp_generation(ia));
2090 2090
2091 2091 case LGRP_SYS_HOME:
2092 2092 return (lgrp_home_get((idtype_t)ia, (id_t)(uintptr_t)ap));
2093 2093
2094 2094 case LGRP_SYS_LATENCY:
2095 2095 mutex_enter(&cpu_lock);
2096 2096 latency = lgrp_latency(ia, (lgrp_id_t)(uintptr_t)ap);
2097 2097 mutex_exit(&cpu_lock);
2098 2098 return (latency);
2099 2099
2100 2100 case LGRP_SYS_MEMINFO:
2101 2101 return (meminfo(ia, (struct meminfo *)ap));
2102 2102
2103 2103 case LGRP_SYS_VERSION:
2104 2104 return (lgrp_version(ia));
2105 2105
2106 2106 case LGRP_SYS_SNAPSHOT:
2107 2107 mutex_enter(&lgrp_snap_lock);
2108 2108 bufsize = lgrp_snapshot();
2109 2109 if (ap && ia > 0) {
2110 2110 if (get_udatamodel() == DATAMODEL_NATIVE)
2111 2111 bufsize = lgrp_snapshot_copy(ap, ia);
2112 2112 #ifdef _SYSCALL32_IMPL
2113 2113 else
2114 2114 bufsize = lgrp_snapshot_copy32(
2115 2115 (caddr32_t)(uintptr_t)ap, ia);
2116 2116 #endif /* _SYSCALL32_IMPL */
2117 2117 }
2118 2118 mutex_exit(&lgrp_snap_lock);
2119 2119 return (bufsize);
2120 2120
2121 2121 default:
2122 2122 break;
2123 2123
2124 2124 }
2125 2125
2126 2126 return (set_errno(EINVAL));
2127 2127 }
↓ open down ↓ |
1828 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX