Print this page
6138 don't abuse atomic_cas_*
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/os/x_call.c
+++ new/usr/src/uts/i86pc/os/x_call.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25 /*
26 26 * Copyright (c) 2010, Intel Corporation.
27 27 * All rights reserved.
28 28 */
29 29
30 30 #include <sys/types.h>
31 31 #include <sys/param.h>
32 32 #include <sys/t_lock.h>
33 33 #include <sys/thread.h>
34 34 #include <sys/cpuvar.h>
35 35 #include <sys/x_call.h>
36 36 #include <sys/xc_levels.h>
37 37 #include <sys/cpu.h>
38 38 #include <sys/psw.h>
39 39 #include <sys/sunddi.h>
40 40 #include <sys/debug.h>
41 41 #include <sys/systm.h>
42 42 #include <sys/archsystm.h>
43 43 #include <sys/machsystm.h>
44 44 #include <sys/mutex_impl.h>
45 45 #include <sys/stack.h>
46 46 #include <sys/promif.h>
47 47 #include <sys/x86_archext.h>
48 48
49 49 /*
50 50 * Implementation for cross-processor calls via interprocessor interrupts
51 51 *
52 52 * This implementation uses a message passing architecture to allow multiple
53 53 * concurrent cross calls to be in flight at any given time. We use the cmpxchg
54 54 * instruction, aka atomic_cas_ptr(), to implement simple efficient work
55 55 * queues for message passing between CPUs with almost no need for regular
56 56 * locking. See xc_extract() and xc_insert() below.
↓ open down ↓ |
56 lines elided |
↑ open up ↑ |
57 57 *
58 58 * The general idea is that initiating a cross call means putting a message
59 59 * on a target(s) CPU's work queue. Any synchronization is handled by passing
60 60 * the message back and forth between initiator and target(s).
61 61 *
62 62 * Every CPU has xc_work_cnt, which indicates it has messages to process.
63 63 * This value is incremented as message traffic is initiated and decremented
64 64 * with every message that finishes all processing.
65 65 *
66 66 * The code needs no mfence or other membar_*() calls. The uses of
67 - * atomic_cas_ptr(), atomic_cas_32() and atomic_dec_32() for the message
67 + * atomic_cas_ptr(), atomic_inc_32_nv() and atomic_dec_32() for the message
68 68 * passing are implemented with LOCK prefix instructions which are
69 69 * equivalent to mfence.
70 70 *
71 71 * One interesting aspect of this implmentation is that it allows 2 or more
72 72 * CPUs to initiate cross calls to intersecting sets of CPUs at the same time.
73 73 * The cross call processing by the CPUs will happen in any order with only
74 74 * a guarantee, for xc_call() and xc_sync(), that an initiator won't return
75 75 * from cross calls before all slaves have invoked the function.
76 76 *
77 77 * The reason for this asynchronous approach is to allow for fast global
78 78 * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation
79 79 * on a different Virtual Address at the same time. The old code required
80 80 * N squared IPIs. With this method, depending on timing, it could happen
81 81 * with just N IPIs.
82 82 */
83 83
84 84 /*
85 85 * The default is to not enable collecting counts of IPI information, since
86 86 * the updating of shared cachelines could cause excess bus traffic.
87 87 */
88 88 uint_t xc_collect_enable = 0;
89 89 uint64_t xc_total_cnt = 0; /* total #IPIs sent for cross calls */
90 90 uint64_t xc_multi_cnt = 0; /* # times we piggy backed on another IPI */
91 91
92 92 /*
93 93 * Values for message states. Here are the normal transitions. A transition
94 94 * of "->" happens in the slave cpu and "=>" happens in the master cpu as
95 95 * the messages are passed back and forth.
96 96 *
97 97 * FREE => ASYNC -> DONE => FREE
98 98 * FREE => CALL -> DONE => FREE
99 99 * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE
100 100 *
101 101 * The interesing one above is ASYNC. You might ask, why not go directly
102 102 * to FREE, instead of DONE. If it did that, it might be possible to exhaust
103 103 * the master's xc_free list if a master can generate ASYNC messages faster
104 104 * then the slave can process them. That could be handled with more complicated
105 105 * handling. However since nothing important uses ASYNC, I've not bothered.
106 106 */
107 107 #define XC_MSG_FREE (0) /* msg in xc_free queue */
108 108 #define XC_MSG_ASYNC (1) /* msg in slave xc_msgbox */
109 109 #define XC_MSG_CALL (2) /* msg in slave xc_msgbox */
110 110 #define XC_MSG_SYNC (3) /* msg in slave xc_msgbox */
111 111 #define XC_MSG_WAITING (4) /* msg in master xc_msgbox or xc_waiters */
112 112 #define XC_MSG_RELEASED (5) /* msg in slave xc_msgbox */
113 113 #define XC_MSG_DONE (6) /* msg in master xc_msgbox */
114 114
115 115 /*
116 116 * We allow for one high priority message at a time to happen in the system.
117 117 * This is used for panic, kmdb, etc., so no locking is done.
118 118 */
119 119 static volatile cpuset_t xc_priority_set_store;
120 120 static volatile ulong_t *xc_priority_set = CPUSET2BV(xc_priority_set_store);
121 121 static xc_data_t xc_priority_data;
122 122
123 123 /*
124 124 * Wrappers to avoid C compiler warnings due to volatile. The atomic bit
125 125 * operations don't accept volatile bit vectors - which is a bit silly.
126 126 */
127 127 #define XC_BT_SET(vector, b) BT_ATOMIC_SET((ulong_t *)(vector), (b))
128 128 #define XC_BT_CLEAR(vector, b) BT_ATOMIC_CLEAR((ulong_t *)(vector), (b))
129 129
130 130 /*
131 131 * Decrement a CPU's work count
132 132 */
133 133 static void
134 134 xc_decrement(struct machcpu *mcpu)
↓ open down ↓ |
57 lines elided |
↑ open up ↑ |
135 135 {
136 136 atomic_dec_32(&mcpu->xc_work_cnt);
137 137 }
138 138
139 139 /*
140 140 * Increment a CPU's work count and return the old value
141 141 */
142 142 static int
143 143 xc_increment(struct machcpu *mcpu)
144 144 {
145 - int old;
146 - do {
147 - old = mcpu->xc_work_cnt;
148 - } while (atomic_cas_32(&mcpu->xc_work_cnt, old, old + 1) != old);
149 - return (old);
145 + return (atomic_inc_32_nv(&mcpu->xc_work_cnt) - 1);
150 146 }
151 147
152 148 /*
153 149 * Put a message into a queue. The insertion is atomic no matter
154 150 * how many different inserts/extracts to the same queue happen.
155 151 */
156 152 static void
157 153 xc_insert(void *queue, xc_msg_t *msg)
158 154 {
159 155 xc_msg_t *old_head;
160 156
161 157 /*
162 158 * FREE messages should only ever be getting inserted into
163 159 * the xc_master CPUs xc_free queue.
164 160 */
165 161 ASSERT(msg->xc_command != XC_MSG_FREE ||
166 162 cpu[msg->xc_master] == NULL || /* possible only during init */
167 163 queue == &cpu[msg->xc_master]->cpu_m.xc_free);
168 164
169 165 do {
170 166 old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
171 167 msg->xc_next = old_head;
172 168 } while (atomic_cas_ptr(queue, old_head, msg) != old_head);
173 169 }
174 170
175 171 /*
176 172 * Extract a message from a queue. The extraction is atomic only
177 173 * when just one thread does extractions from the queue.
178 174 * If the queue is empty, NULL is returned.
179 175 */
180 176 static xc_msg_t *
181 177 xc_extract(xc_msg_t **queue)
182 178 {
183 179 xc_msg_t *old_head;
184 180
185 181 do {
186 182 old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
187 183 if (old_head == NULL)
188 184 return (old_head);
189 185 } while (atomic_cas_ptr(queue, old_head, old_head->xc_next) !=
190 186 old_head);
191 187 old_head->xc_next = NULL;
192 188 return (old_head);
193 189 }
194 190
195 191 /*
196 192 * Initialize the machcpu fields used for cross calls
197 193 */
198 194 static uint_t xc_initialized = 0;
199 195
200 196 void
201 197 xc_init_cpu(struct cpu *cpup)
202 198 {
203 199 xc_msg_t *msg;
204 200 int c;
205 201
206 202 /*
207 203 * Allocate message buffers for the new CPU.
208 204 */
209 205 for (c = 0; c < max_ncpus; ++c) {
210 206 if (plat_dr_support_cpu()) {
211 207 /*
212 208 * Allocate a message buffer for every CPU possible
213 209 * in system, including our own, and add them to our xc
214 210 * message queue.
215 211 */
216 212 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
217 213 msg->xc_command = XC_MSG_FREE;
218 214 msg->xc_master = cpup->cpu_id;
219 215 xc_insert(&cpup->cpu_m.xc_free, msg);
220 216 } else if (cpu[c] != NULL && cpu[c] != cpup) {
221 217 /*
222 218 * Add a new message buffer to each existing CPU's free
223 219 * list, as well as one for my list for each of them.
224 220 * Note: cpu0 is statically inserted into cpu[] array,
225 221 * so need to check cpu[c] isn't cpup itself to avoid
226 222 * allocating extra message buffers for cpu0.
227 223 */
228 224 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
229 225 msg->xc_command = XC_MSG_FREE;
230 226 msg->xc_master = c;
231 227 xc_insert(&cpu[c]->cpu_m.xc_free, msg);
232 228
233 229 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
234 230 msg->xc_command = XC_MSG_FREE;
235 231 msg->xc_master = cpup->cpu_id;
236 232 xc_insert(&cpup->cpu_m.xc_free, msg);
237 233 }
238 234 }
239 235
240 236 if (!plat_dr_support_cpu()) {
241 237 /*
242 238 * Add one for self messages if CPU hotplug is disabled.
243 239 */
244 240 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
245 241 msg->xc_command = XC_MSG_FREE;
246 242 msg->xc_master = cpup->cpu_id;
247 243 xc_insert(&cpup->cpu_m.xc_free, msg);
248 244 }
249 245
250 246 if (!xc_initialized)
251 247 xc_initialized = 1;
252 248 }
253 249
254 250 void
255 251 xc_fini_cpu(struct cpu *cpup)
256 252 {
257 253 xc_msg_t *msg;
258 254
259 255 ASSERT((cpup->cpu_flags & CPU_READY) == 0);
260 256 ASSERT(cpup->cpu_m.xc_msgbox == NULL);
261 257 ASSERT(cpup->cpu_m.xc_work_cnt == 0);
262 258
263 259 while ((msg = xc_extract(&cpup->cpu_m.xc_free)) != NULL) {
264 260 kmem_free(msg, sizeof (*msg));
265 261 }
266 262 }
267 263
268 264 #define XC_FLUSH_MAX_WAITS 1000
269 265
270 266 /* Flush inflight message buffers. */
271 267 int
272 268 xc_flush_cpu(struct cpu *cpup)
273 269 {
274 270 int i;
275 271
276 272 ASSERT((cpup->cpu_flags & CPU_READY) == 0);
277 273
278 274 /*
279 275 * Pause all working CPUs, which ensures that there's no CPU in
280 276 * function xc_common().
281 277 * This is used to work around a race condition window in xc_common()
282 278 * between checking CPU_READY flag and increasing working item count.
283 279 */
284 280 pause_cpus(cpup, NULL);
285 281 start_cpus();
286 282
287 283 for (i = 0; i < XC_FLUSH_MAX_WAITS; i++) {
288 284 if (cpup->cpu_m.xc_work_cnt == 0) {
289 285 break;
290 286 }
291 287 DELAY(1);
292 288 }
293 289 for (; i < XC_FLUSH_MAX_WAITS; i++) {
294 290 if (!BT_TEST(xc_priority_set, cpup->cpu_id)) {
295 291 break;
296 292 }
297 293 DELAY(1);
298 294 }
299 295
300 296 return (i >= XC_FLUSH_MAX_WAITS ? ETIME : 0);
301 297 }
302 298
303 299 /*
304 300 * X-call message processing routine. Note that this is used by both
305 301 * senders and recipients of messages.
306 302 *
307 303 * We're protected against changing CPUs by either being in a high-priority
308 304 * interrupt, having preemption disabled or by having a raised SPL.
309 305 */
310 306 /*ARGSUSED*/
311 307 uint_t
312 308 xc_serv(caddr_t arg1, caddr_t arg2)
313 309 {
314 310 struct machcpu *mcpup = &(CPU->cpu_m);
315 311 xc_msg_t *msg;
316 312 xc_data_t *data;
317 313 xc_msg_t *xc_waiters = NULL;
318 314 uint32_t num_waiting = 0;
319 315 xc_func_t func;
320 316 xc_arg_t a1;
321 317 xc_arg_t a2;
322 318 xc_arg_t a3;
323 319 uint_t rc = DDI_INTR_UNCLAIMED;
324 320
325 321 while (mcpup->xc_work_cnt != 0) {
326 322 rc = DDI_INTR_CLAIMED;
327 323
328 324 /*
329 325 * We may have to wait for a message to arrive.
330 326 */
331 327 for (msg = NULL; msg == NULL;
332 328 msg = xc_extract(&mcpup->xc_msgbox)) {
333 329
334 330 /*
335 331 * Alway check for and handle a priority message.
336 332 */
337 333 if (BT_TEST(xc_priority_set, CPU->cpu_id)) {
338 334 func = xc_priority_data.xc_func;
339 335 a1 = xc_priority_data.xc_a1;
340 336 a2 = xc_priority_data.xc_a2;
341 337 a3 = xc_priority_data.xc_a3;
342 338 XC_BT_CLEAR(xc_priority_set, CPU->cpu_id);
343 339 xc_decrement(mcpup);
344 340 func(a1, a2, a3);
345 341 if (mcpup->xc_work_cnt == 0)
346 342 return (rc);
347 343 }
348 344
349 345 /*
350 346 * wait for a message to arrive
351 347 */
352 348 SMT_PAUSE();
353 349 }
354 350
355 351
356 352 /*
357 353 * process the message
358 354 */
359 355 switch (msg->xc_command) {
360 356
361 357 /*
362 358 * ASYNC gives back the message immediately, then we do the
363 359 * function and return with no more waiting.
364 360 */
365 361 case XC_MSG_ASYNC:
366 362 data = &cpu[msg->xc_master]->cpu_m.xc_data;
367 363 func = data->xc_func;
368 364 a1 = data->xc_a1;
369 365 a2 = data->xc_a2;
370 366 a3 = data->xc_a3;
371 367 msg->xc_command = XC_MSG_DONE;
372 368 xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
373 369 if (func != NULL)
374 370 (void) (*func)(a1, a2, a3);
375 371 xc_decrement(mcpup);
376 372 break;
377 373
378 374 /*
379 375 * SYNC messages do the call, then send it back to the master
380 376 * in WAITING mode
381 377 */
382 378 case XC_MSG_SYNC:
383 379 data = &cpu[msg->xc_master]->cpu_m.xc_data;
384 380 if (data->xc_func != NULL)
385 381 (void) (*data->xc_func)(data->xc_a1,
386 382 data->xc_a2, data->xc_a3);
387 383 msg->xc_command = XC_MSG_WAITING;
388 384 xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
389 385 break;
390 386
391 387 /*
392 388 * WAITING messsages are collected by the master until all
393 389 * have arrived. Once all arrive, we release them back to
394 390 * the slaves
395 391 */
396 392 case XC_MSG_WAITING:
397 393 xc_insert(&xc_waiters, msg);
398 394 if (++num_waiting < mcpup->xc_wait_cnt)
399 395 break;
400 396 while ((msg = xc_extract(&xc_waiters)) != NULL) {
401 397 msg->xc_command = XC_MSG_RELEASED;
402 398 xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox,
403 399 msg);
404 400 --num_waiting;
405 401 }
406 402 if (num_waiting != 0)
407 403 panic("wrong number waiting");
408 404 mcpup->xc_wait_cnt = 0;
409 405 break;
410 406
411 407 /*
412 408 * CALL messages do the function and then, like RELEASE,
413 409 * send the message is back to master as DONE.
414 410 */
415 411 case XC_MSG_CALL:
416 412 data = &cpu[msg->xc_master]->cpu_m.xc_data;
417 413 if (data->xc_func != NULL)
418 414 (void) (*data->xc_func)(data->xc_a1,
419 415 data->xc_a2, data->xc_a3);
420 416 /*FALLTHROUGH*/
421 417 case XC_MSG_RELEASED:
422 418 msg->xc_command = XC_MSG_DONE;
423 419 xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
424 420 xc_decrement(mcpup);
425 421 break;
426 422
427 423 /*
428 424 * DONE means a slave has completely finished up.
429 425 * Once we collect all the DONE messages, we'll exit
430 426 * processing too.
431 427 */
432 428 case XC_MSG_DONE:
433 429 msg->xc_command = XC_MSG_FREE;
434 430 xc_insert(&mcpup->xc_free, msg);
435 431 xc_decrement(mcpup);
436 432 break;
437 433
438 434 case XC_MSG_FREE:
439 435 panic("free message 0x%p in msgbox", (void *)msg);
440 436 break;
441 437
442 438 default:
443 439 panic("bad message 0x%p in msgbox", (void *)msg);
444 440 break;
445 441 }
446 442 }
447 443 return (rc);
448 444 }
449 445
450 446 /*
451 447 * Initiate cross call processing.
452 448 */
453 449 static void
454 450 xc_common(
455 451 xc_func_t func,
456 452 xc_arg_t arg1,
457 453 xc_arg_t arg2,
458 454 xc_arg_t arg3,
459 455 ulong_t *set,
460 456 uint_t command)
461 457 {
462 458 int c;
463 459 struct cpu *cpup;
464 460 xc_msg_t *msg;
465 461 xc_data_t *data;
466 462 int cnt;
467 463 int save_spl;
468 464
469 465 if (!xc_initialized) {
470 466 if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) &&
471 467 func != NULL)
472 468 (void) (*func)(arg1, arg2, arg3);
473 469 return;
474 470 }
475 471
476 472 save_spl = splr(ipltospl(XC_HI_PIL));
477 473
478 474 /*
479 475 * fill in cross call data
480 476 */
481 477 data = &CPU->cpu_m.xc_data;
482 478 data->xc_func = func;
483 479 data->xc_a1 = arg1;
484 480 data->xc_a2 = arg2;
485 481 data->xc_a3 = arg3;
486 482
487 483 /*
488 484 * Post messages to all CPUs involved that are CPU_READY
489 485 */
490 486 CPU->cpu_m.xc_wait_cnt = 0;
491 487 for (c = 0; c < max_ncpus; ++c) {
492 488 if (!BT_TEST(set, c))
493 489 continue;
494 490 cpup = cpu[c];
495 491 if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
496 492 continue;
497 493
498 494 /*
499 495 * Fill out a new message.
500 496 */
501 497 msg = xc_extract(&CPU->cpu_m.xc_free);
502 498 if (msg == NULL)
503 499 panic("Ran out of free xc_msg_t's");
504 500 msg->xc_command = command;
505 501 if (msg->xc_master != CPU->cpu_id)
506 502 panic("msg %p has wrong xc_master", (void *)msg);
507 503 msg->xc_slave = c;
508 504
509 505 /*
510 506 * Increment my work count for all messages that I'll
511 507 * transition from DONE to FREE.
512 508 * Also remember how many XC_MSG_WAITINGs to look for
513 509 */
514 510 (void) xc_increment(&CPU->cpu_m);
515 511 if (command == XC_MSG_SYNC)
516 512 ++CPU->cpu_m.xc_wait_cnt;
517 513
518 514 /*
519 515 * Increment the target CPU work count then insert the message
520 516 * in the target msgbox. If I post the first bit of work
521 517 * for the target to do, send an IPI to the target CPU.
522 518 */
523 519 cnt = xc_increment(&cpup->cpu_m);
524 520 xc_insert(&cpup->cpu_m.xc_msgbox, msg);
525 521 if (cpup != CPU) {
526 522 if (cnt == 0) {
527 523 CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
528 524 send_dirint(c, XC_HI_PIL);
529 525 if (xc_collect_enable)
530 526 ++xc_total_cnt;
531 527 } else if (xc_collect_enable) {
532 528 ++xc_multi_cnt;
533 529 }
534 530 }
535 531 }
536 532
537 533 /*
538 534 * Now drop into the message handler until all work is done
539 535 */
540 536 (void) xc_serv(NULL, NULL);
541 537 splx(save_spl);
542 538 }
543 539
544 540 /*
545 541 * Push out a priority cross call.
546 542 */
547 543 static void
548 544 xc_priority_common(
549 545 xc_func_t func,
550 546 xc_arg_t arg1,
551 547 xc_arg_t arg2,
552 548 xc_arg_t arg3,
553 549 ulong_t *set)
554 550 {
555 551 int i;
556 552 int c;
557 553 struct cpu *cpup;
558 554
559 555 /*
560 556 * Wait briefly for any previous xc_priority to have finished.
561 557 */
562 558 for (c = 0; c < max_ncpus; ++c) {
563 559 cpup = cpu[c];
564 560 if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
565 561 continue;
566 562
567 563 /*
568 564 * The value of 40000 here is from old kernel code. It
569 565 * really should be changed to some time based value, since
570 566 * under a hypervisor, there's no guarantee a remote CPU
571 567 * is even scheduled.
572 568 */
573 569 for (i = 0; BT_TEST(xc_priority_set, c) && i < 40000; ++i)
574 570 SMT_PAUSE();
575 571
576 572 /*
577 573 * Some CPU did not respond to a previous priority request. It's
578 574 * probably deadlocked with interrupts blocked or some such
579 575 * problem. We'll just erase the previous request - which was
580 576 * most likely a kmdb_enter that has already expired - and plow
581 577 * ahead.
582 578 */
583 579 if (BT_TEST(xc_priority_set, c)) {
584 580 XC_BT_CLEAR(xc_priority_set, c);
585 581 if (cpup->cpu_m.xc_work_cnt > 0)
586 582 xc_decrement(&cpup->cpu_m);
587 583 }
588 584 }
589 585
590 586 /*
591 587 * fill in cross call data
592 588 */
593 589 xc_priority_data.xc_func = func;
594 590 xc_priority_data.xc_a1 = arg1;
595 591 xc_priority_data.xc_a2 = arg2;
596 592 xc_priority_data.xc_a3 = arg3;
597 593
598 594 /*
599 595 * Post messages to all CPUs involved that are CPU_READY
600 596 * We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
601 597 */
602 598 for (c = 0; c < max_ncpus; ++c) {
603 599 if (!BT_TEST(set, c))
604 600 continue;
605 601 cpup = cpu[c];
606 602 if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) ||
607 603 cpup == CPU)
608 604 continue;
609 605 (void) xc_increment(&cpup->cpu_m);
610 606 XC_BT_SET(xc_priority_set, c);
611 607 send_dirint(c, XC_HI_PIL);
612 608 for (i = 0; i < 10; ++i) {
613 609 (void) atomic_cas_ptr(&cpup->cpu_m.xc_msgbox,
614 610 cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox);
615 611 }
616 612 }
617 613 }
618 614
619 615 /*
620 616 * Do cross call to all other CPUs with absolutely no waiting or handshaking.
621 617 * This should only be used for extraordinary operations, like panic(), which
622 618 * need to work, in some fashion, in a not completely functional system.
623 619 * All other uses that want minimal waiting should use xc_call_nowait().
624 620 */
625 621 void
626 622 xc_priority(
627 623 xc_arg_t arg1,
628 624 xc_arg_t arg2,
629 625 xc_arg_t arg3,
630 626 ulong_t *set,
631 627 xc_func_t func)
632 628 {
633 629 extern int IGNORE_KERNEL_PREEMPTION;
634 630 int save_spl = splr(ipltospl(XC_HI_PIL));
635 631 int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
636 632
637 633 IGNORE_KERNEL_PREEMPTION = 1;
638 634 xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set);
639 635 IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
640 636 splx(save_spl);
641 637 }
642 638
643 639 /*
644 640 * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger.
645 641 */
646 642 void
647 643 kdi_xc_others(int this_cpu, void (*func)(void))
648 644 {
649 645 extern int IGNORE_KERNEL_PREEMPTION;
650 646 int save_kernel_preemption;
651 647 cpuset_t set;
652 648
653 649 if (!xc_initialized)
654 650 return;
655 651
656 652 save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
657 653 IGNORE_KERNEL_PREEMPTION = 1;
658 654 CPUSET_ALL_BUT(set, this_cpu);
659 655 xc_priority_common((xc_func_t)func, 0, 0, 0, CPUSET2BV(set));
660 656 IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
661 657 }
662 658
663 659
664 660
665 661 /*
666 662 * Invoke function on specified processors. Remotes may continue after
667 663 * service with no waiting. xc_call_nowait() may return immediately too.
668 664 */
669 665 void
670 666 xc_call_nowait(
671 667 xc_arg_t arg1,
672 668 xc_arg_t arg2,
673 669 xc_arg_t arg3,
674 670 ulong_t *set,
675 671 xc_func_t func)
676 672 {
677 673 xc_common(func, arg1, arg2, arg3, set, XC_MSG_ASYNC);
678 674 }
679 675
680 676 /*
681 677 * Invoke function on specified processors. Remotes may continue after
682 678 * service with no waiting. xc_call() returns only after remotes have finished.
683 679 */
684 680 void
685 681 xc_call(
686 682 xc_arg_t arg1,
687 683 xc_arg_t arg2,
688 684 xc_arg_t arg3,
689 685 ulong_t *set,
690 686 xc_func_t func)
691 687 {
692 688 xc_common(func, arg1, arg2, arg3, set, XC_MSG_CALL);
693 689 }
694 690
695 691 /*
696 692 * Invoke function on specified processors. Remotes wait until all have
697 693 * finished. xc_sync() also waits until all remotes have finished.
698 694 */
699 695 void
700 696 xc_sync(
701 697 xc_arg_t arg1,
702 698 xc_arg_t arg2,
703 699 xc_arg_t arg3,
704 700 ulong_t *set,
705 701 xc_func_t func)
706 702 {
707 703 xc_common(func, arg1, arg2, arg3, set, XC_MSG_SYNC);
708 704 }
↓ open down ↓ |
549 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX