XXXX-pass-in-cpu_pause_func-via-pause_cpus Wdiff usr/src/uts/i86pc/os/x_call.c

Print this page

XXXX pass in cpu_pause_func via pause_cpus

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/i86pc/os/x_call.c
          +++ new/usr/src/uts/i86pc/os/x_call.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  /*
  26   26   * Copyright (c) 2010, Intel Corporation.
  27   27   * All rights reserved.
  28   28   */
  29   29  
  30   30  #include <sys/types.h>
  31   31  #include <sys/param.h>
  32   32  #include <sys/t_lock.h>
  33   33  #include <sys/thread.h>
  34   34  #include <sys/cpuvar.h>
  35   35  #include <sys/x_call.h>
  36   36  #include <sys/xc_levels.h>
  37   37  #include <sys/cpu.h>
  38   38  #include <sys/psw.h>
  39   39  #include <sys/sunddi.h>
  40   40  #include <sys/debug.h>
  41   41  #include <sys/systm.h>
  42   42  #include <sys/archsystm.h>
  43   43  #include <sys/machsystm.h>
  44   44  #include <sys/mutex_impl.h>
  45   45  #include <sys/stack.h>
  46   46  #include <sys/promif.h>
  47   47  #include <sys/x86_archext.h>
  48   48  
  49   49  /*
  50   50   * Implementation for cross-processor calls via interprocessor interrupts
  51   51   *
  52   52   * This implementation uses a message passing architecture to allow multiple
  53   53   * concurrent cross calls to be in flight at any given time. We use the cmpxchg
  54   54   * instruction, aka casptr(), to implement simple efficient work queues for
  55   55   * message passing between CPUs with almost no need for regular locking.
  56   56   * See xc_extract() and xc_insert() below.
  57   57   *
  58   58   * The general idea is that initiating a cross call means putting a message
  59   59   * on a target(s) CPU's work queue. Any synchronization is handled by passing
  60   60   * the message back and forth between initiator and target(s).
  61   61   *
  62   62   * Every CPU has xc_work_cnt, which indicates it has messages to process.
  63   63   * This value is incremented as message traffic is initiated and decremented
  64   64   * with every message that finishes all processing.
  65   65   *
  66   66   * The code needs no mfence or other membar_*() calls. The uses of
  67   67   * casptr(), cas32() and atomic_dec_32() for the message passing are
  68   68   * implemented with LOCK prefix instructions which are equivalent to mfence.
  69   69   *
  70   70   * One interesting aspect of this implmentation is that it allows 2 or more
  71   71   * CPUs to initiate cross calls to intersecting sets of CPUs at the same time.
  72   72   * The cross call processing by the CPUs will happen in any order with only
  73   73   * a guarantee, for xc_call() and xc_sync(), that an initiator won't return
  74   74   * from cross calls before all slaves have invoked the function.
  75   75   *
  76   76   * The reason for this asynchronous approach is to allow for fast global
  77   77   * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation
  78   78   * on a different Virtual Address at the same time. The old code required
  79   79   * N squared IPIs. With this method, depending on timing, it could happen
  80   80   * with just N IPIs.
  81   81   */
  82   82  
  83   83  /*
  84   84   * The default is to not enable collecting counts of IPI information, since
  85   85   * the updating of shared cachelines could cause excess bus traffic.
  86   86   */
  87   87  uint_t xc_collect_enable = 0;
  88   88  uint64_t xc_total_cnt = 0;      /* total #IPIs sent for cross calls */
  89   89  uint64_t xc_multi_cnt = 0;      /* # times we piggy backed on another IPI */
  90   90  
  91   91  /*
  92   92   * Values for message states. Here are the normal transitions. A transition
  93   93   * of "->" happens in the slave cpu and "=>" happens in the master cpu as
  94   94   * the messages are passed back and forth.
  95   95   *
  96   96   * FREE => ASYNC ->                       DONE => FREE
  97   97   * FREE => CALL ->                        DONE => FREE
  98   98   * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE
  99   99   *
 100  100   * The interesing one above is ASYNC. You might ask, why not go directly
 101  101   * to FREE, instead of DONE. If it did that, it might be possible to exhaust
 102  102   * the master's xc_free list if a master can generate ASYNC messages faster
 103  103   * then the slave can process them. That could be handled with more complicated
 104  104   * handling. However since nothing important uses ASYNC, I've not bothered.
 105  105   */
 106  106  #define XC_MSG_FREE     (0)     /* msg in xc_free queue */
 107  107  #define XC_MSG_ASYNC    (1)     /* msg in slave xc_msgbox */
 108  108  #define XC_MSG_CALL     (2)     /* msg in slave xc_msgbox */
 109  109  #define XC_MSG_SYNC     (3)     /* msg in slave xc_msgbox */
 110  110  #define XC_MSG_WAITING  (4)     /* msg in master xc_msgbox or xc_waiters */
 111  111  #define XC_MSG_RELEASED (5)     /* msg in slave xc_msgbox */
 112  112  #define XC_MSG_DONE     (6)     /* msg in master xc_msgbox */
 113  113  
 114  114  /*
 115  115   * We allow for one high priority message at a time to happen in the system.
 116  116   * This is used for panic, kmdb, etc., so no locking is done.
 117  117   */
 118  118  static volatile cpuset_t xc_priority_set_store;
 119  119  static volatile ulong_t *xc_priority_set = CPUSET2BV(xc_priority_set_store);
 120  120  static xc_data_t xc_priority_data;
 121  121  
 122  122  /*
 123  123   * Wrappers to avoid C compiler warnings due to volatile. The atomic bit
 124  124   * operations don't accept volatile bit vectors - which is a bit silly.
 125  125   */
 126  126  #define XC_BT_SET(vector, b)    BT_ATOMIC_SET((ulong_t *)(vector), (b))
 127  127  #define XC_BT_CLEAR(vector, b)  BT_ATOMIC_CLEAR((ulong_t *)(vector), (b))
 128  128  
 129  129  /*
 130  130   * Decrement a CPU's work count
 131  131   */
 132  132  static void
 133  133  xc_decrement(struct machcpu *mcpu)
 134  134  {
 135  135          atomic_dec_32(&mcpu->xc_work_cnt);
 136  136  }
 137  137  
 138  138  /*
 139  139   * Increment a CPU's work count and return the old value
 140  140   */
 141  141  static int
 142  142  xc_increment(struct machcpu *mcpu)
 143  143  {
 144  144          int old;
 145  145          do {
 146  146                  old = mcpu->xc_work_cnt;
 147  147          } while (cas32((uint32_t *)&mcpu->xc_work_cnt, old, old + 1) != old);
 148  148          return (old);
 149  149  }
 150  150  
 151  151  /*
 152  152   * Put a message into a queue. The insertion is atomic no matter
 153  153   * how many different inserts/extracts to the same queue happen.
 154  154   */
 155  155  static void
 156  156  xc_insert(void *queue, xc_msg_t *msg)
 157  157  {
 158  158          xc_msg_t *old_head;
 159  159  
 160  160          /*
 161  161           * FREE messages should only ever be getting inserted into
 162  162           * the xc_master CPUs xc_free queue.
 163  163           */
 164  164          ASSERT(msg->xc_command != XC_MSG_FREE ||
 165  165              cpu[msg->xc_master] == NULL || /* possible only during init */
 166  166              queue == &cpu[msg->xc_master]->cpu_m.xc_free);
 167  167  
 168  168          do {
 169  169                  old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
 170  170                  msg->xc_next = old_head;
 171  171          } while (casptr(queue, old_head, msg) != old_head);
 172  172  }
 173  173  
 174  174  /*
 175  175   * Extract a message from a queue. The extraction is atomic only
 176  176   * when just one thread does extractions from the queue.
 177  177   * If the queue is empty, NULL is returned.
 178  178   */
 179  179  static xc_msg_t *
 180  180  xc_extract(xc_msg_t **queue)
 181  181  {
 182  182          xc_msg_t *old_head;
 183  183  
 184  184          do {
 185  185                  old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
 186  186                  if (old_head == NULL)
 187  187                          return (old_head);
 188  188          } while (casptr(queue, old_head, old_head->xc_next) != old_head);
 189  189          old_head->xc_next = NULL;
 190  190          return (old_head);
 191  191  }
 192  192  
 193  193  /*
 194  194   * Initialize the machcpu fields used for cross calls
 195  195   */
 196  196  static uint_t xc_initialized = 0;
 197  197  
 198  198  void
 199  199  xc_init_cpu(struct cpu *cpup)
 200  200  {
 201  201          xc_msg_t *msg;
 202  202          int c;
 203  203  
 204  204          /*
 205  205           * Allocate message buffers for the new CPU.
 206  206           */
 207  207          for (c = 0; c < max_ncpus; ++c) {
 208  208                  if (plat_dr_support_cpu()) {
 209  209                          /*
 210  210                           * Allocate a message buffer for every CPU possible
 211  211                           * in system, including our own, and add them to our xc
 212  212                           * message queue.
 213  213                           */
 214  214                          msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 215  215                          msg->xc_command = XC_MSG_FREE;
 216  216                          msg->xc_master = cpup->cpu_id;
 217  217                          xc_insert(&cpup->cpu_m.xc_free, msg);
 218  218                  } else if (cpu[c] != NULL && cpu[c] != cpup) {
 219  219                          /*
 220  220                           * Add a new message buffer to each existing CPU's free
 221  221                           * list, as well as one for my list for each of them.
 222  222                           * Note: cpu0 is statically inserted into cpu[] array,
 223  223                           * so need to check cpu[c] isn't cpup itself to avoid
 224  224                           * allocating extra message buffers for cpu0.
 225  225                           */
 226  226                          msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 227  227                          msg->xc_command = XC_MSG_FREE;
 228  228                          msg->xc_master = c;
 229  229                          xc_insert(&cpu[c]->cpu_m.xc_free, msg);
 230  230  
 231  231                          msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 232  232                          msg->xc_command = XC_MSG_FREE;
 233  233                          msg->xc_master = cpup->cpu_id;
 234  234                          xc_insert(&cpup->cpu_m.xc_free, msg);
 235  235                  }
 236  236          }
 237  237  
 238  238          if (!plat_dr_support_cpu()) {
 239  239                  /*
 240  240                   * Add one for self messages if CPU hotplug is disabled.
 241  241                   */
 242  242                  msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 243  243                  msg->xc_command = XC_MSG_FREE;
 244  244                  msg->xc_master = cpup->cpu_id;
 245  245                  xc_insert(&cpup->cpu_m.xc_free, msg);
 246  246          }
 247  247  
 248  248          if (!xc_initialized)
 249  249                  xc_initialized = 1;
 250  250  }
 251  251  
 252  252  void
 253  253  xc_fini_cpu(struct cpu *cpup)
 254  254  {
 255  255          xc_msg_t *msg;
 256  256  
 257  257          ASSERT((cpup->cpu_flags & CPU_READY) == 0);
 258  258          ASSERT(cpup->cpu_m.xc_msgbox == NULL);
 259  259          ASSERT(cpup->cpu_m.xc_work_cnt == 0);
 260  260  
 261  261          while ((msg = xc_extract(&cpup->cpu_m.xc_free)) != NULL) {
 262  262                  kmem_free(msg, sizeof (*msg));
 263  263          }
 264  264  }
 265  265  
 266  266  #define XC_FLUSH_MAX_WAITS              1000
 267  267  
 268  268  /* Flush inflight message buffers. */
 269  269  int
 270  270  xc_flush_cpu(struct cpu *cpup)
 271  271  {

↓ open down ↓

271 lines elided

↑ open up ↑

 272  272          int i;
 273  273  
 274  274          ASSERT((cpup->cpu_flags & CPU_READY) == 0);
 275  275  
 276  276          /*
 277  277           * Pause all working CPUs, which ensures that there's no CPU in
 278  278           * function xc_common().
 279  279           * This is used to work around a race condition window in xc_common()
 280  280           * between checking CPU_READY flag and increasing working item count.
 281  281           */
 282      -        pause_cpus(cpup);
      282 +        pause_cpus(cpup, NULL);
 283  283          start_cpus();
 284  284  
 285  285          for (i = 0; i < XC_FLUSH_MAX_WAITS; i++) {
 286  286                  if (cpup->cpu_m.xc_work_cnt == 0) {
 287  287                          break;
 288  288                  }
 289  289                  DELAY(1);
 290  290          }
 291  291          for (; i < XC_FLUSH_MAX_WAITS; i++) {
 292  292                  if (!BT_TEST(xc_priority_set, cpup->cpu_id)) {

 293  293                          break;
 294  294                  }
 295  295                  DELAY(1);
 296  296          }
 297  297  
 298  298          return (i >= XC_FLUSH_MAX_WAITS ? ETIME : 0);
 299  299  }
 300  300  
 301  301  /*
 302  302   * X-call message processing routine. Note that this is used by both
 303  303   * senders and recipients of messages.
 304  304   *
 305  305   * We're protected against changing CPUs by either being in a high-priority
 306  306   * interrupt, having preemption disabled or by having a raised SPL.
 307  307   */
 308  308  /*ARGSUSED*/
 309  309  uint_t
 310  310  xc_serv(caddr_t arg1, caddr_t arg2)
 311  311  {
 312  312          struct machcpu *mcpup = &(CPU->cpu_m);
 313  313          xc_msg_t *msg;
 314  314          xc_data_t *data;
 315  315          xc_msg_t *xc_waiters = NULL;
 316  316          uint32_t num_waiting = 0;
 317  317          xc_func_t func;
 318  318          xc_arg_t a1;
 319  319          xc_arg_t a2;
 320  320          xc_arg_t a3;
 321  321          uint_t rc = DDI_INTR_UNCLAIMED;
 322  322  
 323  323          while (mcpup->xc_work_cnt != 0) {
 324  324                  rc = DDI_INTR_CLAIMED;
 325  325  
 326  326                  /*
 327  327                   * We may have to wait for a message to arrive.
 328  328                   */
 329  329                  for (msg = NULL; msg == NULL;
 330  330                      msg = xc_extract(&mcpup->xc_msgbox)) {
 331  331  
 332  332                          /*
 333  333                           * Alway check for and handle a priority message.
 334  334                           */
 335  335                          if (BT_TEST(xc_priority_set, CPU->cpu_id)) {
 336  336                                  func = xc_priority_data.xc_func;
 337  337                                  a1 = xc_priority_data.xc_a1;
 338  338                                  a2 = xc_priority_data.xc_a2;
 339  339                                  a3 = xc_priority_data.xc_a3;
 340  340                                  XC_BT_CLEAR(xc_priority_set, CPU->cpu_id);
 341  341                                  xc_decrement(mcpup);
 342  342                                  func(a1, a2, a3);
 343  343                                  if (mcpup->xc_work_cnt == 0)
 344  344                                          return (rc);
 345  345                          }
 346  346  
 347  347                          /*
 348  348                           * wait for a message to arrive
 349  349                           */
 350  350                          SMT_PAUSE();
 351  351                  }
 352  352  
 353  353  
 354  354                  /*
 355  355                   * process the message
 356  356                   */
 357  357                  switch (msg->xc_command) {
 358  358  
 359  359                  /*
 360  360                   * ASYNC gives back the message immediately, then we do the
 361  361                   * function and return with no more waiting.
 362  362                   */
 363  363                  case XC_MSG_ASYNC:
 364  364                          data = &cpu[msg->xc_master]->cpu_m.xc_data;
 365  365                          func = data->xc_func;
 366  366                          a1 = data->xc_a1;
 367  367                          a2 = data->xc_a2;
 368  368                          a3 = data->xc_a3;
 369  369                          msg->xc_command = XC_MSG_DONE;
 370  370                          xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
 371  371                          if (func != NULL)
 372  372                                  (void) (*func)(a1, a2, a3);
 373  373                          xc_decrement(mcpup);
 374  374                          break;
 375  375  
 376  376                  /*
 377  377                   * SYNC messages do the call, then send it back to the master
 378  378                   * in WAITING mode
 379  379                   */
 380  380                  case XC_MSG_SYNC:
 381  381                          data = &cpu[msg->xc_master]->cpu_m.xc_data;
 382  382                          if (data->xc_func != NULL)
 383  383                                  (void) (*data->xc_func)(data->xc_a1,
 384  384                                      data->xc_a2, data->xc_a3);
 385  385                          msg->xc_command = XC_MSG_WAITING;
 386  386                          xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
 387  387                          break;
 388  388  
 389  389                  /*
 390  390                   * WAITING messsages are collected by the master until all
 391  391                   * have arrived. Once all arrive, we release them back to
 392  392                   * the slaves
 393  393                   */
 394  394                  case XC_MSG_WAITING:
 395  395                          xc_insert(&xc_waiters, msg);
 396  396                          if (++num_waiting < mcpup->xc_wait_cnt)
 397  397                                  break;
 398  398                          while ((msg = xc_extract(&xc_waiters)) != NULL) {
 399  399                                  msg->xc_command = XC_MSG_RELEASED;
 400  400                                  xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox,
 401  401                                      msg);
 402  402                                  --num_waiting;
 403  403                          }
 404  404                          if (num_waiting != 0)
 405  405                                  panic("wrong number waiting");
 406  406                          mcpup->xc_wait_cnt = 0;
 407  407                          break;
 408  408  
 409  409                  /*
 410  410                   * CALL messages do the function and then, like RELEASE,
 411  411                   * send the message is back to master as DONE.
 412  412                   */
 413  413                  case XC_MSG_CALL:
 414  414                          data = &cpu[msg->xc_master]->cpu_m.xc_data;
 415  415                          if (data->xc_func != NULL)
 416  416                                  (void) (*data->xc_func)(data->xc_a1,
 417  417                                      data->xc_a2, data->xc_a3);
 418  418                          /*FALLTHROUGH*/
 419  419                  case XC_MSG_RELEASED:
 420  420                          msg->xc_command = XC_MSG_DONE;
 421  421                          xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
 422  422                          xc_decrement(mcpup);
 423  423                          break;
 424  424  
 425  425                  /*
 426  426                   * DONE means a slave has completely finished up.
 427  427                   * Once we collect all the DONE messages, we'll exit
 428  428                   * processing too.
 429  429                   */
 430  430                  case XC_MSG_DONE:
 431  431                          msg->xc_command = XC_MSG_FREE;
 432  432                          xc_insert(&mcpup->xc_free, msg);
 433  433                          xc_decrement(mcpup);
 434  434                          break;
 435  435  
 436  436                  case XC_MSG_FREE:
 437  437                          panic("free message 0x%p in msgbox", (void *)msg);
 438  438                          break;
 439  439  
 440  440                  default:
 441  441                          panic("bad message 0x%p in msgbox", (void *)msg);
 442  442                          break;
 443  443                  }
 444  444          }
 445  445          return (rc);
 446  446  }
 447  447  
 448  448  /*
 449  449   * Initiate cross call processing.
 450  450   */
 451  451  static void
 452  452  xc_common(
 453  453          xc_func_t func,
 454  454          xc_arg_t arg1,
 455  455          xc_arg_t arg2,
 456  456          xc_arg_t arg3,
 457  457          ulong_t *set,
 458  458          uint_t command)
 459  459  {
 460  460          int c;
 461  461          struct cpu *cpup;
 462  462          xc_msg_t *msg;
 463  463          xc_data_t *data;
 464  464          int cnt;
 465  465          int save_spl;
 466  466  
 467  467          if (!xc_initialized) {
 468  468                  if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) &&
 469  469                      func != NULL)
 470  470                          (void) (*func)(arg1, arg2, arg3);
 471  471                  return;
 472  472          }
 473  473  
 474  474          save_spl = splr(ipltospl(XC_HI_PIL));
 475  475  
 476  476          /*
 477  477           * fill in cross call data
 478  478           */
 479  479          data = &CPU->cpu_m.xc_data;
 480  480          data->xc_func = func;
 481  481          data->xc_a1 = arg1;
 482  482          data->xc_a2 = arg2;
 483  483          data->xc_a3 = arg3;
 484  484  
 485  485          /*
 486  486           * Post messages to all CPUs involved that are CPU_READY
 487  487           */
 488  488          CPU->cpu_m.xc_wait_cnt = 0;
 489  489          for (c = 0; c < max_ncpus; ++c) {
 490  490                  if (!BT_TEST(set, c))
 491  491                          continue;
 492  492                  cpup = cpu[c];
 493  493                  if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
 494  494                          continue;
 495  495  
 496  496                  /*
 497  497                   * Fill out a new message.
 498  498                   */
 499  499                  msg = xc_extract(&CPU->cpu_m.xc_free);
 500  500                  if (msg == NULL)
 501  501                          panic("Ran out of free xc_msg_t's");
 502  502                  msg->xc_command = command;
 503  503                  if (msg->xc_master != CPU->cpu_id)
 504  504                          panic("msg %p has wrong xc_master", (void *)msg);
 505  505                  msg->xc_slave = c;
 506  506  
 507  507                  /*
 508  508                   * Increment my work count for all messages that I'll
 509  509                   * transition from DONE to FREE.
 510  510                   * Also remember how many XC_MSG_WAITINGs to look for
 511  511                   */
 512  512                  (void) xc_increment(&CPU->cpu_m);
 513  513                  if (command == XC_MSG_SYNC)
 514  514                          ++CPU->cpu_m.xc_wait_cnt;
 515  515  
 516  516                  /*
 517  517                   * Increment the target CPU work count then insert the message
 518  518                   * in the target msgbox. If I post the first bit of work
 519  519                   * for the target to do, send an IPI to the target CPU.
 520  520                   */
 521  521                  cnt = xc_increment(&cpup->cpu_m);
 522  522                  xc_insert(&cpup->cpu_m.xc_msgbox, msg);
 523  523                  if (cpup != CPU) {
 524  524                          if (cnt == 0) {
 525  525                                  CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
 526  526                                  send_dirint(c, XC_HI_PIL);
 527  527                                  if (xc_collect_enable)
 528  528                                          ++xc_total_cnt;
 529  529                          } else if (xc_collect_enable) {
 530  530                                  ++xc_multi_cnt;
 531  531                          }
 532  532                  }
 533  533          }
 534  534  
 535  535          /*
 536  536           * Now drop into the message handler until all work is done
 537  537           */
 538  538          (void) xc_serv(NULL, NULL);
 539  539          splx(save_spl);
 540  540  }
 541  541  
 542  542  /*
 543  543   * Push out a priority cross call.
 544  544   */
 545  545  static void
 546  546  xc_priority_common(
 547  547          xc_func_t func,
 548  548          xc_arg_t arg1,
 549  549          xc_arg_t arg2,
 550  550          xc_arg_t arg3,
 551  551          ulong_t *set)
 552  552  {
 553  553          int i;
 554  554          int c;
 555  555          struct cpu *cpup;
 556  556  
 557  557          /*
 558  558           * Wait briefly for any previous xc_priority to have finished.
 559  559           */
 560  560          for (c = 0; c < max_ncpus; ++c) {
 561  561                  cpup = cpu[c];
 562  562                  if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
 563  563                          continue;
 564  564  
 565  565                  /*
 566  566                   * The value of 40000 here is from old kernel code. It
 567  567                   * really should be changed to some time based value, since
 568  568                   * under a hypervisor, there's no guarantee a remote CPU
 569  569                   * is even scheduled.
 570  570                   */
 571  571                  for (i = 0; BT_TEST(xc_priority_set, c) && i < 40000; ++i)
 572  572                          SMT_PAUSE();
 573  573  
 574  574                  /*
 575  575                   * Some CPU did not respond to a previous priority request. It's
 576  576                   * probably deadlocked with interrupts blocked or some such
 577  577                   * problem. We'll just erase the previous request - which was
 578  578                   * most likely a kmdb_enter that has already expired - and plow
 579  579                   * ahead.
 580  580                   */
 581  581                  if (BT_TEST(xc_priority_set, c)) {
 582  582                          XC_BT_CLEAR(xc_priority_set, c);
 583  583                          if (cpup->cpu_m.xc_work_cnt > 0)
 584  584                                  xc_decrement(&cpup->cpu_m);
 585  585                  }
 586  586          }
 587  587  
 588  588          /*
 589  589           * fill in cross call data
 590  590           */
 591  591          xc_priority_data.xc_func = func;
 592  592          xc_priority_data.xc_a1 = arg1;
 593  593          xc_priority_data.xc_a2 = arg2;
 594  594          xc_priority_data.xc_a3 = arg3;
 595  595  
 596  596          /*
 597  597           * Post messages to all CPUs involved that are CPU_READY
 598  598           * We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
 599  599           */
 600  600          for (c = 0; c < max_ncpus; ++c) {
 601  601                  if (!BT_TEST(set, c))
 602  602                          continue;
 603  603                  cpup = cpu[c];
 604  604                  if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) ||
 605  605                      cpup == CPU)
 606  606                          continue;
 607  607                  (void) xc_increment(&cpup->cpu_m);
 608  608                  XC_BT_SET(xc_priority_set, c);
 609  609                  send_dirint(c, XC_HI_PIL);
 610  610                  for (i = 0; i < 10; ++i) {
 611  611                          (void) casptr(&cpup->cpu_m.xc_msgbox,
 612  612                              cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox);
 613  613                  }
 614  614          }
 615  615  }
 616  616  
 617  617  /*
 618  618   * Do cross call to all other CPUs with absolutely no waiting or handshaking.
 619  619   * This should only be used for extraordinary operations, like panic(), which
 620  620   * need to work, in some fashion, in a not completely functional system.
 621  621   * All other uses that want minimal waiting should use xc_call_nowait().
 622  622   */
 623  623  void
 624  624  xc_priority(
 625  625          xc_arg_t arg1,
 626  626          xc_arg_t arg2,
 627  627          xc_arg_t arg3,
 628  628          ulong_t *set,
 629  629          xc_func_t func)
 630  630  {
 631  631          extern int IGNORE_KERNEL_PREEMPTION;
 632  632          int save_spl = splr(ipltospl(XC_HI_PIL));
 633  633          int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
 634  634  
 635  635          IGNORE_KERNEL_PREEMPTION = 1;
 636  636          xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set);
 637  637          IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
 638  638          splx(save_spl);
 639  639  }
 640  640  
 641  641  /*
 642  642   * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger.
 643  643   */
 644  644  void
 645  645  kdi_xc_others(int this_cpu, void (*func)(void))
 646  646  {
 647  647          extern int IGNORE_KERNEL_PREEMPTION;
 648  648          int save_kernel_preemption;
 649  649          cpuset_t set;
 650  650  
 651  651          if (!xc_initialized)
 652  652                  return;
 653  653  
 654  654          save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
 655  655          IGNORE_KERNEL_PREEMPTION = 1;
 656  656          CPUSET_ALL_BUT(set, this_cpu);
 657  657          xc_priority_common((xc_func_t)func, 0, 0, 0, CPUSET2BV(set));
 658  658          IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
 659  659  }
 660  660  
 661  661  
 662  662  
 663  663  /*
 664  664   * Invoke function on specified processors. Remotes may continue after
 665  665   * service with no waiting. xc_call_nowait() may return immediately too.
 666  666   */
 667  667  void
 668  668  xc_call_nowait(
 669  669          xc_arg_t arg1,
 670  670          xc_arg_t arg2,
 671  671          xc_arg_t arg3,
 672  672          ulong_t *set,
 673  673          xc_func_t func)
 674  674  {
 675  675          xc_common(func, arg1, arg2, arg3, set, XC_MSG_ASYNC);
 676  676  }
 677  677  
 678  678  /*
 679  679   * Invoke function on specified processors. Remotes may continue after
 680  680   * service with no waiting. xc_call() returns only after remotes have finished.
 681  681   */
 682  682  void
 683  683  xc_call(
 684  684          xc_arg_t arg1,
 685  685          xc_arg_t arg2,
 686  686          xc_arg_t arg3,
 687  687          ulong_t *set,
 688  688          xc_func_t func)
 689  689  {
 690  690          xc_common(func, arg1, arg2, arg3, set, XC_MSG_CALL);
 691  691  }
 692  692  
 693  693  /*
 694  694   * Invoke function on specified processors. Remotes wait until all have
 695  695   * finished. xc_sync() also waits until all remotes have finished.
 696  696   */
 697  697  void
 698  698  xc_sync(
 699  699          xc_arg_t arg1,
 700  700          xc_arg_t arg2,
 701  701          xc_arg_t arg3,
 702  702          ulong_t *set,
 703  703          xc_func_t func)
 704  704  {
 705  705          xc_common(func, arg1, arg2, arg3, set, XC_MSG_SYNC);
 706  706  }

↓ open down ↓

414 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX