6138-dont-abuse-atomic_cas_ Wdiff usr/src/uts/i86pc/os/x_call.c

Print this page

6138 don't abuse atomic_cas_*

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/i86pc/os/x_call.c
          +++ new/usr/src/uts/i86pc/os/x_call.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  /*
  26   26   * Copyright (c) 2010, Intel Corporation.
  27   27   * All rights reserved.
  28   28   */
  29   29  
  30   30  #include <sys/types.h>
  31   31  #include <sys/param.h>
  32   32  #include <sys/t_lock.h>
  33   33  #include <sys/thread.h>
  34   34  #include <sys/cpuvar.h>
  35   35  #include <sys/x_call.h>
  36   36  #include <sys/xc_levels.h>
  37   37  #include <sys/cpu.h>
  38   38  #include <sys/psw.h>
  39   39  #include <sys/sunddi.h>
  40   40  #include <sys/debug.h>
  41   41  #include <sys/systm.h>
  42   42  #include <sys/archsystm.h>
  43   43  #include <sys/machsystm.h>
  44   44  #include <sys/mutex_impl.h>
  45   45  #include <sys/stack.h>
  46   46  #include <sys/promif.h>
  47   47  #include <sys/x86_archext.h>
  48   48  
  49   49  /*
  50   50   * Implementation for cross-processor calls via interprocessor interrupts
  51   51   *
  52   52   * This implementation uses a message passing architecture to allow multiple
  53   53   * concurrent cross calls to be in flight at any given time. We use the cmpxchg
  54   54   * instruction, aka atomic_cas_ptr(), to implement simple efficient work
  55   55   * queues for message passing between CPUs with almost no need for regular
  56   56   * locking.  See xc_extract() and xc_insert() below.

↓ open down ↓

56 lines elided

↑ open up ↑

  57   57   *
  58   58   * The general idea is that initiating a cross call means putting a message
  59   59   * on a target(s) CPU's work queue. Any synchronization is handled by passing
  60   60   * the message back and forth between initiator and target(s).
  61   61   *
  62   62   * Every CPU has xc_work_cnt, which indicates it has messages to process.
  63   63   * This value is incremented as message traffic is initiated and decremented
  64   64   * with every message that finishes all processing.
  65   65   *
  66   66   * The code needs no mfence or other membar_*() calls. The uses of
  67      - * atomic_cas_ptr(), atomic_cas_32() and atomic_dec_32() for the message
       67 + * atomic_cas_ptr(), atomic_inc_32_nv() and atomic_dec_32() for the message
  68   68   * passing are implemented with LOCK prefix instructions which are
  69   69   * equivalent to mfence.
  70   70   *
  71   71   * One interesting aspect of this implmentation is that it allows 2 or more
  72   72   * CPUs to initiate cross calls to intersecting sets of CPUs at the same time.
  73   73   * The cross call processing by the CPUs will happen in any order with only
  74   74   * a guarantee, for xc_call() and xc_sync(), that an initiator won't return
  75   75   * from cross calls before all slaves have invoked the function.
  76   76   *
  77   77   * The reason for this asynchronous approach is to allow for fast global

  78   78   * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation
  79   79   * on a different Virtual Address at the same time. The old code required
  80   80   * N squared IPIs. With this method, depending on timing, it could happen
  81   81   * with just N IPIs.
  82   82   */
  83   83  
  84   84  /*
  85   85   * The default is to not enable collecting counts of IPI information, since
  86   86   * the updating of shared cachelines could cause excess bus traffic.
  87   87   */
  88   88  uint_t xc_collect_enable = 0;
  89   89  uint64_t xc_total_cnt = 0;      /* total #IPIs sent for cross calls */
  90   90  uint64_t xc_multi_cnt = 0;      /* # times we piggy backed on another IPI */
  91   91  
  92   92  /*
  93   93   * Values for message states. Here are the normal transitions. A transition
  94   94   * of "->" happens in the slave cpu and "=>" happens in the master cpu as
  95   95   * the messages are passed back and forth.
  96   96   *
  97   97   * FREE => ASYNC ->                       DONE => FREE
  98   98   * FREE => CALL ->                        DONE => FREE
  99   99   * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE
 100  100   *
 101  101   * The interesing one above is ASYNC. You might ask, why not go directly
 102  102   * to FREE, instead of DONE. If it did that, it might be possible to exhaust
 103  103   * the master's xc_free list if a master can generate ASYNC messages faster
 104  104   * then the slave can process them. That could be handled with more complicated
 105  105   * handling. However since nothing important uses ASYNC, I've not bothered.
 106  106   */
 107  107  #define XC_MSG_FREE     (0)     /* msg in xc_free queue */
 108  108  #define XC_MSG_ASYNC    (1)     /* msg in slave xc_msgbox */
 109  109  #define XC_MSG_CALL     (2)     /* msg in slave xc_msgbox */
 110  110  #define XC_MSG_SYNC     (3)     /* msg in slave xc_msgbox */
 111  111  #define XC_MSG_WAITING  (4)     /* msg in master xc_msgbox or xc_waiters */
 112  112  #define XC_MSG_RELEASED (5)     /* msg in slave xc_msgbox */
 113  113  #define XC_MSG_DONE     (6)     /* msg in master xc_msgbox */
 114  114  
 115  115  /*
 116  116   * We allow for one high priority message at a time to happen in the system.
 117  117   * This is used for panic, kmdb, etc., so no locking is done.
 118  118   */
 119  119  static volatile cpuset_t xc_priority_set_store;
 120  120  static volatile ulong_t *xc_priority_set = CPUSET2BV(xc_priority_set_store);
 121  121  static xc_data_t xc_priority_data;
 122  122  
 123  123  /*
 124  124   * Wrappers to avoid C compiler warnings due to volatile. The atomic bit
 125  125   * operations don't accept volatile bit vectors - which is a bit silly.
 126  126   */
 127  127  #define XC_BT_SET(vector, b)    BT_ATOMIC_SET((ulong_t *)(vector), (b))
 128  128  #define XC_BT_CLEAR(vector, b)  BT_ATOMIC_CLEAR((ulong_t *)(vector), (b))
 129  129  
 130  130  /*
 131  131   * Decrement a CPU's work count
 132  132   */
 133  133  static void
 134  134  xc_decrement(struct machcpu *mcpu)

↓ open down ↓

57 lines elided

↑ open up ↑

 135  135  {
 136  136          atomic_dec_32(&mcpu->xc_work_cnt);
 137  137  }
 138  138  
 139  139  /*
 140  140   * Increment a CPU's work count and return the old value
 141  141   */
 142  142  static int
 143  143  xc_increment(struct machcpu *mcpu)
 144  144  {
 145      -        int old;
 146      -        do {
 147      -                old = mcpu->xc_work_cnt;
 148      -        } while (atomic_cas_32(&mcpu->xc_work_cnt, old, old + 1) != old);
 149      -        return (old);
      145 +        return (atomic_inc_32_nv(&mcpu->xc_work_cnt) - 1);
 150  146  }
 151  147  
 152  148  /*
 153  149   * Put a message into a queue. The insertion is atomic no matter
 154  150   * how many different inserts/extracts to the same queue happen.
 155  151   */
 156  152  static void
 157  153  xc_insert(void *queue, xc_msg_t *msg)
 158  154  {
 159  155          xc_msg_t *old_head;

 160  156  
 161  157          /*
 162  158           * FREE messages should only ever be getting inserted into
 163  159           * the xc_master CPUs xc_free queue.
 164  160           */
 165  161          ASSERT(msg->xc_command != XC_MSG_FREE ||
 166  162              cpu[msg->xc_master] == NULL || /* possible only during init */
 167  163              queue == &cpu[msg->xc_master]->cpu_m.xc_free);
 168  164  
 169  165          do {
 170  166                  old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
 171  167                  msg->xc_next = old_head;
 172  168          } while (atomic_cas_ptr(queue, old_head, msg) != old_head);
 173  169  }
 174  170  
 175  171  /*
 176  172   * Extract a message from a queue. The extraction is atomic only
 177  173   * when just one thread does extractions from the queue.
 178  174   * If the queue is empty, NULL is returned.
 179  175   */
 180  176  static xc_msg_t *
 181  177  xc_extract(xc_msg_t **queue)
 182  178  {
 183  179          xc_msg_t *old_head;
 184  180  
 185  181          do {
 186  182                  old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue;
 187  183                  if (old_head == NULL)
 188  184                          return (old_head);
 189  185          } while (atomic_cas_ptr(queue, old_head, old_head->xc_next) !=
 190  186              old_head);
 191  187          old_head->xc_next = NULL;
 192  188          return (old_head);
 193  189  }
 194  190  
 195  191  /*
 196  192   * Initialize the machcpu fields used for cross calls
 197  193   */
 198  194  static uint_t xc_initialized = 0;
 199  195  
 200  196  void
 201  197  xc_init_cpu(struct cpu *cpup)
 202  198  {
 203  199          xc_msg_t *msg;
 204  200          int c;
 205  201  
 206  202          /*
 207  203           * Allocate message buffers for the new CPU.
 208  204           */
 209  205          for (c = 0; c < max_ncpus; ++c) {
 210  206                  if (plat_dr_support_cpu()) {
 211  207                          /*
 212  208                           * Allocate a message buffer for every CPU possible
 213  209                           * in system, including our own, and add them to our xc
 214  210                           * message queue.
 215  211                           */
 216  212                          msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 217  213                          msg->xc_command = XC_MSG_FREE;
 218  214                          msg->xc_master = cpup->cpu_id;
 219  215                          xc_insert(&cpup->cpu_m.xc_free, msg);
 220  216                  } else if (cpu[c] != NULL && cpu[c] != cpup) {
 221  217                          /*
 222  218                           * Add a new message buffer to each existing CPU's free
 223  219                           * list, as well as one for my list for each of them.
 224  220                           * Note: cpu0 is statically inserted into cpu[] array,
 225  221                           * so need to check cpu[c] isn't cpup itself to avoid
 226  222                           * allocating extra message buffers for cpu0.
 227  223                           */
 228  224                          msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 229  225                          msg->xc_command = XC_MSG_FREE;
 230  226                          msg->xc_master = c;
 231  227                          xc_insert(&cpu[c]->cpu_m.xc_free, msg);
 232  228  
 233  229                          msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 234  230                          msg->xc_command = XC_MSG_FREE;
 235  231                          msg->xc_master = cpup->cpu_id;
 236  232                          xc_insert(&cpup->cpu_m.xc_free, msg);
 237  233                  }
 238  234          }
 239  235  
 240  236          if (!plat_dr_support_cpu()) {
 241  237                  /*
 242  238                   * Add one for self messages if CPU hotplug is disabled.
 243  239                   */
 244  240                  msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 245  241                  msg->xc_command = XC_MSG_FREE;
 246  242                  msg->xc_master = cpup->cpu_id;
 247  243                  xc_insert(&cpup->cpu_m.xc_free, msg);
 248  244          }
 249  245  
 250  246          if (!xc_initialized)
 251  247                  xc_initialized = 1;
 252  248  }
 253  249  
 254  250  void
 255  251  xc_fini_cpu(struct cpu *cpup)
 256  252  {
 257  253          xc_msg_t *msg;
 258  254  
 259  255          ASSERT((cpup->cpu_flags & CPU_READY) == 0);
 260  256          ASSERT(cpup->cpu_m.xc_msgbox == NULL);
 261  257          ASSERT(cpup->cpu_m.xc_work_cnt == 0);
 262  258  
 263  259          while ((msg = xc_extract(&cpup->cpu_m.xc_free)) != NULL) {
 264  260                  kmem_free(msg, sizeof (*msg));
 265  261          }
 266  262  }
 267  263  
 268  264  #define XC_FLUSH_MAX_WAITS              1000
 269  265  
 270  266  /* Flush inflight message buffers. */
 271  267  int
 272  268  xc_flush_cpu(struct cpu *cpup)
 273  269  {
 274  270          int i;
 275  271  
 276  272          ASSERT((cpup->cpu_flags & CPU_READY) == 0);
 277  273  
 278  274          /*
 279  275           * Pause all working CPUs, which ensures that there's no CPU in
 280  276           * function xc_common().
 281  277           * This is used to work around a race condition window in xc_common()
 282  278           * between checking CPU_READY flag and increasing working item count.
 283  279           */
 284  280          pause_cpus(cpup, NULL);
 285  281          start_cpus();
 286  282  
 287  283          for (i = 0; i < XC_FLUSH_MAX_WAITS; i++) {
 288  284                  if (cpup->cpu_m.xc_work_cnt == 0) {
 289  285                          break;
 290  286                  }
 291  287                  DELAY(1);
 292  288          }
 293  289          for (; i < XC_FLUSH_MAX_WAITS; i++) {
 294  290                  if (!BT_TEST(xc_priority_set, cpup->cpu_id)) {
 295  291                          break;
 296  292                  }
 297  293                  DELAY(1);
 298  294          }
 299  295  
 300  296          return (i >= XC_FLUSH_MAX_WAITS ? ETIME : 0);
 301  297  }
 302  298  
 303  299  /*
 304  300   * X-call message processing routine. Note that this is used by both
 305  301   * senders and recipients of messages.
 306  302   *
 307  303   * We're protected against changing CPUs by either being in a high-priority
 308  304   * interrupt, having preemption disabled or by having a raised SPL.
 309  305   */
 310  306  /*ARGSUSED*/
 311  307  uint_t
 312  308  xc_serv(caddr_t arg1, caddr_t arg2)
 313  309  {
 314  310          struct machcpu *mcpup = &(CPU->cpu_m);
 315  311          xc_msg_t *msg;
 316  312          xc_data_t *data;
 317  313          xc_msg_t *xc_waiters = NULL;
 318  314          uint32_t num_waiting = 0;
 319  315          xc_func_t func;
 320  316          xc_arg_t a1;
 321  317          xc_arg_t a2;
 322  318          xc_arg_t a3;
 323  319          uint_t rc = DDI_INTR_UNCLAIMED;
 324  320  
 325  321          while (mcpup->xc_work_cnt != 0) {
 326  322                  rc = DDI_INTR_CLAIMED;
 327  323  
 328  324                  /*
 329  325                   * We may have to wait for a message to arrive.
 330  326                   */
 331  327                  for (msg = NULL; msg == NULL;
 332  328                      msg = xc_extract(&mcpup->xc_msgbox)) {
 333  329  
 334  330                          /*
 335  331                           * Alway check for and handle a priority message.
 336  332                           */
 337  333                          if (BT_TEST(xc_priority_set, CPU->cpu_id)) {
 338  334                                  func = xc_priority_data.xc_func;
 339  335                                  a1 = xc_priority_data.xc_a1;
 340  336                                  a2 = xc_priority_data.xc_a2;
 341  337                                  a3 = xc_priority_data.xc_a3;
 342  338                                  XC_BT_CLEAR(xc_priority_set, CPU->cpu_id);
 343  339                                  xc_decrement(mcpup);
 344  340                                  func(a1, a2, a3);
 345  341                                  if (mcpup->xc_work_cnt == 0)
 346  342                                          return (rc);
 347  343                          }
 348  344  
 349  345                          /*
 350  346                           * wait for a message to arrive
 351  347                           */
 352  348                          SMT_PAUSE();
 353  349                  }
 354  350  
 355  351  
 356  352                  /*
 357  353                   * process the message
 358  354                   */
 359  355                  switch (msg->xc_command) {
 360  356  
 361  357                  /*
 362  358                   * ASYNC gives back the message immediately, then we do the
 363  359                   * function and return with no more waiting.
 364  360                   */
 365  361                  case XC_MSG_ASYNC:
 366  362                          data = &cpu[msg->xc_master]->cpu_m.xc_data;
 367  363                          func = data->xc_func;
 368  364                          a1 = data->xc_a1;
 369  365                          a2 = data->xc_a2;
 370  366                          a3 = data->xc_a3;
 371  367                          msg->xc_command = XC_MSG_DONE;
 372  368                          xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
 373  369                          if (func != NULL)
 374  370                                  (void) (*func)(a1, a2, a3);
 375  371                          xc_decrement(mcpup);
 376  372                          break;
 377  373  
 378  374                  /*
 379  375                   * SYNC messages do the call, then send it back to the master
 380  376                   * in WAITING mode
 381  377                   */
 382  378                  case XC_MSG_SYNC:
 383  379                          data = &cpu[msg->xc_master]->cpu_m.xc_data;
 384  380                          if (data->xc_func != NULL)
 385  381                                  (void) (*data->xc_func)(data->xc_a1,
 386  382                                      data->xc_a2, data->xc_a3);
 387  383                          msg->xc_command = XC_MSG_WAITING;
 388  384                          xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
 389  385                          break;
 390  386  
 391  387                  /*
 392  388                   * WAITING messsages are collected by the master until all
 393  389                   * have arrived. Once all arrive, we release them back to
 394  390                   * the slaves
 395  391                   */
 396  392                  case XC_MSG_WAITING:
 397  393                          xc_insert(&xc_waiters, msg);
 398  394                          if (++num_waiting < mcpup->xc_wait_cnt)
 399  395                                  break;
 400  396                          while ((msg = xc_extract(&xc_waiters)) != NULL) {
 401  397                                  msg->xc_command = XC_MSG_RELEASED;
 402  398                                  xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox,
 403  399                                      msg);
 404  400                                  --num_waiting;
 405  401                          }
 406  402                          if (num_waiting != 0)
 407  403                                  panic("wrong number waiting");
 408  404                          mcpup->xc_wait_cnt = 0;
 409  405                          break;
 410  406  
 411  407                  /*
 412  408                   * CALL messages do the function and then, like RELEASE,
 413  409                   * send the message is back to master as DONE.
 414  410                   */
 415  411                  case XC_MSG_CALL:
 416  412                          data = &cpu[msg->xc_master]->cpu_m.xc_data;
 417  413                          if (data->xc_func != NULL)
 418  414                                  (void) (*data->xc_func)(data->xc_a1,
 419  415                                      data->xc_a2, data->xc_a3);
 420  416                          /*FALLTHROUGH*/
 421  417                  case XC_MSG_RELEASED:
 422  418                          msg->xc_command = XC_MSG_DONE;
 423  419                          xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg);
 424  420                          xc_decrement(mcpup);
 425  421                          break;
 426  422  
 427  423                  /*
 428  424                   * DONE means a slave has completely finished up.
 429  425                   * Once we collect all the DONE messages, we'll exit
 430  426                   * processing too.
 431  427                   */
 432  428                  case XC_MSG_DONE:
 433  429                          msg->xc_command = XC_MSG_FREE;
 434  430                          xc_insert(&mcpup->xc_free, msg);
 435  431                          xc_decrement(mcpup);
 436  432                          break;
 437  433  
 438  434                  case XC_MSG_FREE:
 439  435                          panic("free message 0x%p in msgbox", (void *)msg);
 440  436                          break;
 441  437  
 442  438                  default:
 443  439                          panic("bad message 0x%p in msgbox", (void *)msg);
 444  440                          break;
 445  441                  }
 446  442          }
 447  443          return (rc);
 448  444  }
 449  445  
 450  446  /*
 451  447   * Initiate cross call processing.
 452  448   */
 453  449  static void
 454  450  xc_common(
 455  451          xc_func_t func,
 456  452          xc_arg_t arg1,
 457  453          xc_arg_t arg2,
 458  454          xc_arg_t arg3,
 459  455          ulong_t *set,
 460  456          uint_t command)
 461  457  {
 462  458          int c;
 463  459          struct cpu *cpup;
 464  460          xc_msg_t *msg;
 465  461          xc_data_t *data;
 466  462          int cnt;
 467  463          int save_spl;
 468  464  
 469  465          if (!xc_initialized) {
 470  466                  if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) &&
 471  467                      func != NULL)
 472  468                          (void) (*func)(arg1, arg2, arg3);
 473  469                  return;
 474  470          }
 475  471  
 476  472          save_spl = splr(ipltospl(XC_HI_PIL));
 477  473  
 478  474          /*
 479  475           * fill in cross call data
 480  476           */
 481  477          data = &CPU->cpu_m.xc_data;
 482  478          data->xc_func = func;
 483  479          data->xc_a1 = arg1;
 484  480          data->xc_a2 = arg2;
 485  481          data->xc_a3 = arg3;
 486  482  
 487  483          /*
 488  484           * Post messages to all CPUs involved that are CPU_READY
 489  485           */
 490  486          CPU->cpu_m.xc_wait_cnt = 0;
 491  487          for (c = 0; c < max_ncpus; ++c) {
 492  488                  if (!BT_TEST(set, c))
 493  489                          continue;
 494  490                  cpup = cpu[c];
 495  491                  if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
 496  492                          continue;
 497  493  
 498  494                  /*
 499  495                   * Fill out a new message.
 500  496                   */
 501  497                  msg = xc_extract(&CPU->cpu_m.xc_free);
 502  498                  if (msg == NULL)
 503  499                          panic("Ran out of free xc_msg_t's");
 504  500                  msg->xc_command = command;
 505  501                  if (msg->xc_master != CPU->cpu_id)
 506  502                          panic("msg %p has wrong xc_master", (void *)msg);
 507  503                  msg->xc_slave = c;
 508  504  
 509  505                  /*
 510  506                   * Increment my work count for all messages that I'll
 511  507                   * transition from DONE to FREE.
 512  508                   * Also remember how many XC_MSG_WAITINGs to look for
 513  509                   */
 514  510                  (void) xc_increment(&CPU->cpu_m);
 515  511                  if (command == XC_MSG_SYNC)
 516  512                          ++CPU->cpu_m.xc_wait_cnt;
 517  513  
 518  514                  /*
 519  515                   * Increment the target CPU work count then insert the message
 520  516                   * in the target msgbox. If I post the first bit of work
 521  517                   * for the target to do, send an IPI to the target CPU.
 522  518                   */
 523  519                  cnt = xc_increment(&cpup->cpu_m);
 524  520                  xc_insert(&cpup->cpu_m.xc_msgbox, msg);
 525  521                  if (cpup != CPU) {
 526  522                          if (cnt == 0) {
 527  523                                  CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
 528  524                                  send_dirint(c, XC_HI_PIL);
 529  525                                  if (xc_collect_enable)
 530  526                                          ++xc_total_cnt;
 531  527                          } else if (xc_collect_enable) {
 532  528                                  ++xc_multi_cnt;
 533  529                          }
 534  530                  }
 535  531          }
 536  532  
 537  533          /*
 538  534           * Now drop into the message handler until all work is done
 539  535           */
 540  536          (void) xc_serv(NULL, NULL);
 541  537          splx(save_spl);
 542  538  }
 543  539  
 544  540  /*
 545  541   * Push out a priority cross call.
 546  542   */
 547  543  static void
 548  544  xc_priority_common(
 549  545          xc_func_t func,
 550  546          xc_arg_t arg1,
 551  547          xc_arg_t arg2,
 552  548          xc_arg_t arg3,
 553  549          ulong_t *set)
 554  550  {
 555  551          int i;
 556  552          int c;
 557  553          struct cpu *cpup;
 558  554  
 559  555          /*
 560  556           * Wait briefly for any previous xc_priority to have finished.
 561  557           */
 562  558          for (c = 0; c < max_ncpus; ++c) {
 563  559                  cpup = cpu[c];
 564  560                  if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
 565  561                          continue;
 566  562  
 567  563                  /*
 568  564                   * The value of 40000 here is from old kernel code. It
 569  565                   * really should be changed to some time based value, since
 570  566                   * under a hypervisor, there's no guarantee a remote CPU
 571  567                   * is even scheduled.
 572  568                   */
 573  569                  for (i = 0; BT_TEST(xc_priority_set, c) && i < 40000; ++i)
 574  570                          SMT_PAUSE();
 575  571  
 576  572                  /*
 577  573                   * Some CPU did not respond to a previous priority request. It's
 578  574                   * probably deadlocked with interrupts blocked or some such
 579  575                   * problem. We'll just erase the previous request - which was
 580  576                   * most likely a kmdb_enter that has already expired - and plow
 581  577                   * ahead.
 582  578                   */
 583  579                  if (BT_TEST(xc_priority_set, c)) {
 584  580                          XC_BT_CLEAR(xc_priority_set, c);
 585  581                          if (cpup->cpu_m.xc_work_cnt > 0)
 586  582                                  xc_decrement(&cpup->cpu_m);
 587  583                  }
 588  584          }
 589  585  
 590  586          /*
 591  587           * fill in cross call data
 592  588           */
 593  589          xc_priority_data.xc_func = func;
 594  590          xc_priority_data.xc_a1 = arg1;
 595  591          xc_priority_data.xc_a2 = arg2;
 596  592          xc_priority_data.xc_a3 = arg3;
 597  593  
 598  594          /*
 599  595           * Post messages to all CPUs involved that are CPU_READY
 600  596           * We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
 601  597           */
 602  598          for (c = 0; c < max_ncpus; ++c) {
 603  599                  if (!BT_TEST(set, c))
 604  600                          continue;
 605  601                  cpup = cpu[c];
 606  602                  if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) ||
 607  603                      cpup == CPU)
 608  604                          continue;
 609  605                  (void) xc_increment(&cpup->cpu_m);
 610  606                  XC_BT_SET(xc_priority_set, c);
 611  607                  send_dirint(c, XC_HI_PIL);
 612  608                  for (i = 0; i < 10; ++i) {
 613  609                          (void) atomic_cas_ptr(&cpup->cpu_m.xc_msgbox,
 614  610                              cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox);
 615  611                  }
 616  612          }
 617  613  }
 618  614  
 619  615  /*
 620  616   * Do cross call to all other CPUs with absolutely no waiting or handshaking.
 621  617   * This should only be used for extraordinary operations, like panic(), which
 622  618   * need to work, in some fashion, in a not completely functional system.
 623  619   * All other uses that want minimal waiting should use xc_call_nowait().
 624  620   */
 625  621  void
 626  622  xc_priority(
 627  623          xc_arg_t arg1,
 628  624          xc_arg_t arg2,
 629  625          xc_arg_t arg3,
 630  626          ulong_t *set,
 631  627          xc_func_t func)
 632  628  {
 633  629          extern int IGNORE_KERNEL_PREEMPTION;
 634  630          int save_spl = splr(ipltospl(XC_HI_PIL));
 635  631          int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
 636  632  
 637  633          IGNORE_KERNEL_PREEMPTION = 1;
 638  634          xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set);
 639  635          IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
 640  636          splx(save_spl);
 641  637  }
 642  638  
 643  639  /*
 644  640   * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger.
 645  641   */
 646  642  void
 647  643  kdi_xc_others(int this_cpu, void (*func)(void))
 648  644  {
 649  645          extern int IGNORE_KERNEL_PREEMPTION;
 650  646          int save_kernel_preemption;
 651  647          cpuset_t set;
 652  648  
 653  649          if (!xc_initialized)
 654  650                  return;
 655  651  
 656  652          save_kernel_preemption = IGNORE_KERNEL_PREEMPTION;
 657  653          IGNORE_KERNEL_PREEMPTION = 1;
 658  654          CPUSET_ALL_BUT(set, this_cpu);
 659  655          xc_priority_common((xc_func_t)func, 0, 0, 0, CPUSET2BV(set));
 660  656          IGNORE_KERNEL_PREEMPTION = save_kernel_preemption;
 661  657  }
 662  658  
 663  659  
 664  660  
 665  661  /*
 666  662   * Invoke function on specified processors. Remotes may continue after
 667  663   * service with no waiting. xc_call_nowait() may return immediately too.
 668  664   */
 669  665  void
 670  666  xc_call_nowait(
 671  667          xc_arg_t arg1,
 672  668          xc_arg_t arg2,
 673  669          xc_arg_t arg3,
 674  670          ulong_t *set,
 675  671          xc_func_t func)
 676  672  {
 677  673          xc_common(func, arg1, arg2, arg3, set, XC_MSG_ASYNC);
 678  674  }
 679  675  
 680  676  /*
 681  677   * Invoke function on specified processors. Remotes may continue after
 682  678   * service with no waiting. xc_call() returns only after remotes have finished.
 683  679   */
 684  680  void
 685  681  xc_call(
 686  682          xc_arg_t arg1,
 687  683          xc_arg_t arg2,
 688  684          xc_arg_t arg3,
 689  685          ulong_t *set,
 690  686          xc_func_t func)
 691  687  {
 692  688          xc_common(func, arg1, arg2, arg3, set, XC_MSG_CALL);
 693  689  }
 694  690  
 695  691  /*
 696  692   * Invoke function on specified processors. Remotes wait until all have
 697  693   * finished. xc_sync() also waits until all remotes have finished.
 698  694   */
 699  695  void
 700  696  xc_sync(
 701  697          xc_arg_t arg1,
 702  698          xc_arg_t arg2,
 703  699          xc_arg_t arg3,
 704  700          ulong_t *set,
 705  701          xc_func_t func)
 706  702  {
 707  703          xc_common(func, arg1, arg2, arg3, set, XC_MSG_SYNC);
 708  704  }

↓ open down ↓

549 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX