5045-use-atomic_inc_*-atomic_dec_*-instead-of-atomic_add_* Wdiff usr/src/uts/common/os/errorq.c

Print this page

5045 use atomic_{inc,dec}_* instead of atomic_add_*

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/os/errorq.c
          +++ new/usr/src/uts/common/os/errorq.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  /*
  27   27   * Kernel Error Queues
  28   28   *
  29   29   * A common problem when handling hardware error traps and interrupts is that
  30   30   * these errors frequently must be handled at high interrupt level, where
  31   31   * reliably producing error messages and safely examining and manipulating
  32   32   * other kernel state may not be possible.  The kernel error queue primitive is
  33   33   * a common set of routines that allow a subsystem to maintain a queue of
  34   34   * errors that can be processed by an explicit call from a safe context or by a
  35   35   * soft interrupt that fires at a specific lower interrupt level.  The queue
  36   36   * management code also ensures that if the system panics, all in-transit
  37   37   * errors are logged prior to reset.  Each queue has an associated kstat for
  38   38   * observing the number of errors dispatched and logged, and mdb(1) debugging
  39   39   * support is provided for live and post-mortem observability.
  40   40   *
  41   41   * Memory Allocation
  42   42   *
  43   43   *      All of the queue data structures are allocated in advance as part of
  44   44   *      the errorq_create() call.  No additional memory allocations are
  45   45   *      performed as part of errorq_dispatch(), errorq_reserve(),
  46   46   *      errorq_commit() or errorq_drain().  This design
  47   47   *      facilitates reliable error queue processing even when the system is low
  48   48   *      on memory, and ensures that errorq_dispatch() can be called from any
  49   49   *      context.  When the queue is created, the maximum queue length is
  50   50   *      specified as a parameter to errorq_create() and errorq_nvcreate().  This
  51   51   *      length should represent a reasonable upper bound on the number of
  52   52   *      simultaneous errors.  If errorq_dispatch() or errorq_reserve() is
  53   53   *      invoked and no free queue elements are available, the error is
  54   54   *      dropped and will not be logged.  Typically, the queue will only be
  55   55   *      exhausted by an error storm, and in this case
  56   56   *      the earlier errors provide the most important data for analysis.
  57   57   *      When a new error is dispatched, the error data is copied into the
  58   58   *      preallocated queue element so that the caller's buffer can be reused.
  59   59   *
  60   60   *      When a new error is reserved, an element is moved from the free pool
  61   61   *      and returned to the caller.  The element buffer data, eqe_data, may be
  62   62   *      managed by the caller and dispatched to the errorq by calling
  63   63   *      errorq_commit().  This is useful for additions to errorq's
  64   64   *      created with errorq_nvcreate() to handle name-value pair (nvpair) data.
  65   65   *      See below for a discussion on nvlist errorq's.
  66   66   *
  67   67   * Queue Drain Callback
  68   68   *
  69   69   *      When the error queue is drained, the caller's queue drain callback is
  70   70   *      invoked with a pointer to the saved error data.  This function may be
  71   71   *      called from passive kernel context or soft interrupt context at or
  72   72   *      below LOCK_LEVEL, or as part of panic().  As such, the callback should
  73   73   *      basically only be calling cmn_err (but NOT with the CE_PANIC flag).
  74   74   *      The callback must not call panic(), attempt to allocate memory, or wait
  75   75   *      on a condition variable.  The callback may not call errorq_destroy()
  76   76   *      or errorq_drain() on the same error queue that called it.
  77   77   *
  78   78   *      The queue drain callback will always be called for each pending error
  79   79   *      in the order in which errors were enqueued (oldest to newest).  The
  80   80   *      queue drain callback is guaranteed to provide at *least* once semantics
  81   81   *      for all errors that are successfully dispatched (i.e. for which
  82   82   *      errorq_dispatch() has successfully completed).  If an unrelated panic
  83   83   *      occurs while the queue drain callback is running on a vital queue, the
  84   84   *      panic subsystem will continue the queue drain and the callback may be
  85   85   *      invoked again for the same error.  Therefore, the callback should
  86   86   *      restrict itself to logging messages and taking other actions that are
  87   87   *      not destructive if repeated.
  88   88   *
  89   89   * Name-Value Pair Error Queues
  90   90   *
  91   91   *      During error handling, it may be more convenient to store error
  92   92   *      queue element data as a fixed buffer of name-value pairs.  The
  93   93   *      nvpair library allows construction and destruction of nvlists
  94   94   *      in pre-allocated memory buffers.
  95   95   *
  96   96   *      Error queues created via errorq_nvcreate() store queue element
  97   97   *      data as fixed buffer nvlists (ereports).  errorq_reserve()
  98   98   *      allocates an errorq element from eqp->eq_bitmap and returns a valid
  99   99   *      pointer to a errorq_elem_t (queue element) and a pre-allocated
 100  100   *      fixed buffer nvlist.  errorq_elem_nvl() is used to gain access
 101  101   *      to the nvlist to add name-value ereport members prior to
 102  102   *      dispatching the error queue element in errorq_commit().
 103  103   *
 104  104   *      Once dispatched, the drain function will return the element to
 105  105   *      eqp->eq_bitmap and reset the associated nv_alloc structure.
 106  106   *      error_cancel() may be called to cancel an element reservation
 107  107   *      element that was never dispatched (committed).  This is useful in
 108  108   *      cases where a programming error prevents a queue element from being
 109  109   *      dispatched.
 110  110   *
 111  111   * Queue Management
 112  112   *
 113  113   *      The queue element structures and error data buffers are allocated in
 114  114   *      two contiguous chunks as part of errorq_create() or errorq_nvcreate().
 115  115   *      Each queue element structure contains a next pointer,
 116  116   *      a previous pointer, and a pointer to the corresponding error data
 117  117   *      buffer.  The data buffer for a nvlist errorq is a shared buffer
 118  118   *      for the allocation of name-value pair lists. The elements are kept on
 119  119   *      one of four lists:
 120  120   *
 121  121   *      Unused elements are kept in the free pool, managed by eqp->eq_bitmap.
 122  122   *      The eqe_prev and eqe_next pointers are not used while in the free pool
 123  123   *      and will be set to NULL.
 124  124   *
 125  125   *      Pending errors are kept on the pending list, a singly-linked list
 126  126   *      pointed to by eqp->eq_pend, and linked together using eqe_prev.  This
 127  127   *      list is maintained in order from newest error to oldest.  The eqe_next
 128  128   *      pointer is not used by the pending list and will be set to NULL.
 129  129   *
 130  130   *      The processing list is a doubly-linked list pointed to by eqp->eq_phead
 131  131   *      (the oldest element) and eqp->eq_ptail (the newest element).  The
 132  132   *      eqe_next pointer is used to traverse from eq_phead to eq_ptail, and the
 133  133   *      eqe_prev pointer is used to traverse from eq_ptail to eq_phead.  Once a
 134  134   *      queue drain operation begins, the current pending list is moved to the
 135  135   *      processing list in a two-phase commit fashion (eq_ptail being cleared
 136  136   *      at the beginning but eq_phead only at the end), allowing the panic code
 137  137   *      to always locate and process all pending errors in the event that a
 138  138   *      panic occurs in the middle of queue processing.
 139  139   *
 140  140   *      A fourth list is maintained for nvlist errorqs.  The dump list,
 141  141   *      eq_dump is used to link all errorq elements that should be stored
 142  142   *      in a crash dump file in the event of a system panic.  During
 143  143   *      errorq_panic(), the list is created and subsequently traversed
 144  144   *      in errorq_dump() during the final phases of a crash dump.
 145  145   *
 146  146   * Platform Considerations
 147  147   *
 148  148   *      In order to simplify their implementation, error queues make use of the
 149  149   *      C wrappers for compare-and-swap.  If the platform itself does not
 150  150   *      support compare-and-swap in hardware and the kernel emulation routines
 151  151   *      are used instead, then the context in which errorq_dispatch() can be
 152  152   *      safely invoked is further constrained by the implementation of the
 153  153   *      compare-and-swap emulation.  Specifically, if errorq_dispatch() is
 154  154   *      called from a code path that can be executed above ATOMIC_LEVEL on such
 155  155   *      a platform, the dispatch code could potentially deadlock unless the
 156  156   *      corresponding error interrupt is blocked or disabled prior to calling
 157  157   *      errorq_dispatch().  Error queues should therefore be deployed with
 158  158   *      caution on these platforms.
 159  159   *
 160  160   * Interfaces
 161  161   *
 162  162   * errorq_t *errorq_create(name, func, private, qlen, eltsize, ipl, flags);
 163  163   * errorq_t *errorq_nvcreate(name, func, private, qlen, eltsize, ipl, flags);
 164  164   *
 165  165   *      Create a new error queue with the specified name, callback, and
 166  166   *      properties.  A pointer to the new error queue is returned upon success,
 167  167   *      or NULL is returned to indicate that the queue could not be created.
 168  168   *      This function must be called from passive kernel context with no locks
 169  169   *      held that can prevent a sleeping memory allocation from occurring.
 170  170   *      errorq_create() will return failure if the queue kstats cannot be
 171  171   *      created, or if a soft interrupt handler cannot be registered.
 172  172   *
 173  173   *      The queue 'name' is a string that is recorded for live and post-mortem
 174  174   *      examination by a debugger.  The queue callback 'func' will be invoked
 175  175   *      for each error drained from the queue, and will receive the 'private'
 176  176   *      pointer as its first argument.  The callback must obey the rules for
 177  177   *      callbacks described above.  The queue will have maximum length 'qlen'
 178  178   *      and each element will be able to record up to 'eltsize' bytes of data.
 179  179   *      The queue's soft interrupt (see errorq_dispatch(), below) will fire
 180  180   *      at 'ipl', which should not exceed LOCK_LEVEL.  The queue 'flags' may
 181  181   *      include the following flag:
 182  182   *
 183  183   *      ERRORQ_VITAL    - This queue contains information that is considered
 184  184   *         vital to problem diagnosis.  Error queues that are marked vital will
 185  185   *         be automatically drained by the panic subsystem prior to printing
 186  186   *         the panic messages to the console.
 187  187   *
 188  188   * void errorq_destroy(errorq);
 189  189   *
 190  190   *      Destroy the specified error queue.  The queue is drained of any
 191  191   *      pending elements and these are logged before errorq_destroy returns.
 192  192   *      Once errorq_destroy() begins draining the queue, any simultaneous
 193  193   *      calls to dispatch errors will result in the errors being dropped.
 194  194   *      The caller must invoke a higher-level abstraction (e.g. disabling
 195  195   *      an error interrupt) to ensure that error handling code does not
 196  196   *      attempt to dispatch errors to the queue while it is being freed.
 197  197   *
 198  198   * void errorq_dispatch(errorq, data, len, flag);
 199  199   *
 200  200   *      Attempt to enqueue the specified error data.  If a free queue element
 201  201   *      is available, the data is copied into a free element and placed on a
 202  202   *      pending list.  If no free queue element is available, the error is
 203  203   *      dropped.  The data length (len) is specified in bytes and should not
 204  204   *      exceed the queue's maximum element size.  If the data length is less
 205  205   *      than the maximum element size, the remainder of the queue element is
 206  206   *      filled with zeroes.  The flag parameter should be one of:
 207  207   *
 208  208   *      ERRORQ_ASYNC    - Schedule a soft interrupt at the previously specified
 209  209   *         IPL to asynchronously drain the queue on behalf of the caller.
 210  210   *
 211  211   *      ERRORQ_SYNC     - Do not schedule a soft interrupt to drain the queue.
 212  212   *         The caller is presumed to be calling errorq_drain() or panic() in
 213  213   *         the near future in order to drain the queue and log the error.
 214  214   *
 215  215   *      The errorq_dispatch() function may be called from any context, subject
 216  216   *      to the Platform Considerations described above.
 217  217   *
 218  218   * void errorq_drain(errorq);
 219  219   *
 220  220   *      Drain the error queue of all pending errors.  The queue's callback
 221  221   *      function is invoked for each error in order from oldest to newest.
 222  222   *      This function may be used at or below LOCK_LEVEL or from panic context.
 223  223   *
 224  224   * errorq_elem_t *errorq_reserve(errorq);
 225  225   *
 226  226   *      Reserve an error queue element for later processing and dispatching.
 227  227   *      The element is returned to the caller who may add error-specific data
 228  228   *      to element.  The element is retured to the free pool when either
 229  229   *      errorq_commit() is called and the element asynchronously processed
 230  230   *      or immediately when errorq_cancel() is called.
 231  231   *
 232  232   * void errorq_commit(errorq, errorq_elem, flag);
 233  233   *
 234  234   *      Commit an errorq element (eqep) for dispatching, see
 235  235   *      errorq_dispatch().
 236  236   *
 237  237   * void errorq_cancel(errorq, errorq_elem);
 238  238   *
 239  239   *      Cancel a pending errorq element reservation.  The errorq element is
 240  240   *      returned to the free pool upon cancelation.
 241  241   */
 242  242  
 243  243  #include <sys/errorq_impl.h>
 244  244  #include <sys/sysmacros.h>
 245  245  #include <sys/machlock.h>
 246  246  #include <sys/cmn_err.h>
 247  247  #include <sys/atomic.h>
 248  248  #include <sys/systm.h>
 249  249  #include <sys/kmem.h>
 250  250  #include <sys/conf.h>
 251  251  #include <sys/ddi.h>
 252  252  #include <sys/sunddi.h>
 253  253  #include <sys/bootconf.h>
 254  254  #include <sys/spl.h>
 255  255  #include <sys/dumphdr.h>
 256  256  #include <sys/compress.h>
 257  257  #include <sys/time.h>
 258  258  #include <sys/panic.h>
 259  259  #include <sys/bitmap.h>
 260  260  #include <sys/fm/protocol.h>
 261  261  #include <sys/fm/util.h>
 262  262  
 263  263  static struct errorq_kstat errorq_kstat_template = {
 264  264          { "dispatched", KSTAT_DATA_UINT64 },
 265  265          { "dropped", KSTAT_DATA_UINT64 },
 266  266          { "logged", KSTAT_DATA_UINT64 },
 267  267          { "reserved", KSTAT_DATA_UINT64 },
 268  268          { "reserve_fail", KSTAT_DATA_UINT64 },
 269  269          { "committed", KSTAT_DATA_UINT64 },
 270  270          { "commit_fail", KSTAT_DATA_UINT64 },
 271  271          { "cancelled", KSTAT_DATA_UINT64 }
 272  272  };
 273  273  
 274  274  static uint64_t errorq_lost = 0;
 275  275  static errorq_t *errorq_list = NULL;
 276  276  static kmutex_t errorq_lock;
 277  277  static uint64_t errorq_vitalmin = 5;
 278  278  
 279  279  static uint_t
 280  280  errorq_intr(caddr_t eqp)
 281  281  {
 282  282          errorq_drain((errorq_t *)eqp);
 283  283          return (DDI_INTR_CLAIMED);
 284  284  }
 285  285  
 286  286  /*
 287  287   * Create a new error queue with the specified properties and add a software
 288  288   * interrupt handler and kstat for it.  This function must be called from
 289  289   * passive kernel context with no locks held that can prevent a sleeping
 290  290   * memory allocation from occurring.  This function will return NULL if the
 291  291   * softint or kstat for this queue cannot be created.
 292  292   */
 293  293  errorq_t *
 294  294  errorq_create(const char *name, errorq_func_t func, void *private,
 295  295      ulong_t qlen, size_t size, uint_t ipl, uint_t flags)
 296  296  {
 297  297          errorq_t *eqp = kmem_alloc(sizeof (errorq_t), KM_SLEEP);
 298  298          ddi_iblock_cookie_t ibc = (ddi_iblock_cookie_t)(uintptr_t)ipltospl(ipl);
 299  299          dev_info_t *dip = ddi_root_node();
 300  300  
 301  301          errorq_elem_t *eep;
 302  302          ddi_softintr_t id = NULL;
 303  303          caddr_t data;
 304  304  
 305  305          ASSERT(qlen != 0 && size != 0);
 306  306          ASSERT(ipl > 0 && ipl <= LOCK_LEVEL);
 307  307  
 308  308          /*
 309  309           * If a queue is created very early in boot before device tree services
 310  310           * are available, the queue softint handler cannot be created.  We
 311  311           * manually drain these queues and create their softint handlers when
 312  312           * it is safe to do so as part of errorq_init(), below.
 313  313           */
 314  314          if (modrootloaded && ddi_add_softintr(dip, DDI_SOFTINT_FIXED, &id,
 315  315              &ibc, NULL, errorq_intr, (caddr_t)eqp) != DDI_SUCCESS) {
 316  316                  cmn_err(CE_WARN, "errorq_create: failed to register "
 317  317                      "IPL %u softint for queue %s", ipl, name);
 318  318                  kmem_free(eqp, sizeof (errorq_t));
 319  319                  return (NULL);
 320  320          }
 321  321  
 322  322          if ((eqp->eq_ksp = kstat_create("unix", 0, name, "errorq",
 323  323              KSTAT_TYPE_NAMED, sizeof (struct errorq_kstat) /
 324  324              sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL)) == NULL) {
 325  325                  cmn_err(CE_WARN, "errorq_create: failed to create kstat "
 326  326                      "for queue %s", name);
 327  327                  if (id != NULL)
 328  328                          ddi_remove_softintr(id);
 329  329                  kmem_free(eqp, sizeof (errorq_t));
 330  330                  return (NULL);
 331  331          }
 332  332  
 333  333          bcopy(&errorq_kstat_template, &eqp->eq_kstat,
 334  334              sizeof (struct errorq_kstat));
 335  335          eqp->eq_ksp->ks_data = &eqp->eq_kstat;
 336  336          eqp->eq_ksp->ks_private = eqp;
 337  337          kstat_install(eqp->eq_ksp);
 338  338  
 339  339          (void) strncpy(eqp->eq_name, name, ERRORQ_NAMELEN);
 340  340          eqp->eq_name[ERRORQ_NAMELEN] = '\0';
 341  341          eqp->eq_func = func;
 342  342          eqp->eq_private = private;
 343  343          eqp->eq_data = kmem_alloc(qlen * size, KM_SLEEP);
 344  344          eqp->eq_qlen = qlen;
 345  345          eqp->eq_size = size;
 346  346          eqp->eq_ipl = ipl;
 347  347          eqp->eq_flags = flags | ERRORQ_ACTIVE;
 348  348          eqp->eq_id = id;
 349  349          mutex_init(&eqp->eq_lock, NULL, MUTEX_DEFAULT, NULL);
 350  350          eqp->eq_elems = kmem_alloc(qlen * sizeof (errorq_elem_t), KM_SLEEP);
 351  351          eqp->eq_phead = NULL;
 352  352          eqp->eq_ptail = NULL;
 353  353          eqp->eq_pend = NULL;
 354  354          eqp->eq_dump = NULL;
 355  355          eqp->eq_bitmap = kmem_zalloc(BT_SIZEOFMAP(qlen), KM_SLEEP);
 356  356          eqp->eq_rotor = 0;
 357  357  
 358  358          /*
 359  359           * Iterate over the array of errorq_elem_t structures and set its
 360  360           * data pointer.
 361  361           */
 362  362          for (eep = eqp->eq_elems, data = eqp->eq_data; qlen > 1; qlen--) {
 363  363                  eep->eqe_next = NULL;
 364  364                  eep->eqe_dump = NULL;
 365  365                  eep->eqe_prev = NULL;
 366  366                  eep->eqe_data = data;
 367  367                  data += size;
 368  368                  eep++;
 369  369          }
 370  370          eep->eqe_next = NULL;
 371  371          eep->eqe_prev = NULL;
 372  372          eep->eqe_data = data;
 373  373          eep->eqe_dump = NULL;
 374  374  
 375  375          /*
 376  376           * Once the errorq is initialized, add it to the global list of queues,
 377  377           * and then return a pointer to the new queue to the caller.
 378  378           */
 379  379          mutex_enter(&errorq_lock);
 380  380          eqp->eq_next = errorq_list;
 381  381          errorq_list = eqp;
 382  382          mutex_exit(&errorq_lock);
 383  383  
 384  384          return (eqp);
 385  385  }
 386  386  
 387  387  /*
 388  388   * Create a new errorq as if by errorq_create(), but set the ERRORQ_NVLIST
 389  389   * flag and initialize each element to have the start of its data region used
 390  390   * as an errorq_nvelem_t with a nvlist allocator that consumes the data region.
 391  391   */
 392  392  errorq_t *
 393  393  errorq_nvcreate(const char *name, errorq_func_t func, void *private,
 394  394      ulong_t qlen, size_t size, uint_t ipl, uint_t flags)
 395  395  {
 396  396          errorq_t *eqp;
 397  397          errorq_elem_t *eep;
 398  398  
 399  399          eqp = errorq_create(name, func, private, qlen,
 400  400              size + sizeof (errorq_nvelem_t), ipl, flags | ERRORQ_NVLIST);
 401  401  
 402  402          if (eqp == NULL)
 403  403                  return (NULL);
 404  404  
 405  405          mutex_enter(&eqp->eq_lock);
 406  406  
 407  407          for (eep = eqp->eq_elems; qlen != 0; eep++, qlen--) {
 408  408                  errorq_nvelem_t *eqnp = eep->eqe_data;
 409  409                  eqnp->eqn_buf = (char *)eqnp + sizeof (errorq_nvelem_t);
 410  410                  eqnp->eqn_nva = fm_nva_xcreate(eqnp->eqn_buf, size);
 411  411          }
 412  412  
 413  413          mutex_exit(&eqp->eq_lock);
 414  414          return (eqp);
 415  415  }
 416  416  
 417  417  /*
 418  418   * To destroy an error queue, we mark it as disabled and then explicitly drain
 419  419   * all pending errors.  Once the drain is complete, we can remove the queue
 420  420   * from the global list of queues examined by errorq_panic(), and then free
 421  421   * the various queue data structures.  The caller must use some higher-level
 422  422   * abstraction (e.g. disabling an error interrupt) to ensure that no one will
 423  423   * attempt to enqueue new errors while we are freeing this queue.
 424  424   */
 425  425  void
 426  426  errorq_destroy(errorq_t *eqp)
 427  427  {
 428  428          errorq_t *p, **pp;
 429  429          errorq_elem_t *eep;
 430  430          ulong_t i;
 431  431  
 432  432          ASSERT(eqp != NULL);
 433  433          eqp->eq_flags &= ~ERRORQ_ACTIVE;
 434  434          errorq_drain(eqp);
 435  435  
 436  436          mutex_enter(&errorq_lock);
 437  437          pp = &errorq_list;
 438  438  
 439  439          for (p = errorq_list; p != NULL; p = p->eq_next) {
 440  440                  if (p == eqp) {
 441  441                          *pp = p->eq_next;
 442  442                          break;
 443  443                  }
 444  444                  pp = &p->eq_next;
 445  445          }
 446  446  
 447  447          mutex_exit(&errorq_lock);
 448  448          ASSERT(p != NULL);
 449  449  
 450  450          if (eqp->eq_flags & ERRORQ_NVLIST) {
 451  451                  for (eep = eqp->eq_elems, i = 0; i < eqp->eq_qlen; i++, eep++) {
 452  452                          errorq_nvelem_t *eqnp = eep->eqe_data;
 453  453                          fm_nva_xdestroy(eqnp->eqn_nva);
 454  454                  }
 455  455          }
 456  456  
 457  457          mutex_destroy(&eqp->eq_lock);
 458  458          kstat_delete(eqp->eq_ksp);
 459  459  
 460  460          if (eqp->eq_id != NULL)
 461  461                  ddi_remove_softintr(eqp->eq_id);
 462  462  
 463  463          kmem_free(eqp->eq_elems, eqp->eq_qlen * sizeof (errorq_elem_t));
 464  464          kmem_free(eqp->eq_bitmap, BT_SIZEOFMAP(eqp->eq_qlen));
 465  465          kmem_free(eqp->eq_data, eqp->eq_qlen * eqp->eq_size);
 466  466  
 467  467          kmem_free(eqp, sizeof (errorq_t));
 468  468  }
 469  469  
 470  470  /*
 471  471   * private version of bt_availbit which makes a best-efforts attempt
 472  472   * at allocating in a round-robin fashion in order to facilitate post-mortem
 473  473   * diagnosis.
 474  474   */
 475  475  static index_t
 476  476  errorq_availbit(ulong_t *bitmap, size_t nbits, index_t curindex)
 477  477  {
 478  478          ulong_t bit, maxbit, bx;
 479  479          index_t rval, nextindex = curindex + 1;
 480  480          index_t nextword = nextindex >> BT_ULSHIFT;
 481  481          ulong_t nextbitindex = nextindex & BT_ULMASK;
 482  482          index_t maxindex = nbits - 1;
 483  483          index_t maxword = maxindex >> BT_ULSHIFT;
 484  484          ulong_t maxbitindex = maxindex & BT_ULMASK;
 485  485  
 486  486          /*
 487  487           * First check if there are still some bits remaining in the current
 488  488           * word, and see if any of those are available. We need to do this by
 489  489           * hand as the bt_availbit() function always starts at the beginning
 490  490           * of a word.
 491  491           */
 492  492          if (nextindex <= maxindex && nextbitindex != 0) {
 493  493                  maxbit = (nextword == maxword) ? maxbitindex : BT_ULMASK;
 494  494                  for (bx = 0, bit = 1; bx <= maxbit; bx++, bit <<= 1)
 495  495                          if (bx >= nextbitindex && !(bitmap[nextword] & bit))
 496  496                                  return ((nextword << BT_ULSHIFT) + bx);
 497  497                  nextword++;
 498  498          }
 499  499          /*
 500  500           * Now check if there are any words remaining before the end of the
 501  501           * bitmap. Use bt_availbit() to find any free bits.
 502  502           */
 503  503          if (nextword <= maxword)
 504  504                  if ((rval = bt_availbit(&bitmap[nextword],
 505  505                      nbits - (nextword << BT_ULSHIFT))) != -1)
 506  506                          return ((nextword << BT_ULSHIFT) + rval);
 507  507          /*
 508  508           * Finally loop back to the start and look for any free bits starting
 509  509           * from the beginning of the bitmap to the current rotor position.
 510  510           */
 511  511          return (bt_availbit(bitmap, nextindex));
 512  512  }
 513  513  
 514  514  /*
 515  515   * Dispatch a new error into the queue for later processing.  The specified
 516  516   * data buffer is copied into a preallocated queue element.  If 'len' is

↓ open down ↓

516 lines elided

↑ open up ↑

 517  517   * smaller than the queue element size, the remainder of the queue element is
 518  518   * filled with zeroes.  This function may be called from any context subject
 519  519   * to the Platform Considerations described above.
 520  520   */
 521  521  void
 522  522  errorq_dispatch(errorq_t *eqp, const void *data, size_t len, uint_t flag)
 523  523  {
 524  524          errorq_elem_t *eep, *old;
 525  525  
 526  526          if (eqp == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) {
 527      -                atomic_add_64(&errorq_lost, 1);
      527 +                atomic_inc_64(&errorq_lost);
 528  528                  return; /* drop error if queue is uninitialized or disabled */
 529  529          }
 530  530  
 531  531          for (;;) {
 532  532                  int i, rval;
 533  533  
 534  534                  if ((i = errorq_availbit(eqp->eq_bitmap, eqp->eq_qlen,
 535  535                      eqp->eq_rotor)) == -1) {
 536      -                        atomic_add_64(&eqp->eq_kstat.eqk_dropped.value.ui64, 1);
      536 +                        atomic_inc_64(&eqp->eq_kstat.eqk_dropped.value.ui64);
 537  537                          return;
 538  538                  }
 539  539                  BT_ATOMIC_SET_EXCL(eqp->eq_bitmap, i, rval);
 540  540                  if (rval == 0) {
 541  541                          eqp->eq_rotor = i;
 542  542                          eep = &eqp->eq_elems[i];
 543  543                          break;
 544  544                  }
 545  545          }
 546  546

 547  547          ASSERT(len <= eqp->eq_size);
 548  548          bcopy(data, eep->eqe_data, MIN(eqp->eq_size, len));
 549  549  
 550  550          if (len < eqp->eq_size)
 551  551                  bzero((caddr_t)eep->eqe_data + len, eqp->eq_size - len);

↓ open down ↓

5 lines elided

↑ open up ↑

 552  552  
 553  553          for (;;) {
 554  554                  old = eqp->eq_pend;
 555  555                  eep->eqe_prev = old;
 556  556                  membar_producer();
 557  557  
 558  558                  if (atomic_cas_ptr(&eqp->eq_pend, old, eep) == old)
 559  559                          break;
 560  560          }
 561  561  
 562      -        atomic_add_64(&eqp->eq_kstat.eqk_dispatched.value.ui64, 1);
      562 +        atomic_inc_64(&eqp->eq_kstat.eqk_dispatched.value.ui64);
 563  563  
 564  564          if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL)
 565  565                  ddi_trigger_softintr(eqp->eq_id);
 566  566  }
 567  567  
 568  568  /*
 569  569   * Drain the specified error queue by calling eq_func() for each pending error.
 570  570   * This function must be called at or below LOCK_LEVEL or from panic context.
 571  571   * In order to synchronize with other attempts to drain the queue, we acquire
 572  572   * the adaptive eq_lock, blocking other consumers.  Once this lock is held,

 573  573   * we must use compare-and-swap to move the pending list to the processing
 574  574   * list and to return elements to the free pool in order to synchronize
 575  575   * with producers, who do not acquire any locks and only use atomic set/clear.
 576  576   *
 577  577   * An additional constraint on this function is that if the system panics
 578  578   * while this function is running, the panic code must be able to detect and
 579  579   * handle all intermediate states and correctly dequeue all errors.  The
 580  580   * errorq_panic() function below will be used for detecting and handling
 581  581   * these intermediate states.  The comments in errorq_drain() below explain
 582  582   * how we make sure each intermediate state is distinct and consistent.
 583  583   */
 584  584  void
 585  585  errorq_drain(errorq_t *eqp)
 586  586  {
 587  587          errorq_elem_t *eep, *dep;
 588  588  
 589  589          ASSERT(eqp != NULL);
 590  590          mutex_enter(&eqp->eq_lock);
 591  591  
 592  592          /*
 593  593           * If there are one or more pending errors, set eq_ptail to point to
 594  594           * the first element on the pending list and then attempt to compare-
 595  595           * and-swap NULL to the pending list.  We use membar_producer() to
 596  596           * make sure that eq_ptail will be visible to errorq_panic() below
 597  597           * before the pending list is NULLed out.  This section is labeled
 598  598           * case (1) for errorq_panic, below.  If eq_ptail is not yet set (1A)
 599  599           * eq_pend has all the pending errors.  If atomic_cas_ptr fails or
 600  600           * has not been called yet (1B), eq_pend still has all the pending
 601  601           * errors.  If atomic_cas_ptr succeeds (1C), eq_ptail has all the
 602  602           * pending errors.
 603  603           */
 604  604          while ((eep = eqp->eq_pend) != NULL) {
 605  605                  eqp->eq_ptail = eep;
 606  606                  membar_producer();
 607  607  
 608  608                  if (atomic_cas_ptr(&eqp->eq_pend, eep, NULL) == eep)
 609  609                          break;
 610  610          }
 611  611  
 612  612          /*
 613  613           * If no errors were pending, assert that eq_ptail is set to NULL,
 614  614           * drop the consumer lock, and return without doing anything.
 615  615           */
 616  616          if (eep == NULL) {
 617  617                  ASSERT(eqp->eq_ptail == NULL);
 618  618                  mutex_exit(&eqp->eq_lock);
 619  619                  return;
 620  620          }
 621  621  
 622  622          /*
 623  623           * Now iterate from eq_ptail (a.k.a. eep, the newest error) to the
 624  624           * oldest error, setting the eqe_next pointer so that we can iterate
 625  625           * over the errors from oldest to newest.  We use membar_producer()
 626  626           * to make sure that these stores are visible before we set eq_phead.
 627  627           * If we panic before, during, or just after this loop (case 2),
 628  628           * errorq_panic() will simply redo this work, as described below.
 629  629           */
 630  630          for (eep->eqe_next = NULL; eep->eqe_prev != NULL; eep = eep->eqe_prev)
 631  631                  eep->eqe_prev->eqe_next = eep;
 632  632          membar_producer();
 633  633  
 634  634          /*
 635  635           * Now set eq_phead to the head of the processing list (the oldest
 636  636           * error) and issue another membar_producer() to make sure that
 637  637           * eq_phead is seen as non-NULL before we clear eq_ptail.  If we panic
 638  638           * after eq_phead is set (case 3), we will detect and log these errors
 639  639           * in errorq_panic(), as described below.
 640  640           */
 641  641          eqp->eq_phead = eep;
 642  642          membar_producer();
 643  643  
 644  644          eqp->eq_ptail = NULL;
 645  645          membar_producer();
 646  646  
 647  647          /*
 648  648           * If we enter from errorq_panic_drain(), we may already have
 649  649           * errorq elements on the dump list.  Find the tail of
 650  650           * the list ready for append.
 651  651           */
 652  652          if (panicstr && (dep = eqp->eq_dump) != NULL) {
 653  653                  while (dep->eqe_dump != NULL)
 654  654                          dep = dep->eqe_dump;
 655  655          }
 656  656  
 657  657          /*
 658  658           * Now iterate over the processing list from oldest (eq_phead) to
 659  659           * newest and log each error.  Once an error is logged, we use
 660  660           * atomic clear to return it to the free pool.  If we panic before,
 661  661           * during, or after calling eq_func() (case 4), the error will still be
 662  662           * found on eq_phead and will be logged in errorq_panic below.
 663  663           */
 664  664  
 665  665          while ((eep = eqp->eq_phead) != NULL) {
 666  666                  eqp->eq_func(eqp->eq_private, eep->eqe_data, eep);
 667  667                  eqp->eq_kstat.eqk_logged.value.ui64++;
 668  668  
 669  669                  eqp->eq_phead = eep->eqe_next;
 670  670                  membar_producer();
 671  671  
 672  672                  eep->eqe_next = NULL;
 673  673  
 674  674                  /*
 675  675                   * On panic, we add the element to the dump list for each
 676  676                   * nvlist errorq.  Elements are stored oldest to newest.
 677  677                   * Then continue, so we don't free and subsequently overwrite
 678  678                   * any elements which we've put on the dump queue.
 679  679                   */
 680  680                  if (panicstr && (eqp->eq_flags & ERRORQ_NVLIST)) {
 681  681                          if (eqp->eq_dump == NULL)
 682  682                                  dep = eqp->eq_dump = eep;
 683  683                          else
 684  684                                  dep = dep->eqe_dump = eep;
 685  685                          membar_producer();
 686  686                          continue;
 687  687                  }
 688  688  
 689  689                  eep->eqe_prev = NULL;
 690  690                  BT_ATOMIC_CLEAR(eqp->eq_bitmap, eep - eqp->eq_elems);
 691  691          }
 692  692  
 693  693          mutex_exit(&eqp->eq_lock);
 694  694  }
 695  695  
 696  696  /*
 697  697   * Now that device tree services are available, set up the soft interrupt
 698  698   * handlers for any queues that were created early in boot.  We then
 699  699   * manually drain these queues to report any pending early errors.
 700  700   */
 701  701  void
 702  702  errorq_init(void)
 703  703  {
 704  704          dev_info_t *dip = ddi_root_node();
 705  705          ddi_softintr_t id;
 706  706          errorq_t *eqp;
 707  707  
 708  708          ASSERT(modrootloaded != 0);
 709  709          ASSERT(dip != NULL);
 710  710  
 711  711          mutex_enter(&errorq_lock);
 712  712  
 713  713          for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) {
 714  714                  ddi_iblock_cookie_t ibc =
 715  715                      (ddi_iblock_cookie_t)(uintptr_t)ipltospl(eqp->eq_ipl);
 716  716  
 717  717                  if (eqp->eq_id != NULL)
 718  718                          continue; /* softint already initialized */
 719  719  
 720  720                  if (ddi_add_softintr(dip, DDI_SOFTINT_FIXED, &id, &ibc, NULL,
 721  721                      errorq_intr, (caddr_t)eqp) != DDI_SUCCESS) {
 722  722                          panic("errorq_init: failed to register IPL %u softint "
 723  723                              "for queue %s", eqp->eq_ipl, eqp->eq_name);
 724  724                  }
 725  725  
 726  726                  eqp->eq_id = id;
 727  727                  errorq_drain(eqp);
 728  728          }
 729  729  
 730  730          mutex_exit(&errorq_lock);
 731  731  }
 732  732  
 733  733  /*
 734  734   * This function is designed to be called from panic context only, and
 735  735   * therefore does not need to acquire errorq_lock when iterating over
 736  736   * errorq_list.  This function must be called no more than once for each
 737  737   * 'what' value (if you change this then review the manipulation of 'dep'.
 738  738   */
 739  739  static uint64_t
 740  740  errorq_panic_drain(uint_t what)
 741  741  {
 742  742          errorq_elem_t *eep, *nep, *dep;
 743  743          errorq_t *eqp;
 744  744          uint64_t loggedtmp;
 745  745          uint64_t logged = 0;
 746  746  
 747  747          for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) {
 748  748                  if ((eqp->eq_flags & (ERRORQ_VITAL | ERRORQ_NVLIST)) != what)
 749  749                          continue; /* do not drain this queue on this pass */
 750  750  
 751  751                  loggedtmp = eqp->eq_kstat.eqk_logged.value.ui64;
 752  752  
 753  753                  /*
 754  754                   * In case (1B) above, eq_ptail may be set but the
 755  755                   * atomic_cas_ptr may not have been executed yet or may have
 756  756                   * failed.  Either way, we must log errors in chronological
 757  757                   * order.  So we search the pending list for the error
 758  758                   * pointed to by eq_ptail.  If it is found, we know that all
 759  759                   * subsequent errors are also still on the pending list, so
 760  760                   * just NULL out eq_ptail and let errorq_drain(), below,
 761  761                   * take care of the logging.
 762  762                   */
 763  763                  for (eep = eqp->eq_pend; eep != NULL; eep = eep->eqe_prev) {
 764  764                          if (eep == eqp->eq_ptail) {
 765  765                                  ASSERT(eqp->eq_phead == NULL);
 766  766                                  eqp->eq_ptail = NULL;
 767  767                                  break;
 768  768                          }
 769  769                  }
 770  770  
 771  771                  /*
 772  772                   * In cases (1C) and (2) above, eq_ptail will be set to the
 773  773                   * newest error on the processing list but eq_phead will still
 774  774                   * be NULL.  We set the eqe_next pointers so we can iterate
 775  775                   * over the processing list in order from oldest error to the
 776  776                   * newest error.  We then set eq_phead to point to the oldest
 777  777                   * error and fall into the for-loop below.
 778  778                   */
 779  779                  if (eqp->eq_phead == NULL && (eep = eqp->eq_ptail) != NULL) {
 780  780                          for (eep->eqe_next = NULL; eep->eqe_prev != NULL;
 781  781                              eep = eep->eqe_prev)
 782  782                                  eep->eqe_prev->eqe_next = eep;
 783  783  
 784  784                          eqp->eq_phead = eep;
 785  785                          eqp->eq_ptail = NULL;
 786  786                  }
 787  787  
 788  788                  /*
 789  789                   * In cases (3) and (4) above (or after case (1C/2) handling),
 790  790                   * eq_phead will be set to the oldest error on the processing
 791  791                   * list.  We log each error and return it to the free pool.
 792  792                   *
 793  793                   * Unlike errorq_drain(), we don't need to worry about updating
 794  794                   * eq_phead because errorq_panic() will be called at most once.
 795  795                   * However, we must use atomic_cas_ptr to update the
 796  796                   * freelist in case errors are still being enqueued during
 797  797                   * panic.
 798  798                   */
 799  799                  for (eep = eqp->eq_phead; eep != NULL; eep = nep) {
 800  800                          eqp->eq_func(eqp->eq_private, eep->eqe_data, eep);
 801  801                          eqp->eq_kstat.eqk_logged.value.ui64++;
 802  802  
 803  803                          nep = eep->eqe_next;
 804  804                          eep->eqe_next = NULL;
 805  805  
 806  806                          /*
 807  807                           * On panic, we add the element to the dump list for
 808  808                           * each nvlist errorq, stored oldest to newest. Then
 809  809                           * continue, so we don't free and subsequently overwrite
 810  810                           * any elements which we've put on the dump queue.
 811  811                           */
 812  812                          if (eqp->eq_flags & ERRORQ_NVLIST) {
 813  813                                  if (eqp->eq_dump == NULL)
 814  814                                          dep = eqp->eq_dump = eep;
 815  815                                  else
 816  816                                          dep = dep->eqe_dump = eep;
 817  817                                  membar_producer();
 818  818                                  continue;
 819  819                          }
 820  820  
 821  821                          eep->eqe_prev = NULL;
 822  822                          BT_ATOMIC_CLEAR(eqp->eq_bitmap, eep - eqp->eq_elems);
 823  823                  }
 824  824  
 825  825                  /*
 826  826                   * Now go ahead and drain any other errors on the pending list.
 827  827                   * This call transparently handles case (1A) above, as well as
 828  828                   * any other errors that were dispatched after errorq_drain()
 829  829                   * completed its first compare-and-swap.
 830  830                   */
 831  831                  errorq_drain(eqp);
 832  832  
 833  833                  logged += eqp->eq_kstat.eqk_logged.value.ui64 - loggedtmp;
 834  834          }
 835  835          return (logged);
 836  836  }
 837  837  
 838  838  /*
 839  839   * Drain all error queues - called only from panic context.  Some drain
 840  840   * functions may enqueue errors to ERRORQ_NVLIST error queues so that
 841  841   * they may be written out in the panic dump - so ERRORQ_NVLIST queues
 842  842   * must be drained last.  Drain ERRORQ_VITAL queues before nonvital queues
 843  843   * so that vital errors get to fill the ERRORQ_NVLIST queues first, and
 844  844   * do not drain the nonvital queues if there are many vital errors.
 845  845   */
 846  846  void
 847  847  errorq_panic(void)
 848  848  {
 849  849          ASSERT(panicstr != NULL);
 850  850  
 851  851          if (errorq_panic_drain(ERRORQ_VITAL) <= errorq_vitalmin)
 852  852                  (void) errorq_panic_drain(0);
 853  853          (void) errorq_panic_drain(ERRORQ_VITAL | ERRORQ_NVLIST);
 854  854          (void) errorq_panic_drain(ERRORQ_NVLIST);
 855  855  }
 856  856  
 857  857  /*
 858  858   * Reserve an error queue element for later processing and dispatching.  The
 859  859   * element is returned to the caller who may add error-specific data to

↓ open down ↓

287 lines elided

↑ open up ↑

 860  860   * element.  The element is retured to the free pool when either
 861  861   * errorq_commit() is called and the element asynchronously processed
 862  862   * or immediately when errorq_cancel() is called.
 863  863   */
 864  864  errorq_elem_t *
 865  865  errorq_reserve(errorq_t *eqp)
 866  866  {
 867  867          errorq_elem_t *eqep;
 868  868  
 869  869          if (eqp == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) {
 870      -                atomic_add_64(&errorq_lost, 1);
      870 +                atomic_inc_64(&errorq_lost);
 871  871                  return (NULL);
 872  872          }
 873  873  
 874  874          for (;;) {
 875  875                  int i, rval;
 876  876  
 877  877                  if ((i = errorq_availbit(eqp->eq_bitmap, eqp->eq_qlen,
 878  878                      eqp->eq_rotor)) == -1) {
 879      -                        atomic_add_64(&eqp->eq_kstat.eqk_dropped.value.ui64, 1);
      879 +                        atomic_inc_64(&eqp->eq_kstat.eqk_dropped.value.ui64);
 880  880                          return (NULL);
 881  881                  }
 882  882                  BT_ATOMIC_SET_EXCL(eqp->eq_bitmap, i, rval);
 883  883                  if (rval == 0) {
 884  884                          eqp->eq_rotor = i;
 885  885                          eqep = &eqp->eq_elems[i];
 886  886                          break;
 887  887                  }
 888  888          }
 889  889  
 890  890          if (eqp->eq_flags & ERRORQ_NVLIST) {
 891  891                  errorq_nvelem_t *eqnp = eqep->eqe_data;
 892  892                  nv_alloc_reset(eqnp->eqn_nva);
 893  893                  eqnp->eqn_nvl = fm_nvlist_create(eqnp->eqn_nva);
 894  894          }
 895  895  
 896      -        atomic_add_64(&eqp->eq_kstat.eqk_reserved.value.ui64, 1);
      896 +        atomic_inc_64(&eqp->eq_kstat.eqk_reserved.value.ui64);
 897  897          return (eqep);
 898  898  }
 899  899  
 900  900  /*
 901  901   * Commit an errorq element (eqep) for dispatching.
 902  902   * This function may be called from any context subject
 903  903   * to the Platform Considerations described above.
 904  904   */
 905  905  void
 906  906  errorq_commit(errorq_t *eqp, errorq_elem_t *eqep, uint_t flag)
 907  907  {
 908  908          errorq_elem_t *old;
 909  909  
 910  910          if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) {
 911      -                atomic_add_64(&eqp->eq_kstat.eqk_commit_fail.value.ui64, 1);
      911 +                atomic_inc_64(&eqp->eq_kstat.eqk_commit_fail.value.ui64);
 912  912                  return;
 913  913          }
 914  914  
 915  915          for (;;) {
 916  916                  old = eqp->eq_pend;
 917  917                  eqep->eqe_prev = old;
 918  918                  membar_producer();
 919  919  
 920  920                  if (atomic_cas_ptr(&eqp->eq_pend, old, eqep) == old)
 921  921                          break;
 922  922          }
 923  923  
 924      -        atomic_add_64(&eqp->eq_kstat.eqk_committed.value.ui64, 1);
      924 +        atomic_inc_64(&eqp->eq_kstat.eqk_committed.value.ui64);
 925  925  
 926  926          if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL)
 927  927                  ddi_trigger_softintr(eqp->eq_id);
 928  928  }
 929  929  
 930  930  /*
 931  931   * Cancel an errorq element reservation by returning the specified element
 932  932   * to the free pool.  Duplicate or invalid frees are not supported.
 933  933   */
 934  934  void
 935  935  errorq_cancel(errorq_t *eqp, errorq_elem_t *eqep)
 936  936  {
 937  937          if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE))
 938  938                  return;
 939  939  
 940  940          BT_ATOMIC_CLEAR(eqp->eq_bitmap, eqep - eqp->eq_elems);
 941  941  
 942      -        atomic_add_64(&eqp->eq_kstat.eqk_cancelled.value.ui64, 1);
      942 +        atomic_inc_64(&eqp->eq_kstat.eqk_cancelled.value.ui64);
 943  943  }
 944  944  
 945  945  /*
 946  946   * Write elements on the dump list of each nvlist errorq to the dump device.
 947  947   * Upon reboot, fmd(1M) will extract and replay them for diagnosis.
 948  948   */
 949  949  void
 950  950  errorq_dump(void)
 951  951  {
 952  952          errorq_elem_t *eep;

 953  953          errorq_t *eqp;
 954  954  
 955  955          if (ereport_dumpbuf == NULL)
 956  956                  return; /* reboot or panic before errorq is even set up */
 957  957  
 958  958          for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) {
 959  959                  if (!(eqp->eq_flags & ERRORQ_NVLIST) ||
 960  960                      !(eqp->eq_flags & ERRORQ_ACTIVE))
 961  961                          continue; /* do not dump this queue on panic */
 962  962  
 963  963                  for (eep = eqp->eq_dump; eep != NULL; eep = eep->eqe_dump) {
 964  964                          errorq_nvelem_t *eqnp = eep->eqe_data;
 965  965                          size_t len = 0;
 966  966                          erpt_dump_t ed;
 967  967                          int err;
 968  968  
 969  969                          (void) nvlist_size(eqnp->eqn_nvl,
 970  970                              &len, NV_ENCODE_NATIVE);
 971  971  
 972  972                          if (len > ereport_dumplen || len == 0) {
 973  973                                  cmn_err(CE_WARN, "%s: unable to save error "
 974  974                                      "report %p due to size %lu\n",
 975  975                                      eqp->eq_name, (void *)eep, len);
 976  976                                  continue;
 977  977                          }
 978  978  
 979  979                          if ((err = nvlist_pack(eqnp->eqn_nvl,
 980  980                              (char **)&ereport_dumpbuf, &ereport_dumplen,
 981  981                              NV_ENCODE_NATIVE, KM_NOSLEEP)) != 0) {
 982  982                                  cmn_err(CE_WARN, "%s: unable to save error "
 983  983                                      "report %p due to pack error %d\n",
 984  984                                      eqp->eq_name, (void *)eep, err);
 985  985                                  continue;
 986  986                          }
 987  987  
 988  988                          ed.ed_magic = ERPT_MAGIC;
 989  989                          ed.ed_chksum = checksum32(ereport_dumpbuf, len);
 990  990                          ed.ed_size = (uint32_t)len;
 991  991                          ed.ed_pad = 0;
 992  992                          ed.ed_hrt_nsec = 0;
 993  993                          ed.ed_hrt_base = panic_hrtime;
 994  994                          ed.ed_tod_base.sec = panic_hrestime.tv_sec;
 995  995                          ed.ed_tod_base.nsec = panic_hrestime.tv_nsec;
 996  996  
 997  997                          dumpvp_write(&ed, sizeof (ed));
 998  998                          dumpvp_write(ereport_dumpbuf, len);
 999  999                  }
1000 1000          }
1001 1001  }
1002 1002  
1003 1003  nvlist_t *
1004 1004  errorq_elem_nvl(errorq_t *eqp, const errorq_elem_t *eqep)
1005 1005  {
1006 1006          errorq_nvelem_t *eqnp = eqep->eqe_data;
1007 1007  
1008 1008          ASSERT(eqp->eq_flags & ERRORQ_ACTIVE && eqp->eq_flags & ERRORQ_NVLIST);
1009 1009  
1010 1010          return (eqnp->eqn_nvl);
1011 1011  }
1012 1012  
1013 1013  nv_alloc_t *
1014 1014  errorq_elem_nva(errorq_t *eqp, const errorq_elem_t *eqep)
1015 1015  {
1016 1016          errorq_nvelem_t *eqnp = eqep->eqe_data;
1017 1017  
1018 1018          ASSERT(eqp->eq_flags & ERRORQ_ACTIVE && eqp->eq_flags & ERRORQ_NVLIST);
1019 1019  
1020 1020          return (eqnp->eqn_nva);
1021 1021  }
1022 1022  
1023 1023  /*
1024 1024   * Reserve a new element and duplicate the data of the original into it.
1025 1025   */
1026 1026  void *
1027 1027  errorq_elem_dup(errorq_t *eqp, const errorq_elem_t *eqep, errorq_elem_t **neqep)
1028 1028  {
1029 1029          ASSERT(eqp->eq_flags & ERRORQ_ACTIVE);
1030 1030          ASSERT(!(eqp->eq_flags & ERRORQ_NVLIST));
1031 1031  
1032 1032          if ((*neqep = errorq_reserve(eqp)) == NULL)
1033 1033                  return (NULL);
1034 1034  
1035 1035          bcopy(eqep->eqe_data, (*neqep)->eqe_data, eqp->eq_size);
1036 1036          return ((*neqep)->eqe_data);
1037 1037  }

↓ open down ↓

85 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX