Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/errorq.c
+++ new/usr/src/uts/common/os/errorq.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * Kernel Error Queues
28 28 *
29 29 * A common problem when handling hardware error traps and interrupts is that
30 30 * these errors frequently must be handled at high interrupt level, where
31 31 * reliably producing error messages and safely examining and manipulating
32 32 * other kernel state may not be possible. The kernel error queue primitive is
33 33 * a common set of routines that allow a subsystem to maintain a queue of
34 34 * errors that can be processed by an explicit call from a safe context or by a
35 35 * soft interrupt that fires at a specific lower interrupt level. The queue
36 36 * management code also ensures that if the system panics, all in-transit
37 37 * errors are logged prior to reset. Each queue has an associated kstat for
38 38 * observing the number of errors dispatched and logged, and mdb(1) debugging
39 39 * support is provided for live and post-mortem observability.
40 40 *
41 41 * Memory Allocation
42 42 *
43 43 * All of the queue data structures are allocated in advance as part of
44 44 * the errorq_create() call. No additional memory allocations are
45 45 * performed as part of errorq_dispatch(), errorq_reserve(),
46 46 * errorq_commit() or errorq_drain(). This design
47 47 * facilitates reliable error queue processing even when the system is low
48 48 * on memory, and ensures that errorq_dispatch() can be called from any
49 49 * context. When the queue is created, the maximum queue length is
50 50 * specified as a parameter to errorq_create() and errorq_nvcreate(). This
51 51 * length should represent a reasonable upper bound on the number of
52 52 * simultaneous errors. If errorq_dispatch() or errorq_reserve() is
53 53 * invoked and no free queue elements are available, the error is
54 54 * dropped and will not be logged. Typically, the queue will only be
55 55 * exhausted by an error storm, and in this case
56 56 * the earlier errors provide the most important data for analysis.
57 57 * When a new error is dispatched, the error data is copied into the
58 58 * preallocated queue element so that the caller's buffer can be reused.
59 59 *
60 60 * When a new error is reserved, an element is moved from the free pool
61 61 * and returned to the caller. The element buffer data, eqe_data, may be
62 62 * managed by the caller and dispatched to the errorq by calling
63 63 * errorq_commit(). This is useful for additions to errorq's
64 64 * created with errorq_nvcreate() to handle name-value pair (nvpair) data.
65 65 * See below for a discussion on nvlist errorq's.
66 66 *
67 67 * Queue Drain Callback
68 68 *
69 69 * When the error queue is drained, the caller's queue drain callback is
70 70 * invoked with a pointer to the saved error data. This function may be
71 71 * called from passive kernel context or soft interrupt context at or
72 72 * below LOCK_LEVEL, or as part of panic(). As such, the callback should
73 73 * basically only be calling cmn_err (but NOT with the CE_PANIC flag).
74 74 * The callback must not call panic(), attempt to allocate memory, or wait
75 75 * on a condition variable. The callback may not call errorq_destroy()
76 76 * or errorq_drain() on the same error queue that called it.
77 77 *
78 78 * The queue drain callback will always be called for each pending error
79 79 * in the order in which errors were enqueued (oldest to newest). The
80 80 * queue drain callback is guaranteed to provide at *least* once semantics
81 81 * for all errors that are successfully dispatched (i.e. for which
82 82 * errorq_dispatch() has successfully completed). If an unrelated panic
83 83 * occurs while the queue drain callback is running on a vital queue, the
84 84 * panic subsystem will continue the queue drain and the callback may be
85 85 * invoked again for the same error. Therefore, the callback should
86 86 * restrict itself to logging messages and taking other actions that are
87 87 * not destructive if repeated.
88 88 *
89 89 * Name-Value Pair Error Queues
90 90 *
91 91 * During error handling, it may be more convenient to store error
92 92 * queue element data as a fixed buffer of name-value pairs. The
93 93 * nvpair library allows construction and destruction of nvlists
94 94 * in pre-allocated memory buffers.
95 95 *
96 96 * Error queues created via errorq_nvcreate() store queue element
97 97 * data as fixed buffer nvlists (ereports). errorq_reserve()
98 98 * allocates an errorq element from eqp->eq_bitmap and returns a valid
99 99 * pointer to a errorq_elem_t (queue element) and a pre-allocated
100 100 * fixed buffer nvlist. errorq_elem_nvl() is used to gain access
101 101 * to the nvlist to add name-value ereport members prior to
102 102 * dispatching the error queue element in errorq_commit().
103 103 *
104 104 * Once dispatched, the drain function will return the element to
105 105 * eqp->eq_bitmap and reset the associated nv_alloc structure.
106 106 * error_cancel() may be called to cancel an element reservation
107 107 * element that was never dispatched (committed). This is useful in
108 108 * cases where a programming error prevents a queue element from being
109 109 * dispatched.
110 110 *
111 111 * Queue Management
112 112 *
113 113 * The queue element structures and error data buffers are allocated in
114 114 * two contiguous chunks as part of errorq_create() or errorq_nvcreate().
115 115 * Each queue element structure contains a next pointer,
116 116 * a previous pointer, and a pointer to the corresponding error data
117 117 * buffer. The data buffer for a nvlist errorq is a shared buffer
118 118 * for the allocation of name-value pair lists. The elements are kept on
119 119 * one of four lists:
120 120 *
121 121 * Unused elements are kept in the free pool, managed by eqp->eq_bitmap.
122 122 * The eqe_prev and eqe_next pointers are not used while in the free pool
123 123 * and will be set to NULL.
124 124 *
125 125 * Pending errors are kept on the pending list, a singly-linked list
126 126 * pointed to by eqp->eq_pend, and linked together using eqe_prev. This
127 127 * list is maintained in order from newest error to oldest. The eqe_next
128 128 * pointer is not used by the pending list and will be set to NULL.
129 129 *
130 130 * The processing list is a doubly-linked list pointed to by eqp->eq_phead
131 131 * (the oldest element) and eqp->eq_ptail (the newest element). The
132 132 * eqe_next pointer is used to traverse from eq_phead to eq_ptail, and the
133 133 * eqe_prev pointer is used to traverse from eq_ptail to eq_phead. Once a
134 134 * queue drain operation begins, the current pending list is moved to the
135 135 * processing list in a two-phase commit fashion (eq_ptail being cleared
136 136 * at the beginning but eq_phead only at the end), allowing the panic code
137 137 * to always locate and process all pending errors in the event that a
138 138 * panic occurs in the middle of queue processing.
139 139 *
140 140 * A fourth list is maintained for nvlist errorqs. The dump list,
141 141 * eq_dump is used to link all errorq elements that should be stored
142 142 * in a crash dump file in the event of a system panic. During
143 143 * errorq_panic(), the list is created and subsequently traversed
144 144 * in errorq_dump() during the final phases of a crash dump.
145 145 *
146 146 * Platform Considerations
147 147 *
148 148 * In order to simplify their implementation, error queues make use of the
149 149 * C wrappers for compare-and-swap. If the platform itself does not
150 150 * support compare-and-swap in hardware and the kernel emulation routines
151 151 * are used instead, then the context in which errorq_dispatch() can be
152 152 * safely invoked is further constrained by the implementation of the
153 153 * compare-and-swap emulation. Specifically, if errorq_dispatch() is
154 154 * called from a code path that can be executed above ATOMIC_LEVEL on such
155 155 * a platform, the dispatch code could potentially deadlock unless the
156 156 * corresponding error interrupt is blocked or disabled prior to calling
157 157 * errorq_dispatch(). Error queues should therefore be deployed with
158 158 * caution on these platforms.
159 159 *
160 160 * Interfaces
161 161 *
162 162 * errorq_t *errorq_create(name, func, private, qlen, eltsize, ipl, flags);
163 163 * errorq_t *errorq_nvcreate(name, func, private, qlen, eltsize, ipl, flags);
164 164 *
165 165 * Create a new error queue with the specified name, callback, and
166 166 * properties. A pointer to the new error queue is returned upon success,
167 167 * or NULL is returned to indicate that the queue could not be created.
168 168 * This function must be called from passive kernel context with no locks
169 169 * held that can prevent a sleeping memory allocation from occurring.
170 170 * errorq_create() will return failure if the queue kstats cannot be
171 171 * created, or if a soft interrupt handler cannot be registered.
172 172 *
173 173 * The queue 'name' is a string that is recorded for live and post-mortem
174 174 * examination by a debugger. The queue callback 'func' will be invoked
175 175 * for each error drained from the queue, and will receive the 'private'
176 176 * pointer as its first argument. The callback must obey the rules for
177 177 * callbacks described above. The queue will have maximum length 'qlen'
178 178 * and each element will be able to record up to 'eltsize' bytes of data.
179 179 * The queue's soft interrupt (see errorq_dispatch(), below) will fire
180 180 * at 'ipl', which should not exceed LOCK_LEVEL. The queue 'flags' may
181 181 * include the following flag:
182 182 *
183 183 * ERRORQ_VITAL - This queue contains information that is considered
184 184 * vital to problem diagnosis. Error queues that are marked vital will
185 185 * be automatically drained by the panic subsystem prior to printing
186 186 * the panic messages to the console.
187 187 *
188 188 * void errorq_destroy(errorq);
189 189 *
190 190 * Destroy the specified error queue. The queue is drained of any
191 191 * pending elements and these are logged before errorq_destroy returns.
192 192 * Once errorq_destroy() begins draining the queue, any simultaneous
193 193 * calls to dispatch errors will result in the errors being dropped.
194 194 * The caller must invoke a higher-level abstraction (e.g. disabling
195 195 * an error interrupt) to ensure that error handling code does not
196 196 * attempt to dispatch errors to the queue while it is being freed.
197 197 *
198 198 * void errorq_dispatch(errorq, data, len, flag);
199 199 *
200 200 * Attempt to enqueue the specified error data. If a free queue element
201 201 * is available, the data is copied into a free element and placed on a
202 202 * pending list. If no free queue element is available, the error is
203 203 * dropped. The data length (len) is specified in bytes and should not
204 204 * exceed the queue's maximum element size. If the data length is less
205 205 * than the maximum element size, the remainder of the queue element is
206 206 * filled with zeroes. The flag parameter should be one of:
207 207 *
208 208 * ERRORQ_ASYNC - Schedule a soft interrupt at the previously specified
209 209 * IPL to asynchronously drain the queue on behalf of the caller.
210 210 *
211 211 * ERRORQ_SYNC - Do not schedule a soft interrupt to drain the queue.
212 212 * The caller is presumed to be calling errorq_drain() or panic() in
213 213 * the near future in order to drain the queue and log the error.
214 214 *
215 215 * The errorq_dispatch() function may be called from any context, subject
216 216 * to the Platform Considerations described above.
217 217 *
218 218 * void errorq_drain(errorq);
219 219 *
220 220 * Drain the error queue of all pending errors. The queue's callback
221 221 * function is invoked for each error in order from oldest to newest.
222 222 * This function may be used at or below LOCK_LEVEL or from panic context.
223 223 *
224 224 * errorq_elem_t *errorq_reserve(errorq);
225 225 *
226 226 * Reserve an error queue element for later processing and dispatching.
227 227 * The element is returned to the caller who may add error-specific data
228 228 * to element. The element is retured to the free pool when either
229 229 * errorq_commit() is called and the element asynchronously processed
230 230 * or immediately when errorq_cancel() is called.
231 231 *
232 232 * void errorq_commit(errorq, errorq_elem, flag);
233 233 *
234 234 * Commit an errorq element (eqep) for dispatching, see
235 235 * errorq_dispatch().
236 236 *
237 237 * void errorq_cancel(errorq, errorq_elem);
238 238 *
239 239 * Cancel a pending errorq element reservation. The errorq element is
240 240 * returned to the free pool upon cancelation.
241 241 */
242 242
243 243 #include <sys/errorq_impl.h>
244 244 #include <sys/sysmacros.h>
245 245 #include <sys/machlock.h>
246 246 #include <sys/cmn_err.h>
247 247 #include <sys/atomic.h>
248 248 #include <sys/systm.h>
249 249 #include <sys/kmem.h>
250 250 #include <sys/conf.h>
251 251 #include <sys/ddi.h>
252 252 #include <sys/sunddi.h>
253 253 #include <sys/bootconf.h>
254 254 #include <sys/spl.h>
255 255 #include <sys/dumphdr.h>
256 256 #include <sys/compress.h>
257 257 #include <sys/time.h>
258 258 #include <sys/panic.h>
259 259 #include <sys/bitmap.h>
260 260 #include <sys/fm/protocol.h>
261 261 #include <sys/fm/util.h>
262 262
263 263 static struct errorq_kstat errorq_kstat_template = {
264 264 { "dispatched", KSTAT_DATA_UINT64 },
265 265 { "dropped", KSTAT_DATA_UINT64 },
266 266 { "logged", KSTAT_DATA_UINT64 },
267 267 { "reserved", KSTAT_DATA_UINT64 },
268 268 { "reserve_fail", KSTAT_DATA_UINT64 },
269 269 { "committed", KSTAT_DATA_UINT64 },
270 270 { "commit_fail", KSTAT_DATA_UINT64 },
271 271 { "cancelled", KSTAT_DATA_UINT64 }
272 272 };
273 273
274 274 static uint64_t errorq_lost = 0;
275 275 static errorq_t *errorq_list = NULL;
276 276 static kmutex_t errorq_lock;
277 277 static uint64_t errorq_vitalmin = 5;
278 278
279 279 static uint_t
280 280 errorq_intr(caddr_t eqp)
281 281 {
282 282 errorq_drain((errorq_t *)eqp);
283 283 return (DDI_INTR_CLAIMED);
284 284 }
285 285
286 286 /*
287 287 * Create a new error queue with the specified properties and add a software
288 288 * interrupt handler and kstat for it. This function must be called from
289 289 * passive kernel context with no locks held that can prevent a sleeping
290 290 * memory allocation from occurring. This function will return NULL if the
291 291 * softint or kstat for this queue cannot be created.
292 292 */
293 293 errorq_t *
294 294 errorq_create(const char *name, errorq_func_t func, void *private,
295 295 ulong_t qlen, size_t size, uint_t ipl, uint_t flags)
296 296 {
297 297 errorq_t *eqp = kmem_alloc(sizeof (errorq_t), KM_SLEEP);
298 298 ddi_iblock_cookie_t ibc = (ddi_iblock_cookie_t)(uintptr_t)ipltospl(ipl);
299 299 dev_info_t *dip = ddi_root_node();
300 300
301 301 errorq_elem_t *eep;
302 302 ddi_softintr_t id = NULL;
303 303 caddr_t data;
304 304
305 305 ASSERT(qlen != 0 && size != 0);
306 306 ASSERT(ipl > 0 && ipl <= LOCK_LEVEL);
307 307
308 308 /*
309 309 * If a queue is created very early in boot before device tree services
310 310 * are available, the queue softint handler cannot be created. We
311 311 * manually drain these queues and create their softint handlers when
312 312 * it is safe to do so as part of errorq_init(), below.
313 313 */
314 314 if (modrootloaded && ddi_add_softintr(dip, DDI_SOFTINT_FIXED, &id,
315 315 &ibc, NULL, errorq_intr, (caddr_t)eqp) != DDI_SUCCESS) {
316 316 cmn_err(CE_WARN, "errorq_create: failed to register "
317 317 "IPL %u softint for queue %s", ipl, name);
318 318 kmem_free(eqp, sizeof (errorq_t));
319 319 return (NULL);
320 320 }
321 321
322 322 if ((eqp->eq_ksp = kstat_create("unix", 0, name, "errorq",
323 323 KSTAT_TYPE_NAMED, sizeof (struct errorq_kstat) /
324 324 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL)) == NULL) {
325 325 cmn_err(CE_WARN, "errorq_create: failed to create kstat "
326 326 "for queue %s", name);
327 327 if (id != NULL)
328 328 ddi_remove_softintr(id);
329 329 kmem_free(eqp, sizeof (errorq_t));
330 330 return (NULL);
331 331 }
332 332
333 333 bcopy(&errorq_kstat_template, &eqp->eq_kstat,
334 334 sizeof (struct errorq_kstat));
335 335 eqp->eq_ksp->ks_data = &eqp->eq_kstat;
336 336 eqp->eq_ksp->ks_private = eqp;
337 337 kstat_install(eqp->eq_ksp);
338 338
339 339 (void) strncpy(eqp->eq_name, name, ERRORQ_NAMELEN);
340 340 eqp->eq_name[ERRORQ_NAMELEN] = '\0';
341 341 eqp->eq_func = func;
342 342 eqp->eq_private = private;
343 343 eqp->eq_data = kmem_alloc(qlen * size, KM_SLEEP);
344 344 eqp->eq_qlen = qlen;
345 345 eqp->eq_size = size;
346 346 eqp->eq_ipl = ipl;
347 347 eqp->eq_flags = flags | ERRORQ_ACTIVE;
348 348 eqp->eq_id = id;
349 349 mutex_init(&eqp->eq_lock, NULL, MUTEX_DEFAULT, NULL);
350 350 eqp->eq_elems = kmem_alloc(qlen * sizeof (errorq_elem_t), KM_SLEEP);
351 351 eqp->eq_phead = NULL;
352 352 eqp->eq_ptail = NULL;
353 353 eqp->eq_pend = NULL;
354 354 eqp->eq_dump = NULL;
355 355 eqp->eq_bitmap = kmem_zalloc(BT_SIZEOFMAP(qlen), KM_SLEEP);
356 356 eqp->eq_rotor = 0;
357 357
358 358 /*
359 359 * Iterate over the array of errorq_elem_t structures and set its
360 360 * data pointer.
361 361 */
362 362 for (eep = eqp->eq_elems, data = eqp->eq_data; qlen > 1; qlen--) {
363 363 eep->eqe_next = NULL;
364 364 eep->eqe_dump = NULL;
365 365 eep->eqe_prev = NULL;
366 366 eep->eqe_data = data;
367 367 data += size;
368 368 eep++;
369 369 }
370 370 eep->eqe_next = NULL;
371 371 eep->eqe_prev = NULL;
372 372 eep->eqe_data = data;
373 373 eep->eqe_dump = NULL;
374 374
375 375 /*
376 376 * Once the errorq is initialized, add it to the global list of queues,
377 377 * and then return a pointer to the new queue to the caller.
378 378 */
379 379 mutex_enter(&errorq_lock);
380 380 eqp->eq_next = errorq_list;
381 381 errorq_list = eqp;
382 382 mutex_exit(&errorq_lock);
383 383
384 384 return (eqp);
385 385 }
386 386
387 387 /*
388 388 * Create a new errorq as if by errorq_create(), but set the ERRORQ_NVLIST
389 389 * flag and initialize each element to have the start of its data region used
390 390 * as an errorq_nvelem_t with a nvlist allocator that consumes the data region.
391 391 */
392 392 errorq_t *
393 393 errorq_nvcreate(const char *name, errorq_func_t func, void *private,
394 394 ulong_t qlen, size_t size, uint_t ipl, uint_t flags)
395 395 {
396 396 errorq_t *eqp;
397 397 errorq_elem_t *eep;
398 398
399 399 eqp = errorq_create(name, func, private, qlen,
400 400 size + sizeof (errorq_nvelem_t), ipl, flags | ERRORQ_NVLIST);
401 401
402 402 if (eqp == NULL)
403 403 return (NULL);
404 404
405 405 mutex_enter(&eqp->eq_lock);
406 406
407 407 for (eep = eqp->eq_elems; qlen != 0; eep++, qlen--) {
408 408 errorq_nvelem_t *eqnp = eep->eqe_data;
409 409 eqnp->eqn_buf = (char *)eqnp + sizeof (errorq_nvelem_t);
410 410 eqnp->eqn_nva = fm_nva_xcreate(eqnp->eqn_buf, size);
411 411 }
412 412
413 413 mutex_exit(&eqp->eq_lock);
414 414 return (eqp);
415 415 }
416 416
417 417 /*
418 418 * To destroy an error queue, we mark it as disabled and then explicitly drain
419 419 * all pending errors. Once the drain is complete, we can remove the queue
420 420 * from the global list of queues examined by errorq_panic(), and then free
421 421 * the various queue data structures. The caller must use some higher-level
422 422 * abstraction (e.g. disabling an error interrupt) to ensure that no one will
423 423 * attempt to enqueue new errors while we are freeing this queue.
424 424 */
425 425 void
426 426 errorq_destroy(errorq_t *eqp)
427 427 {
428 428 errorq_t *p, **pp;
429 429 errorq_elem_t *eep;
430 430 ulong_t i;
431 431
432 432 ASSERT(eqp != NULL);
433 433 eqp->eq_flags &= ~ERRORQ_ACTIVE;
434 434 errorq_drain(eqp);
435 435
436 436 mutex_enter(&errorq_lock);
437 437 pp = &errorq_list;
438 438
439 439 for (p = errorq_list; p != NULL; p = p->eq_next) {
440 440 if (p == eqp) {
441 441 *pp = p->eq_next;
442 442 break;
443 443 }
444 444 pp = &p->eq_next;
445 445 }
446 446
447 447 mutex_exit(&errorq_lock);
448 448 ASSERT(p != NULL);
449 449
450 450 if (eqp->eq_flags & ERRORQ_NVLIST) {
451 451 for (eep = eqp->eq_elems, i = 0; i < eqp->eq_qlen; i++, eep++) {
452 452 errorq_nvelem_t *eqnp = eep->eqe_data;
453 453 fm_nva_xdestroy(eqnp->eqn_nva);
454 454 }
455 455 }
456 456
457 457 mutex_destroy(&eqp->eq_lock);
458 458 kstat_delete(eqp->eq_ksp);
459 459
460 460 if (eqp->eq_id != NULL)
461 461 ddi_remove_softintr(eqp->eq_id);
462 462
463 463 kmem_free(eqp->eq_elems, eqp->eq_qlen * sizeof (errorq_elem_t));
464 464 kmem_free(eqp->eq_bitmap, BT_SIZEOFMAP(eqp->eq_qlen));
465 465 kmem_free(eqp->eq_data, eqp->eq_qlen * eqp->eq_size);
466 466
467 467 kmem_free(eqp, sizeof (errorq_t));
468 468 }
469 469
470 470 /*
471 471 * private version of bt_availbit which makes a best-efforts attempt
472 472 * at allocating in a round-robin fashion in order to facilitate post-mortem
473 473 * diagnosis.
474 474 */
475 475 static index_t
476 476 errorq_availbit(ulong_t *bitmap, size_t nbits, index_t curindex)
477 477 {
478 478 ulong_t bit, maxbit, bx;
479 479 index_t rval, nextindex = curindex + 1;
480 480 index_t nextword = nextindex >> BT_ULSHIFT;
481 481 ulong_t nextbitindex = nextindex & BT_ULMASK;
482 482 index_t maxindex = nbits - 1;
483 483 index_t maxword = maxindex >> BT_ULSHIFT;
484 484 ulong_t maxbitindex = maxindex & BT_ULMASK;
485 485
486 486 /*
487 487 * First check if there are still some bits remaining in the current
488 488 * word, and see if any of those are available. We need to do this by
489 489 * hand as the bt_availbit() function always starts at the beginning
490 490 * of a word.
491 491 */
492 492 if (nextindex <= maxindex && nextbitindex != 0) {
493 493 maxbit = (nextword == maxword) ? maxbitindex : BT_ULMASK;
494 494 for (bx = 0, bit = 1; bx <= maxbit; bx++, bit <<= 1)
495 495 if (bx >= nextbitindex && !(bitmap[nextword] & bit))
496 496 return ((nextword << BT_ULSHIFT) + bx);
497 497 nextword++;
498 498 }
499 499 /*
500 500 * Now check if there are any words remaining before the end of the
501 501 * bitmap. Use bt_availbit() to find any free bits.
502 502 */
503 503 if (nextword <= maxword)
504 504 if ((rval = bt_availbit(&bitmap[nextword],
505 505 nbits - (nextword << BT_ULSHIFT))) != -1)
506 506 return ((nextword << BT_ULSHIFT) + rval);
507 507 /*
508 508 * Finally loop back to the start and look for any free bits starting
509 509 * from the beginning of the bitmap to the current rotor position.
510 510 */
511 511 return (bt_availbit(bitmap, nextindex));
512 512 }
513 513
514 514 /*
515 515 * Dispatch a new error into the queue for later processing. The specified
516 516 * data buffer is copied into a preallocated queue element. If 'len' is
↓ open down ↓ |
516 lines elided |
↑ open up ↑ |
517 517 * smaller than the queue element size, the remainder of the queue element is
518 518 * filled with zeroes. This function may be called from any context subject
519 519 * to the Platform Considerations described above.
520 520 */
521 521 void
522 522 errorq_dispatch(errorq_t *eqp, const void *data, size_t len, uint_t flag)
523 523 {
524 524 errorq_elem_t *eep, *old;
525 525
526 526 if (eqp == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) {
527 - atomic_add_64(&errorq_lost, 1);
527 + atomic_inc_64(&errorq_lost);
528 528 return; /* drop error if queue is uninitialized or disabled */
529 529 }
530 530
531 531 for (;;) {
532 532 int i, rval;
533 533
534 534 if ((i = errorq_availbit(eqp->eq_bitmap, eqp->eq_qlen,
535 535 eqp->eq_rotor)) == -1) {
536 - atomic_add_64(&eqp->eq_kstat.eqk_dropped.value.ui64, 1);
536 + atomic_inc_64(&eqp->eq_kstat.eqk_dropped.value.ui64);
537 537 return;
538 538 }
539 539 BT_ATOMIC_SET_EXCL(eqp->eq_bitmap, i, rval);
540 540 if (rval == 0) {
541 541 eqp->eq_rotor = i;
542 542 eep = &eqp->eq_elems[i];
543 543 break;
544 544 }
545 545 }
546 546
547 547 ASSERT(len <= eqp->eq_size);
548 548 bcopy(data, eep->eqe_data, MIN(eqp->eq_size, len));
549 549
550 550 if (len < eqp->eq_size)
551 551 bzero((caddr_t)eep->eqe_data + len, eqp->eq_size - len);
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
552 552
553 553 for (;;) {
554 554 old = eqp->eq_pend;
555 555 eep->eqe_prev = old;
556 556 membar_producer();
557 557
558 558 if (atomic_cas_ptr(&eqp->eq_pend, old, eep) == old)
559 559 break;
560 560 }
561 561
562 - atomic_add_64(&eqp->eq_kstat.eqk_dispatched.value.ui64, 1);
562 + atomic_inc_64(&eqp->eq_kstat.eqk_dispatched.value.ui64);
563 563
564 564 if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL)
565 565 ddi_trigger_softintr(eqp->eq_id);
566 566 }
567 567
568 568 /*
569 569 * Drain the specified error queue by calling eq_func() for each pending error.
570 570 * This function must be called at or below LOCK_LEVEL or from panic context.
571 571 * In order to synchronize with other attempts to drain the queue, we acquire
572 572 * the adaptive eq_lock, blocking other consumers. Once this lock is held,
573 573 * we must use compare-and-swap to move the pending list to the processing
574 574 * list and to return elements to the free pool in order to synchronize
575 575 * with producers, who do not acquire any locks and only use atomic set/clear.
576 576 *
577 577 * An additional constraint on this function is that if the system panics
578 578 * while this function is running, the panic code must be able to detect and
579 579 * handle all intermediate states and correctly dequeue all errors. The
580 580 * errorq_panic() function below will be used for detecting and handling
581 581 * these intermediate states. The comments in errorq_drain() below explain
582 582 * how we make sure each intermediate state is distinct and consistent.
583 583 */
584 584 void
585 585 errorq_drain(errorq_t *eqp)
586 586 {
587 587 errorq_elem_t *eep, *dep;
588 588
589 589 ASSERT(eqp != NULL);
590 590 mutex_enter(&eqp->eq_lock);
591 591
592 592 /*
593 593 * If there are one or more pending errors, set eq_ptail to point to
594 594 * the first element on the pending list and then attempt to compare-
595 595 * and-swap NULL to the pending list. We use membar_producer() to
596 596 * make sure that eq_ptail will be visible to errorq_panic() below
597 597 * before the pending list is NULLed out. This section is labeled
598 598 * case (1) for errorq_panic, below. If eq_ptail is not yet set (1A)
599 599 * eq_pend has all the pending errors. If atomic_cas_ptr fails or
600 600 * has not been called yet (1B), eq_pend still has all the pending
601 601 * errors. If atomic_cas_ptr succeeds (1C), eq_ptail has all the
602 602 * pending errors.
603 603 */
604 604 while ((eep = eqp->eq_pend) != NULL) {
605 605 eqp->eq_ptail = eep;
606 606 membar_producer();
607 607
608 608 if (atomic_cas_ptr(&eqp->eq_pend, eep, NULL) == eep)
609 609 break;
610 610 }
611 611
612 612 /*
613 613 * If no errors were pending, assert that eq_ptail is set to NULL,
614 614 * drop the consumer lock, and return without doing anything.
615 615 */
616 616 if (eep == NULL) {
617 617 ASSERT(eqp->eq_ptail == NULL);
618 618 mutex_exit(&eqp->eq_lock);
619 619 return;
620 620 }
621 621
622 622 /*
623 623 * Now iterate from eq_ptail (a.k.a. eep, the newest error) to the
624 624 * oldest error, setting the eqe_next pointer so that we can iterate
625 625 * over the errors from oldest to newest. We use membar_producer()
626 626 * to make sure that these stores are visible before we set eq_phead.
627 627 * If we panic before, during, or just after this loop (case 2),
628 628 * errorq_panic() will simply redo this work, as described below.
629 629 */
630 630 for (eep->eqe_next = NULL; eep->eqe_prev != NULL; eep = eep->eqe_prev)
631 631 eep->eqe_prev->eqe_next = eep;
632 632 membar_producer();
633 633
634 634 /*
635 635 * Now set eq_phead to the head of the processing list (the oldest
636 636 * error) and issue another membar_producer() to make sure that
637 637 * eq_phead is seen as non-NULL before we clear eq_ptail. If we panic
638 638 * after eq_phead is set (case 3), we will detect and log these errors
639 639 * in errorq_panic(), as described below.
640 640 */
641 641 eqp->eq_phead = eep;
642 642 membar_producer();
643 643
644 644 eqp->eq_ptail = NULL;
645 645 membar_producer();
646 646
647 647 /*
648 648 * If we enter from errorq_panic_drain(), we may already have
649 649 * errorq elements on the dump list. Find the tail of
650 650 * the list ready for append.
651 651 */
652 652 if (panicstr && (dep = eqp->eq_dump) != NULL) {
653 653 while (dep->eqe_dump != NULL)
654 654 dep = dep->eqe_dump;
655 655 }
656 656
657 657 /*
658 658 * Now iterate over the processing list from oldest (eq_phead) to
659 659 * newest and log each error. Once an error is logged, we use
660 660 * atomic clear to return it to the free pool. If we panic before,
661 661 * during, or after calling eq_func() (case 4), the error will still be
662 662 * found on eq_phead and will be logged in errorq_panic below.
663 663 */
664 664
665 665 while ((eep = eqp->eq_phead) != NULL) {
666 666 eqp->eq_func(eqp->eq_private, eep->eqe_data, eep);
667 667 eqp->eq_kstat.eqk_logged.value.ui64++;
668 668
669 669 eqp->eq_phead = eep->eqe_next;
670 670 membar_producer();
671 671
672 672 eep->eqe_next = NULL;
673 673
674 674 /*
675 675 * On panic, we add the element to the dump list for each
676 676 * nvlist errorq. Elements are stored oldest to newest.
677 677 * Then continue, so we don't free and subsequently overwrite
678 678 * any elements which we've put on the dump queue.
679 679 */
680 680 if (panicstr && (eqp->eq_flags & ERRORQ_NVLIST)) {
681 681 if (eqp->eq_dump == NULL)
682 682 dep = eqp->eq_dump = eep;
683 683 else
684 684 dep = dep->eqe_dump = eep;
685 685 membar_producer();
686 686 continue;
687 687 }
688 688
689 689 eep->eqe_prev = NULL;
690 690 BT_ATOMIC_CLEAR(eqp->eq_bitmap, eep - eqp->eq_elems);
691 691 }
692 692
693 693 mutex_exit(&eqp->eq_lock);
694 694 }
695 695
696 696 /*
697 697 * Now that device tree services are available, set up the soft interrupt
698 698 * handlers for any queues that were created early in boot. We then
699 699 * manually drain these queues to report any pending early errors.
700 700 */
701 701 void
702 702 errorq_init(void)
703 703 {
704 704 dev_info_t *dip = ddi_root_node();
705 705 ddi_softintr_t id;
706 706 errorq_t *eqp;
707 707
708 708 ASSERT(modrootloaded != 0);
709 709 ASSERT(dip != NULL);
710 710
711 711 mutex_enter(&errorq_lock);
712 712
713 713 for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) {
714 714 ddi_iblock_cookie_t ibc =
715 715 (ddi_iblock_cookie_t)(uintptr_t)ipltospl(eqp->eq_ipl);
716 716
717 717 if (eqp->eq_id != NULL)
718 718 continue; /* softint already initialized */
719 719
720 720 if (ddi_add_softintr(dip, DDI_SOFTINT_FIXED, &id, &ibc, NULL,
721 721 errorq_intr, (caddr_t)eqp) != DDI_SUCCESS) {
722 722 panic("errorq_init: failed to register IPL %u softint "
723 723 "for queue %s", eqp->eq_ipl, eqp->eq_name);
724 724 }
725 725
726 726 eqp->eq_id = id;
727 727 errorq_drain(eqp);
728 728 }
729 729
730 730 mutex_exit(&errorq_lock);
731 731 }
732 732
733 733 /*
734 734 * This function is designed to be called from panic context only, and
735 735 * therefore does not need to acquire errorq_lock when iterating over
736 736 * errorq_list. This function must be called no more than once for each
737 737 * 'what' value (if you change this then review the manipulation of 'dep'.
738 738 */
739 739 static uint64_t
740 740 errorq_panic_drain(uint_t what)
741 741 {
742 742 errorq_elem_t *eep, *nep, *dep;
743 743 errorq_t *eqp;
744 744 uint64_t loggedtmp;
745 745 uint64_t logged = 0;
746 746
747 747 for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) {
748 748 if ((eqp->eq_flags & (ERRORQ_VITAL | ERRORQ_NVLIST)) != what)
749 749 continue; /* do not drain this queue on this pass */
750 750
751 751 loggedtmp = eqp->eq_kstat.eqk_logged.value.ui64;
752 752
753 753 /*
754 754 * In case (1B) above, eq_ptail may be set but the
755 755 * atomic_cas_ptr may not have been executed yet or may have
756 756 * failed. Either way, we must log errors in chronological
757 757 * order. So we search the pending list for the error
758 758 * pointed to by eq_ptail. If it is found, we know that all
759 759 * subsequent errors are also still on the pending list, so
760 760 * just NULL out eq_ptail and let errorq_drain(), below,
761 761 * take care of the logging.
762 762 */
763 763 for (eep = eqp->eq_pend; eep != NULL; eep = eep->eqe_prev) {
764 764 if (eep == eqp->eq_ptail) {
765 765 ASSERT(eqp->eq_phead == NULL);
766 766 eqp->eq_ptail = NULL;
767 767 break;
768 768 }
769 769 }
770 770
771 771 /*
772 772 * In cases (1C) and (2) above, eq_ptail will be set to the
773 773 * newest error on the processing list but eq_phead will still
774 774 * be NULL. We set the eqe_next pointers so we can iterate
775 775 * over the processing list in order from oldest error to the
776 776 * newest error. We then set eq_phead to point to the oldest
777 777 * error and fall into the for-loop below.
778 778 */
779 779 if (eqp->eq_phead == NULL && (eep = eqp->eq_ptail) != NULL) {
780 780 for (eep->eqe_next = NULL; eep->eqe_prev != NULL;
781 781 eep = eep->eqe_prev)
782 782 eep->eqe_prev->eqe_next = eep;
783 783
784 784 eqp->eq_phead = eep;
785 785 eqp->eq_ptail = NULL;
786 786 }
787 787
788 788 /*
789 789 * In cases (3) and (4) above (or after case (1C/2) handling),
790 790 * eq_phead will be set to the oldest error on the processing
791 791 * list. We log each error and return it to the free pool.
792 792 *
793 793 * Unlike errorq_drain(), we don't need to worry about updating
794 794 * eq_phead because errorq_panic() will be called at most once.
795 795 * However, we must use atomic_cas_ptr to update the
796 796 * freelist in case errors are still being enqueued during
797 797 * panic.
798 798 */
799 799 for (eep = eqp->eq_phead; eep != NULL; eep = nep) {
800 800 eqp->eq_func(eqp->eq_private, eep->eqe_data, eep);
801 801 eqp->eq_kstat.eqk_logged.value.ui64++;
802 802
803 803 nep = eep->eqe_next;
804 804 eep->eqe_next = NULL;
805 805
806 806 /*
807 807 * On panic, we add the element to the dump list for
808 808 * each nvlist errorq, stored oldest to newest. Then
809 809 * continue, so we don't free and subsequently overwrite
810 810 * any elements which we've put on the dump queue.
811 811 */
812 812 if (eqp->eq_flags & ERRORQ_NVLIST) {
813 813 if (eqp->eq_dump == NULL)
814 814 dep = eqp->eq_dump = eep;
815 815 else
816 816 dep = dep->eqe_dump = eep;
817 817 membar_producer();
818 818 continue;
819 819 }
820 820
821 821 eep->eqe_prev = NULL;
822 822 BT_ATOMIC_CLEAR(eqp->eq_bitmap, eep - eqp->eq_elems);
823 823 }
824 824
825 825 /*
826 826 * Now go ahead and drain any other errors on the pending list.
827 827 * This call transparently handles case (1A) above, as well as
828 828 * any other errors that were dispatched after errorq_drain()
829 829 * completed its first compare-and-swap.
830 830 */
831 831 errorq_drain(eqp);
832 832
833 833 logged += eqp->eq_kstat.eqk_logged.value.ui64 - loggedtmp;
834 834 }
835 835 return (logged);
836 836 }
837 837
838 838 /*
839 839 * Drain all error queues - called only from panic context. Some drain
840 840 * functions may enqueue errors to ERRORQ_NVLIST error queues so that
841 841 * they may be written out in the panic dump - so ERRORQ_NVLIST queues
842 842 * must be drained last. Drain ERRORQ_VITAL queues before nonvital queues
843 843 * so that vital errors get to fill the ERRORQ_NVLIST queues first, and
844 844 * do not drain the nonvital queues if there are many vital errors.
845 845 */
846 846 void
847 847 errorq_panic(void)
848 848 {
849 849 ASSERT(panicstr != NULL);
850 850
851 851 if (errorq_panic_drain(ERRORQ_VITAL) <= errorq_vitalmin)
852 852 (void) errorq_panic_drain(0);
853 853 (void) errorq_panic_drain(ERRORQ_VITAL | ERRORQ_NVLIST);
854 854 (void) errorq_panic_drain(ERRORQ_NVLIST);
855 855 }
856 856
857 857 /*
858 858 * Reserve an error queue element for later processing and dispatching. The
859 859 * element is returned to the caller who may add error-specific data to
↓ open down ↓ |
287 lines elided |
↑ open up ↑ |
860 860 * element. The element is retured to the free pool when either
861 861 * errorq_commit() is called and the element asynchronously processed
862 862 * or immediately when errorq_cancel() is called.
863 863 */
864 864 errorq_elem_t *
865 865 errorq_reserve(errorq_t *eqp)
866 866 {
867 867 errorq_elem_t *eqep;
868 868
869 869 if (eqp == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) {
870 - atomic_add_64(&errorq_lost, 1);
870 + atomic_inc_64(&errorq_lost);
871 871 return (NULL);
872 872 }
873 873
874 874 for (;;) {
875 875 int i, rval;
876 876
877 877 if ((i = errorq_availbit(eqp->eq_bitmap, eqp->eq_qlen,
878 878 eqp->eq_rotor)) == -1) {
879 - atomic_add_64(&eqp->eq_kstat.eqk_dropped.value.ui64, 1);
879 + atomic_inc_64(&eqp->eq_kstat.eqk_dropped.value.ui64);
880 880 return (NULL);
881 881 }
882 882 BT_ATOMIC_SET_EXCL(eqp->eq_bitmap, i, rval);
883 883 if (rval == 0) {
884 884 eqp->eq_rotor = i;
885 885 eqep = &eqp->eq_elems[i];
886 886 break;
887 887 }
888 888 }
889 889
890 890 if (eqp->eq_flags & ERRORQ_NVLIST) {
891 891 errorq_nvelem_t *eqnp = eqep->eqe_data;
892 892 nv_alloc_reset(eqnp->eqn_nva);
893 893 eqnp->eqn_nvl = fm_nvlist_create(eqnp->eqn_nva);
894 894 }
895 895
896 - atomic_add_64(&eqp->eq_kstat.eqk_reserved.value.ui64, 1);
896 + atomic_inc_64(&eqp->eq_kstat.eqk_reserved.value.ui64);
897 897 return (eqep);
898 898 }
899 899
900 900 /*
901 901 * Commit an errorq element (eqep) for dispatching.
902 902 * This function may be called from any context subject
903 903 * to the Platform Considerations described above.
904 904 */
905 905 void
906 906 errorq_commit(errorq_t *eqp, errorq_elem_t *eqep, uint_t flag)
907 907 {
908 908 errorq_elem_t *old;
909 909
910 910 if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) {
911 - atomic_add_64(&eqp->eq_kstat.eqk_commit_fail.value.ui64, 1);
911 + atomic_inc_64(&eqp->eq_kstat.eqk_commit_fail.value.ui64);
912 912 return;
913 913 }
914 914
915 915 for (;;) {
916 916 old = eqp->eq_pend;
917 917 eqep->eqe_prev = old;
918 918 membar_producer();
919 919
920 920 if (atomic_cas_ptr(&eqp->eq_pend, old, eqep) == old)
921 921 break;
922 922 }
923 923
924 - atomic_add_64(&eqp->eq_kstat.eqk_committed.value.ui64, 1);
924 + atomic_inc_64(&eqp->eq_kstat.eqk_committed.value.ui64);
925 925
926 926 if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL)
927 927 ddi_trigger_softintr(eqp->eq_id);
928 928 }
929 929
930 930 /*
931 931 * Cancel an errorq element reservation by returning the specified element
932 932 * to the free pool. Duplicate or invalid frees are not supported.
933 933 */
934 934 void
935 935 errorq_cancel(errorq_t *eqp, errorq_elem_t *eqep)
936 936 {
937 937 if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE))
938 938 return;
939 939
940 940 BT_ATOMIC_CLEAR(eqp->eq_bitmap, eqep - eqp->eq_elems);
941 941
942 - atomic_add_64(&eqp->eq_kstat.eqk_cancelled.value.ui64, 1);
942 + atomic_inc_64(&eqp->eq_kstat.eqk_cancelled.value.ui64);
943 943 }
944 944
945 945 /*
946 946 * Write elements on the dump list of each nvlist errorq to the dump device.
947 947 * Upon reboot, fmd(1M) will extract and replay them for diagnosis.
948 948 */
949 949 void
950 950 errorq_dump(void)
951 951 {
952 952 errorq_elem_t *eep;
953 953 errorq_t *eqp;
954 954
955 955 if (ereport_dumpbuf == NULL)
956 956 return; /* reboot or panic before errorq is even set up */
957 957
958 958 for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) {
959 959 if (!(eqp->eq_flags & ERRORQ_NVLIST) ||
960 960 !(eqp->eq_flags & ERRORQ_ACTIVE))
961 961 continue; /* do not dump this queue on panic */
962 962
963 963 for (eep = eqp->eq_dump; eep != NULL; eep = eep->eqe_dump) {
964 964 errorq_nvelem_t *eqnp = eep->eqe_data;
965 965 size_t len = 0;
966 966 erpt_dump_t ed;
967 967 int err;
968 968
969 969 (void) nvlist_size(eqnp->eqn_nvl,
970 970 &len, NV_ENCODE_NATIVE);
971 971
972 972 if (len > ereport_dumplen || len == 0) {
973 973 cmn_err(CE_WARN, "%s: unable to save error "
974 974 "report %p due to size %lu\n",
975 975 eqp->eq_name, (void *)eep, len);
976 976 continue;
977 977 }
978 978
979 979 if ((err = nvlist_pack(eqnp->eqn_nvl,
980 980 (char **)&ereport_dumpbuf, &ereport_dumplen,
981 981 NV_ENCODE_NATIVE, KM_NOSLEEP)) != 0) {
982 982 cmn_err(CE_WARN, "%s: unable to save error "
983 983 "report %p due to pack error %d\n",
984 984 eqp->eq_name, (void *)eep, err);
985 985 continue;
986 986 }
987 987
988 988 ed.ed_magic = ERPT_MAGIC;
989 989 ed.ed_chksum = checksum32(ereport_dumpbuf, len);
990 990 ed.ed_size = (uint32_t)len;
991 991 ed.ed_pad = 0;
992 992 ed.ed_hrt_nsec = 0;
993 993 ed.ed_hrt_base = panic_hrtime;
994 994 ed.ed_tod_base.sec = panic_hrestime.tv_sec;
995 995 ed.ed_tod_base.nsec = panic_hrestime.tv_nsec;
996 996
997 997 dumpvp_write(&ed, sizeof (ed));
998 998 dumpvp_write(ereport_dumpbuf, len);
999 999 }
1000 1000 }
1001 1001 }
1002 1002
1003 1003 nvlist_t *
1004 1004 errorq_elem_nvl(errorq_t *eqp, const errorq_elem_t *eqep)
1005 1005 {
1006 1006 errorq_nvelem_t *eqnp = eqep->eqe_data;
1007 1007
1008 1008 ASSERT(eqp->eq_flags & ERRORQ_ACTIVE && eqp->eq_flags & ERRORQ_NVLIST);
1009 1009
1010 1010 return (eqnp->eqn_nvl);
1011 1011 }
1012 1012
1013 1013 nv_alloc_t *
1014 1014 errorq_elem_nva(errorq_t *eqp, const errorq_elem_t *eqep)
1015 1015 {
1016 1016 errorq_nvelem_t *eqnp = eqep->eqe_data;
1017 1017
1018 1018 ASSERT(eqp->eq_flags & ERRORQ_ACTIVE && eqp->eq_flags & ERRORQ_NVLIST);
1019 1019
1020 1020 return (eqnp->eqn_nva);
1021 1021 }
1022 1022
1023 1023 /*
1024 1024 * Reserve a new element and duplicate the data of the original into it.
1025 1025 */
1026 1026 void *
1027 1027 errorq_elem_dup(errorq_t *eqp, const errorq_elem_t *eqep, errorq_elem_t **neqep)
1028 1028 {
1029 1029 ASSERT(eqp->eq_flags & ERRORQ_ACTIVE);
1030 1030 ASSERT(!(eqp->eq_flags & ERRORQ_NVLIST));
1031 1031
1032 1032 if ((*neqep = errorq_reserve(eqp)) == NULL)
1033 1033 return (NULL);
1034 1034
1035 1035 bcopy(eqep->eqe_data, (*neqep)->eqe_data, eqp->eq_size);
1036 1036 return ((*neqep)->eqe_data);
1037 1037 }
↓ open down ↓ |
85 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX