Print this page
patch remove-dont-swap-flag
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/panic.c
+++ new/usr/src/uts/common/os/panic.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /*
26 26 * Copyright (c) 2011, Joyent, Inc. All rights reserved.
27 27 */
28 28
29 29 /*
30 30 * When the operating system detects that it is in an invalid state, a panic
31 31 * is initiated in order to minimize potential damage to user data and to
32 32 * facilitate debugging. There are three major tasks to be performed in
33 33 * a system panic: recording information about the panic in memory (and thus
34 34 * making it part of the crash dump), synchronizing the file systems to
35 35 * preserve user file data, and generating the crash dump. We define the
36 36 * system to be in one of four states with respect to the panic code:
37 37 *
38 38 * CALM - the state of the system prior to any thread initiating a panic
39 39 *
40 40 * QUIESCE - the state of the system when the first thread to initiate
41 41 * a system panic records information about the cause of the panic
42 42 * and renders the system quiescent by stopping other processors
43 43 *
44 44 * SYNC - the state of the system when we synchronize the file systems
45 45 * DUMP - the state when we generate the crash dump.
46 46 *
47 47 * The transitions between these states are irreversible: once we begin
48 48 * panicking, we only make one attempt to perform the actions associated with
49 49 * each state.
50 50 *
51 51 * The panic code itself must be re-entrant because actions taken during any
52 52 * state may lead to another system panic. Additionally, any Solaris
53 53 * thread may initiate a panic at any time, and so we must have synchronization
54 54 * between threads which attempt to initiate a state transition simultaneously.
55 55 * The panic code makes use of a special locking primitive, a trigger, to
56 56 * perform this synchronization. A trigger is simply a word which is set
57 57 * atomically and can only be set once. We declare three triggers, one for
58 58 * each transition between the four states. When a thread enters the panic
59 59 * code it attempts to set each trigger; if it fails it moves on to the
60 60 * next trigger. A special case is the first trigger: if two threads race
61 61 * to perform the transition to QUIESCE, the losing thread may execute before
62 62 * the winner has a chance to stop its CPU. To solve this problem, we have
63 63 * the loser look ahead to see if any other triggers are set; if not, it
64 64 * presumes a panic is underway and simply spins. Unfortunately, since we
65 65 * are panicking, it is not possible to know this with absolute certainty.
66 66 *
67 67 * There are two common reasons for re-entering the panic code once a panic
68 68 * has been initiated: (1) after we debug_enter() at the end of QUIESCE,
69 69 * the operator may type "sync" instead of "go", and the PROM's sync callback
70 70 * routine will invoke panic(); (2) if the clock routine decides that sync
71 71 * or dump is not making progress, it will invoke panic() to force a timeout.
72 72 * The design assumes that a third possibility, another thread causing an
73 73 * unrelated panic while sync or dump is still underway, is extremely unlikely.
74 74 * If this situation occurs, we may end up triggering dump while sync is
75 75 * still in progress. This third case is considered extremely unlikely because
76 76 * all other CPUs are stopped and low-level interrupts have been blocked.
77 77 *
78 78 * The panic code is entered via a call directly to the vpanic() function,
79 79 * or its varargs wrappers panic() and cmn_err(9F). The vpanic routine
80 80 * is implemented in assembly language to record the current machine
81 81 * registers, attempt to set the trigger for the QUIESCE state, and
82 82 * if successful, switch stacks on to the panic_stack before calling into
83 83 * the common panicsys() routine. The first thread to initiate a panic
84 84 * is allowed to make use of the reserved panic_stack so that executing
85 85 * the panic code itself does not overwrite valuable data on that thread's
86 86 * stack *ahead* of the current stack pointer. This data will be preserved
87 87 * in the crash dump and may prove invaluable in determining what this
88 88 * thread has previously been doing. The first thread, saved in panic_thread,
89 89 * is also responsible for stopping the other CPUs as quickly as possible,
90 90 * and then setting the various panic_* variables. Most important among
91 91 * these is panicstr, which allows threads to subsequently bypass held
92 92 * locks so that we can proceed without ever blocking. We must stop the
93 93 * other CPUs *prior* to setting panicstr in case threads running there are
94 94 * currently spinning to acquire a lock; we want that state to be preserved.
95 95 * Every thread which initiates a panic has its T_PANIC flag set so we can
96 96 * identify all such threads in the crash dump.
97 97 *
98 98 * The panic_thread is also allowed to make use of the special memory buffer
99 99 * panicbuf, which on machines with appropriate hardware is preserved across
100 100 * reboots. We allow the panic_thread to store its register set and panic
101 101 * message in this buffer, so even if we fail to obtain a crash dump we will
102 102 * be able to examine the machine after reboot and determine some of the
103 103 * state at the time of the panic. If we do get a dump, the panic buffer
104 104 * data is structured so that a debugger can easily consume the information
105 105 * therein (see <sys/panic.h>).
106 106 *
107 107 * Each platform or architecture is required to implement the functions
108 108 * panic_savetrap() to record trap-specific information to panicbuf,
109 109 * panic_saveregs() to record a register set to panicbuf, panic_stopcpus()
110 110 * to halt all CPUs but the panicking CPU, panic_quiesce_hw() to perform
111 111 * miscellaneous platform-specific tasks *after* panicstr is set,
112 112 * panic_showtrap() to print trap-specific information to the console,
113 113 * and panic_dump_hw() to perform platform tasks prior to calling dumpsys().
114 114 *
115 115 * A Note on Word Formation, courtesy of the Oxford Guide to English Usage:
116 116 *
117 117 * Words ending in -c interpose k before suffixes which otherwise would
118 118 * indicate a soft c, and thus the verb and adjective forms of 'panic' are
119 119 * spelled "panicked", "panicking", and "panicky" respectively. Use of
120 120 * the ill-conceived "panicing" and "panic'd" is discouraged.
121 121 */
122 122
123 123 #include <sys/types.h>
124 124 #include <sys/varargs.h>
125 125 #include <sys/sysmacros.h>
126 126 #include <sys/cmn_err.h>
127 127 #include <sys/cpuvar.h>
128 128 #include <sys/thread.h>
129 129 #include <sys/t_lock.h>
130 130 #include <sys/cred.h>
131 131 #include <sys/systm.h>
132 132 #include <sys/archsystm.h>
133 133 #include <sys/uadmin.h>
134 134 #include <sys/callb.h>
135 135 #include <sys/vfs.h>
136 136 #include <sys/log.h>
137 137 #include <sys/disp.h>
138 138 #include <sys/param.h>
139 139 #include <sys/dumphdr.h>
140 140 #include <sys/ftrace.h>
141 141 #include <sys/reboot.h>
142 142 #include <sys/debug.h>
143 143 #include <sys/stack.h>
144 144 #include <sys/spl.h>
145 145 #include <sys/errorq.h>
146 146 #include <sys/panic.h>
147 147 #include <sys/fm/util.h>
148 148 #include <sys/clock_impl.h>
149 149
150 150 /*
151 151 * Panic variables which are set once during the QUIESCE state by the
152 152 * first thread to initiate a panic. These are examined by post-mortem
153 153 * debugging tools; the inconsistent use of 'panic' versus 'panic_' in
154 154 * the variable naming is historical and allows legacy tools to work.
155 155 */
156 156 #pragma align STACK_ALIGN(panic_stack)
157 157 char panic_stack[PANICSTKSIZE]; /* reserved stack for panic_thread */
158 158 kthread_t *panic_thread; /* first thread to call panicsys() */
159 159 cpu_t panic_cpu; /* cpu from first call to panicsys() */
160 160 label_t panic_regs; /* setjmp label from panic_thread */
161 161 label_t panic_pcb; /* t_pcb at time of panic */
162 162 struct regs *panic_reg; /* regs struct from first panicsys() */
163 163 char *volatile panicstr; /* format string to first panicsys() */
164 164 va_list panicargs; /* arguments to first panicsys() */
165 165 clock_t panic_lbolt; /* lbolt at time of panic */
166 166 int64_t panic_lbolt64; /* lbolt64 at time of panic */
167 167 hrtime_t panic_hrtime; /* hrtime at time of panic */
168 168 timespec_t panic_hrestime; /* hrestime at time of panic */
169 169 int panic_ipl; /* ipl on panic_cpu at time of panic */
170 170 ushort_t panic_schedflag; /* t_schedflag for panic_thread */
171 171 cpu_t *panic_bound_cpu; /* t_bound_cpu for panic_thread */
172 172 char panic_preempt; /* t_preempt for panic_thread */
173 173
174 174 /*
175 175 * Panic variables which can be set via /etc/system or patched while
176 176 * the system is in operation. Again, the stupid names are historic.
177 177 */
178 178 char *panic_bootstr = NULL; /* mdboot string to use after panic */
179 179 int panic_bootfcn = AD_BOOT; /* mdboot function to use after panic */
180 180 int halt_on_panic = 0; /* halt after dump instead of reboot? */
181 181 int nopanicdebug = 0; /* reboot instead of call debugger? */
182 182 int in_sync = 0; /* skip vfs_syncall() and just dump? */
183 183
184 184 /*
185 185 * The do_polled_io flag is set by the panic code to inform the SCSI subsystem
186 186 * to use polled mode instead of interrupt-driven i/o.
187 187 */
188 188 int do_polled_io = 0;
189 189
190 190 /*
191 191 * The panic_forced flag is set by the uadmin A_DUMP code to inform the
192 192 * panic subsystem that it should not attempt an initial debug_enter.
193 193 */
194 194 int panic_forced = 0;
195 195
196 196 /*
197 197 * Triggers for panic state transitions:
198 198 */
199 199 int panic_quiesce; /* trigger for CALM -> QUIESCE */
200 200 int panic_sync; /* trigger for QUIESCE -> SYNC */
201 201 int panic_dump; /* trigger for SYNC -> DUMP */
202 202
203 203 /*
204 204 * Variable signifying quiesce(9E) is in progress.
205 205 */
206 206 volatile int quiesce_active = 0;
207 207
208 208 void
209 209 panicsys(const char *format, va_list alist, struct regs *rp, int on_panic_stack)
210 210 {
211 211 int s = spl8();
212 212 kthread_t *t = curthread;
213 213 cpu_t *cp = CPU;
214 214
215 215 caddr_t intr_stack = NULL;
↓ open down ↓ |
215 lines elided |
↑ open up ↑ |
216 216 uint_t intr_actv;
217 217
218 218 ushort_t schedflag = t->t_schedflag;
219 219 cpu_t *bound_cpu = t->t_bound_cpu;
220 220 char preempt = t->t_preempt;
221 221 label_t pcb = t->t_pcb;
222 222
223 223 (void) setjmp(&t->t_pcb);
224 224 t->t_flag |= T_PANIC;
225 225
226 - t->t_schedflag |= TS_DONT_SWAP;
227 226 t->t_bound_cpu = cp;
228 227 t->t_preempt++;
229 228
230 229 panic_enter_hw(s);
231 230
232 231 /*
233 232 * If we're on the interrupt stack and an interrupt thread is available
234 233 * in this CPU's pool, preserve the interrupt stack by detaching an
235 234 * interrupt thread and making its stack the intr_stack.
236 235 */
237 236 if (CPU_ON_INTR(cp) && cp->cpu_intr_thread != NULL) {
238 237 kthread_t *it = cp->cpu_intr_thread;
239 238
240 239 intr_stack = cp->cpu_intr_stack;
241 240 intr_actv = cp->cpu_intr_actv;
242 241
243 242 cp->cpu_intr_stack = thread_stk_init(it->t_stk);
244 243 cp->cpu_intr_thread = it->t_link;
245 244
246 245 /*
247 246 * Clear only the high level bits of cpu_intr_actv.
248 247 * We want to indicate that high-level interrupts are
249 248 * not active without destroying the low-level interrupt
250 249 * information stored there.
251 250 */
252 251 cp->cpu_intr_actv &= ((1 << (LOCK_LEVEL + 1)) - 1);
253 252 }
254 253
255 254 /*
256 255 * Record one-time panic information and quiesce the other CPUs.
257 256 * Then print out the panic message and stack trace.
258 257 */
259 258 if (on_panic_stack) {
260 259 panic_data_t *pdp = (panic_data_t *)panicbuf;
261 260
262 261 pdp->pd_version = PANICBUFVERS;
263 262 pdp->pd_msgoff = sizeof (panic_data_t) - sizeof (panic_nv_t);
264 263
265 264 (void) strncpy(pdp->pd_uuid, dump_get_uuid(),
266 265 sizeof (pdp->pd_uuid));
267 266
268 267 if (t->t_panic_trap != NULL)
269 268 panic_savetrap(pdp, t->t_panic_trap);
270 269 else
271 270 panic_saveregs(pdp, rp);
272 271
273 272 (void) vsnprintf(&panicbuf[pdp->pd_msgoff],
274 273 PANICBUFSIZE - pdp->pd_msgoff, format, alist);
275 274
276 275 /*
277 276 * Call into the platform code to stop the other CPUs.
278 277 * We currently have all interrupts blocked, and expect that
279 278 * the platform code will lower ipl only as far as needed to
280 279 * perform cross-calls, and will acquire as *few* locks as is
281 280 * possible -- panicstr is not set so we can still deadlock.
282 281 */
283 282 panic_stopcpus(cp, t, s);
284 283
285 284 panicstr = (char *)format;
286 285 va_copy(panicargs, alist);
287 286 panic_lbolt = LBOLT_NO_ACCOUNT;
288 287 panic_lbolt64 = LBOLT_NO_ACCOUNT64;
289 288 panic_hrestime = hrestime;
290 289 panic_hrtime = gethrtime_waitfree();
291 290 panic_thread = t;
292 291 panic_regs = t->t_pcb;
293 292 panic_reg = rp;
294 293 panic_cpu = *cp;
295 294 panic_ipl = spltoipl(s);
296 295 panic_schedflag = schedflag;
297 296 panic_bound_cpu = bound_cpu;
298 297 panic_preempt = preempt;
299 298 panic_pcb = pcb;
300 299
301 300 if (intr_stack != NULL) {
302 301 panic_cpu.cpu_intr_stack = intr_stack;
303 302 panic_cpu.cpu_intr_actv = intr_actv;
304 303 }
305 304
306 305 /*
307 306 * Lower ipl to 10 to keep clock() from running, but allow
308 307 * keyboard interrupts to enter the debugger. These callbacks
309 308 * are executed with panicstr set so they can bypass locks.
310 309 */
311 310 splx(ipltospl(CLOCK_LEVEL));
312 311 panic_quiesce_hw(pdp);
313 312 (void) FTRACE_STOP();
314 313 (void) callb_execute_class(CB_CL_PANIC, NULL);
315 314
316 315 if (log_intrq != NULL)
317 316 log_flushq(log_intrq);
318 317
319 318 /*
320 319 * If log_consq has been initialized and syslogd has started,
321 320 * print any messages in log_consq that haven't been consumed.
322 321 */
323 322 if (log_consq != NULL && log_consq != log_backlogq)
324 323 log_printq(log_consq);
325 324
326 325 fm_banner();
327 326
328 327 #if defined(__x86)
329 328 /*
330 329 * A hypervisor panic originates outside of Solaris, so we
331 330 * don't want to prepend the panic message with misleading
332 331 * pointers from within Solaris.
333 332 */
334 333 if (!IN_XPV_PANIC())
335 334 #endif
336 335 printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id,
337 336 (void *)t);
338 337 vprintf(format, alist);
339 338 printf("\n\n");
340 339
341 340 if (t->t_panic_trap != NULL) {
342 341 panic_showtrap(t->t_panic_trap);
343 342 printf("\n");
344 343 }
345 344
346 345 traceregs(rp);
347 346 printf("\n");
348 347
349 348 if (((boothowto & RB_DEBUG) || obpdebug) &&
350 349 !nopanicdebug && !panic_forced) {
351 350 if (dumpvp != NULL) {
352 351 debug_enter("panic: entering debugger "
353 352 "(continue to save dump)");
354 353 } else {
355 354 debug_enter("panic: entering debugger "
356 355 "(no dump device, continue to reboot)");
357 356 }
358 357 }
359 358
360 359 } else if (panic_dump != 0 || panic_sync != 0 || panicstr != NULL) {
361 360 printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id, (void *)t);
362 361 vprintf(format, alist);
363 362 printf("\n");
364 363 } else
365 364 goto spin;
366 365
367 366 /*
368 367 * Prior to performing sync or dump, we make sure that do_polled_io is
369 368 * set, but we'll leave ipl at 10; deadman(), a CY_HIGH_LEVEL cyclic,
370 369 * will re-enter panic if we are not making progress with sync or dump.
371 370 */
372 371
373 372 /*
374 373 * Sync the filesystems. Reset t_cred if not set because much of
375 374 * the filesystem code depends on CRED() being valid.
376 375 */
377 376 if (!in_sync && panic_trigger(&panic_sync)) {
378 377 if (t->t_cred == NULL)
379 378 t->t_cred = kcred;
380 379 splx(ipltospl(CLOCK_LEVEL));
381 380 do_polled_io = 1;
382 381 vfs_syncall();
383 382 }
384 383
385 384 /*
386 385 * Take the crash dump. If the dump trigger is already set, try to
387 386 * enter the debugger again before rebooting the system.
388 387 */
389 388 if (panic_trigger(&panic_dump)) {
390 389 panic_dump_hw(s);
391 390 splx(ipltospl(CLOCK_LEVEL));
392 391 errorq_panic();
393 392 do_polled_io = 1;
394 393 dumpsys();
395 394 } else if (((boothowto & RB_DEBUG) || obpdebug) && !nopanicdebug) {
396 395 debug_enter("panic: entering debugger (continue to reboot)");
397 396 } else
398 397 printf("dump aborted: please record the above information!\n");
399 398
400 399 if (halt_on_panic)
401 400 mdboot(A_REBOOT, AD_HALT, NULL, B_FALSE);
402 401 else
403 402 mdboot(A_REBOOT, panic_bootfcn, panic_bootstr, B_FALSE);
404 403 spin:
405 404 /*
406 405 * Restore ipl to at most CLOCK_LEVEL so we don't end up spinning
407 406 * and unable to jump into the debugger.
408 407 */
409 408 splx(MIN(s, ipltospl(CLOCK_LEVEL)));
410 409 for (;;)
411 410 ;
412 411 }
413 412
414 413 void
415 414 panic(const char *format, ...)
416 415 {
417 416 va_list alist;
418 417
419 418 va_start(alist, format);
420 419 vpanic(format, alist);
421 420 va_end(alist);
422 421 }
↓ open down ↓ |
186 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX