Print this page
6583 remove whole-process swapping
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/os/trap.c
+++ new/usr/src/uts/i86pc/os/trap.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
27 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
28 28 /* All Rights Reserved */
29 29 /* */
30 30 /* Copyright (c) 1987, 1988 Microsoft Corporation */
31 31 /* All Rights Reserved */
32 32 /* */
33 33
34 34 /*
35 35 * Copyright 2012 Joyent, Inc. All rights reserved.
36 36 */
37 37
38 38 #include <sys/types.h>
39 39 #include <sys/sysmacros.h>
40 40 #include <sys/param.h>
41 41 #include <sys/signal.h>
42 42 #include <sys/systm.h>
43 43 #include <sys/user.h>
44 44 #include <sys/proc.h>
45 45 #include <sys/disp.h>
46 46 #include <sys/class.h>
47 47 #include <sys/core.h>
48 48 #include <sys/syscall.h>
49 49 #include <sys/cpuvar.h>
50 50 #include <sys/vm.h>
51 51 #include <sys/sysinfo.h>
52 52 #include <sys/fault.h>
53 53 #include <sys/stack.h>
54 54 #include <sys/psw.h>
55 55 #include <sys/regset.h>
56 56 #include <sys/fp.h>
57 57 #include <sys/trap.h>
58 58 #include <sys/kmem.h>
59 59 #include <sys/vtrace.h>
60 60 #include <sys/cmn_err.h>
61 61 #include <sys/prsystm.h>
62 62 #include <sys/mutex_impl.h>
63 63 #include <sys/machsystm.h>
64 64 #include <sys/archsystm.h>
65 65 #include <sys/sdt.h>
66 66 #include <sys/avintr.h>
67 67 #include <sys/kobj.h>
68 68
69 69 #include <vm/hat.h>
70 70
71 71 #include <vm/seg_kmem.h>
72 72 #include <vm/as.h>
73 73 #include <vm/seg.h>
74 74 #include <vm/hat_pte.h>
75 75 #include <vm/hat_i86.h>
76 76
77 77 #include <sys/procfs.h>
78 78
79 79 #include <sys/reboot.h>
80 80 #include <sys/debug.h>
81 81 #include <sys/debugreg.h>
82 82 #include <sys/modctl.h>
83 83 #include <sys/aio_impl.h>
84 84 #include <sys/tnf.h>
85 85 #include <sys/tnf_probe.h>
86 86 #include <sys/cred.h>
87 87 #include <sys/mman.h>
88 88 #include <sys/x86_archext.h>
89 89 #include <sys/copyops.h>
90 90 #include <c2/audit.h>
91 91 #include <sys/ftrace.h>
92 92 #include <sys/panic.h>
93 93 #include <sys/traptrace.h>
94 94 #include <sys/ontrap.h>
95 95 #include <sys/cpc_impl.h>
96 96 #include <sys/bootconf.h>
97 97 #include <sys/bootinfo.h>
98 98 #include <sys/promif.h>
99 99 #include <sys/mach_mmu.h>
100 100 #if defined(__xpv)
101 101 #include <sys/hypervisor.h>
102 102 #endif
103 103 #include <sys/contract/process_impl.h>
104 104
105 105 #define USER 0x10000 /* user-mode flag added to trap type */
106 106
107 107 static const char *trap_type_mnemonic[] = {
108 108 "de", "db", "2", "bp",
109 109 "of", "br", "ud", "nm",
110 110 "df", "9", "ts", "np",
111 111 "ss", "gp", "pf", "15",
112 112 "mf", "ac", "mc", "xf"
113 113 };
114 114
115 115 static const char *trap_type[] = {
116 116 "Divide error", /* trap id 0 */
117 117 "Debug", /* trap id 1 */
118 118 "NMI interrupt", /* trap id 2 */
119 119 "Breakpoint", /* trap id 3 */
120 120 "Overflow", /* trap id 4 */
121 121 "BOUND range exceeded", /* trap id 5 */
122 122 "Invalid opcode", /* trap id 6 */
123 123 "Device not available", /* trap id 7 */
124 124 "Double fault", /* trap id 8 */
125 125 "Coprocessor segment overrun", /* trap id 9 */
126 126 "Invalid TSS", /* trap id 10 */
127 127 "Segment not present", /* trap id 11 */
128 128 "Stack segment fault", /* trap id 12 */
129 129 "General protection", /* trap id 13 */
130 130 "Page fault", /* trap id 14 */
131 131 "Reserved", /* trap id 15 */
132 132 "x87 floating point error", /* trap id 16 */
133 133 "Alignment check", /* trap id 17 */
134 134 "Machine check", /* trap id 18 */
135 135 "SIMD floating point exception", /* trap id 19 */
136 136 };
137 137
138 138 #define TRAP_TYPES (sizeof (trap_type) / sizeof (trap_type[0]))
139 139
140 140 #define SLOW_SCALL_SIZE 2
141 141 #define FAST_SCALL_SIZE 2
142 142
143 143 int tudebug = 0;
144 144 int tudebugbpt = 0;
145 145 int tudebugfpe = 0;
146 146 int tudebugsse = 0;
147 147
148 148 #if defined(TRAPDEBUG) || defined(lint)
149 149 int tdebug = 0;
150 150 int lodebug = 0;
151 151 int faultdebug = 0;
152 152 #else
153 153 #define tdebug 0
154 154 #define lodebug 0
155 155 #define faultdebug 0
156 156 #endif /* defined(TRAPDEBUG) || defined(lint) */
157 157
158 158 #if defined(TRAPTRACE)
159 159 /*
160 160 * trap trace record for cpu0 is allocated here.
161 161 * trap trace records for non-boot cpus are allocated in mp_startup_init().
162 162 */
163 163 static trap_trace_rec_t trap_tr0[TRAPTR_NENT];
164 164 trap_trace_ctl_t trap_trace_ctl[NCPU] = {
165 165 {
166 166 (uintptr_t)trap_tr0, /* next record */
167 167 (uintptr_t)trap_tr0, /* first record */
168 168 (uintptr_t)(trap_tr0 + TRAPTR_NENT), /* limit */
169 169 (uintptr_t)0 /* current */
170 170 },
171 171 };
172 172
173 173 /*
174 174 * default trap buffer size
175 175 */
176 176 size_t trap_trace_bufsize = TRAPTR_NENT * sizeof (trap_trace_rec_t);
177 177 int trap_trace_freeze = 0;
178 178 int trap_trace_off = 0;
179 179
180 180 /*
181 181 * A dummy TRAPTRACE entry to use after death.
182 182 */
183 183 trap_trace_rec_t trap_trace_postmort;
184 184
185 185 static void dump_ttrace(void);
186 186 #endif /* TRAPTRACE */
187 187 static void dumpregs(struct regs *);
188 188 static void showregs(uint_t, struct regs *, caddr_t);
189 189 static int kern_gpfault(struct regs *);
190 190
191 191 /*ARGSUSED*/
192 192 static int
193 193 die(uint_t type, struct regs *rp, caddr_t addr, processorid_t cpuid)
194 194 {
195 195 struct panic_trap_info ti;
196 196 const char *trap_name, *trap_mnemonic;
197 197
198 198 if (type < TRAP_TYPES) {
199 199 trap_name = trap_type[type];
200 200 trap_mnemonic = trap_type_mnemonic[type];
201 201 } else {
202 202 trap_name = "trap";
203 203 trap_mnemonic = "-";
204 204 }
205 205
206 206 #ifdef TRAPTRACE
207 207 TRAPTRACE_FREEZE;
208 208 #endif
209 209
210 210 ti.trap_regs = rp;
211 211 ti.trap_type = type & ~USER;
212 212 ti.trap_addr = addr;
213 213
214 214 curthread->t_panic_trap = &ti;
215 215
216 216 if (type == T_PGFLT && addr < (caddr_t)KERNELBASE) {
217 217 panic("BAD TRAP: type=%x (#%s %s) rp=%p addr=%p "
218 218 "occurred in module \"%s\" due to %s",
219 219 type, trap_mnemonic, trap_name, (void *)rp, (void *)addr,
220 220 mod_containing_pc((caddr_t)rp->r_pc),
221 221 addr < (caddr_t)PAGESIZE ?
222 222 "a NULL pointer dereference" :
223 223 "an illegal access to a user address");
224 224 } else
225 225 panic("BAD TRAP: type=%x (#%s %s) rp=%p addr=%p",
226 226 type, trap_mnemonic, trap_name, (void *)rp, (void *)addr);
227 227 return (0);
228 228 }
229 229
230 230 /*
231 231 * Rewrite the instruction at pc to be an int $T_SYSCALLINT instruction.
232 232 *
233 233 * int <vector> is two bytes: 0xCD <vector>
234 234 */
235 235
236 236 static int
237 237 rewrite_syscall(caddr_t pc)
238 238 {
239 239 uchar_t instr[SLOW_SCALL_SIZE] = { 0xCD, T_SYSCALLINT };
240 240
241 241 if (uwrite(curthread->t_procp, instr, SLOW_SCALL_SIZE,
242 242 (uintptr_t)pc) != 0)
243 243 return (1);
244 244
245 245 return (0);
246 246 }
247 247
248 248 /*
249 249 * Test to see if the instruction at pc is sysenter or syscall. The second
250 250 * argument should be the x86 feature flag corresponding to the expected
251 251 * instruction.
252 252 *
253 253 * sysenter is two bytes: 0x0F 0x34
254 254 * syscall is two bytes: 0x0F 0x05
255 255 * int $T_SYSCALLINT is two bytes: 0xCD 0x91
256 256 */
257 257
258 258 static int
259 259 instr_is_other_syscall(caddr_t pc, int which)
260 260 {
261 261 uchar_t instr[FAST_SCALL_SIZE];
262 262
263 263 ASSERT(which == X86FSET_SEP || which == X86FSET_ASYSC || which == 0xCD);
264 264
265 265 if (copyin_nowatch(pc, (caddr_t)instr, FAST_SCALL_SIZE) != 0)
266 266 return (0);
267 267
268 268 switch (which) {
269 269 case X86FSET_SEP:
270 270 if (instr[0] == 0x0F && instr[1] == 0x34)
271 271 return (1);
272 272 break;
273 273 case X86FSET_ASYSC:
274 274 if (instr[0] == 0x0F && instr[1] == 0x05)
275 275 return (1);
276 276 break;
277 277 case 0xCD:
278 278 if (instr[0] == 0xCD && instr[1] == T_SYSCALLINT)
279 279 return (1);
280 280 break;
281 281 }
282 282
283 283 return (0);
284 284 }
285 285
286 286 static const char *
287 287 syscall_insn_string(int syscall_insn)
288 288 {
289 289 switch (syscall_insn) {
290 290 case X86FSET_SEP:
291 291 return ("sysenter");
292 292 case X86FSET_ASYSC:
293 293 return ("syscall");
294 294 case 0xCD:
295 295 return ("int");
296 296 default:
297 297 return ("Unknown");
298 298 }
299 299 }
300 300
301 301 static int
302 302 ldt_rewrite_syscall(struct regs *rp, proc_t *p, int syscall_insn)
303 303 {
304 304 caddr_t linearpc;
305 305 int return_code = 0;
306 306
307 307 mutex_enter(&p->p_ldtlock); /* Must be held across linear_pc() */
308 308
309 309 if (linear_pc(rp, p, &linearpc) == 0) {
310 310
311 311 /*
312 312 * If another thread beat us here, it already changed
313 313 * this site to the slower (int) syscall instruction.
314 314 */
315 315 if (instr_is_other_syscall(linearpc, 0xCD)) {
316 316 return_code = 1;
317 317 } else if (instr_is_other_syscall(linearpc, syscall_insn)) {
318 318
319 319 if (rewrite_syscall(linearpc) == 0) {
320 320 return_code = 1;
321 321 }
322 322 #ifdef DEBUG
323 323 else
324 324 cmn_err(CE_WARN, "failed to rewrite %s "
325 325 "instruction in process %d",
326 326 syscall_insn_string(syscall_insn),
327 327 p->p_pid);
328 328 #endif /* DEBUG */
329 329 }
330 330 }
331 331
332 332 mutex_exit(&p->p_ldtlock); /* Must be held across linear_pc() */
333 333
334 334 return (return_code);
335 335 }
336 336
337 337 /*
338 338 * Test to see if the instruction at pc is a system call instruction.
339 339 *
340 340 * The bytes of an lcall instruction used for the syscall trap.
341 341 * static uchar_t lcall[7] = { 0x9a, 0, 0, 0, 0, 0x7, 0 };
342 342 * static uchar_t lcallalt[7] = { 0x9a, 0, 0, 0, 0, 0x27, 0 };
343 343 */
344 344
345 345 #define LCALLSIZE 7
346 346
347 347 static int
348 348 instr_is_lcall_syscall(caddr_t pc)
349 349 {
350 350 uchar_t instr[LCALLSIZE];
351 351
352 352 if (copyin_nowatch(pc, (caddr_t)instr, LCALLSIZE) == 0 &&
353 353 instr[0] == 0x9a &&
354 354 instr[1] == 0 &&
355 355 instr[2] == 0 &&
356 356 instr[3] == 0 &&
357 357 instr[4] == 0 &&
358 358 (instr[5] == 0x7 || instr[5] == 0x27) &&
359 359 instr[6] == 0)
360 360 return (1);
361 361
362 362 return (0);
363 363 }
364 364
365 365 #ifdef __amd64
366 366
367 367 /*
368 368 * In the first revisions of amd64 CPUs produced by AMD, the LAHF and
369 369 * SAHF instructions were not implemented in 64-bit mode. Later revisions
370 370 * did implement these instructions. An extension to the cpuid instruction
371 371 * was added to check for the capability of executing these instructions
372 372 * in 64-bit mode.
373 373 *
374 374 * Intel originally did not implement these instructions in EM64T either,
375 375 * but added them in later revisions.
376 376 *
377 377 * So, there are different chip revisions by both vendors out there that
378 378 * may or may not implement these instructions. The easy solution is to
379 379 * just always emulate these instructions on demand.
380 380 *
381 381 * SAHF == store %ah in the lower 8 bits of %rflags (opcode 0x9e)
382 382 * LAHF == load the lower 8 bits of %rflags into %ah (opcode 0x9f)
383 383 */
384 384
385 385 #define LSAHFSIZE 1
386 386
387 387 static int
388 388 instr_is_lsahf(caddr_t pc, uchar_t *instr)
389 389 {
390 390 if (copyin_nowatch(pc, (caddr_t)instr, LSAHFSIZE) == 0 &&
391 391 (*instr == 0x9e || *instr == 0x9f))
392 392 return (1);
393 393 return (0);
394 394 }
395 395
396 396 /*
397 397 * Emulate the LAHF and SAHF instructions. The reference manuals define
398 398 * these instructions to always load/store bit 1 as a 1, and bits 3 and 5
399 399 * as a 0. The other, defined, bits are copied (the PS_ICC bits and PS_P).
400 400 *
401 401 * Note that %ah is bits 8-15 of %rax.
402 402 */
403 403 static void
404 404 emulate_lsahf(struct regs *rp, uchar_t instr)
405 405 {
406 406 if (instr == 0x9e) {
407 407 /* sahf. Copy bits from %ah to flags. */
408 408 rp->r_ps = (rp->r_ps & ~0xff) |
409 409 ((rp->r_rax >> 8) & PSL_LSAHFMASK) | PS_MB1;
410 410 } else {
411 411 /* lahf. Copy bits from flags to %ah. */
412 412 rp->r_rax = (rp->r_rax & ~0xff00) |
413 413 (((rp->r_ps & PSL_LSAHFMASK) | PS_MB1) << 8);
414 414 }
415 415 rp->r_pc += LSAHFSIZE;
416 416 }
417 417 #endif /* __amd64 */
418 418
419 419 #ifdef OPTERON_ERRATUM_91
420 420
421 421 /*
422 422 * Test to see if the instruction at pc is a prefetch instruction.
423 423 *
424 424 * The first byte of prefetch instructions is always 0x0F.
425 425 * The second byte is 0x18 for regular prefetch or 0x0D for AMD 3dnow prefetch.
426 426 * The third byte (ModRM) contains the register field bits (bits 3-5).
427 427 * These bits must be between 0 and 3 inclusive for regular prefetch and
428 428 * 0 and 1 inclusive for AMD 3dnow prefetch.
429 429 *
430 430 * In 64-bit mode, there may be a one-byte REX prefex (0x40-0x4F).
431 431 */
432 432
433 433 static int
434 434 cmp_to_prefetch(uchar_t *p)
435 435 {
436 436 #ifdef _LP64
437 437 if ((p[0] & 0xF0) == 0x40) /* 64-bit REX prefix */
438 438 p++;
439 439 #endif
440 440 return ((p[0] == 0x0F && p[1] == 0x18 && ((p[2] >> 3) & 7) <= 3) ||
441 441 (p[0] == 0x0F && p[1] == 0x0D && ((p[2] >> 3) & 7) <= 1));
442 442 }
443 443
444 444 static int
445 445 instr_is_prefetch(caddr_t pc)
446 446 {
447 447 uchar_t instr[4]; /* optional REX prefix plus 3-byte opcode */
448 448
449 449 return (copyin_nowatch(pc, instr, sizeof (instr)) == 0 &&
450 450 cmp_to_prefetch(instr));
451 451 }
452 452
453 453 #endif /* OPTERON_ERRATUM_91 */
454 454
455 455 /*
456 456 * Called from the trap handler when a processor trap occurs.
457 457 *
458 458 * Note: All user-level traps that might call stop() must exit
459 459 * trap() by 'goto out' or by falling through.
460 460 * Note Also: trap() is usually called with interrupts enabled, (PS_IE == 1)
461 461 * however, there are paths that arrive here with PS_IE == 0 so special care
462 462 * must be taken in those cases.
463 463 */
464 464 void
465 465 trap(struct regs *rp, caddr_t addr, processorid_t cpuid)
466 466 {
467 467 kthread_t *ct = curthread;
468 468 enum seg_rw rw;
469 469 unsigned type;
470 470 proc_t *p = ttoproc(ct);
471 471 klwp_t *lwp = ttolwp(ct);
472 472 uintptr_t lofault;
473 473 label_t *onfault;
474 474 faultcode_t pagefault(), res, errcode;
475 475 enum fault_type fault_type;
476 476 k_siginfo_t siginfo;
477 477 uint_t fault = 0;
478 478 int mstate;
479 479 int sicode = 0;
480 480 int watchcode;
481 481 int watchpage;
482 482 caddr_t vaddr;
483 483 int singlestep_twiddle;
↓ open down ↓ |
483 lines elided |
↑ open up ↑ |
484 484 size_t sz;
485 485 int ta;
486 486 #ifdef __amd64
487 487 uchar_t instr;
488 488 #endif
489 489
490 490 ASSERT_STACK_ALIGNED();
491 491
492 492 type = rp->r_trapno;
493 493 CPU_STATS_ADDQ(CPU, sys, trap, 1);
494 - ASSERT(ct->t_schedflag & TS_DONT_SWAP);
495 494
496 495 if (type == T_PGFLT) {
497 496
498 497 errcode = rp->r_err;
499 498 if (errcode & PF_ERR_WRITE)
500 499 rw = S_WRITE;
501 500 else if ((caddr_t)rp->r_pc == addr ||
502 501 (mmu.pt_nx != 0 && (errcode & PF_ERR_EXEC)))
503 502 rw = S_EXEC;
504 503 else
505 504 rw = S_READ;
506 505
507 506 #if defined(__i386)
508 507 /*
509 508 * Pentium Pro work-around
510 509 */
511 510 if ((errcode & PF_ERR_PROT) && pentiumpro_bug4046376) {
512 511 uint_t attr;
513 512 uint_t priv_violation;
514 513 uint_t access_violation;
515 514
516 515 if (hat_getattr(addr < (caddr_t)kernelbase ?
517 516 curproc->p_as->a_hat : kas.a_hat, addr, &attr)
518 517 == -1) {
519 518 errcode &= ~PF_ERR_PROT;
520 519 } else {
521 520 priv_violation = (errcode & PF_ERR_USER) &&
522 521 !(attr & PROT_USER);
523 522 access_violation = (errcode & PF_ERR_WRITE) &&
524 523 !(attr & PROT_WRITE);
525 524 if (!priv_violation && !access_violation)
526 525 goto cleanup;
527 526 }
528 527 }
529 528 #endif /* __i386 */
530 529
531 530 } else if (type == T_SGLSTP && lwp != NULL)
532 531 lwp->lwp_pcb.pcb_drstat = (uintptr_t)addr;
533 532
534 533 if (tdebug)
535 534 showregs(type, rp, addr);
536 535
537 536 if (USERMODE(rp->r_cs)) {
538 537 /*
539 538 * Set up the current cred to use during this trap. u_cred
540 539 * no longer exists. t_cred is used instead.
541 540 * The current process credential applies to the thread for
542 541 * the entire trap. If trapping from the kernel, this
543 542 * should already be set up.
544 543 */
545 544 if (ct->t_cred != p->p_cred) {
546 545 cred_t *oldcred = ct->t_cred;
547 546 /*
548 547 * DTrace accesses t_cred in probe context. t_cred
549 548 * must always be either NULL, or point to a valid,
550 549 * allocated cred structure.
551 550 */
552 551 ct->t_cred = crgetcred();
553 552 crfree(oldcred);
554 553 }
555 554 ASSERT(lwp != NULL);
556 555 type |= USER;
557 556 ASSERT(lwptoregs(lwp) == rp);
558 557 lwp->lwp_state = LWP_SYS;
559 558
560 559 switch (type) {
561 560 case T_PGFLT + USER:
562 561 if ((caddr_t)rp->r_pc == addr)
563 562 mstate = LMS_TFAULT;
564 563 else
565 564 mstate = LMS_DFAULT;
566 565 break;
567 566 default:
568 567 mstate = LMS_TRAP;
569 568 break;
570 569 }
571 570 /* Kernel probe */
572 571 TNF_PROBE_1(thread_state, "thread", /* CSTYLED */,
573 572 tnf_microstate, state, mstate);
574 573 mstate = new_mstate(ct, mstate);
575 574
576 575 bzero(&siginfo, sizeof (siginfo));
577 576 }
578 577
579 578 switch (type) {
580 579 case T_PGFLT + USER:
581 580 case T_SGLSTP:
582 581 case T_SGLSTP + USER:
583 582 case T_BPTFLT + USER:
584 583 break;
585 584
586 585 default:
587 586 FTRACE_2("trap(): type=0x%lx, regs=0x%lx",
588 587 (ulong_t)type, (ulong_t)rp);
589 588 break;
590 589 }
591 590
592 591 switch (type) {
593 592 case T_SIMDFPE:
594 593 /* Make sure we enable interrupts before die()ing */
595 594 sti(); /* The SIMD exception comes in via cmninttrap */
596 595 /*FALLTHROUGH*/
597 596 default:
598 597 if (type & USER) {
599 598 if (tudebug)
600 599 showregs(type, rp, (caddr_t)0);
601 600 printf("trap: Unknown trap type %d in user mode\n",
602 601 type & ~USER);
603 602 siginfo.si_signo = SIGILL;
604 603 siginfo.si_code = ILL_ILLTRP;
605 604 siginfo.si_addr = (caddr_t)rp->r_pc;
606 605 siginfo.si_trapno = type & ~USER;
607 606 fault = FLTILL;
608 607 break;
609 608 } else {
610 609 (void) die(type, rp, addr, cpuid);
611 610 /*NOTREACHED*/
612 611 }
613 612
614 613 case T_PGFLT: /* system page fault */
615 614 /*
616 615 * If we're under on_trap() protection (see <sys/ontrap.h>),
617 616 * set ot_trap and bounce back to the on_trap() call site
618 617 * via the installed trampoline.
619 618 */
620 619 if ((ct->t_ontrap != NULL) &&
621 620 (ct->t_ontrap->ot_prot & OT_DATA_ACCESS)) {
622 621 ct->t_ontrap->ot_trap |= OT_DATA_ACCESS;
623 622 rp->r_pc = ct->t_ontrap->ot_trampoline;
624 623 goto cleanup;
625 624 }
626 625
627 626 /*
628 627 * If we have an Instruction fault in kernel mode, then that
629 628 * means we've tried to execute a user page (SMEP) or both of
630 629 * PAE and NXE are enabled. In either case, given that it's a
631 630 * kernel fault, we should panic immediately and not try to make
632 631 * any more forward progress. This indicates a bug in the
633 632 * kernel, which if execution continued, could be exploited to
634 633 * wreak havoc on the system.
635 634 */
636 635 if (errcode & PF_ERR_EXEC) {
637 636 (void) die(type, rp, addr, cpuid);
638 637 }
639 638
640 639 /*
641 640 * See if we can handle as pagefault. Save lofault and onfault
642 641 * across this. Here we assume that an address less than
643 642 * KERNELBASE is a user fault. We can do this as copy.s
644 643 * routines verify that the starting address is less than
645 644 * KERNELBASE before starting and because we know that we
646 645 * always have KERNELBASE mapped as invalid to serve as a
647 646 * "barrier".
648 647 */
649 648 lofault = ct->t_lofault;
650 649 onfault = ct->t_onfault;
651 650 ct->t_lofault = 0;
652 651
653 652 mstate = new_mstate(ct, LMS_KFAULT);
654 653
655 654 if (addr < (caddr_t)kernelbase) {
656 655 res = pagefault(addr,
657 656 (errcode & PF_ERR_PROT)? F_PROT: F_INVAL, rw, 0);
658 657 if (res == FC_NOMAP &&
659 658 addr < p->p_usrstack &&
660 659 grow(addr))
661 660 res = 0;
662 661 } else {
663 662 res = pagefault(addr,
664 663 (errcode & PF_ERR_PROT)? F_PROT: F_INVAL, rw, 1);
665 664 }
666 665 (void) new_mstate(ct, mstate);
667 666
668 667 /*
669 668 * Restore lofault and onfault. If we resolved the fault, exit.
670 669 * If we didn't and lofault wasn't set, die.
671 670 */
672 671 ct->t_lofault = lofault;
673 672 ct->t_onfault = onfault;
674 673 if (res == 0)
675 674 goto cleanup;
676 675
677 676 #if defined(OPTERON_ERRATUM_93) && defined(_LP64)
678 677 if (lofault == 0 && opteron_erratum_93) {
679 678 /*
680 679 * Workaround for Opteron Erratum 93. On return from
681 680 * a System Managment Interrupt at a HLT instruction
682 681 * the %rip might be truncated to a 32 bit value.
683 682 * BIOS is supposed to fix this, but some don't.
684 683 * If this occurs we simply restore the high order bits.
685 684 * The HLT instruction is 1 byte of 0xf4.
686 685 */
687 686 uintptr_t rip = rp->r_pc;
688 687
689 688 if ((rip & 0xfffffffful) == rip) {
690 689 rip |= 0xfffffffful << 32;
691 690 if (hat_getpfnum(kas.a_hat, (caddr_t)rip) !=
692 691 PFN_INVALID &&
693 692 (*(uchar_t *)rip == 0xf4 ||
694 693 *(uchar_t *)(rip - 1) == 0xf4)) {
695 694 rp->r_pc = rip;
696 695 goto cleanup;
697 696 }
698 697 }
699 698 }
700 699 #endif /* OPTERON_ERRATUM_93 && _LP64 */
701 700
702 701 #ifdef OPTERON_ERRATUM_91
703 702 if (lofault == 0 && opteron_erratum_91) {
704 703 /*
705 704 * Workaround for Opteron Erratum 91. Prefetches may
706 705 * generate a page fault (they're not supposed to do
707 706 * that!). If this occurs we simply return back to the
708 707 * instruction.
709 708 */
710 709 caddr_t pc = (caddr_t)rp->r_pc;
711 710
712 711 /*
713 712 * If the faulting PC is not mapped, this is a
714 713 * legitimate kernel page fault that must result in a
715 714 * panic. If the faulting PC is mapped, it could contain
716 715 * a prefetch instruction. Check for that here.
717 716 */
718 717 if (hat_getpfnum(kas.a_hat, pc) != PFN_INVALID) {
719 718 if (cmp_to_prefetch((uchar_t *)pc)) {
720 719 #ifdef DEBUG
721 720 cmn_err(CE_WARN, "Opteron erratum 91 "
722 721 "occurred: kernel prefetch"
723 722 " at %p generated a page fault!",
724 723 (void *)rp->r_pc);
725 724 #endif /* DEBUG */
726 725 goto cleanup;
727 726 }
728 727 }
729 728 (void) die(type, rp, addr, cpuid);
730 729 }
731 730 #endif /* OPTERON_ERRATUM_91 */
732 731
733 732 if (lofault == 0)
734 733 (void) die(type, rp, addr, cpuid);
735 734
736 735 /*
737 736 * Cannot resolve fault. Return to lofault.
738 737 */
739 738 if (lodebug) {
740 739 showregs(type, rp, addr);
741 740 traceregs(rp);
742 741 }
743 742 if (FC_CODE(res) == FC_OBJERR)
744 743 res = FC_ERRNO(res);
745 744 else
746 745 res = EFAULT;
747 746 rp->r_r0 = res;
748 747 rp->r_pc = ct->t_lofault;
749 748 goto cleanup;
750 749
751 750 case T_PGFLT + USER: /* user page fault */
752 751 if (faultdebug) {
753 752 char *fault_str;
754 753
755 754 switch (rw) {
756 755 case S_READ:
757 756 fault_str = "read";
758 757 break;
759 758 case S_WRITE:
760 759 fault_str = "write";
761 760 break;
762 761 case S_EXEC:
763 762 fault_str = "exec";
764 763 break;
765 764 default:
766 765 fault_str = "";
767 766 break;
768 767 }
769 768 printf("user %s fault: addr=0x%lx errcode=0x%x\n",
770 769 fault_str, (uintptr_t)addr, errcode);
771 770 }
772 771
773 772 #if defined(OPTERON_ERRATUM_100) && defined(_LP64)
774 773 /*
775 774 * Workaround for AMD erratum 100
776 775 *
777 776 * A 32-bit process may receive a page fault on a non
778 777 * 32-bit address by mistake. The range of the faulting
779 778 * address will be
780 779 *
781 780 * 0xffffffff80000000 .. 0xffffffffffffffff or
782 781 * 0x0000000100000000 .. 0x000000017fffffff
783 782 *
784 783 * The fault is always due to an instruction fetch, however
785 784 * the value of r_pc should be correct (in 32 bit range),
786 785 * so we ignore the page fault on the bogus address.
787 786 */
788 787 if (p->p_model == DATAMODEL_ILP32 &&
789 788 (0xffffffff80000000 <= (uintptr_t)addr ||
790 789 (0x100000000 <= (uintptr_t)addr &&
791 790 (uintptr_t)addr <= 0x17fffffff))) {
792 791 if (!opteron_erratum_100)
793 792 panic("unexpected erratum #100");
794 793 if (rp->r_pc <= 0xffffffff)
795 794 goto out;
796 795 }
797 796 #endif /* OPTERON_ERRATUM_100 && _LP64 */
798 797
799 798 ASSERT(!(curthread->t_flag & T_WATCHPT));
800 799 watchpage = (pr_watch_active(p) && pr_is_watchpage(addr, rw));
801 800 #ifdef __i386
802 801 /*
803 802 * In 32-bit mode, the lcall (system call) instruction fetches
804 803 * one word from the stack, at the stack pointer, because of the
805 804 * way the call gate is constructed. This is a bogus
806 805 * read and should not be counted as a read watchpoint.
807 806 * We work around the problem here by testing to see if
808 807 * this situation applies and, if so, simply jumping to
809 808 * the code in locore.s that fields the system call trap.
810 809 * The registers on the stack are already set up properly
811 810 * due to the match between the call gate sequence and the
812 811 * trap gate sequence. We just have to adjust the pc.
813 812 */
814 813 if (watchpage && addr == (caddr_t)rp->r_sp &&
815 814 rw == S_READ && instr_is_lcall_syscall((caddr_t)rp->r_pc)) {
816 815 extern void watch_syscall(void);
817 816
818 817 rp->r_pc += LCALLSIZE;
819 818 watch_syscall(); /* never returns */
820 819 /* NOTREACHED */
821 820 }
822 821 #endif /* __i386 */
823 822 vaddr = addr;
824 823 if (!watchpage || (sz = instr_size(rp, &vaddr, rw)) <= 0)
825 824 fault_type = (errcode & PF_ERR_PROT)? F_PROT: F_INVAL;
826 825 else if ((watchcode = pr_is_watchpoint(&vaddr, &ta,
827 826 sz, NULL, rw)) != 0) {
828 827 if (ta) {
829 828 do_watch_step(vaddr, sz, rw,
830 829 watchcode, rp->r_pc);
831 830 fault_type = F_INVAL;
832 831 } else {
833 832 bzero(&siginfo, sizeof (siginfo));
834 833 siginfo.si_signo = SIGTRAP;
835 834 siginfo.si_code = watchcode;
836 835 siginfo.si_addr = vaddr;
837 836 siginfo.si_trapafter = 0;
838 837 siginfo.si_pc = (caddr_t)rp->r_pc;
839 838 fault = FLTWATCH;
840 839 break;
841 840 }
842 841 } else {
843 842 /* XXX pr_watch_emul() never succeeds (for now) */
844 843 if (rw != S_EXEC && pr_watch_emul(rp, vaddr, rw))
845 844 goto out;
846 845 do_watch_step(vaddr, sz, rw, 0, 0);
847 846 fault_type = F_INVAL;
848 847 }
849 848
850 849 res = pagefault(addr, fault_type, rw, 0);
851 850
852 851 /*
853 852 * If pagefault() succeeded, ok.
854 853 * Otherwise attempt to grow the stack.
855 854 */
856 855 if (res == 0 ||
857 856 (res == FC_NOMAP &&
858 857 addr < p->p_usrstack &&
859 858 grow(addr))) {
860 859 lwp->lwp_lastfault = FLTPAGE;
861 860 lwp->lwp_lastfaddr = addr;
862 861 if (prismember(&p->p_fltmask, FLTPAGE)) {
863 862 bzero(&siginfo, sizeof (siginfo));
864 863 siginfo.si_addr = addr;
865 864 (void) stop_on_fault(FLTPAGE, &siginfo);
866 865 }
867 866 goto out;
868 867 } else if (res == FC_PROT && addr < p->p_usrstack &&
869 868 (mmu.pt_nx != 0 && (errcode & PF_ERR_EXEC))) {
870 869 report_stack_exec(p, addr);
871 870 }
872 871
873 872 #ifdef OPTERON_ERRATUM_91
874 873 /*
875 874 * Workaround for Opteron Erratum 91. Prefetches may generate a
876 875 * page fault (they're not supposed to do that!). If this
877 876 * occurs we simply return back to the instruction.
878 877 *
879 878 * We rely on copyin to properly fault in the page with r_pc.
880 879 */
881 880 if (opteron_erratum_91 &&
882 881 addr != (caddr_t)rp->r_pc &&
883 882 instr_is_prefetch((caddr_t)rp->r_pc)) {
884 883 #ifdef DEBUG
885 884 cmn_err(CE_WARN, "Opteron erratum 91 occurred: "
886 885 "prefetch at %p in pid %d generated a trap!",
887 886 (void *)rp->r_pc, p->p_pid);
888 887 #endif /* DEBUG */
889 888 goto out;
890 889 }
891 890 #endif /* OPTERON_ERRATUM_91 */
892 891
893 892 if (tudebug)
894 893 showregs(type, rp, addr);
895 894 /*
896 895 * In the case where both pagefault and grow fail,
897 896 * set the code to the value provided by pagefault.
898 897 * We map all errors returned from pagefault() to SIGSEGV.
899 898 */
900 899 bzero(&siginfo, sizeof (siginfo));
901 900 siginfo.si_addr = addr;
902 901 switch (FC_CODE(res)) {
903 902 case FC_HWERR:
904 903 case FC_NOSUPPORT:
905 904 siginfo.si_signo = SIGBUS;
906 905 siginfo.si_code = BUS_ADRERR;
907 906 fault = FLTACCESS;
908 907 break;
909 908 case FC_ALIGN:
910 909 siginfo.si_signo = SIGBUS;
911 910 siginfo.si_code = BUS_ADRALN;
912 911 fault = FLTACCESS;
913 912 break;
914 913 case FC_OBJERR:
915 914 if ((siginfo.si_errno = FC_ERRNO(res)) != EINTR) {
916 915 siginfo.si_signo = SIGBUS;
917 916 siginfo.si_code = BUS_OBJERR;
918 917 fault = FLTACCESS;
919 918 }
920 919 break;
921 920 default: /* FC_NOMAP or FC_PROT */
922 921 siginfo.si_signo = SIGSEGV;
923 922 siginfo.si_code =
924 923 (res == FC_NOMAP)? SEGV_MAPERR : SEGV_ACCERR;
925 924 fault = FLTBOUNDS;
926 925 break;
927 926 }
928 927 break;
929 928
930 929 case T_ILLINST + USER: /* invalid opcode fault */
931 930 /*
932 931 * If the syscall instruction is disabled due to LDT usage, a
933 932 * user program that attempts to execute it will trigger a #ud
934 933 * trap. Check for that case here. If this occurs on a CPU which
935 934 * doesn't even support syscall, the result of all of this will
936 935 * be to emulate that particular instruction.
937 936 */
938 937 if (p->p_ldt != NULL &&
939 938 ldt_rewrite_syscall(rp, p, X86FSET_ASYSC))
940 939 goto out;
941 940
942 941 #ifdef __amd64
943 942 /*
944 943 * Emulate the LAHF and SAHF instructions if needed.
945 944 * See the instr_is_lsahf function for details.
946 945 */
947 946 if (p->p_model == DATAMODEL_LP64 &&
948 947 instr_is_lsahf((caddr_t)rp->r_pc, &instr)) {
949 948 emulate_lsahf(rp, instr);
950 949 goto out;
951 950 }
952 951 #endif
953 952
954 953 /*FALLTHROUGH*/
955 954
956 955 if (tudebug)
957 956 showregs(type, rp, (caddr_t)0);
958 957 siginfo.si_signo = SIGILL;
959 958 siginfo.si_code = ILL_ILLOPC;
960 959 siginfo.si_addr = (caddr_t)rp->r_pc;
961 960 fault = FLTILL;
962 961 break;
963 962
964 963 case T_ZERODIV + USER: /* integer divide by zero */
965 964 if (tudebug && tudebugfpe)
966 965 showregs(type, rp, (caddr_t)0);
967 966 siginfo.si_signo = SIGFPE;
968 967 siginfo.si_code = FPE_INTDIV;
969 968 siginfo.si_addr = (caddr_t)rp->r_pc;
970 969 fault = FLTIZDIV;
971 970 break;
972 971
973 972 case T_OVFLW + USER: /* integer overflow */
974 973 if (tudebug && tudebugfpe)
975 974 showregs(type, rp, (caddr_t)0);
976 975 siginfo.si_signo = SIGFPE;
977 976 siginfo.si_code = FPE_INTOVF;
978 977 siginfo.si_addr = (caddr_t)rp->r_pc;
979 978 fault = FLTIOVF;
980 979 break;
981 980
982 981 case T_NOEXTFLT + USER: /* math coprocessor not available */
983 982 if (tudebug && tudebugfpe)
984 983 showregs(type, rp, addr);
985 984 if (fpnoextflt(rp)) {
986 985 siginfo.si_signo = SIGILL;
987 986 siginfo.si_code = ILL_ILLOPC;
988 987 siginfo.si_addr = (caddr_t)rp->r_pc;
989 988 fault = FLTILL;
990 989 }
991 990 break;
992 991
993 992 case T_EXTOVRFLT: /* extension overrun fault */
994 993 /* check if we took a kernel trap on behalf of user */
995 994 {
996 995 extern void ndptrap_frstor(void);
997 996 if (rp->r_pc != (uintptr_t)ndptrap_frstor) {
998 997 sti(); /* T_EXTOVRFLT comes in via cmninttrap */
999 998 (void) die(type, rp, addr, cpuid);
1000 999 }
1001 1000 type |= USER;
1002 1001 }
1003 1002 /*FALLTHROUGH*/
1004 1003 case T_EXTOVRFLT + USER: /* extension overrun fault */
1005 1004 if (tudebug && tudebugfpe)
1006 1005 showregs(type, rp, addr);
1007 1006 if (fpextovrflt(rp)) {
1008 1007 siginfo.si_signo = SIGSEGV;
1009 1008 siginfo.si_code = SEGV_MAPERR;
1010 1009 siginfo.si_addr = (caddr_t)rp->r_pc;
1011 1010 fault = FLTBOUNDS;
1012 1011 }
1013 1012 break;
1014 1013
1015 1014 case T_EXTERRFLT: /* x87 floating point exception pending */
1016 1015 /* check if we took a kernel trap on behalf of user */
1017 1016 {
1018 1017 extern void ndptrap_frstor(void);
1019 1018 if (rp->r_pc != (uintptr_t)ndptrap_frstor) {
1020 1019 sti(); /* T_EXTERRFLT comes in via cmninttrap */
1021 1020 (void) die(type, rp, addr, cpuid);
1022 1021 }
1023 1022 type |= USER;
1024 1023 }
1025 1024 /*FALLTHROUGH*/
1026 1025
1027 1026 case T_EXTERRFLT + USER: /* x87 floating point exception pending */
1028 1027 if (tudebug && tudebugfpe)
1029 1028 showregs(type, rp, addr);
1030 1029 if (sicode = fpexterrflt(rp)) {
1031 1030 siginfo.si_signo = SIGFPE;
1032 1031 siginfo.si_code = sicode;
1033 1032 siginfo.si_addr = (caddr_t)rp->r_pc;
1034 1033 fault = FLTFPE;
1035 1034 }
1036 1035 break;
1037 1036
1038 1037 case T_SIMDFPE + USER: /* SSE and SSE2 exceptions */
1039 1038 if (tudebug && tudebugsse)
1040 1039 showregs(type, rp, addr);
1041 1040 if (!is_x86_feature(x86_featureset, X86FSET_SSE) &&
1042 1041 !is_x86_feature(x86_featureset, X86FSET_SSE2)) {
1043 1042 /*
1044 1043 * There are rumours that some user instructions
1045 1044 * on older CPUs can cause this trap to occur; in
1046 1045 * which case send a SIGILL instead of a SIGFPE.
1047 1046 */
1048 1047 siginfo.si_signo = SIGILL;
1049 1048 siginfo.si_code = ILL_ILLTRP;
1050 1049 siginfo.si_addr = (caddr_t)rp->r_pc;
1051 1050 siginfo.si_trapno = type & ~USER;
1052 1051 fault = FLTILL;
1053 1052 } else if ((sicode = fpsimderrflt(rp)) != 0) {
1054 1053 siginfo.si_signo = SIGFPE;
1055 1054 siginfo.si_code = sicode;
1056 1055 siginfo.si_addr = (caddr_t)rp->r_pc;
1057 1056 fault = FLTFPE;
1058 1057 }
1059 1058
1060 1059 sti(); /* The SIMD exception comes in via cmninttrap */
1061 1060 break;
1062 1061
1063 1062 case T_BPTFLT: /* breakpoint trap */
1064 1063 /*
1065 1064 * Kernel breakpoint traps should only happen when kmdb is
1066 1065 * active, and even then, it'll have interposed on the IDT, so
1067 1066 * control won't get here. If it does, we've hit a breakpoint
1068 1067 * without the debugger, which is very strange, and very
1069 1068 * fatal.
1070 1069 */
1071 1070 if (tudebug && tudebugbpt)
1072 1071 showregs(type, rp, (caddr_t)0);
1073 1072
1074 1073 (void) die(type, rp, addr, cpuid);
1075 1074 break;
1076 1075
1077 1076 case T_SGLSTP: /* single step/hw breakpoint exception */
1078 1077
1079 1078 /* Now evaluate how we got here */
1080 1079 if (lwp != NULL && (lwp->lwp_pcb.pcb_drstat & DR_SINGLESTEP)) {
1081 1080 /*
1082 1081 * i386 single-steps even through lcalls which
1083 1082 * change the privilege level. So we take a trap at
1084 1083 * the first instruction in privileged mode.
1085 1084 *
1086 1085 * Set a flag to indicate that upon completion of
1087 1086 * the system call, deal with the single-step trap.
1088 1087 *
1089 1088 * The same thing happens for sysenter, too.
1090 1089 */
1091 1090 singlestep_twiddle = 0;
1092 1091 if (rp->r_pc == (uintptr_t)sys_sysenter ||
1093 1092 rp->r_pc == (uintptr_t)brand_sys_sysenter) {
1094 1093 singlestep_twiddle = 1;
1095 1094 #if defined(__amd64)
1096 1095 /*
1097 1096 * Since we are already on the kernel's
1098 1097 * %gs, on 64-bit systems the sysenter case
1099 1098 * needs to adjust the pc to avoid
1100 1099 * executing the swapgs instruction at the
1101 1100 * top of the handler.
1102 1101 */
1103 1102 if (rp->r_pc == (uintptr_t)sys_sysenter)
1104 1103 rp->r_pc = (uintptr_t)
1105 1104 _sys_sysenter_post_swapgs;
1106 1105 else
1107 1106 rp->r_pc = (uintptr_t)
1108 1107 _brand_sys_sysenter_post_swapgs;
1109 1108 #endif
1110 1109 }
1111 1110 #if defined(__i386)
1112 1111 else if (rp->r_pc == (uintptr_t)sys_call ||
1113 1112 rp->r_pc == (uintptr_t)brand_sys_call) {
1114 1113 singlestep_twiddle = 1;
1115 1114 }
1116 1115 #endif
1117 1116 else {
1118 1117 /* not on sysenter/syscall; uregs available */
1119 1118 if (tudebug && tudebugbpt)
1120 1119 showregs(type, rp, (caddr_t)0);
1121 1120 }
1122 1121 if (singlestep_twiddle) {
1123 1122 rp->r_ps &= ~PS_T; /* turn off trace */
1124 1123 lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
1125 1124 ct->t_post_sys = 1;
1126 1125 aston(curthread);
1127 1126 goto cleanup;
1128 1127 }
1129 1128 }
1130 1129 /* XXX - needs review on debugger interface? */
1131 1130 if (boothowto & RB_DEBUG)
1132 1131 debug_enter((char *)NULL);
1133 1132 else
1134 1133 (void) die(type, rp, addr, cpuid);
1135 1134 break;
1136 1135
1137 1136 case T_NMIFLT: /* NMI interrupt */
1138 1137 printf("Unexpected NMI in system mode\n");
1139 1138 goto cleanup;
1140 1139
1141 1140 case T_NMIFLT + USER: /* NMI interrupt */
1142 1141 printf("Unexpected NMI in user mode\n");
1143 1142 break;
1144 1143
1145 1144 case T_GPFLT: /* general protection violation */
1146 1145 /*
1147 1146 * Any #GP that occurs during an on_trap .. no_trap bracket
1148 1147 * with OT_DATA_ACCESS or OT_SEGMENT_ACCESS protection,
1149 1148 * or in a on_fault .. no_fault bracket, is forgiven
1150 1149 * and we trampoline. This protection is given regardless
1151 1150 * of whether we are 32/64 bit etc - if a distinction is
1152 1151 * required then define new on_trap protection types.
1153 1152 *
1154 1153 * On amd64, we can get a #gp from referencing addresses
1155 1154 * in the virtual address hole e.g. from a copyin or in
1156 1155 * update_sregs while updating user segment registers.
1157 1156 *
1158 1157 * On the 32-bit hypervisor we could also generate one in
1159 1158 * mfn_to_pfn by reaching around or into where the hypervisor
1160 1159 * lives which is protected by segmentation.
1161 1160 */
1162 1161
1163 1162 /*
1164 1163 * If we're under on_trap() protection (see <sys/ontrap.h>),
1165 1164 * set ot_trap and trampoline back to the on_trap() call site
1166 1165 * for OT_DATA_ACCESS or OT_SEGMENT_ACCESS.
1167 1166 */
1168 1167 if (ct->t_ontrap != NULL) {
1169 1168 int ttype = ct->t_ontrap->ot_prot &
1170 1169 (OT_DATA_ACCESS | OT_SEGMENT_ACCESS);
1171 1170
1172 1171 if (ttype != 0) {
1173 1172 ct->t_ontrap->ot_trap |= ttype;
1174 1173 if (tudebug)
1175 1174 showregs(type, rp, (caddr_t)0);
1176 1175 rp->r_pc = ct->t_ontrap->ot_trampoline;
1177 1176 goto cleanup;
1178 1177 }
1179 1178 }
1180 1179
1181 1180 /*
1182 1181 * If we're under lofault protection (copyin etc.),
1183 1182 * longjmp back to lofault with an EFAULT.
1184 1183 */
1185 1184 if (ct->t_lofault) {
1186 1185 /*
1187 1186 * Fault is not resolvable, so just return to lofault
1188 1187 */
1189 1188 if (lodebug) {
1190 1189 showregs(type, rp, addr);
1191 1190 traceregs(rp);
1192 1191 }
1193 1192 rp->r_r0 = EFAULT;
1194 1193 rp->r_pc = ct->t_lofault;
1195 1194 goto cleanup;
1196 1195 }
1197 1196
1198 1197 /*
1199 1198 * We fall through to the next case, which repeats
1200 1199 * the OT_SEGMENT_ACCESS check which we've already
1201 1200 * done, so we'll always fall through to the
1202 1201 * T_STKFLT case.
1203 1202 */
1204 1203 /*FALLTHROUGH*/
1205 1204 case T_SEGFLT: /* segment not present fault */
1206 1205 /*
1207 1206 * One example of this is #NP in update_sregs while
1208 1207 * attempting to update a user segment register
1209 1208 * that points to a descriptor that is marked not
1210 1209 * present.
1211 1210 */
1212 1211 if (ct->t_ontrap != NULL &&
1213 1212 ct->t_ontrap->ot_prot & OT_SEGMENT_ACCESS) {
1214 1213 ct->t_ontrap->ot_trap |= OT_SEGMENT_ACCESS;
1215 1214 if (tudebug)
1216 1215 showregs(type, rp, (caddr_t)0);
1217 1216 rp->r_pc = ct->t_ontrap->ot_trampoline;
1218 1217 goto cleanup;
1219 1218 }
1220 1219 /*FALLTHROUGH*/
1221 1220 case T_STKFLT: /* stack fault */
1222 1221 case T_TSSFLT: /* invalid TSS fault */
1223 1222 if (tudebug)
1224 1223 showregs(type, rp, (caddr_t)0);
1225 1224 if (kern_gpfault(rp))
1226 1225 (void) die(type, rp, addr, cpuid);
1227 1226 goto cleanup;
1228 1227
1229 1228 /*
1230 1229 * ONLY 32-bit PROCESSES can USE a PRIVATE LDT! 64-bit apps
1231 1230 * should have no need for them, so we put a stop to it here.
1232 1231 *
1233 1232 * So: not-present fault is ONLY valid for 32-bit processes with
1234 1233 * a private LDT trying to do a system call. Emulate it.
1235 1234 *
1236 1235 * #gp fault is ONLY valid for 32-bit processes also, which DO NOT
1237 1236 * have a private LDT, and are trying to do a system call. Emulate it.
1238 1237 */
1239 1238
1240 1239 case T_SEGFLT + USER: /* segment not present fault */
1241 1240 case T_GPFLT + USER: /* general protection violation */
1242 1241 #ifdef _SYSCALL32_IMPL
1243 1242 if (p->p_model != DATAMODEL_NATIVE) {
1244 1243 #endif /* _SYSCALL32_IMPL */
1245 1244 if (instr_is_lcall_syscall((caddr_t)rp->r_pc)) {
1246 1245 if (type == T_SEGFLT + USER)
1247 1246 ASSERT(p->p_ldt != NULL);
1248 1247
1249 1248 if ((p->p_ldt == NULL && type == T_GPFLT + USER) ||
1250 1249 type == T_SEGFLT + USER) {
1251 1250
1252 1251 /*
1253 1252 * The user attempted a system call via the obsolete
1254 1253 * call gate mechanism. Because the process doesn't have
1255 1254 * an LDT (i.e. the ldtr contains 0), a #gp results.
1256 1255 * Emulate the syscall here, just as we do above for a
1257 1256 * #np trap.
1258 1257 */
1259 1258
1260 1259 /*
1261 1260 * Since this is a not-present trap, rp->r_pc points to
1262 1261 * the trapping lcall instruction. We need to bump it
1263 1262 * to the next insn so the app can continue on.
1264 1263 */
1265 1264 rp->r_pc += LCALLSIZE;
1266 1265 lwp->lwp_regs = rp;
1267 1266
1268 1267 /*
1269 1268 * Normally the microstate of the LWP is forced back to
1270 1269 * LMS_USER by the syscall handlers. Emulate that
1271 1270 * behavior here.
1272 1271 */
1273 1272 mstate = LMS_USER;
1274 1273
1275 1274 dosyscall();
1276 1275 goto out;
1277 1276 }
1278 1277 }
1279 1278 #ifdef _SYSCALL32_IMPL
1280 1279 }
1281 1280 #endif /* _SYSCALL32_IMPL */
1282 1281 /*
1283 1282 * If the current process is using a private LDT and the
1284 1283 * trapping instruction is sysenter, the sysenter instruction
1285 1284 * has been disabled on the CPU because it destroys segment
1286 1285 * registers. If this is the case, rewrite the instruction to
1287 1286 * be a safe system call and retry it. If this occurs on a CPU
1288 1287 * which doesn't even support sysenter, the result of all of
1289 1288 * this will be to emulate that particular instruction.
1290 1289 */
1291 1290 if (p->p_ldt != NULL &&
1292 1291 ldt_rewrite_syscall(rp, p, X86FSET_SEP))
1293 1292 goto out;
1294 1293
1295 1294 /*FALLTHROUGH*/
1296 1295
1297 1296 case T_BOUNDFLT + USER: /* bound fault */
1298 1297 case T_STKFLT + USER: /* stack fault */
1299 1298 case T_TSSFLT + USER: /* invalid TSS fault */
1300 1299 if (tudebug)
1301 1300 showregs(type, rp, (caddr_t)0);
1302 1301 siginfo.si_signo = SIGSEGV;
1303 1302 siginfo.si_code = SEGV_MAPERR;
1304 1303 siginfo.si_addr = (caddr_t)rp->r_pc;
1305 1304 fault = FLTBOUNDS;
1306 1305 break;
1307 1306
1308 1307 case T_ALIGNMENT + USER: /* user alignment error (486) */
1309 1308 if (tudebug)
1310 1309 showregs(type, rp, (caddr_t)0);
1311 1310 bzero(&siginfo, sizeof (siginfo));
1312 1311 siginfo.si_signo = SIGBUS;
1313 1312 siginfo.si_code = BUS_ADRALN;
1314 1313 siginfo.si_addr = (caddr_t)rp->r_pc;
1315 1314 fault = FLTACCESS;
1316 1315 break;
1317 1316
1318 1317 case T_SGLSTP + USER: /* single step/hw breakpoint exception */
1319 1318 if (tudebug && tudebugbpt)
1320 1319 showregs(type, rp, (caddr_t)0);
1321 1320
1322 1321 /* Was it single-stepping? */
1323 1322 if (lwp->lwp_pcb.pcb_drstat & DR_SINGLESTEP) {
1324 1323 pcb_t *pcb = &lwp->lwp_pcb;
1325 1324
1326 1325 rp->r_ps &= ~PS_T;
1327 1326 /*
1328 1327 * If both NORMAL_STEP and WATCH_STEP are in effect,
1329 1328 * give precedence to WATCH_STEP. If neither is set,
1330 1329 * user must have set the PS_T bit in %efl; treat this
1331 1330 * as NORMAL_STEP.
1332 1331 */
1333 1332 if ((fault = undo_watch_step(&siginfo)) == 0 &&
1334 1333 ((pcb->pcb_flags & NORMAL_STEP) ||
1335 1334 !(pcb->pcb_flags & WATCH_STEP))) {
1336 1335 siginfo.si_signo = SIGTRAP;
1337 1336 siginfo.si_code = TRAP_TRACE;
1338 1337 siginfo.si_addr = (caddr_t)rp->r_pc;
1339 1338 fault = FLTTRACE;
1340 1339 }
1341 1340 pcb->pcb_flags &= ~(NORMAL_STEP|WATCH_STEP);
1342 1341 }
1343 1342 break;
1344 1343
1345 1344 case T_BPTFLT + USER: /* breakpoint trap */
1346 1345 if (tudebug && tudebugbpt)
1347 1346 showregs(type, rp, (caddr_t)0);
1348 1347 /*
1349 1348 * int 3 (the breakpoint instruction) leaves the pc referring
1350 1349 * to the address one byte after the breakpointed address.
1351 1350 * If the P_PR_BPTADJ flag has been set via /proc, We adjust
1352 1351 * it back so it refers to the breakpointed address.
1353 1352 */
1354 1353 if (p->p_proc_flag & P_PR_BPTADJ)
1355 1354 rp->r_pc--;
1356 1355 siginfo.si_signo = SIGTRAP;
1357 1356 siginfo.si_code = TRAP_BRKPT;
1358 1357 siginfo.si_addr = (caddr_t)rp->r_pc;
1359 1358 fault = FLTBPT;
1360 1359 break;
1361 1360
1362 1361 case T_AST:
1363 1362 /*
1364 1363 * This occurs only after the cs register has been made to
1365 1364 * look like a kernel selector, either through debugging or
1366 1365 * possibly by functions like setcontext(). The thread is
1367 1366 * about to cause a general protection fault at common_iret()
1368 1367 * in locore. We let that happen immediately instead of
1369 1368 * doing the T_AST processing.
1370 1369 */
1371 1370 goto cleanup;
1372 1371
1373 1372 case T_AST + USER: /* profiling, resched, h/w error pseudo trap */
1374 1373 if (lwp->lwp_pcb.pcb_flags & ASYNC_HWERR) {
1375 1374 proc_t *p = ttoproc(curthread);
1376 1375 extern void print_msg_hwerr(ctid_t ct_id, proc_t *p);
1377 1376
1378 1377 lwp->lwp_pcb.pcb_flags &= ~ASYNC_HWERR;
1379 1378 print_msg_hwerr(p->p_ct_process->conp_contract.ct_id,
1380 1379 p);
1381 1380 contract_process_hwerr(p->p_ct_process, p);
1382 1381 siginfo.si_signo = SIGKILL;
1383 1382 siginfo.si_code = SI_NOINFO;
1384 1383 } else if (lwp->lwp_pcb.pcb_flags & CPC_OVERFLOW) {
1385 1384 lwp->lwp_pcb.pcb_flags &= ~CPC_OVERFLOW;
1386 1385 if (kcpc_overflow_ast()) {
1387 1386 /*
1388 1387 * Signal performance counter overflow
1389 1388 */
1390 1389 if (tudebug)
1391 1390 showregs(type, rp, (caddr_t)0);
1392 1391 bzero(&siginfo, sizeof (siginfo));
1393 1392 siginfo.si_signo = SIGEMT;
1394 1393 siginfo.si_code = EMT_CPCOVF;
1395 1394 siginfo.si_addr = (caddr_t)rp->r_pc;
1396 1395 fault = FLTCPCOVF;
1397 1396 }
1398 1397 }
1399 1398
1400 1399 break;
1401 1400 }
1402 1401
1403 1402 /*
1404 1403 * We can't get here from a system trap
1405 1404 */
1406 1405 ASSERT(type & USER);
1407 1406
1408 1407 if (fault) {
1409 1408 /* We took a fault so abort single step. */
1410 1409 lwp->lwp_pcb.pcb_flags &= ~(NORMAL_STEP|WATCH_STEP);
1411 1410 /*
1412 1411 * Remember the fault and fault adddress
1413 1412 * for real-time (SIGPROF) profiling.
1414 1413 */
1415 1414 lwp->lwp_lastfault = fault;
1416 1415 lwp->lwp_lastfaddr = siginfo.si_addr;
1417 1416
1418 1417 DTRACE_PROC2(fault, int, fault, ksiginfo_t *, &siginfo);
1419 1418
1420 1419 /*
1421 1420 * If a debugger has declared this fault to be an
1422 1421 * event of interest, stop the lwp. Otherwise just
1423 1422 * deliver the associated signal.
1424 1423 */
1425 1424 if (siginfo.si_signo != SIGKILL &&
1426 1425 prismember(&p->p_fltmask, fault) &&
1427 1426 stop_on_fault(fault, &siginfo) == 0)
1428 1427 siginfo.si_signo = 0;
1429 1428 }
1430 1429
1431 1430 if (siginfo.si_signo)
1432 1431 trapsig(&siginfo, (fault != FLTFPE && fault != FLTCPCOVF));
1433 1432
1434 1433 if (lwp->lwp_oweupc)
1435 1434 profil_tick(rp->r_pc);
1436 1435
1437 1436 if (ct->t_astflag | ct->t_sig_check) {
1438 1437 /*
1439 1438 * Turn off the AST flag before checking all the conditions that
1440 1439 * may have caused an AST. This flag is on whenever a signal or
1441 1440 * unusual condition should be handled after the next trap or
1442 1441 * syscall.
1443 1442 */
1444 1443 astoff(ct);
1445 1444 /*
1446 1445 * If a single-step trap occurred on a syscall (see above)
1447 1446 * recognize it now. Do this before checking for signals
1448 1447 * because deferred_singlestep_trap() may generate a SIGTRAP to
1449 1448 * the LWP or may otherwise mark the LWP to call issig(FORREAL).
1450 1449 */
1451 1450 if (lwp->lwp_pcb.pcb_flags & DEBUG_PENDING)
1452 1451 deferred_singlestep_trap((caddr_t)rp->r_pc);
1453 1452
1454 1453 ct->t_sig_check = 0;
1455 1454
1456 1455 mutex_enter(&p->p_lock);
1457 1456 if (curthread->t_proc_flag & TP_CHANGEBIND) {
1458 1457 timer_lwpbind();
1459 1458 curthread->t_proc_flag &= ~TP_CHANGEBIND;
1460 1459 }
1461 1460 mutex_exit(&p->p_lock);
1462 1461
1463 1462 /*
1464 1463 * for kaio requests that are on the per-process poll queue,
1465 1464 * aiop->aio_pollq, they're AIO_POLL bit is set, the kernel
1466 1465 * should copyout their result_t to user memory. by copying
1467 1466 * out the result_t, the user can poll on memory waiting
1468 1467 * for the kaio request to complete.
1469 1468 */
1470 1469 if (p->p_aio)
1471 1470 aio_cleanup(0);
1472 1471 /*
1473 1472 * If this LWP was asked to hold, call holdlwp(), which will
1474 1473 * stop. holdlwps() sets this up and calls pokelwps() which
1475 1474 * sets the AST flag.
1476 1475 *
1477 1476 * Also check TP_EXITLWP, since this is used by fresh new LWPs
1478 1477 * through lwp_rtt(). That flag is set if the lwp_create(2)
1479 1478 * syscall failed after creating the LWP.
1480 1479 */
1481 1480 if (ISHOLD(p))
1482 1481 holdlwp();
1483 1482
1484 1483 /*
1485 1484 * All code that sets signals and makes ISSIG evaluate true must
1486 1485 * set t_astflag afterwards.
1487 1486 */
1488 1487 if (ISSIG_PENDING(ct, lwp, p)) {
1489 1488 if (issig(FORREAL))
1490 1489 psig();
1491 1490 ct->t_sig_check = 1;
1492 1491 }
1493 1492
1494 1493 if (ct->t_rprof != NULL) {
1495 1494 realsigprof(0, 0, 0);
1496 1495 ct->t_sig_check = 1;
1497 1496 }
1498 1497
1499 1498 /*
1500 1499 * /proc can't enable/disable the trace bit itself
1501 1500 * because that could race with the call gate used by
1502 1501 * system calls via "lcall". If that happened, an
1503 1502 * invalid EFLAGS would result. prstep()/prnostep()
1504 1503 * therefore schedule an AST for the purpose.
1505 1504 */
1506 1505 if (lwp->lwp_pcb.pcb_flags & REQUEST_STEP) {
1507 1506 lwp->lwp_pcb.pcb_flags &= ~REQUEST_STEP;
1508 1507 rp->r_ps |= PS_T;
1509 1508 }
1510 1509 if (lwp->lwp_pcb.pcb_flags & REQUEST_NOSTEP) {
1511 1510 lwp->lwp_pcb.pcb_flags &= ~REQUEST_NOSTEP;
1512 1511 rp->r_ps &= ~PS_T;
1513 1512 }
1514 1513 }
1515 1514
1516 1515 out: /* We can't get here from a system trap */
1517 1516 ASSERT(type & USER);
1518 1517
1519 1518 if (ISHOLD(p))
1520 1519 holdlwp();
1521 1520
1522 1521 /*
1523 1522 * Set state to LWP_USER here so preempt won't give us a kernel
1524 1523 * priority if it occurs after this point. Call CL_TRAPRET() to
1525 1524 * restore the user-level priority.
1526 1525 *
1527 1526 * It is important that no locks (other than spinlocks) be entered
1528 1527 * after this point before returning to user mode (unless lwp_state
1529 1528 * is set back to LWP_SYS).
1530 1529 */
1531 1530 lwp->lwp_state = LWP_USER;
1532 1531
1533 1532 if (ct->t_trapret) {
1534 1533 ct->t_trapret = 0;
1535 1534 thread_lock(ct);
1536 1535 CL_TRAPRET(ct);
1537 1536 thread_unlock(ct);
1538 1537 }
1539 1538 if (CPU->cpu_runrun || curthread->t_schedflag & TS_ANYWAITQ)
1540 1539 preempt();
1541 1540 prunstop();
1542 1541 (void) new_mstate(ct, mstate);
1543 1542
1544 1543 /* Kernel probe */
1545 1544 TNF_PROBE_1(thread_state, "thread", /* CSTYLED */,
1546 1545 tnf_microstate, state, LMS_USER);
1547 1546
1548 1547 return;
1549 1548
1550 1549 cleanup: /* system traps end up here */
1551 1550 ASSERT(!(type & USER));
1552 1551 }
1553 1552
1554 1553 /*
1555 1554 * Patch non-zero to disable preemption of threads in the kernel.
1556 1555 */
1557 1556 int IGNORE_KERNEL_PREEMPTION = 0; /* XXX - delete this someday */
1558 1557
1559 1558 struct kpreempt_cnts { /* kernel preemption statistics */
1560 1559 int kpc_idle; /* executing idle thread */
1561 1560 int kpc_intr; /* executing interrupt thread */
1562 1561 int kpc_clock; /* executing clock thread */
1563 1562 int kpc_blocked; /* thread has blocked preemption (t_preempt) */
1564 1563 int kpc_notonproc; /* thread is surrendering processor */
1565 1564 int kpc_inswtch; /* thread has ratified scheduling decision */
1566 1565 int kpc_prilevel; /* processor interrupt level is too high */
1567 1566 int kpc_apreempt; /* asynchronous preemption */
1568 1567 int kpc_spreempt; /* synchronous preemption */
1569 1568 } kpreempt_cnts;
1570 1569
1571 1570 /*
1572 1571 * kernel preemption: forced rescheduling, preempt the running kernel thread.
1573 1572 * the argument is old PIL for an interrupt,
1574 1573 * or the distingished value KPREEMPT_SYNC.
1575 1574 */
1576 1575 void
1577 1576 kpreempt(int asyncspl)
1578 1577 {
1579 1578 kthread_t *ct = curthread;
1580 1579
1581 1580 if (IGNORE_KERNEL_PREEMPTION) {
1582 1581 aston(CPU->cpu_dispthread);
1583 1582 return;
1584 1583 }
1585 1584
1586 1585 /*
1587 1586 * Check that conditions are right for kernel preemption
1588 1587 */
1589 1588 do {
1590 1589 if (ct->t_preempt) {
1591 1590 /*
1592 1591 * either a privileged thread (idle, panic, interrupt)
1593 1592 * or will check when t_preempt is lowered
1594 1593 * We need to specifically handle the case where
1595 1594 * the thread is in the middle of swtch (resume has
1596 1595 * been called) and has its t_preempt set
1597 1596 * [idle thread and a thread which is in kpreempt
1598 1597 * already] and then a high priority thread is
1599 1598 * available in the local dispatch queue.
1600 1599 * In this case the resumed thread needs to take a
1601 1600 * trap so that it can call kpreempt. We achieve
1602 1601 * this by using siron().
1603 1602 * How do we detect this condition:
1604 1603 * idle thread is running and is in the midst of
1605 1604 * resume: curthread->t_pri == -1 && CPU->dispthread
1606 1605 * != CPU->thread
1607 1606 * Need to ensure that this happens only at high pil
1608 1607 * resume is called at high pil
1609 1608 * Only in resume_from_idle is the pil changed.
1610 1609 */
1611 1610 if (ct->t_pri < 0) {
1612 1611 kpreempt_cnts.kpc_idle++;
1613 1612 if (CPU->cpu_dispthread != CPU->cpu_thread)
1614 1613 siron();
1615 1614 } else if (ct->t_flag & T_INTR_THREAD) {
1616 1615 kpreempt_cnts.kpc_intr++;
1617 1616 if (ct->t_pil == CLOCK_LEVEL)
1618 1617 kpreempt_cnts.kpc_clock++;
1619 1618 } else {
1620 1619 kpreempt_cnts.kpc_blocked++;
1621 1620 if (CPU->cpu_dispthread != CPU->cpu_thread)
1622 1621 siron();
1623 1622 }
1624 1623 aston(CPU->cpu_dispthread);
1625 1624 return;
1626 1625 }
1627 1626 if (ct->t_state != TS_ONPROC ||
1628 1627 ct->t_disp_queue != CPU->cpu_disp) {
1629 1628 /* this thread will be calling swtch() shortly */
1630 1629 kpreempt_cnts.kpc_notonproc++;
1631 1630 if (CPU->cpu_thread != CPU->cpu_dispthread) {
1632 1631 /* already in swtch(), force another */
1633 1632 kpreempt_cnts.kpc_inswtch++;
1634 1633 siron();
1635 1634 }
1636 1635 return;
1637 1636 }
1638 1637 if (getpil() >= DISP_LEVEL) {
1639 1638 /*
1640 1639 * We can't preempt this thread if it is at
1641 1640 * a PIL >= DISP_LEVEL since it may be holding
1642 1641 * a spin lock (like sched_lock).
1643 1642 */
1644 1643 siron(); /* check back later */
1645 1644 kpreempt_cnts.kpc_prilevel++;
1646 1645 return;
1647 1646 }
1648 1647 if (!interrupts_enabled()) {
1649 1648 /*
1650 1649 * Can't preempt while running with ints disabled
1651 1650 */
1652 1651 kpreempt_cnts.kpc_prilevel++;
1653 1652 return;
1654 1653 }
1655 1654 if (asyncspl != KPREEMPT_SYNC)
1656 1655 kpreempt_cnts.kpc_apreempt++;
1657 1656 else
1658 1657 kpreempt_cnts.kpc_spreempt++;
1659 1658
1660 1659 ct->t_preempt++;
1661 1660 preempt();
1662 1661 ct->t_preempt--;
1663 1662 } while (CPU->cpu_kprunrun);
1664 1663 }
1665 1664
1666 1665 /*
1667 1666 * Print out debugging info.
1668 1667 */
1669 1668 static void
1670 1669 showregs(uint_t type, struct regs *rp, caddr_t addr)
1671 1670 {
1672 1671 int s;
1673 1672
1674 1673 s = spl7();
1675 1674 type &= ~USER;
1676 1675 if (PTOU(curproc)->u_comm[0])
1677 1676 printf("%s: ", PTOU(curproc)->u_comm);
1678 1677 if (type < TRAP_TYPES)
1679 1678 printf("#%s %s\n", trap_type_mnemonic[type], trap_type[type]);
1680 1679 else
1681 1680 switch (type) {
1682 1681 case T_SYSCALL:
1683 1682 printf("Syscall Trap:\n");
1684 1683 break;
1685 1684 case T_AST:
1686 1685 printf("AST\n");
1687 1686 break;
1688 1687 default:
1689 1688 printf("Bad Trap = %d\n", type);
1690 1689 break;
1691 1690 }
1692 1691 if (type == T_PGFLT) {
1693 1692 printf("Bad %s fault at addr=0x%lx\n",
1694 1693 USERMODE(rp->r_cs) ? "user": "kernel", (uintptr_t)addr);
1695 1694 } else if (addr) {
1696 1695 printf("addr=0x%lx\n", (uintptr_t)addr);
1697 1696 }
1698 1697
1699 1698 printf("pid=%d, pc=0x%lx, sp=0x%lx, eflags=0x%lx\n",
1700 1699 (ttoproc(curthread) && ttoproc(curthread)->p_pidp) ?
1701 1700 ttoproc(curthread)->p_pid : 0, rp->r_pc, rp->r_sp, rp->r_ps);
1702 1701
1703 1702 #if defined(__lint)
1704 1703 /*
1705 1704 * this clause can be deleted when lint bug 4870403 is fixed
1706 1705 * (lint thinks that bit 32 is illegal in a %b format string)
1707 1706 */
1708 1707 printf("cr0: %x cr4: %b\n",
1709 1708 (uint_t)getcr0(), (uint_t)getcr4(), FMT_CR4);
1710 1709 #else
1711 1710 printf("cr0: %b cr4: %b\n",
1712 1711 (uint_t)getcr0(), FMT_CR0, (uint_t)getcr4(), FMT_CR4);
1713 1712 #endif /* __lint */
1714 1713
1715 1714 printf("cr2: %lx", getcr2());
1716 1715 #if !defined(__xpv)
1717 1716 printf("cr3: %lx", getcr3());
1718 1717 #if defined(__amd64)
1719 1718 printf("cr8: %lx\n", getcr8());
1720 1719 #endif
1721 1720 #endif
1722 1721 printf("\n");
1723 1722
1724 1723 dumpregs(rp);
1725 1724 splx(s);
1726 1725 }
1727 1726
1728 1727 static void
1729 1728 dumpregs(struct regs *rp)
1730 1729 {
1731 1730 #if defined(__amd64)
1732 1731 const char fmt[] = "\t%3s: %16lx %3s: %16lx %3s: %16lx\n";
1733 1732
1734 1733 printf(fmt, "rdi", rp->r_rdi, "rsi", rp->r_rsi, "rdx", rp->r_rdx);
1735 1734 printf(fmt, "rcx", rp->r_rcx, " r8", rp->r_r8, " r9", rp->r_r9);
1736 1735 printf(fmt, "rax", rp->r_rax, "rbx", rp->r_rbx, "rbp", rp->r_rbp);
1737 1736 printf(fmt, "r10", rp->r_r10, "r11", rp->r_r11, "r12", rp->r_r12);
1738 1737 printf(fmt, "r13", rp->r_r13, "r14", rp->r_r14, "r15", rp->r_r15);
1739 1738
1740 1739 printf(fmt, "fsb", rdmsr(MSR_AMD_FSBASE), "gsb", rdmsr(MSR_AMD_GSBASE),
1741 1740 " ds", rp->r_ds);
1742 1741 printf(fmt, " es", rp->r_es, " fs", rp->r_fs, " gs", rp->r_gs);
1743 1742
1744 1743 printf(fmt, "trp", rp->r_trapno, "err", rp->r_err, "rip", rp->r_rip);
1745 1744 printf(fmt, " cs", rp->r_cs, "rfl", rp->r_rfl, "rsp", rp->r_rsp);
1746 1745
1747 1746 printf("\t%3s: %16lx\n", " ss", rp->r_ss);
1748 1747
1749 1748 #elif defined(__i386)
1750 1749 const char fmt[] = "\t%3s: %8lx %3s: %8lx %3s: %8lx %3s: %8lx\n";
1751 1750
1752 1751 printf(fmt, " gs", rp->r_gs, " fs", rp->r_fs,
1753 1752 " es", rp->r_es, " ds", rp->r_ds);
1754 1753 printf(fmt, "edi", rp->r_edi, "esi", rp->r_esi,
1755 1754 "ebp", rp->r_ebp, "esp", rp->r_esp);
1756 1755 printf(fmt, "ebx", rp->r_ebx, "edx", rp->r_edx,
1757 1756 "ecx", rp->r_ecx, "eax", rp->r_eax);
1758 1757 printf(fmt, "trp", rp->r_trapno, "err", rp->r_err,
1759 1758 "eip", rp->r_eip, " cs", rp->r_cs);
1760 1759 printf("\t%3s: %8lx %3s: %8lx %3s: %8lx\n",
1761 1760 "efl", rp->r_efl, "usp", rp->r_uesp, " ss", rp->r_ss);
1762 1761
1763 1762 #endif /* __i386 */
1764 1763 }
1765 1764
1766 1765 /*
1767 1766 * Test to see if the instruction is iret on i386 or iretq on amd64.
1768 1767 *
1769 1768 * On the hypervisor we can only test for nopop_sys_rtt_syscall. If true
1770 1769 * then we are in the context of hypervisor's failsafe handler because it
1771 1770 * tried to iret and failed due to a bad selector. See xen_failsafe_callback.
1772 1771 */
1773 1772 static int
1774 1773 instr_is_iret(caddr_t pc)
1775 1774 {
1776 1775
1777 1776 #if defined(__xpv)
1778 1777 extern void nopop_sys_rtt_syscall(void);
1779 1778 return ((pc == (caddr_t)nopop_sys_rtt_syscall) ? 1 : 0);
1780 1779
1781 1780 #else
1782 1781
1783 1782 #if defined(__amd64)
1784 1783 static const uint8_t iret_insn[2] = { 0x48, 0xcf }; /* iretq */
1785 1784
1786 1785 #elif defined(__i386)
1787 1786 static const uint8_t iret_insn[1] = { 0xcf }; /* iret */
1788 1787 #endif /* __i386 */
1789 1788 return (bcmp(pc, iret_insn, sizeof (iret_insn)) == 0);
1790 1789
1791 1790 #endif /* __xpv */
1792 1791 }
1793 1792
1794 1793 #if defined(__i386)
1795 1794
1796 1795 /*
1797 1796 * Test to see if the instruction is part of __SEGREGS_POP
1798 1797 *
1799 1798 * Note carefully the appallingly awful dependency between
1800 1799 * the instruction sequence used in __SEGREGS_POP and these
1801 1800 * instructions encoded here.
1802 1801 */
1803 1802 static int
1804 1803 instr_is_segregs_pop(caddr_t pc)
1805 1804 {
1806 1805 static const uint8_t movw_0_esp_gs[4] = { 0x8e, 0x6c, 0x24, 0x0 };
1807 1806 static const uint8_t movw_4_esp_fs[4] = { 0x8e, 0x64, 0x24, 0x4 };
1808 1807 static const uint8_t movw_8_esp_es[4] = { 0x8e, 0x44, 0x24, 0x8 };
1809 1808 static const uint8_t movw_c_esp_ds[4] = { 0x8e, 0x5c, 0x24, 0xc };
1810 1809
1811 1810 if (bcmp(pc, movw_0_esp_gs, sizeof (movw_0_esp_gs)) == 0 ||
1812 1811 bcmp(pc, movw_4_esp_fs, sizeof (movw_4_esp_fs)) == 0 ||
1813 1812 bcmp(pc, movw_8_esp_es, sizeof (movw_8_esp_es)) == 0 ||
1814 1813 bcmp(pc, movw_c_esp_ds, sizeof (movw_c_esp_ds)) == 0)
1815 1814 return (1);
1816 1815
1817 1816 return (0);
1818 1817 }
1819 1818
1820 1819 #endif /* __i386 */
1821 1820
1822 1821 /*
1823 1822 * Test to see if the instruction is part of _sys_rtt.
1824 1823 *
1825 1824 * Again on the hypervisor if we try to IRET to user land with a bad code
1826 1825 * or stack selector we will get vectored through xen_failsafe_callback.
1827 1826 * In which case we assume we got here via _sys_rtt since we only allow
1828 1827 * IRET to user land to take place in _sys_rtt.
1829 1828 */
1830 1829 static int
1831 1830 instr_is_sys_rtt(caddr_t pc)
1832 1831 {
1833 1832 extern void _sys_rtt(), _sys_rtt_end();
1834 1833
1835 1834 if ((uintptr_t)pc < (uintptr_t)_sys_rtt ||
1836 1835 (uintptr_t)pc > (uintptr_t)_sys_rtt_end)
1837 1836 return (0);
1838 1837
1839 1838 return (1);
1840 1839 }
1841 1840
1842 1841 /*
1843 1842 * Handle #gp faults in kernel mode.
1844 1843 *
1845 1844 * One legitimate way this can happen is if we attempt to update segment
1846 1845 * registers to naughty values on the way out of the kernel.
1847 1846 *
1848 1847 * This can happen in a couple of ways: someone - either accidentally or
1849 1848 * on purpose - creates (setcontext(2), lwp_create(2)) or modifies
1850 1849 * (signal(2)) a ucontext that contains silly segment register values.
1851 1850 * Or someone - either accidentally or on purpose - modifies the prgregset_t
1852 1851 * of a subject process via /proc to contain silly segment register values.
1853 1852 *
1854 1853 * (The unfortunate part is that we can end up discovering the bad segment
1855 1854 * register value in the middle of an 'iret' after we've popped most of the
1856 1855 * stack. So it becomes quite difficult to associate an accurate ucontext
1857 1856 * with the lwp, because the act of taking the #gp trap overwrites most of
1858 1857 * what we were going to send the lwp.)
1859 1858 *
1860 1859 * OTOH if it turns out that's -not- the problem, and we're -not- an lwp
1861 1860 * trying to return to user mode and we get a #gp fault, then we need
1862 1861 * to die() -- which will happen if we return non-zero from this routine.
1863 1862 */
1864 1863 static int
1865 1864 kern_gpfault(struct regs *rp)
1866 1865 {
1867 1866 kthread_t *t = curthread;
1868 1867 proc_t *p = ttoproc(t);
1869 1868 klwp_t *lwp = ttolwp(t);
1870 1869 struct regs tmpregs, *trp = NULL;
1871 1870 caddr_t pc = (caddr_t)rp->r_pc;
1872 1871 int v;
1873 1872 uint32_t auditing = AU_AUDITING();
1874 1873
1875 1874 /*
1876 1875 * if we're not an lwp, or in the case of running native the
1877 1876 * pc range is outside _sys_rtt, then we should immediately
1878 1877 * be die()ing horribly.
1879 1878 */
1880 1879 if (lwp == NULL || !instr_is_sys_rtt(pc))
1881 1880 return (1);
1882 1881
1883 1882 /*
1884 1883 * So at least we're in the right part of the kernel.
1885 1884 *
1886 1885 * Disassemble the instruction at the faulting pc.
1887 1886 * Once we know what it is, we carefully reconstruct the stack
1888 1887 * based on the order in which the stack is deconstructed in
1889 1888 * _sys_rtt. Ew.
1890 1889 */
1891 1890 if (instr_is_iret(pc)) {
1892 1891 /*
1893 1892 * We took the #gp while trying to perform the IRET.
1894 1893 * This means that either %cs or %ss are bad.
1895 1894 * All we know for sure is that most of the general
1896 1895 * registers have been restored, including the
1897 1896 * segment registers, and all we have left on the
1898 1897 * topmost part of the lwp's stack are the
1899 1898 * registers that the iretq was unable to consume.
1900 1899 *
1901 1900 * All the rest of the state was crushed by the #gp
1902 1901 * which pushed -its- registers atop our old save area
1903 1902 * (because we had to decrement the stack pointer, sigh) so
1904 1903 * all that we can try and do is to reconstruct the
1905 1904 * crushed frame from the #gp trap frame itself.
1906 1905 */
1907 1906 trp = &tmpregs;
1908 1907 trp->r_ss = lwptoregs(lwp)->r_ss;
1909 1908 trp->r_sp = lwptoregs(lwp)->r_sp;
1910 1909 trp->r_ps = lwptoregs(lwp)->r_ps;
1911 1910 trp->r_cs = lwptoregs(lwp)->r_cs;
1912 1911 trp->r_pc = lwptoregs(lwp)->r_pc;
1913 1912 bcopy(rp, trp, offsetof(struct regs, r_pc));
1914 1913
1915 1914 /*
1916 1915 * Validate simple math
1917 1916 */
1918 1917 ASSERT(trp->r_pc == lwptoregs(lwp)->r_pc);
1919 1918 ASSERT(trp->r_err == rp->r_err);
1920 1919
1921 1920
1922 1921
1923 1922 }
1924 1923
1925 1924 #if defined(__amd64)
1926 1925 if (trp == NULL && lwp->lwp_pcb.pcb_rupdate != 0) {
1927 1926
1928 1927 /*
1929 1928 * This is the common case -- we're trying to load
1930 1929 * a bad segment register value in the only section
1931 1930 * of kernel code that ever loads segment registers.
1932 1931 *
1933 1932 * We don't need to do anything at this point because
1934 1933 * the pcb contains all the pending segment register
1935 1934 * state, and the regs are still intact because we
1936 1935 * didn't adjust the stack pointer yet. Given the fidelity
1937 1936 * of all this, we could conceivably send a signal
1938 1937 * to the lwp, rather than core-ing.
1939 1938 */
1940 1939 trp = lwptoregs(lwp);
1941 1940 ASSERT((caddr_t)trp == (caddr_t)rp->r_sp);
1942 1941 }
1943 1942
1944 1943 #elif defined(__i386)
1945 1944
1946 1945 if (trp == NULL && instr_is_segregs_pop(pc))
1947 1946 trp = lwptoregs(lwp);
1948 1947
1949 1948 #endif /* __i386 */
1950 1949
1951 1950 if (trp == NULL)
1952 1951 return (1);
1953 1952
1954 1953 /*
1955 1954 * If we get to here, we're reasonably confident that we've
1956 1955 * correctly decoded what happened on the way out of the kernel.
1957 1956 * Rewrite the lwp's registers so that we can create a core dump
1958 1957 * the (at least vaguely) represents the mcontext we were
1959 1958 * being asked to restore when things went so terribly wrong.
1960 1959 */
1961 1960
1962 1961 /*
1963 1962 * Make sure that we have a meaningful %trapno and %err.
1964 1963 */
1965 1964 trp->r_trapno = rp->r_trapno;
1966 1965 trp->r_err = rp->r_err;
1967 1966
1968 1967 if ((caddr_t)trp != (caddr_t)lwptoregs(lwp))
1969 1968 bcopy(trp, lwptoregs(lwp), sizeof (*trp));
1970 1969
1971 1970
1972 1971 mutex_enter(&p->p_lock);
1973 1972 lwp->lwp_cursig = SIGSEGV;
1974 1973 mutex_exit(&p->p_lock);
1975 1974
1976 1975 /*
1977 1976 * Terminate all LWPs but don't discard them. If another lwp beat
1978 1977 * us to the punch by calling exit(), evaporate now.
1979 1978 */
1980 1979 proc_is_exiting(p);
1981 1980 if (exitlwps(1) != 0) {
1982 1981 mutex_enter(&p->p_lock);
1983 1982 lwp_exit();
1984 1983 }
1985 1984
1986 1985 if (auditing) /* audit core dump */
1987 1986 audit_core_start(SIGSEGV);
1988 1987 v = core(SIGSEGV, B_FALSE);
1989 1988 if (auditing) /* audit core dump */
1990 1989 audit_core_finish(v ? CLD_KILLED : CLD_DUMPED);
1991 1990 exit(v ? CLD_KILLED : CLD_DUMPED, SIGSEGV);
1992 1991 return (0);
1993 1992 }
1994 1993
1995 1994 /*
1996 1995 * dump_tss() - Display the TSS structure
1997 1996 */
1998 1997
1999 1998 #if !defined(__xpv)
2000 1999 #if defined(__amd64)
2001 2000
2002 2001 static void
2003 2002 dump_tss(void)
2004 2003 {
2005 2004 const char tss_fmt[] = "tss.%s:\t0x%p\n"; /* Format string */
2006 2005 tss_t *tss = CPU->cpu_tss;
2007 2006
2008 2007 printf(tss_fmt, "tss_rsp0", (void *)tss->tss_rsp0);
2009 2008 printf(tss_fmt, "tss_rsp1", (void *)tss->tss_rsp1);
2010 2009 printf(tss_fmt, "tss_rsp2", (void *)tss->tss_rsp2);
2011 2010
2012 2011 printf(tss_fmt, "tss_ist1", (void *)tss->tss_ist1);
2013 2012 printf(tss_fmt, "tss_ist2", (void *)tss->tss_ist2);
2014 2013 printf(tss_fmt, "tss_ist3", (void *)tss->tss_ist3);
2015 2014 printf(tss_fmt, "tss_ist4", (void *)tss->tss_ist4);
2016 2015 printf(tss_fmt, "tss_ist5", (void *)tss->tss_ist5);
2017 2016 printf(tss_fmt, "tss_ist6", (void *)tss->tss_ist6);
2018 2017 printf(tss_fmt, "tss_ist7", (void *)tss->tss_ist7);
2019 2018 }
2020 2019
2021 2020 #elif defined(__i386)
2022 2021
2023 2022 static void
2024 2023 dump_tss(void)
2025 2024 {
2026 2025 const char tss_fmt[] = "tss.%s:\t0x%p\n"; /* Format string */
2027 2026 tss_t *tss = CPU->cpu_tss;
2028 2027
2029 2028 printf(tss_fmt, "tss_link", (void *)(uintptr_t)tss->tss_link);
2030 2029 printf(tss_fmt, "tss_esp0", (void *)(uintptr_t)tss->tss_esp0);
2031 2030 printf(tss_fmt, "tss_ss0", (void *)(uintptr_t)tss->tss_ss0);
2032 2031 printf(tss_fmt, "tss_esp1", (void *)(uintptr_t)tss->tss_esp1);
2033 2032 printf(tss_fmt, "tss_ss1", (void *)(uintptr_t)tss->tss_ss1);
2034 2033 printf(tss_fmt, "tss_esp2", (void *)(uintptr_t)tss->tss_esp2);
2035 2034 printf(tss_fmt, "tss_ss2", (void *)(uintptr_t)tss->tss_ss2);
2036 2035 printf(tss_fmt, "tss_cr3", (void *)(uintptr_t)tss->tss_cr3);
2037 2036 printf(tss_fmt, "tss_eip", (void *)(uintptr_t)tss->tss_eip);
2038 2037 printf(tss_fmt, "tss_eflags", (void *)(uintptr_t)tss->tss_eflags);
2039 2038 printf(tss_fmt, "tss_eax", (void *)(uintptr_t)tss->tss_eax);
2040 2039 printf(tss_fmt, "tss_ebx", (void *)(uintptr_t)tss->tss_ebx);
2041 2040 printf(tss_fmt, "tss_ecx", (void *)(uintptr_t)tss->tss_ecx);
2042 2041 printf(tss_fmt, "tss_edx", (void *)(uintptr_t)tss->tss_edx);
2043 2042 printf(tss_fmt, "tss_esp", (void *)(uintptr_t)tss->tss_esp);
2044 2043 }
2045 2044
2046 2045 #endif /* __amd64 */
2047 2046 #endif /* !__xpv */
2048 2047
2049 2048 #if defined(TRAPTRACE)
2050 2049
2051 2050 int ttrace_nrec = 10; /* number of records to dump out */
2052 2051 int ttrace_dump_nregs = 0; /* dump out this many records with regs too */
2053 2052
2054 2053 /*
2055 2054 * Dump out the last ttrace_nrec traptrace records on each CPU
2056 2055 */
2057 2056 static void
2058 2057 dump_ttrace(void)
2059 2058 {
2060 2059 trap_trace_ctl_t *ttc;
2061 2060 trap_trace_rec_t *rec;
2062 2061 uintptr_t current;
2063 2062 int i, j, k;
2064 2063 int n = NCPU;
2065 2064 #if defined(__amd64)
2066 2065 const char banner[] =
2067 2066 "\ncpu address timestamp "
2068 2067 "type vc handler pc\n";
2069 2068 const char fmt1[] = "%3d %016lx %12llx ";
2070 2069 #elif defined(__i386)
2071 2070 const char banner[] =
2072 2071 "\ncpu address timestamp type vc handler pc\n";
2073 2072 const char fmt1[] = "%3d %08lx %12llx ";
2074 2073 #endif
2075 2074 const char fmt2[] = "%4s %3x ";
2076 2075 const char fmt3[] = "%8s ";
2077 2076
2078 2077 if (ttrace_nrec == 0)
2079 2078 return;
2080 2079
2081 2080 printf(banner);
2082 2081
2083 2082 for (i = 0; i < n; i++) {
2084 2083 ttc = &trap_trace_ctl[i];
2085 2084 if (ttc->ttc_first == NULL)
2086 2085 continue;
2087 2086
2088 2087 current = ttc->ttc_next - sizeof (trap_trace_rec_t);
2089 2088 for (j = 0; j < ttrace_nrec; j++) {
2090 2089 struct sysent *sys;
2091 2090 struct autovec *vec;
2092 2091 extern struct av_head autovect[];
2093 2092 int type;
2094 2093 ulong_t off;
2095 2094 char *sym, *stype;
2096 2095
2097 2096 if (current < ttc->ttc_first)
2098 2097 current =
2099 2098 ttc->ttc_limit - sizeof (trap_trace_rec_t);
2100 2099
2101 2100 if (current == NULL)
2102 2101 continue;
2103 2102
2104 2103 rec = (trap_trace_rec_t *)current;
2105 2104
2106 2105 if (rec->ttr_stamp == 0)
2107 2106 break;
2108 2107
2109 2108 printf(fmt1, i, (uintptr_t)rec, rec->ttr_stamp);
2110 2109
2111 2110 switch (rec->ttr_marker) {
2112 2111 case TT_SYSCALL:
2113 2112 case TT_SYSENTER:
2114 2113 case TT_SYSC:
2115 2114 case TT_SYSC64:
2116 2115 #if defined(__amd64)
2117 2116 sys = &sysent32[rec->ttr_sysnum];
2118 2117 switch (rec->ttr_marker) {
2119 2118 case TT_SYSC64:
2120 2119 sys = &sysent[rec->ttr_sysnum];
2121 2120 /*FALLTHROUGH*/
2122 2121 #elif defined(__i386)
2123 2122 sys = &sysent[rec->ttr_sysnum];
2124 2123 switch (rec->ttr_marker) {
2125 2124 case TT_SYSC64:
2126 2125 #endif
2127 2126 case TT_SYSC:
2128 2127 stype = "sysc"; /* syscall */
2129 2128 break;
2130 2129 case TT_SYSCALL:
2131 2130 stype = "lcal"; /* lcall */
2132 2131 break;
2133 2132 case TT_SYSENTER:
2134 2133 stype = "syse"; /* sysenter */
2135 2134 break;
2136 2135 default:
2137 2136 break;
2138 2137 }
2139 2138 printf(fmt2, "sysc", rec->ttr_sysnum);
2140 2139 if (sys != NULL) {
2141 2140 sym = kobj_getsymname(
2142 2141 (uintptr_t)sys->sy_callc,
2143 2142 &off);
2144 2143 if (sym != NULL)
2145 2144 printf(fmt3, sym);
2146 2145 else
2147 2146 printf("%p ", sys->sy_callc);
2148 2147 } else {
2149 2148 printf(fmt3, "unknown");
2150 2149 }
2151 2150 break;
2152 2151
2153 2152 case TT_INTERRUPT:
2154 2153 printf(fmt2, "intr", rec->ttr_vector);
2155 2154 if (get_intr_handler != NULL)
2156 2155 vec = (struct autovec *)
2157 2156 (*get_intr_handler)
2158 2157 (rec->ttr_cpuid, rec->ttr_vector);
2159 2158 else
2160 2159 vec =
2161 2160 autovect[rec->ttr_vector].avh_link;
2162 2161
2163 2162 if (vec != NULL) {
2164 2163 sym = kobj_getsymname(
2165 2164 (uintptr_t)vec->av_vector, &off);
2166 2165 if (sym != NULL)
2167 2166 printf(fmt3, sym);
2168 2167 else
2169 2168 printf("%p ", vec->av_vector);
2170 2169 } else {
2171 2170 printf(fmt3, "unknown ");
2172 2171 }
2173 2172 break;
2174 2173
2175 2174 case TT_TRAP:
2176 2175 case TT_EVENT:
2177 2176 type = rec->ttr_regs.r_trapno;
2178 2177 printf(fmt2, "trap", type);
2179 2178 if (type < TRAP_TYPES)
2180 2179 printf(" #%s ",
2181 2180 trap_type_mnemonic[type]);
2182 2181 else
2183 2182 switch (type) {
2184 2183 case T_AST:
2185 2184 printf(fmt3, "ast");
2186 2185 break;
2187 2186 default:
2188 2187 printf(fmt3, "");
2189 2188 break;
2190 2189 }
2191 2190 break;
2192 2191
2193 2192 default:
2194 2193 break;
2195 2194 }
2196 2195
2197 2196 sym = kobj_getsymname(rec->ttr_regs.r_pc, &off);
2198 2197 if (sym != NULL)
2199 2198 printf("%s+%lx\n", sym, off);
2200 2199 else
2201 2200 printf("%lx\n", rec->ttr_regs.r_pc);
2202 2201
2203 2202 if (ttrace_dump_nregs-- > 0) {
2204 2203 int s;
2205 2204
2206 2205 if (rec->ttr_marker == TT_INTERRUPT)
2207 2206 printf(
2208 2207 "\t\tipl %x spl %x pri %x\n",
2209 2208 rec->ttr_ipl,
2210 2209 rec->ttr_spl,
2211 2210 rec->ttr_pri);
2212 2211
2213 2212 dumpregs(&rec->ttr_regs);
2214 2213
2215 2214 printf("\t%3s: %p\n\n", " ct",
2216 2215 (void *)rec->ttr_curthread);
2217 2216
2218 2217 /*
2219 2218 * print out the pc stack that we recorded
2220 2219 * at trap time (if any)
2221 2220 */
2222 2221 for (s = 0; s < rec->ttr_sdepth; s++) {
2223 2222 uintptr_t fullpc;
2224 2223
2225 2224 if (s >= TTR_STACK_DEPTH) {
2226 2225 printf("ttr_sdepth corrupt\n");
2227 2226 break;
2228 2227 }
2229 2228
2230 2229 fullpc = (uintptr_t)rec->ttr_stack[s];
2231 2230
2232 2231 sym = kobj_getsymname(fullpc, &off);
2233 2232 if (sym != NULL)
2234 2233 printf("-> %s+0x%lx()\n",
2235 2234 sym, off);
2236 2235 else
2237 2236 printf("-> 0x%lx()\n", fullpc);
2238 2237 }
2239 2238 printf("\n");
2240 2239 }
2241 2240 current -= sizeof (trap_trace_rec_t);
2242 2241 }
2243 2242 }
2244 2243 }
2245 2244
2246 2245 #endif /* TRAPTRACE */
2247 2246
2248 2247 void
2249 2248 panic_showtrap(struct panic_trap_info *tip)
2250 2249 {
2251 2250 showregs(tip->trap_type, tip->trap_regs, tip->trap_addr);
2252 2251
2253 2252 #if defined(TRAPTRACE)
2254 2253 dump_ttrace();
2255 2254 #endif
2256 2255
2257 2256 #if !defined(__xpv)
2258 2257 if (tip->trap_type == T_DBLFLT)
2259 2258 dump_tss();
2260 2259 #endif
2261 2260 }
2262 2261
2263 2262 void
2264 2263 panic_savetrap(panic_data_t *pdp, struct panic_trap_info *tip)
2265 2264 {
2266 2265 panic_saveregs(pdp, tip->trap_regs);
2267 2266 }
↓ open down ↓ |
1763 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX