Print this page
patch fix-compile2
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/sparc/os/syscall.c
+++ new/usr/src/uts/sparc/os/syscall.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 #include <sys/param.h>
27 27 #include <sys/vmparam.h>
28 28 #include <sys/types.h>
29 29 #include <sys/sysmacros.h>
30 30 #include <sys/systm.h>
31 31 #include <sys/cmn_err.h>
32 32 #include <sys/signal.h>
33 33 #include <sys/stack.h>
34 34 #include <sys/cred.h>
35 35 #include <sys/user.h>
36 36 #include <sys/debug.h>
37 37 #include <sys/errno.h>
38 38 #include <sys/proc.h>
39 39 #include <sys/var.h>
40 40 #include <sys/inline.h>
41 41 #include <sys/syscall.h>
42 42 #include <sys/ucontext.h>
43 43 #include <sys/cpuvar.h>
44 44 #include <sys/siginfo.h>
45 45 #include <sys/trap.h>
46 46 #include <sys/machtrap.h>
47 47 #include <sys/sysinfo.h>
48 48 #include <sys/procfs.h>
49 49 #include <sys/prsystm.h>
50 50 #include <sys/fpu/fpusystm.h>
51 51 #include <sys/modctl.h>
52 52 #include <sys/aio_impl.h>
53 53 #include <c2/audit.h>
54 54 #include <sys/tnf.h>
55 55 #include <sys/tnf_probe.h>
56 56 #include <sys/machpcb.h>
57 57 #include <sys/privregs.h>
58 58 #include <sys/copyops.h>
59 59 #include <sys/timer.h>
60 60 #include <sys/priv.h>
61 61 #include <sys/msacct.h>
62 62
63 63 int syscalltrace = 0;
64 64 #ifdef SYSCALLTRACE
65 65 static kmutex_t systrace_lock; /* syscall tracing lock */
66 66 #endif /* SYSCALLTRACE */
67 67
68 68 static krwlock_t *lock_syscall(struct sysent *, uint_t);
69 69
70 70 #ifdef _SYSCALL32_IMPL
71 71 static struct sysent *
72 72 lwp_getsysent(klwp_t *lwp)
73 73 {
74 74 if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE)
75 75 return (sysent);
76 76 return (sysent32);
77 77 }
78 78 #define LWP_GETSYSENT(lwp) (lwp_getsysent(lwp))
79 79 #else
80 80 #define LWP_GETSYSENT(lwp) (sysent)
81 81 #endif
82 82
83 83 /*
84 84 * Called to restore the lwp's register window just before
85 85 * returning to user level (only if the registers have been
86 86 * fetched or modified through /proc).
87 87 */
88 88 /*ARGSUSED1*/
89 89 void
90 90 xregrestore(klwp_t *lwp, int shared)
91 91 {
92 92 /*
93 93 * If locals+ins were modified by /proc copy them out.
94 94 * Also copy to the shared window, if necessary.
95 95 */
96 96 if (lwp->lwp_pcb.pcb_xregstat == XREGMODIFIED) {
97 97 struct machpcb *mpcb = lwptompcb(lwp);
98 98 caddr_t sp = (caddr_t)lwptoregs(lwp)->r_sp;
99 99
100 100 size_t rwinsize;
101 101 caddr_t rwp;
102 102 int is64;
103 103
104 104 if (lwp_getdatamodel(lwp) == DATAMODEL_LP64) {
105 105 rwinsize = sizeof (struct rwindow);
106 106 rwp = sp + STACK_BIAS;
107 107 is64 = 1;
108 108 } else {
109 109 rwinsize = sizeof (struct rwindow32);
110 110 sp = (caddr_t)(uintptr_t)(caddr32_t)(uintptr_t)sp;
111 111 rwp = sp;
112 112 is64 = 0;
113 113 }
114 114
115 115 if (is64)
116 116 (void) copyout_nowatch(&lwp->lwp_pcb.pcb_xregs,
117 117 rwp, rwinsize);
118 118 else {
119 119 struct rwindow32 rwindow32;
120 120 int watched;
121 121
122 122 watched = watch_disable_addr(rwp, rwinsize, S_WRITE);
123 123 rwindow_nto32(&lwp->lwp_pcb.pcb_xregs, &rwindow32);
124 124 (void) copyout(&rwindow32, rwp, rwinsize);
125 125 if (watched)
126 126 watch_enable_addr(rwp, rwinsize, S_WRITE);
127 127 }
128 128
129 129 /* also copy to the user return window */
130 130 mpcb->mpcb_rsp[0] = sp;
131 131 mpcb->mpcb_rsp[1] = NULL;
132 132 bcopy(&lwp->lwp_pcb.pcb_xregs, &mpcb->mpcb_rwin[0],
133 133 sizeof (lwp->lwp_pcb.pcb_xregs));
134 134 }
135 135 lwp->lwp_pcb.pcb_xregstat = XREGNONE;
136 136 }
137 137
138 138
139 139 /*
140 140 * Get the arguments to the current system call.
141 141 * lwp->lwp_ap normally points to the out regs in the reg structure.
142 142 * If the user is going to change the out registers and might want to
143 143 * get the args (for /proc tracing), it must copy the args elsewhere
144 144 * via save_syscall_args().
145 145 */
146 146 uint_t
147 147 get_syscall_args(klwp_t *lwp, long *argp, int *nargsp)
148 148 {
149 149 kthread_t *t = lwptot(lwp);
150 150 uint_t code = t->t_sysnum;
151 151 long mask;
152 152 long *ap;
153 153 int nargs;
154 154
155 155 if (lwptoproc(lwp)->p_model == DATAMODEL_ILP32)
156 156 mask = (uint32_t)0xffffffffU;
157 157 else
158 158 mask = 0xffffffffffffffff;
159 159
160 160 if (code != 0 && code < NSYSCALL) {
161 161
162 162 nargs = LWP_GETSYSENT(lwp)[code].sy_narg;
163 163
164 164 ASSERT(nargs <= MAXSYSARGS);
165 165
166 166 *nargsp = nargs;
167 167 ap = lwp->lwp_ap;
168 168 while (nargs-- > 0)
169 169 *argp++ = *ap++ & mask;
170 170 } else {
171 171 *nargsp = 0;
172 172 }
173 173 return (code);
174 174 }
175 175
176 176 #ifdef _SYSCALL32_IMPL
177 177 /*
178 178 * Get the arguments to the current 32-bit system call.
179 179 */
180 180 uint_t
181 181 get_syscall32_args(klwp_t *lwp, int *argp, int *nargsp)
182 182 {
183 183 long args[MAXSYSARGS];
184 184 uint_t i, code;
185 185
186 186 code = get_syscall_args(lwp, args, nargsp);
187 187 for (i = 0; i != *nargsp; i++)
188 188 *argp++ = (int)args[i];
189 189 return (code);
190 190 }
191 191 #endif
192 192
193 193 /*
194 194 * Save the system call arguments in a safe place.
195 195 * lwp->lwp_ap normally points to the out regs in the reg structure.
196 196 * If the user is going to change the out registers, g1, or the stack,
197 197 * and might want to get the args (for /proc tracing), it must copy
198 198 * the args elsewhere via save_syscall_args().
199 199 *
200 200 * This may be called from stop() even when we're not in a system call.
201 201 * Since there's no easy way to tell, this must be safe (not panic).
202 202 * If the copyins get data faults, return non-zero.
203 203 */
204 204 int
205 205 save_syscall_args()
206 206 {
207 207 kthread_t *t = curthread;
208 208 klwp_t *lwp = ttolwp(t);
209 209 struct regs *rp = lwptoregs(lwp);
210 210 uint_t code = t->t_sysnum;
211 211 uint_t nargs;
212 212 int i;
213 213 caddr_t ua;
214 214 model_t datamodel;
215 215
216 216 if (lwp->lwp_argsaved || code == 0)
217 217 return (0); /* args already saved or not needed */
218 218
219 219 if (code >= NSYSCALL) {
220 220 nargs = 0; /* illegal syscall */
221 221 } else {
222 222 struct sysent *se = LWP_GETSYSENT(lwp);
223 223 struct sysent *callp = se + code;
224 224
225 225 nargs = callp->sy_narg;
226 226 if (LOADABLE_SYSCALL(callp) && nargs == 0) {
227 227 krwlock_t *module_lock;
228 228
229 229 /*
230 230 * Find out how many arguments the system
231 231 * call uses.
232 232 *
233 233 * We have the property that loaded syscalls
234 234 * never change the number of arguments they
235 235 * use after they've been loaded once. This
236 236 * allows us to stop for /proc tracing without
237 237 * holding the module lock.
238 238 * /proc is assured that sy_narg is valid.
239 239 */
240 240 module_lock = lock_syscall(se, code);
241 241 nargs = callp->sy_narg;
242 242 rw_exit(module_lock);
243 243 }
244 244 }
245 245
246 246 /*
247 247 * Fetch the system call arguments.
248 248 */
249 249 if (nargs == 0)
250 250 goto out;
251 251
252 252
253 253 ASSERT(nargs <= MAXSYSARGS);
254 254
255 255 if ((datamodel = lwp_getdatamodel(lwp)) == DATAMODEL_ILP32) {
256 256
257 257 if (rp->r_g1 == 0) { /* indirect syscall */
258 258
259 259 lwp->lwp_arg[0] = (uint32_t)rp->r_o1;
260 260 lwp->lwp_arg[1] = (uint32_t)rp->r_o2;
261 261 lwp->lwp_arg[2] = (uint32_t)rp->r_o3;
262 262 lwp->lwp_arg[3] = (uint32_t)rp->r_o4;
263 263 lwp->lwp_arg[4] = (uint32_t)rp->r_o5;
264 264 if (nargs > 5) {
265 265 ua = (caddr_t)(uintptr_t)(caddr32_t)(uintptr_t)
266 266 (rp->r_sp + MINFRAME32);
267 267 for (i = 5; i < nargs; i++) {
268 268 uint32_t a;
269 269 if (fuword32(ua, &a) != 0)
270 270 return (-1);
271 271 lwp->lwp_arg[i] = a;
272 272 ua += sizeof (a);
273 273 }
274 274 }
275 275 } else {
276 276 lwp->lwp_arg[0] = (uint32_t)rp->r_o0;
277 277 lwp->lwp_arg[1] = (uint32_t)rp->r_o1;
278 278 lwp->lwp_arg[2] = (uint32_t)rp->r_o2;
279 279 lwp->lwp_arg[3] = (uint32_t)rp->r_o3;
280 280 lwp->lwp_arg[4] = (uint32_t)rp->r_o4;
281 281 lwp->lwp_arg[5] = (uint32_t)rp->r_o5;
282 282 if (nargs > 6) {
283 283 ua = (caddr_t)(uintptr_t)(caddr32_t)(uintptr_t)
284 284 (rp->r_sp + MINFRAME32);
285 285 for (i = 6; i < nargs; i++) {
286 286 uint32_t a;
287 287 if (fuword32(ua, &a) != 0)
288 288 return (-1);
289 289 lwp->lwp_arg[i] = a;
290 290 ua += sizeof (a);
291 291 }
292 292 }
293 293 }
294 294 } else {
295 295 ASSERT(datamodel == DATAMODEL_LP64);
296 296 lwp->lwp_arg[0] = rp->r_o0;
297 297 lwp->lwp_arg[1] = rp->r_o1;
298 298 lwp->lwp_arg[2] = rp->r_o2;
299 299 lwp->lwp_arg[3] = rp->r_o3;
300 300 lwp->lwp_arg[4] = rp->r_o4;
301 301 lwp->lwp_arg[5] = rp->r_o5;
302 302 if (nargs > 6) {
303 303 ua = (caddr_t)rp->r_sp + MINFRAME + STACK_BIAS;
304 304 for (i = 6; i < nargs; i++) {
305 305 unsigned long a;
306 306 if (fulword(ua, &a) != 0)
307 307 return (-1);
308 308 lwp->lwp_arg[i] = a;
309 309 ua += sizeof (a);
310 310 }
311 311 }
312 312 }
313 313
314 314 out:
315 315 lwp->lwp_ap = lwp->lwp_arg;
316 316 lwp->lwp_argsaved = 1;
317 317 t->t_post_sys = 1; /* so lwp_ap will be reset */
318 318 return (0);
319 319 }
320 320
321 321 void
322 322 reset_syscall_args(void)
323 323 {
324 324 klwp_t *lwp = ttolwp(curthread);
325 325
326 326 lwp->lwp_ap = (long *)&lwptoregs(lwp)->r_o0;
327 327 lwp->lwp_argsaved = 0;
328 328 }
329 329
330 330 /*
331 331 * nonexistent system call-- signal lwp (may want to handle it)
332 332 * flag error if lwp won't see signal immediately
333 333 * This works for old or new calling sequence.
334 334 */
335 335 int64_t
336 336 nosys()
337 337 {
338 338 tsignal(curthread, SIGSYS);
339 339 return ((int64_t)set_errno(ENOSYS));
340 340 }
341 341
342 342 /*
343 343 * Perform pre-system-call processing, including stopping for tracing,
344 344 * auditing, microstate-accounting, etc.
345 345 *
346 346 * This routine is called only if the t_pre_sys flag is set. Any condition
347 347 * requiring pre-syscall handling must set the t_pre_sys flag. If the
348 348 * condition is persistent, this routine will repost t_pre_sys.
349 349 */
350 350 int
351 351 pre_syscall(int arg0)
↓ open down ↓ |
351 lines elided |
↑ open up ↑ |
352 352 {
353 353 unsigned int code;
354 354 kthread_t *t = curthread;
355 355 proc_t *p = ttoproc(t);
356 356 klwp_t *lwp = ttolwp(t);
357 357 struct regs *rp = lwptoregs(lwp);
358 358 int repost;
359 359
360 360 t->t_pre_sys = repost = 0; /* clear pre-syscall processing flag */
361 361
362 - ASSERT(t->t_schedflag & TS_DONT_SWAP);
363 -
364 362 syscall_mstate(LMS_USER, LMS_SYSTEM);
365 363
366 364 /*
367 365 * The syscall arguments in the out registers should be pointed to
368 366 * by lwp_ap. If the args need to be copied so that the outs can
369 367 * be changed without losing the ability to get the args for /proc,
370 368 * they can be saved by save_syscall_args(), and lwp_ap will be
371 369 * restored by post_syscall().
372 370 */
373 371 ASSERT(lwp->lwp_ap == (long *)&rp->r_o0);
374 372
375 373 /*
376 374 * Make sure the thread is holding the latest credentials for the
377 375 * process. The credentials in the process right now apply to this
378 376 * thread for the entire system call.
379 377 */
380 378 if (t->t_cred != p->p_cred) {
381 379 cred_t *oldcred = t->t_cred;
382 380 /*
383 381 * DTrace accesses t_cred in probe context. t_cred must
384 382 * always be either NULL, or point to a valid, allocated cred
385 383 * structure.
386 384 */
387 385 t->t_cred = crgetcred();
388 386 crfree(oldcred);
389 387 }
390 388
391 389 /*
392 390 * Undo special arrangements to single-step the lwp
393 391 * so that a debugger will see valid register contents.
394 392 * Also so that the pc is valid for syncfpu().
395 393 * Also so that a syscall like exec() can be stepped.
396 394 */
397 395 if (lwp->lwp_pcb.pcb_step != STEP_NONE) {
398 396 (void) prundostep();
399 397 repost = 1;
400 398 }
401 399
402 400 /*
403 401 * Check for indirect system call in case we stop for tracing.
404 402 * Don't allow multiple indirection.
405 403 */
406 404 code = t->t_sysnum;
407 405 if (code == 0 && arg0 != 0) { /* indirect syscall */
408 406 code = arg0;
409 407 t->t_sysnum = arg0;
410 408 }
411 409
412 410 /*
413 411 * From the proc(4) manual page:
414 412 * When entry to a system call is being traced, the traced process
415 413 * stops after having begun the call to the system but before the
416 414 * system call arguments have been fetched from the process.
417 415 * If proc changes the args we must refetch them after starting.
418 416 */
419 417 if (PTOU(p)->u_systrap) {
420 418 if (prismember(&PTOU(p)->u_entrymask, code)) {
421 419 /*
422 420 * Recheck stop condition, now that lock is held.
423 421 */
424 422 mutex_enter(&p->p_lock);
425 423 if (PTOU(p)->u_systrap &&
426 424 prismember(&PTOU(p)->u_entrymask, code)) {
427 425 stop(PR_SYSENTRY, code);
428 426 /*
429 427 * Must refetch args since they were
430 428 * possibly modified by /proc. Indicate
431 429 * that the valid copy is in the
432 430 * registers.
433 431 */
434 432 lwp->lwp_argsaved = 0;
435 433 lwp->lwp_ap = (long *)&rp->r_o0;
436 434 }
437 435 mutex_exit(&p->p_lock);
438 436 }
439 437 repost = 1;
440 438 }
441 439
442 440 if (lwp->lwp_sysabort) {
443 441 /*
444 442 * lwp_sysabort may have been set via /proc while the process
445 443 * was stopped on PR_SYSENTRY. If so, abort the system call.
446 444 * Override any error from the copyin() of the arguments.
447 445 */
448 446 lwp->lwp_sysabort = 0;
449 447 (void) set_errno(EINTR); /* sets post-sys processing */
450 448 t->t_pre_sys = 1; /* repost anyway */
451 449 return (1); /* don't do system call, return EINTR */
452 450 }
453 451
454 452 /* begin auditing for this syscall */
455 453 if (audit_active == C2AUDIT_LOADED) {
456 454 uint32_t auditing = au_zone_getstate(NULL);
457 455
458 456 if (auditing & AU_AUDIT_MASK) {
459 457 int error;
460 458 if (error = audit_start(T_SYSCALL, code, auditing, \
461 459 0, lwp)) {
462 460 t->t_pre_sys = 1; /* repost anyway */
463 461 lwp->lwp_error = 0; /* for old drivers */
464 462 return (error);
465 463 }
466 464 repost = 1;
467 465 }
468 466 }
469 467
470 468 #ifndef NPROBE
471 469 /* Kernel probe */
472 470 if (tnf_tracing_active) {
473 471 TNF_PROBE_1(syscall_start, "syscall thread", /* CSTYLED */,
474 472 tnf_sysnum, sysnum, t->t_sysnum);
475 473 t->t_post_sys = 1; /* make sure post_syscall runs */
476 474 repost = 1;
477 475 }
478 476 #endif /* NPROBE */
479 477
480 478 #ifdef SYSCALLTRACE
481 479 if (syscalltrace) {
482 480 int i;
483 481 long *ap;
484 482 char *cp;
485 483 char *sysname;
486 484 struct sysent *callp;
487 485
488 486 if (code >= NSYSCALL)
489 487 callp = &nosys_ent; /* nosys has no args */
490 488 else
491 489 callp = LWP_GETSYSENT(lwp) + code;
492 490 (void) save_syscall_args();
493 491 mutex_enter(&systrace_lock);
494 492 printf("%d: ", p->p_pid);
495 493 if (code >= NSYSCALL)
496 494 printf("0x%x", code);
497 495 else {
498 496 sysname = mod_getsysname(code);
499 497 printf("%s[0x%x]", sysname == NULL ? "NULL" :
500 498 sysname, code);
501 499 }
502 500 cp = "(";
503 501 for (i = 0, ap = lwp->lwp_ap; i < callp->sy_narg; i++, ap++) {
504 502 printf("%s%lx", cp, *ap);
505 503 cp = ", ";
506 504 }
507 505 if (i)
508 506 printf(")");
509 507 printf(" %s id=0x%p\n", PTOU(p)->u_comm, curthread);
510 508 mutex_exit(&systrace_lock);
511 509 }
512 510 #endif /* SYSCALLTRACE */
513 511
514 512 /*
515 513 * If there was a continuing reason for pre-syscall processing,
516 514 * set the t_pre_sys flag for the next system call.
517 515 */
518 516 if (repost)
519 517 t->t_pre_sys = 1;
520 518 lwp->lwp_error = 0; /* for old drivers */
521 519 lwp->lwp_badpriv = PRIV_NONE; /* for privilege tracing */
522 520 return (0);
523 521 }
524 522
525 523 /*
526 524 * Post-syscall processing. Perform abnormal system call completion
527 525 * actions such as /proc tracing, profiling, signals, preemption, etc.
528 526 *
529 527 * This routine is called only if t_post_sys, t_sig_check, or t_astflag is set.
530 528 * Any condition requiring pre-syscall handling must set one of these.
531 529 * If the condition is persistent, this routine will repost t_post_sys.
532 530 */
533 531 void
534 532 post_syscall(long rval1, long rval2)
535 533 {
536 534 kthread_t *t = curthread;
537 535 proc_t *p = curproc;
538 536 klwp_t *lwp = ttolwp(t);
539 537 struct regs *rp = lwptoregs(lwp);
540 538 uint_t error;
541 539 int code = t->t_sysnum;
542 540 int repost = 0;
543 541 int proc_stop = 0; /* non-zero if stopping for /proc */
544 542 int sigprof = 0; /* non-zero if sending SIGPROF */
545 543
546 544 t->t_post_sys = 0;
547 545
548 546 error = lwp->lwp_errno;
549 547
550 548 /*
551 549 * Code can be zero if this is a new LWP returning after a forkall(),
552 550 * other than the one which matches the one in the parent which called
553 551 * forkall(). In these LWPs, skip most of post-syscall activity.
554 552 */
555 553 if (code == 0)
556 554 goto sig_check;
557 555
558 556 /* put out audit record for this syscall */
559 557 if (AU_AUDITING()) {
560 558 rval_t rval; /* fix audit_finish() someday */
561 559
562 560 /* XX64 -- truncation of 64-bit return values? */
563 561 rval.r_val1 = (int)rval1;
564 562 rval.r_val2 = (int)rval2;
565 563 audit_finish(T_SYSCALL, code, error, &rval);
566 564 repost = 1;
567 565 }
568 566
569 567 if (curthread->t_pdmsg != NULL) {
570 568 char *m = curthread->t_pdmsg;
571 569
572 570 uprintf("%s", m);
573 571 kmem_free(m, strlen(m) + 1);
574 572 curthread->t_pdmsg = NULL;
575 573 }
576 574
577 575 /*
578 576 * If we're going to stop for /proc tracing, set the flag and
579 577 * save the arguments so that the return values don't smash them.
580 578 */
581 579 if (PTOU(p)->u_systrap) {
582 580 if (prismember(&PTOU(p)->u_exitmask, code)) {
583 581 proc_stop = 1;
584 582 (void) save_syscall_args();
585 583 }
586 584 repost = 1;
587 585 }
588 586
589 587 /*
590 588 * Similarly check to see if SIGPROF might be sent.
591 589 */
592 590 if (curthread->t_rprof != NULL &&
593 591 curthread->t_rprof->rp_anystate != 0) {
594 592 (void) save_syscall_args();
595 593 sigprof = 1;
596 594 }
597 595
598 596 if (lwp->lwp_eosys == NORMALRETURN) {
599 597 if (error == 0) {
600 598 #ifdef SYSCALLTRACE
601 599 if (syscalltrace) {
602 600 mutex_enter(&systrace_lock);
603 601 printf(
604 602 "%d: r_val1=0x%lx, r_val2=0x%lx, id 0x%p\n",
605 603 p->p_pid, rval1, rval2, curthread);
606 604 mutex_exit(&systrace_lock);
607 605 }
608 606 #endif /* SYSCALLTRACE */
609 607 rp->r_tstate &= ~TSTATE_IC;
610 608 rp->r_o0 = rval1;
611 609 rp->r_o1 = rval2;
612 610 } else {
613 611 int sig;
614 612
615 613 #ifdef SYSCALLTRACE
616 614 if (syscalltrace) {
617 615 mutex_enter(&systrace_lock);
618 616 printf("%d: error=%d, id 0x%p\n",
619 617 p->p_pid, error, curthread);
620 618 mutex_exit(&systrace_lock);
621 619 }
622 620 #endif /* SYSCALLTRACE */
623 621 if (error == EINTR && t->t_activefd.a_stale)
624 622 error = EBADF;
625 623 if (error == EINTR &&
626 624 (sig = lwp->lwp_cursig) != 0 &&
627 625 sigismember(&PTOU(p)->u_sigrestart, sig) &&
628 626 PTOU(p)->u_signal[sig - 1] != SIG_DFL &&
629 627 PTOU(p)->u_signal[sig - 1] != SIG_IGN)
630 628 error = ERESTART;
631 629 rp->r_o0 = error;
632 630 rp->r_tstate |= TSTATE_IC;
633 631 }
634 632 /*
635 633 * The default action is to redo the trap instruction.
636 634 * We increment the pc and npc past it for NORMALRETURN.
637 635 * JUSTRETURN has set up a new pc and npc already.
638 636 * If we are a cloned thread of forkall(), don't
639 637 * adjust here because we have already inherited
640 638 * the adjusted values from our clone.
641 639 */
642 640 if (!(t->t_flag & T_FORKALL)) {
643 641 rp->r_pc = rp->r_npc;
644 642 rp->r_npc += 4;
645 643 }
646 644 }
647 645
648 646 /*
649 647 * From the proc(4) manual page:
650 648 * When exit from a system call is being traced, the traced process
651 649 * stops on completion of the system call just prior to checking for
652 650 * signals and returning to user level. At this point all return
653 651 * values have been stored into the traced process's saved registers.
654 652 */
655 653 if (proc_stop) {
656 654 mutex_enter(&p->p_lock);
657 655 if (PTOU(p)->u_systrap &&
658 656 prismember(&PTOU(p)->u_exitmask, code))
659 657 stop(PR_SYSEXIT, code);
660 658 mutex_exit(&p->p_lock);
661 659 }
662 660
663 661 /*
664 662 * If we are the parent returning from a successful
665 663 * vfork, wait for the child to exec or exit.
666 664 * This code must be here and not in the bowels of the system
667 665 * so that /proc can intercept exit from vfork in a timely way.
668 666 */
669 667 if (t->t_flag & T_VFPARENT) {
670 668 ASSERT(code == SYS_vfork || code == SYS_forksys);
671 669 ASSERT(rp->r_o1 == 0 && error == 0);
672 670 vfwait((pid_t)rval1);
673 671 t->t_flag &= ~T_VFPARENT;
674 672 }
675 673
676 674 /*
677 675 * If profiling is active, bill the current PC in user-land
678 676 * and keep reposting until profiling is disabled.
679 677 */
680 678 if (p->p_prof.pr_scale) {
681 679 if (lwp->lwp_oweupc)
682 680 profil_tick(rp->r_pc);
683 681 repost = 1;
684 682 }
685 683
686 684 sig_check:
687 685 /*
688 686 * Reset flag for next time.
689 687 * We must do this after stopping on PR_SYSEXIT
690 688 * because /proc uses the information in lwp_eosys.
691 689 */
692 690 lwp->lwp_eosys = NORMALRETURN;
693 691 clear_stale_fd();
694 692 t->t_flag &= ~T_FORKALL;
695 693
696 694 if (t->t_astflag | t->t_sig_check) {
697 695 /*
698 696 * Turn off the AST flag before checking all the conditions that
699 697 * may have caused an AST. This flag is on whenever a signal or
700 698 * unusual condition should be handled after the next trap or
701 699 * syscall.
702 700 */
703 701 astoff(t);
704 702 t->t_sig_check = 0;
705 703
706 704 /*
707 705 * The following check is legal for the following reasons:
708 706 * 1) The thread we are checking, is ourselves, so there is
709 707 * no way the proc can go away.
710 708 * 2) The only time we need to be protected by the
711 709 * lock is if the binding is changed.
712 710 *
713 711 * Note we will still take the lock and check the binding
714 712 * if the condition was true without the lock held. This
715 713 * prevents lock contention among threads owned by the
716 714 * same proc.
717 715 */
718 716
719 717 if (curthread->t_proc_flag & TP_CHANGEBIND) {
720 718 mutex_enter(&p->p_lock);
721 719 if (curthread->t_proc_flag & TP_CHANGEBIND) {
722 720 timer_lwpbind();
723 721 curthread->t_proc_flag &= ~TP_CHANGEBIND;
724 722 }
725 723 mutex_exit(&p->p_lock);
726 724 }
727 725
728 726 /*
729 727 * for kaio requests on the special kaio poll queue,
730 728 * copyout their results to user memory.
731 729 */
732 730 if (p->p_aio)
733 731 aio_cleanup(0);
734 732
735 733 /*
736 734 * If this LWP was asked to hold, call holdlwp(), which will
737 735 * stop. holdlwps() sets this up and calls pokelwps() which
738 736 * sets the AST flag.
739 737 *
740 738 * Also check TP_EXITLWP, since this is used by fresh new LWPs
741 739 * through lwp_rtt(). That flag is set if the lwp_create(2)
742 740 * syscall failed after creating the LWP.
743 741 */
744 742 if (ISHOLD(p) || (t->t_proc_flag & TP_EXITLWP))
745 743 holdlwp();
746 744
747 745 /*
748 746 * All code that sets signals and makes ISSIG_PENDING
749 747 * evaluate true must set t_sig_check afterwards.
750 748 */
751 749 if (ISSIG_PENDING(t, lwp, p)) {
752 750 if (issig(FORREAL))
753 751 psig();
754 752 t->t_sig_check = 1; /* recheck next time */
755 753 }
756 754
757 755 if (sigprof) {
758 756 int nargs = (code > 0 && code < NSYSCALL)?
759 757 LWP_GETSYSENT(lwp)[code].sy_narg : 0;
760 758 realsigprof(code, nargs, error);
761 759 t->t_sig_check = 1; /* recheck next time */
762 760 }
763 761
764 762 /*
765 763 * If a performance counter overflow interrupt was
766 764 * delivered *during* the syscall, then re-enable the
767 765 * AST so that we take a trip through trap() to cause
768 766 * the SIGEMT to be delivered.
769 767 */
770 768 if (lwp->lwp_pcb.pcb_flags & CPC_OVERFLOW)
771 769 aston(t);
772 770
773 771 /*
774 772 * If an asynchronous hardware error is pending, turn AST flag
775 773 * back on. AST will be checked again before we return to user
776 774 * mode and we'll come back through trap() to handle the error.
777 775 */
778 776 if (lwp->lwp_pcb.pcb_flags & ASYNC_HWERR)
779 777 aston(t);
780 778 }
781 779
782 780 /*
783 781 * Restore register window if a debugger modified it.
784 782 * Set up to perform a single-step if a debugger requested it.
785 783 */
786 784 if (lwp->lwp_pcb.pcb_xregstat != XREGNONE)
787 785 xregrestore(lwp, 1);
788 786
789 787 lwp->lwp_errno = 0; /* clear error for next time */
790 788
791 789 #ifndef NPROBE
792 790 /* Kernel probe */
793 791 if (tnf_tracing_active) {
794 792 TNF_PROBE_3(syscall_end, "syscall thread", /* CSTYLED */,
795 793 tnf_long, rval1, rval1,
796 794 tnf_long, rval2, rval2,
797 795 tnf_long, errno, (long)error);
798 796 repost = 1;
799 797 }
800 798 #endif /* NPROBE */
801 799
802 800 /*
803 801 * Set state to LWP_USER here so preempt won't give us a kernel
804 802 * priority if it occurs after this point. Call CL_TRAPRET() to
805 803 * restore the user-level priority.
806 804 *
807 805 * It is important that no locks (other than spinlocks) be entered
808 806 * after this point before returning to user mode (unless lwp_state
809 807 * is set back to LWP_SYS).
810 808 *
811 809 * Sampled times past this point are charged to the user.
812 810 */
813 811 lwp->lwp_state = LWP_USER;
814 812
815 813 if (t->t_trapret) {
816 814 t->t_trapret = 0;
817 815 thread_lock(t);
818 816 CL_TRAPRET(t);
819 817 thread_unlock(t);
820 818 }
821 819 if (CPU->cpu_runrun || t->t_schedflag & TS_ANYWAITQ)
822 820 preempt();
823 821 prunstop();
824 822
825 823 /*
826 824 * t_post_sys will be set if pcb_step is active.
827 825 */
828 826 if (lwp->lwp_pcb.pcb_step != STEP_NONE) {
829 827 prdostep();
830 828 repost = 1;
831 829 }
832 830
833 831 t->t_sysnum = 0; /* no longer in a system call */
834 832
835 833 /*
836 834 * In case the args were copied to the lwp, reset the
837 835 * pointer so the next syscall will have the right lwp_ap pointer.
838 836 */
839 837 lwp->lwp_ap = (long *)&rp->r_o0;
840 838 lwp->lwp_argsaved = 0;
841 839
842 840 /*
843 841 * If there was a continuing reason for post-syscall processing,
844 842 * set the t_post_sys flag for the next system call.
845 843 */
846 844 if (repost)
847 845 t->t_post_sys = 1;
848 846
849 847 /*
850 848 * If there is a ustack registered for this lwp, and the stack rlimit
851 849 * has been altered, read in the ustack. If the saved stack rlimit
852 850 * matches the bounds of the ustack, update the ustack to reflect
853 851 * the new rlimit. If the new stack rlimit is RLIM_INFINITY, disable
854 852 * stack checking by setting the size to 0.
855 853 */
856 854 if (lwp->lwp_ustack != 0 && lwp->lwp_old_stk_ctl != 0) {
857 855 rlim64_t new_size;
858 856 model_t model;
859 857 caddr_t top;
860 858 struct rlimit64 rl;
861 859
862 860 mutex_enter(&p->p_lock);
863 861 new_size = p->p_stk_ctl;
864 862 model = p->p_model;
865 863 top = p->p_usrstack;
866 864 (void) rctl_rlimit_get(rctlproc_legacy[RLIMIT_STACK], p, &rl);
867 865 mutex_exit(&p->p_lock);
868 866
869 867 if (rl.rlim_cur == RLIM64_INFINITY)
870 868 new_size = 0;
871 869
872 870 if (model == DATAMODEL_NATIVE) {
873 871 stack_t stk;
874 872
875 873 if (copyin((stack_t *)lwp->lwp_ustack, &stk,
876 874 sizeof (stack_t)) == 0 &&
877 875 (stk.ss_size == lwp->lwp_old_stk_ctl ||
878 876 stk.ss_size == 0) &&
879 877 stk.ss_sp == top - stk.ss_size) {
880 878 stk.ss_sp = (void *)((uintptr_t)stk.ss_sp +
881 879 stk.ss_size - new_size);
882 880 stk.ss_size = new_size;
883 881
884 882 (void) copyout(&stk,
885 883 (stack_t *)lwp->lwp_ustack,
886 884 sizeof (stack_t));
887 885 }
888 886 } else {
889 887 stack32_t stk32;
890 888
891 889 if (copyin((stack32_t *)lwp->lwp_ustack, &stk32,
892 890 sizeof (stack32_t)) == 0 &&
893 891 (stk32.ss_size == lwp->lwp_old_stk_ctl ||
894 892 stk32.ss_size == 0) &&
895 893 stk32.ss_sp ==
896 894 (caddr32_t)(uintptr_t)(top - stk32.ss_size)) {
897 895 stk32.ss_sp += stk32.ss_size - new_size;
898 896 stk32.ss_size = new_size;
899 897
900 898 (void) copyout(&stk32,
901 899 (stack32_t *)lwp->lwp_ustack,
902 900 sizeof (stack32_t));
903 901 }
904 902 }
905 903
906 904 lwp->lwp_old_stk_ctl = 0;
907 905 }
908 906
909 907 syscall_mstate(LMS_SYSTEM, LMS_USER);
910 908 }
911 909
912 910 /*
913 911 * Call a system call which takes a pointer to the user args struct and
914 912 * a pointer to the return values. This is a bit slower than the standard
915 913 * C arg-passing method in some cases.
916 914 */
917 915 int64_t
918 916 syscall_ap()
919 917 {
920 918 uint_t error;
921 919 struct sysent *callp;
922 920 rval_t rval;
923 921 klwp_t *lwp = ttolwp(curthread);
924 922 struct regs *rp = lwptoregs(lwp);
925 923
926 924 callp = LWP_GETSYSENT(lwp) + curthread->t_sysnum;
927 925
928 926 /*
929 927 * If the arguments don't fit in registers %o0 - o5, make sure they
930 928 * have been copied to the lwp_arg array.
931 929 */
932 930 if (callp->sy_narg > 6 && save_syscall_args())
933 931 return ((int64_t)set_errno(EFAULT));
934 932
935 933 rval.r_val1 = 0;
936 934 rval.r_val2 = (int)rp->r_o1;
937 935 lwp->lwp_error = 0; /* for old drivers */
938 936 error = (*(callp->sy_call))(lwp->lwp_ap, &rval);
939 937 if (error)
940 938 return ((int64_t)set_errno(error));
941 939 return (rval.r_vals);
942 940 }
943 941
944 942 /*
945 943 * Load system call module.
946 944 * Returns with pointer to held read lock for module.
947 945 */
948 946 static krwlock_t *
949 947 lock_syscall(struct sysent *table, uint_t code)
950 948 {
951 949 krwlock_t *module_lock;
952 950 struct modctl *modp;
953 951 int id;
954 952 struct sysent *callp;
955 953
956 954 module_lock = table[code].sy_lock;
957 955 callp = &table[code];
958 956
959 957 /*
960 958 * Optimization to only call modload if we don't have a loaded
961 959 * syscall.
962 960 */
963 961 rw_enter(module_lock, RW_READER);
964 962 if (LOADED_SYSCALL(callp))
965 963 return (module_lock);
966 964 rw_exit(module_lock);
967 965
968 966 for (;;) {
969 967 if ((id = modload("sys", syscallnames[code])) == -1)
970 968 break;
971 969
972 970 /*
973 971 * If we loaded successfully at least once, the modctl
974 972 * will still be valid, so we try to grab it by filename.
975 973 * If this call fails, it's because the mod_filename
976 974 * was changed after the call to modload() (mod_hold_by_name()
977 975 * is the likely culprit). We can safely just take
978 976 * another lap if this is the case; the modload() will
979 977 * change the mod_filename back to one by which we can
980 978 * find the modctl.
981 979 */
982 980 modp = mod_find_by_filename("sys", syscallnames[code]);
983 981
984 982 if (modp == NULL)
985 983 continue;
986 984
987 985 mutex_enter(&mod_lock);
988 986
989 987 if (!modp->mod_installed) {
990 988 mutex_exit(&mod_lock);
991 989 continue;
992 990 }
993 991 break;
994 992 }
995 993
996 994 rw_enter(module_lock, RW_READER);
997 995
998 996 if (id != -1)
999 997 mutex_exit(&mod_lock);
1000 998
1001 999 return (module_lock);
1002 1000 }
1003 1001
1004 1002 /*
1005 1003 * Loadable syscall support.
1006 1004 * If needed, load the module, then reserve it by holding a read
1007 1005 * lock for the duration of the call.
1008 1006 * Later, if the syscall is not unloadable, it could patch the vector.
1009 1007 */
1010 1008 /*ARGSUSED*/
1011 1009 int64_t
1012 1010 loadable_syscall(
1013 1011 long a0, long a1, long a2, long a3,
1014 1012 long a4, long a5, long a6, long a7)
1015 1013 {
1016 1014 int64_t rval;
1017 1015 struct sysent *callp;
1018 1016 struct sysent *se = LWP_GETSYSENT(ttolwp(curthread));
1019 1017 krwlock_t *module_lock;
1020 1018 int code;
1021 1019
1022 1020 code = curthread->t_sysnum;
1023 1021 callp = se + code;
1024 1022
1025 1023 /*
1026 1024 * Try to autoload the system call if necessary.
1027 1025 */
1028 1026 module_lock = lock_syscall(se, code);
1029 1027 THREAD_KPRI_RELEASE(); /* drop priority given by rw_enter */
1030 1028
1031 1029 /*
1032 1030 * we've locked either the loaded syscall or nosys
1033 1031 */
1034 1032 if (callp->sy_flags & SE_ARGC) {
1035 1033 int64_t (*sy_call)();
1036 1034
1037 1035 sy_call = (int64_t (*)())callp->sy_call;
1038 1036 rval = (*sy_call)(a0, a1, a2, a3, a4, a5);
1039 1037 } else {
1040 1038 rval = syscall_ap();
1041 1039 }
1042 1040
1043 1041 THREAD_KPRI_REQUEST(); /* regain priority from read lock */
1044 1042 rw_exit(module_lock);
1045 1043 return (rval);
1046 1044 }
1047 1045
1048 1046 /*
1049 1047 * Handle indirect system calls.
1050 1048 * This interface should be deprecated. The library can handle
1051 1049 * this more efficiently, but keep this implementation for old binaries.
1052 1050 *
1053 1051 * XX64 Needs some work.
1054 1052 */
1055 1053 int64_t
1056 1054 indir(int code, long a0, long a1, long a2, long a3, long a4)
1057 1055 {
1058 1056 klwp_t *lwp = ttolwp(curthread);
1059 1057 struct sysent *callp;
1060 1058
1061 1059 if (code <= 0 || code >= NSYSCALL)
1062 1060 return (nosys());
1063 1061
1064 1062 ASSERT(lwp->lwp_ap != NULL);
1065 1063
1066 1064 curthread->t_sysnum = code;
1067 1065 callp = LWP_GETSYSENT(lwp) + code;
1068 1066
1069 1067 /*
1070 1068 * Handle argument setup, unless already done in pre_syscall().
1071 1069 */
1072 1070 if (callp->sy_narg > 5) {
1073 1071 if (save_syscall_args()) /* move args to LWP array */
1074 1072 return ((int64_t)set_errno(EFAULT));
1075 1073 } else if (!lwp->lwp_argsaved) {
1076 1074 long *ap;
1077 1075
1078 1076 ap = lwp->lwp_ap; /* args haven't been saved */
1079 1077 lwp->lwp_ap = ap + 1; /* advance arg pointer */
1080 1078 curthread->t_post_sys = 1; /* so lwp_ap will be reset */
1081 1079 }
1082 1080 return ((*callp->sy_callc)(a0, a1, a2, a3, a4, lwp->lwp_arg[5]));
1083 1081 }
1084 1082
1085 1083 /*
1086 1084 * set_errno - set an error return from the current system call.
1087 1085 * This could be a macro.
1088 1086 * This returns the value it is passed, so that the caller can
1089 1087 * use tail-recursion-elimination and do return (set_errno(ERRNO));
1090 1088 */
1091 1089 uint_t
1092 1090 set_errno(uint_t error)
1093 1091 {
1094 1092 ASSERT(error != 0); /* must not be used to clear errno */
1095 1093
1096 1094 curthread->t_post_sys = 1; /* have post_syscall do error return */
1097 1095 return (ttolwp(curthread)->lwp_errno = error);
1098 1096 }
1099 1097
1100 1098 /*
1101 1099 * set_proc_pre_sys - Set pre-syscall processing for entire process.
1102 1100 */
1103 1101 void
1104 1102 set_proc_pre_sys(proc_t *p)
1105 1103 {
1106 1104 kthread_t *t;
1107 1105 kthread_t *first;
1108 1106
1109 1107 ASSERT(MUTEX_HELD(&p->p_lock));
1110 1108
1111 1109 t = first = p->p_tlist;
1112 1110 do {
1113 1111 t->t_pre_sys = 1;
1114 1112 } while ((t = t->t_forw) != first);
1115 1113 }
1116 1114
1117 1115 /*
1118 1116 * set_proc_post_sys - Set post-syscall processing for entire process.
1119 1117 */
1120 1118 void
1121 1119 set_proc_post_sys(proc_t *p)
1122 1120 {
1123 1121 kthread_t *t;
1124 1122 kthread_t *first;
1125 1123
1126 1124 ASSERT(MUTEX_HELD(&p->p_lock));
1127 1125
1128 1126 t = first = p->p_tlist;
1129 1127 do {
1130 1128 t->t_post_sys = 1;
1131 1129 } while ((t = t->t_forw) != first);
1132 1130 }
1133 1131
1134 1132 /*
1135 1133 * set_proc_sys - Set pre- and post-syscall processing for entire process.
1136 1134 */
1137 1135 void
1138 1136 set_proc_sys(proc_t *p)
1139 1137 {
1140 1138 kthread_t *t;
1141 1139 kthread_t *first;
1142 1140
1143 1141 ASSERT(MUTEX_HELD(&p->p_lock));
1144 1142
1145 1143 t = first = p->p_tlist;
1146 1144 do {
1147 1145 t->t_pre_sys = 1;
1148 1146 t->t_post_sys = 1;
1149 1147 } while ((t = t->t_forw) != first);
1150 1148 }
1151 1149
1152 1150 /*
1153 1151 * set_all_proc_sys - set pre- and post-syscall processing flags for all
1154 1152 * user processes.
1155 1153 *
1156 1154 * This is needed when auditing, tracing, or other facilities which affect
1157 1155 * all processes are turned on.
1158 1156 */
1159 1157 void
1160 1158 set_all_proc_sys()
1161 1159 {
1162 1160 kthread_t *t;
1163 1161 kthread_t *first;
1164 1162
1165 1163 mutex_enter(&pidlock);
1166 1164 t = first = curthread;
1167 1165 do {
1168 1166 t->t_pre_sys = 1;
1169 1167 t->t_post_sys = 1;
1170 1168 } while ((t = t->t_next) != first);
1171 1169 mutex_exit(&pidlock);
1172 1170 }
1173 1171
1174 1172 /*
1175 1173 * set_all_zone_usr_proc_sys - set pre- and post-syscall processing flags for
1176 1174 * all user processes running in the zone of the current process
1177 1175 *
1178 1176 * This is needed when auditing is turned on.
1179 1177 */
1180 1178 void
1181 1179 set_all_zone_usr_proc_sys(zoneid_t zoneid)
1182 1180 {
1183 1181 proc_t *p;
1184 1182 kthread_t *t;
1185 1183
1186 1184 mutex_enter(&pidlock);
1187 1185 for (p = practive; p != NULL; p = p->p_next) {
1188 1186 /* skip kernel processes */
1189 1187 if (p->p_exec == NULLVP || p->p_as == &kas ||
1190 1188 p->p_stat == SIDL || p->p_stat == SZOMB ||
1191 1189 (p->p_flag & (SSYS | SEXITING | SEXITLWPS)))
1192 1190 continue;
1193 1191 /*
1194 1192 * Only processes in the given zone (eventually in
1195 1193 * all zones) are taken into account
1196 1194 */
1197 1195 if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) {
1198 1196 mutex_enter(&p->p_lock);
1199 1197 if ((t = p->p_tlist) == NULL) {
1200 1198 mutex_exit(&p->p_lock);
1201 1199 continue;
1202 1200 }
1203 1201 /*
1204 1202 * Set pre- and post-syscall processing flags
1205 1203 * for all threads of the process
1206 1204 */
1207 1205 do {
1208 1206 t->t_pre_sys = 1;
1209 1207 t->t_post_sys = 1;
1210 1208 } while (p->p_tlist != (t = t->t_forw));
1211 1209 mutex_exit(&p->p_lock);
1212 1210 }
1213 1211 }
1214 1212 mutex_exit(&pidlock);
1215 1213 }
1216 1214
1217 1215 /*
1218 1216 * set_proc_ast - Set asynchronous service trap (AST) flag for all
1219 1217 * threads in process.
1220 1218 */
1221 1219 void
1222 1220 set_proc_ast(proc_t *p)
1223 1221 {
1224 1222 kthread_t *t;
1225 1223 kthread_t *first;
1226 1224
1227 1225 ASSERT(MUTEX_HELD(&p->p_lock));
1228 1226
1229 1227 t = first = p->p_tlist;
1230 1228 do {
1231 1229 aston(t);
1232 1230 } while ((t = t->t_forw) != first);
1233 1231 }
↓ open down ↓ |
860 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX