1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
  29  */
  30 
  31 #include <sys/types.h>
  32 #include <sys/uio.h>
  33 #include <sys/param.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/cred.h>
  36 #include <sys/policy.h>
  37 #include <sys/debug.h>
  38 #include <sys/errno.h>
  39 #include <sys/file.h>
  40 #include <sys/inline.h>
  41 #include <sys/kmem.h>
  42 #include <sys/proc.h>
  43 #include <sys/brand.h>
  44 #include <sys/regset.h>
  45 #include <sys/sysmacros.h>
  46 #include <sys/systm.h>
  47 #include <sys/vfs.h>
  48 #include <sys/vnode.h>
  49 #include <sys/signal.h>
  50 #include <sys/auxv.h>
  51 #include <sys/user.h>
  52 #include <sys/class.h>
  53 #include <sys/fault.h>
  54 #include <sys/syscall.h>
  55 #include <sys/procfs.h>
  56 #include <sys/zone.h>
  57 #include <sys/copyops.h>
  58 #include <sys/schedctl.h>
  59 #include <vm/as.h>
  60 #include <vm/seg.h>
  61 #include <fs/proc/prdata.h>
  62 #include <sys/contract/process_impl.h>
  63 
  64 static  void    pr_settrace(proc_t *, sigset_t *);
  65 static  int     pr_setfpregs(prnode_t *, prfpregset_t *);
  66 #if defined(__sparc)
  67 static  int     pr_setxregs(prnode_t *, prxregset_t *);
  68 static  int     pr_setasrs(prnode_t *, asrset_t);
  69 #endif
  70 static  int     pr_setvaddr(prnode_t *, caddr_t);
  71 static  int     pr_clearsig(prnode_t *);
  72 static  int     pr_clearflt(prnode_t *);
  73 static  int     pr_watch(prnode_t *, prwatch_t *, int *);
  74 static  int     pr_agent(prnode_t *, prgregset_t, int *);
  75 static  int     pr_rdwr(proc_t *, enum uio_rw, priovec_t *);
  76 static  int     pr_scred(proc_t *, prcred_t *, cred_t *, boolean_t);
  77 static  int     pr_spriv(proc_t *, prpriv_t *, cred_t *);
  78 static  int     pr_szoneid(proc_t *, zoneid_t, cred_t *);
  79 static  void    pauselwps(proc_t *);
  80 static  void    unpauselwps(proc_t *);
  81 
  82 typedef union {
  83         long            sig;            /* PCKILL, PCUNKILL */
  84         long            nice;           /* PCNICE */
  85         long            timeo;          /* PCTWSTOP */
  86         ulong_t         flags;          /* PCRUN, PCSET, PCUNSET */
  87         caddr_t         vaddr;          /* PCSVADDR */
  88         siginfo_t       siginfo;        /* PCSSIG */
  89         sigset_t        sigset;         /* PCSTRACE, PCSHOLD */
  90         fltset_t        fltset;         /* PCSFAULT */
  91         sysset_t        sysset;         /* PCSENTRY, PCSEXIT */
  92         prgregset_t     prgregset;      /* PCSREG, PCAGENT */
  93         prfpregset_t    prfpregset;     /* PCSFPREG */
  94 #if defined(__sparc)
  95         prxregset_t     prxregset;      /* PCSXREG */
  96         asrset_t        asrset;         /* PCSASRS */
  97 #endif
  98         prwatch_t       prwatch;        /* PCWATCH */
  99         priovec_t       priovec;        /* PCREAD, PCWRITE */
 100         prcred_t        prcred;         /* PCSCRED */
 101         prpriv_t        prpriv;         /* PCSPRIV */
 102         long            przoneid;       /* PCSZONE */
 103 } arg_t;
 104 
 105 static  int     pr_control(long, arg_t *, prnode_t *, cred_t *);
 106 
 107 static size_t
 108 ctlsize(long cmd, size_t resid, arg_t *argp)
 109 {
 110         size_t size = sizeof (long);
 111         size_t rnd;
 112         int ngrp;
 113 
 114         switch (cmd) {
 115         case PCNULL:
 116         case PCSTOP:
 117         case PCDSTOP:
 118         case PCWSTOP:
 119         case PCCSIG:
 120         case PCCFAULT:
 121                 break;
 122         case PCSSIG:
 123                 size += sizeof (siginfo_t);
 124                 break;
 125         case PCTWSTOP:
 126                 size += sizeof (long);
 127                 break;
 128         case PCKILL:
 129         case PCUNKILL:
 130         case PCNICE:
 131                 size += sizeof (long);
 132                 break;
 133         case PCRUN:
 134         case PCSET:
 135         case PCUNSET:
 136                 size += sizeof (ulong_t);
 137                 break;
 138         case PCSVADDR:
 139                 size += sizeof (caddr_t);
 140                 break;
 141         case PCSTRACE:
 142         case PCSHOLD:
 143                 size += sizeof (sigset_t);
 144                 break;
 145         case PCSFAULT:
 146                 size += sizeof (fltset_t);
 147                 break;
 148         case PCSENTRY:
 149         case PCSEXIT:
 150                 size += sizeof (sysset_t);
 151                 break;
 152         case PCSREG:
 153         case PCAGENT:
 154                 size += sizeof (prgregset_t);
 155                 break;
 156         case PCSFPREG:
 157                 size += sizeof (prfpregset_t);
 158                 break;
 159 #if defined(__sparc)
 160         case PCSXREG:
 161                 size += sizeof (prxregset_t);
 162                 break;
 163         case PCSASRS:
 164                 size += sizeof (asrset_t);
 165                 break;
 166 #endif
 167         case PCWATCH:
 168                 size += sizeof (prwatch_t);
 169                 break;
 170         case PCREAD:
 171         case PCWRITE:
 172                 size += sizeof (priovec_t);
 173                 break;
 174         case PCSCRED:
 175                 size += sizeof (prcred_t);
 176                 break;
 177         case PCSCREDX:
 178                 /*
 179                  * We cannot derefence the pr_ngroups fields if it
 180                  * we don't have enough data.
 181                  */
 182                 if (resid < size + sizeof (prcred_t) - sizeof (gid_t))
 183                         return (0);
 184                 ngrp = argp->prcred.pr_ngroups;
 185                 if (ngrp < 0 || ngrp > ngroups_max)
 186                         return (0);
 187 
 188                 /* The result can be smaller than sizeof (prcred_t) */
 189                 size += sizeof (prcred_t) - sizeof (gid_t);
 190                 size += ngrp * sizeof (gid_t);
 191                 break;
 192         case PCSPRIV:
 193                 if (resid >= size + sizeof (prpriv_t))
 194                         size += priv_prgetprivsize(&argp->prpriv);
 195                 else
 196                         return (0);
 197                 break;
 198         case PCSZONE:
 199                 size += sizeof (long);
 200                 break;
 201         default:
 202                 return (0);
 203         }
 204 
 205         /* Round up to a multiple of long, unless exact amount written */
 206         if (size < resid) {
 207                 rnd = size & (sizeof (long) - 1);
 208 
 209                 if (rnd != 0)
 210                         size += sizeof (long) - rnd;
 211         }
 212 
 213         if (size > resid)
 214                 return (0);
 215         return (size);
 216 }
 217 
 218 /*
 219  * Control operations (lots).
 220  */
 221 int
 222 prwritectl(vnode_t *vp, uio_t *uiop, cred_t *cr)
 223 {
 224 #define MY_BUFFER_SIZE \
 225                 100 > 1 + sizeof (arg_t) / sizeof (long) ? \
 226                 100 : 1 + sizeof (arg_t) / sizeof (long)
 227         long buf[MY_BUFFER_SIZE];
 228         long *bufp;
 229         size_t resid = 0;
 230         size_t size;
 231         prnode_t *pnp = VTOP(vp);
 232         int error;
 233         int locked = 0;
 234 
 235         while (uiop->uio_resid) {
 236                 /*
 237                  * Read several commands in one gulp.
 238                  */
 239                 bufp = buf;
 240                 if (resid) {    /* move incomplete command to front of buffer */
 241                         long *tail;
 242 
 243                         if (resid >= sizeof (buf))
 244                                 break;
 245                         tail = (long *)((char *)buf + sizeof (buf) - resid);
 246                         do {
 247                                 *bufp++ = *tail++;
 248                         } while ((resid -= sizeof (long)) != 0);
 249                 }
 250                 resid = sizeof (buf) - ((char *)bufp - (char *)buf);
 251                 if (resid > uiop->uio_resid)
 252                         resid = uiop->uio_resid;
 253                 if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
 254                         return (error);
 255                 resid += (char *)bufp - (char *)buf;
 256                 bufp = buf;
 257 
 258                 do {            /* loop over commands in buffer */
 259                         long cmd = bufp[0];
 260                         arg_t *argp = (arg_t *)&bufp[1];
 261 
 262                         size = ctlsize(cmd, resid, argp);
 263                         if (size == 0)  /* incomplete or invalid command */
 264                                 break;
 265                         /*
 266                          * Perform the specified control operation.
 267                          */
 268                         if (!locked) {
 269                                 if ((error = prlock(pnp, ZNO)) != 0)
 270                                         return (error);
 271                                 locked = 1;
 272                         }
 273                         if (error = pr_control(cmd, argp, pnp, cr)) {
 274                                 if (error == -1)        /* -1 is timeout */
 275                                         locked = 0;
 276                                 else
 277                                         return (error);
 278                         }
 279                         bufp = (long *)((char *)bufp + size);
 280                 } while ((resid -= size) != 0);
 281 
 282                 if (locked) {
 283                         prunlock(pnp);
 284                         locked = 0;
 285                 }
 286         }
 287         return (resid? EINVAL : 0);
 288 }
 289 
 290 static int
 291 pr_control(long cmd, arg_t *argp, prnode_t *pnp, cred_t *cr)
 292 {
 293         prcommon_t *pcp;
 294         proc_t *p;
 295         int unlocked;
 296         int error = 0;
 297 
 298         if (cmd == PCNULL)
 299                 return (0);
 300 
 301         pcp = pnp->pr_common;
 302         p = pcp->prc_proc;
 303         ASSERT(p != NULL);
 304 
 305         /* System processes defy control. */
 306         if (p->p_flag & SSYS) {
 307                 prunlock(pnp);
 308                 return (EBUSY);
 309         }
 310 
 311         switch (cmd) {
 312 
 313         default:
 314                 error = EINVAL;
 315                 break;
 316 
 317         case PCSTOP:    /* direct process or lwp to stop and wait for stop */
 318         case PCDSTOP:   /* direct process or lwp to stop, don't wait */
 319         case PCWSTOP:   /* wait for process or lwp to stop */
 320         case PCTWSTOP:  /* wait for process or lwp to stop, with timeout */
 321                 {
 322                         time_t timeo;
 323 
 324                         /*
 325                          * Can't apply to a system process.
 326                          */
 327                         if (p->p_as == &kas) {
 328                                 error = EBUSY;
 329                                 break;
 330                         }
 331 
 332                         if (cmd == PCSTOP || cmd == PCDSTOP)
 333                                 pr_stop(pnp);
 334 
 335                         if (cmd == PCDSTOP)
 336                                 break;
 337 
 338                         /*
 339                          * If an lwp is waiting for itself or its process,
 340                          * don't wait. The stopped lwp would never see the
 341                          * fact that it is stopped.
 342                          */
 343                         if ((pcp->prc_flags & PRC_LWP)?
 344                             (pcp->prc_thread == curthread) : (p == curproc)) {
 345                                 if (cmd == PCWSTOP || cmd == PCTWSTOP)
 346                                         error = EBUSY;
 347                                 break;
 348                         }
 349 
 350                         timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
 351                         if ((error = pr_wait_stop(pnp, timeo)) != 0)
 352                                 return (error);
 353 
 354                         break;
 355                 }
 356 
 357         case PCRUN:     /* make lwp or process runnable */
 358                 error = pr_setrun(pnp, argp->flags);
 359                 break;
 360 
 361         case PCSTRACE:  /* set signal trace mask */
 362                 pr_settrace(p,  &argp->sigset);
 363                 break;
 364 
 365         case PCSSIG:    /* set current signal */
 366                 error = pr_setsig(pnp, &argp->siginfo);
 367                 if (argp->siginfo.si_signo == SIGKILL && error == 0) {
 368                         prunlock(pnp);
 369                         pr_wait_die(pnp);
 370                         return (-1);
 371                 }
 372                 break;
 373 
 374         case PCKILL:    /* send signal */
 375                 error = pr_kill(pnp, (int)argp->sig, cr);
 376                 if (error == 0 && argp->sig == SIGKILL) {
 377                         prunlock(pnp);
 378                         pr_wait_die(pnp);
 379                         return (-1);
 380                 }
 381                 break;
 382 
 383         case PCUNKILL:  /* delete a pending signal */
 384                 error = pr_unkill(pnp, (int)argp->sig);
 385                 break;
 386 
 387         case PCNICE:    /* set nice priority */
 388                 error = pr_nice(p, (int)argp->nice, cr);
 389                 break;
 390 
 391         case PCSENTRY:  /* set syscall entry bit mask */
 392         case PCSEXIT:   /* set syscall exit bit mask */
 393                 pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
 394                 break;
 395 
 396         case PCSET:     /* set process flags */
 397                 error = pr_set(p, argp->flags);
 398                 break;
 399 
 400         case PCUNSET:   /* unset process flags */
 401                 error = pr_unset(p, argp->flags);
 402                 break;
 403 
 404         case PCSREG:    /* set general registers */
 405                 {
 406                         kthread_t *t = pr_thread(pnp);
 407 
 408                         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
 409                                 thread_unlock(t);
 410                                 error = EBUSY;
 411                         } else {
 412                                 thread_unlock(t);
 413                                 mutex_exit(&p->p_lock);
 414                                 prsetprregs(ttolwp(t), argp->prgregset, 0);
 415                                 mutex_enter(&p->p_lock);
 416                         }
 417                         break;
 418                 }
 419 
 420         case PCSFPREG:  /* set floating-point registers */
 421                 error = pr_setfpregs(pnp, &argp->prfpregset);
 422                 break;
 423 
 424         case PCSXREG:   /* set extra registers */
 425 #if defined(__sparc)
 426                 error = pr_setxregs(pnp, &argp->prxregset);
 427 #else
 428                 error = EINVAL;
 429 #endif
 430                 break;
 431 
 432 #if defined(__sparc)
 433         case PCSASRS:   /* set ancillary state registers */
 434                 error = pr_setasrs(pnp, argp->asrset);
 435                 break;
 436 #endif
 437 
 438         case PCSVADDR:  /* set virtual address at which to resume */
 439                 error = pr_setvaddr(pnp, argp->vaddr);
 440                 break;
 441 
 442         case PCSHOLD:   /* set signal-hold mask */
 443                 pr_sethold(pnp, &argp->sigset);
 444                 break;
 445 
 446         case PCSFAULT:  /* set mask of traced faults */
 447                 pr_setfault(p, &argp->fltset);
 448                 break;
 449 
 450         case PCCSIG:    /* clear current signal */
 451                 error = pr_clearsig(pnp);
 452                 break;
 453 
 454         case PCCFAULT:  /* clear current fault */
 455                 error = pr_clearflt(pnp);
 456                 break;
 457 
 458         case PCWATCH:   /* set or clear watched areas */
 459                 error = pr_watch(pnp, &argp->prwatch, &unlocked);
 460                 if (error && unlocked)
 461                         return (error);
 462                 break;
 463 
 464         case PCAGENT:   /* create the /proc agent lwp in the target process */
 465                 error = pr_agent(pnp, argp->prgregset, &unlocked);
 466                 if (error && unlocked)
 467                         return (error);
 468                 break;
 469 
 470         case PCREAD:    /* read from the address space */
 471                 error = pr_rdwr(p, UIO_READ, &argp->priovec);
 472                 break;
 473 
 474         case PCWRITE:   /* write to the address space */
 475                 error = pr_rdwr(p, UIO_WRITE, &argp->priovec);
 476                 break;
 477 
 478         case PCSCRED:   /* set the process credentials */
 479         case PCSCREDX:
 480                 error = pr_scred(p, &argp->prcred, cr, cmd == PCSCREDX);
 481                 break;
 482 
 483         case PCSPRIV:   /* set the process privileges */
 484                 error = pr_spriv(p, &argp->prpriv, cr);
 485                 break;
 486         case PCSZONE:   /* set the process's zoneid credentials */
 487                 error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
 488                 break;
 489         }
 490 
 491         if (error)
 492                 prunlock(pnp);
 493         return (error);
 494 }
 495 
 496 #ifdef _SYSCALL32_IMPL
 497 
 498 typedef union {
 499         int32_t         sig;            /* PCKILL, PCUNKILL */
 500         int32_t         nice;           /* PCNICE */
 501         int32_t         timeo;          /* PCTWSTOP */
 502         uint32_t        flags;          /* PCRUN, PCSET, PCUNSET */
 503         caddr32_t       vaddr;          /* PCSVADDR */
 504         siginfo32_t     siginfo;        /* PCSSIG */
 505         sigset_t        sigset;         /* PCSTRACE, PCSHOLD */
 506         fltset_t        fltset;         /* PCSFAULT */
 507         sysset_t        sysset;         /* PCSENTRY, PCSEXIT */
 508         prgregset32_t   prgregset;      /* PCSREG, PCAGENT */
 509         prfpregset32_t  prfpregset;     /* PCSFPREG */
 510 #if defined(__sparc)
 511         prxregset_t     prxregset;      /* PCSXREG */
 512 #endif
 513         prwatch32_t     prwatch;        /* PCWATCH */
 514         priovec32_t     priovec;        /* PCREAD, PCWRITE */
 515         prcred32_t      prcred;         /* PCSCRED */
 516         prpriv_t        prpriv;         /* PCSPRIV */
 517         int32_t         przoneid;       /* PCSZONE */
 518 } arg32_t;
 519 
 520 static  int     pr_control32(int32_t, arg32_t *, prnode_t *, cred_t *);
 521 static  int     pr_setfpregs32(prnode_t *, prfpregset32_t *);
 522 
 523 /*
 524  * Note that while ctlsize32() can use argp, it must do so only in a way
 525  * that assumes 32-bit rather than 64-bit alignment as argp is a pointer
 526  * to an array of 32-bit values and only 32-bit alignment is ensured.
 527  */
 528 static size_t
 529 ctlsize32(int32_t cmd, size_t resid, arg32_t *argp)
 530 {
 531         size_t size = sizeof (int32_t);
 532         size_t rnd;
 533         int ngrp;
 534 
 535         switch (cmd) {
 536         case PCNULL:
 537         case PCSTOP:
 538         case PCDSTOP:
 539         case PCWSTOP:
 540         case PCCSIG:
 541         case PCCFAULT:
 542                 break;
 543         case PCSSIG:
 544                 size += sizeof (siginfo32_t);
 545                 break;
 546         case PCTWSTOP:
 547                 size += sizeof (int32_t);
 548                 break;
 549         case PCKILL:
 550         case PCUNKILL:
 551         case PCNICE:
 552                 size += sizeof (int32_t);
 553                 break;
 554         case PCRUN:
 555         case PCSET:
 556         case PCUNSET:
 557                 size += sizeof (uint32_t);
 558                 break;
 559         case PCSVADDR:
 560                 size += sizeof (caddr32_t);
 561                 break;
 562         case PCSTRACE:
 563         case PCSHOLD:
 564                 size += sizeof (sigset_t);
 565                 break;
 566         case PCSFAULT:
 567                 size += sizeof (fltset_t);
 568                 break;
 569         case PCSENTRY:
 570         case PCSEXIT:
 571                 size += sizeof (sysset_t);
 572                 break;
 573         case PCSREG:
 574         case PCAGENT:
 575                 size += sizeof (prgregset32_t);
 576                 break;
 577         case PCSFPREG:
 578                 size += sizeof (prfpregset32_t);
 579                 break;
 580 #if defined(__sparc)
 581         case PCSXREG:
 582                 size += sizeof (prxregset_t);
 583                 break;
 584 #endif
 585         case PCWATCH:
 586                 size += sizeof (prwatch32_t);
 587                 break;
 588         case PCREAD:
 589         case PCWRITE:
 590                 size += sizeof (priovec32_t);
 591                 break;
 592         case PCSCRED:
 593                 size += sizeof (prcred32_t);
 594                 break;
 595         case PCSCREDX:
 596                 /*
 597                  * We cannot derefence the pr_ngroups fields if it
 598                  * we don't have enough data.
 599                  */
 600                 if (resid < size + sizeof (prcred32_t) - sizeof (gid32_t))
 601                         return (0);
 602                 ngrp = argp->prcred.pr_ngroups;
 603                 if (ngrp < 0 || ngrp > ngroups_max)
 604                         return (0);
 605 
 606                 /* The result can be smaller than sizeof (prcred32_t) */
 607                 size += sizeof (prcred32_t) - sizeof (gid32_t);
 608                 size += ngrp * sizeof (gid32_t);
 609                 break;
 610         case PCSPRIV:
 611                 if (resid >= size + sizeof (prpriv_t))
 612                         size += priv_prgetprivsize(&argp->prpriv);
 613                 else
 614                         return (0);
 615                 break;
 616         case PCSZONE:
 617                 size += sizeof (int32_t);
 618                 break;
 619         default:
 620                 return (0);
 621         }
 622 
 623         /* Round up to a multiple of int32_t */
 624         rnd = size & (sizeof (int32_t) - 1);
 625 
 626         if (rnd != 0)
 627                 size += sizeof (int32_t) - rnd;
 628 
 629         if (size > resid)
 630                 return (0);
 631         return (size);
 632 }
 633 
 634 /*
 635  * Control operations (lots).
 636  */
 637 int
 638 prwritectl32(struct vnode *vp, struct uio *uiop, cred_t *cr)
 639 {
 640 #define MY_BUFFER_SIZE32 \
 641                 100 > 1 + sizeof (arg32_t) / sizeof (int32_t) ? \
 642                 100 : 1 + sizeof (arg32_t) / sizeof (int32_t)
 643         int32_t buf[MY_BUFFER_SIZE32];
 644         int32_t *bufp;
 645         arg32_t arg;
 646         size_t resid = 0;
 647         size_t size;
 648         prnode_t *pnp = VTOP(vp);
 649         int error;
 650         int locked = 0;
 651 
 652         while (uiop->uio_resid) {
 653                 /*
 654                  * Read several commands in one gulp.
 655                  */
 656                 bufp = buf;
 657                 if (resid) {    /* move incomplete command to front of buffer */
 658                         int32_t *tail;
 659 
 660                         if (resid >= sizeof (buf))
 661                                 break;
 662                         tail = (int32_t *)((char *)buf + sizeof (buf) - resid);
 663                         do {
 664                                 *bufp++ = *tail++;
 665                         } while ((resid -= sizeof (int32_t)) != 0);
 666                 }
 667                 resid = sizeof (buf) - ((char *)bufp - (char *)buf);
 668                 if (resid > uiop->uio_resid)
 669                         resid = uiop->uio_resid;
 670                 if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
 671                         return (error);
 672                 resid += (char *)bufp - (char *)buf;
 673                 bufp = buf;
 674 
 675                 do {            /* loop over commands in buffer */
 676                         int32_t cmd = bufp[0];
 677                         arg32_t *argp = (arg32_t *)&bufp[1];
 678 
 679                         size = ctlsize32(cmd, resid, argp);
 680                         if (size == 0)  /* incomplete or invalid command */
 681                                 break;
 682                         /*
 683                          * Perform the specified control operation.
 684                          */
 685                         if (!locked) {
 686                                 if ((error = prlock(pnp, ZNO)) != 0)
 687                                         return (error);
 688                                 locked = 1;
 689                         }
 690 
 691                         /*
 692                          * Since some members of the arg32_t union contain
 693                          * 64-bit values (which must be 64-bit aligned), we
 694                          * can't simply pass a pointer to the structure as
 695                          * it may be unaligned. Note that we do pass the
 696                          * potentially unaligned structure to ctlsize32()
 697                          * above, but that uses it a way that makes no
 698                          * assumptions about alignment.
 699                          */
 700                         ASSERT(size - sizeof (cmd) <= sizeof (arg));
 701                         bcopy(argp, &arg, size - sizeof (cmd));
 702 
 703                         if (error = pr_control32(cmd, &arg, pnp, cr)) {
 704                                 if (error == -1)        /* -1 is timeout */
 705                                         locked = 0;
 706                                 else
 707                                         return (error);
 708                         }
 709                         bufp = (int32_t *)((char *)bufp + size);
 710                 } while ((resid -= size) != 0);
 711 
 712                 if (locked) {
 713                         prunlock(pnp);
 714                         locked = 0;
 715                 }
 716         }
 717         return (resid? EINVAL : 0);
 718 }
 719 
 720 static int
 721 pr_control32(int32_t cmd, arg32_t *argp, prnode_t *pnp, cred_t *cr)
 722 {
 723         prcommon_t *pcp;
 724         proc_t *p;
 725         int unlocked;
 726         int error = 0;
 727 
 728         if (cmd == PCNULL)
 729                 return (0);
 730 
 731         pcp = pnp->pr_common;
 732         p = pcp->prc_proc;
 733         ASSERT(p != NULL);
 734 
 735         if (p->p_flag & SSYS) {
 736                 prunlock(pnp);
 737                 return (EBUSY);
 738         }
 739 
 740         switch (cmd) {
 741 
 742         default:
 743                 error = EINVAL;
 744                 break;
 745 
 746         case PCSTOP:    /* direct process or lwp to stop and wait for stop */
 747         case PCDSTOP:   /* direct process or lwp to stop, don't wait */
 748         case PCWSTOP:   /* wait for process or lwp to stop */
 749         case PCTWSTOP:  /* wait for process or lwp to stop, with timeout */
 750                 {
 751                         time_t timeo;
 752 
 753                         /*
 754                          * Can't apply to a system process.
 755                          */
 756                         if (p->p_as == &kas) {
 757                                 error = EBUSY;
 758                                 break;
 759                         }
 760 
 761                         if (cmd == PCSTOP || cmd == PCDSTOP)
 762                                 pr_stop(pnp);
 763 
 764                         if (cmd == PCDSTOP)
 765                                 break;
 766 
 767                         /*
 768                          * If an lwp is waiting for itself or its process,
 769                          * don't wait. The lwp will never see the fact that
 770                          * itself is stopped.
 771                          */
 772                         if ((pcp->prc_flags & PRC_LWP)?
 773                             (pcp->prc_thread == curthread) : (p == curproc)) {
 774                                 if (cmd == PCWSTOP || cmd == PCTWSTOP)
 775                                         error = EBUSY;
 776                                 break;
 777                         }
 778 
 779                         timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
 780                         if ((error = pr_wait_stop(pnp, timeo)) != 0)
 781                                 return (error);
 782 
 783                         break;
 784                 }
 785 
 786         case PCRUN:     /* make lwp or process runnable */
 787                 error = pr_setrun(pnp, (ulong_t)argp->flags);
 788                 break;
 789 
 790         case PCSTRACE:  /* set signal trace mask */
 791                 pr_settrace(p,  &argp->sigset);
 792                 break;
 793 
 794         case PCSSIG:    /* set current signal */
 795                 if (PROCESS_NOT_32BIT(p))
 796                         error = EOVERFLOW;
 797                 else {
 798                         int sig = (int)argp->siginfo.si_signo;
 799                         siginfo_t siginfo;
 800 
 801                         bzero(&siginfo, sizeof (siginfo));
 802                         siginfo_32tok(&argp->siginfo, (k_siginfo_t *)&siginfo);
 803                         error = pr_setsig(pnp, &siginfo);
 804                         if (sig == SIGKILL && error == 0) {
 805                                 prunlock(pnp);
 806                                 pr_wait_die(pnp);
 807                                 return (-1);
 808                         }
 809                 }
 810                 break;
 811 
 812         case PCKILL:    /* send signal */
 813                 error = pr_kill(pnp, (int)argp->sig, cr);
 814                 if (error == 0 && argp->sig == SIGKILL) {
 815                         prunlock(pnp);
 816                         pr_wait_die(pnp);
 817                         return (-1);
 818                 }
 819                 break;
 820 
 821         case PCUNKILL:  /* delete a pending signal */
 822                 error = pr_unkill(pnp, (int)argp->sig);
 823                 break;
 824 
 825         case PCNICE:    /* set nice priority */
 826                 error = pr_nice(p, (int)argp->nice, cr);
 827                 break;
 828 
 829         case PCSENTRY:  /* set syscall entry bit mask */
 830         case PCSEXIT:   /* set syscall exit bit mask */
 831                 pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
 832                 break;
 833 
 834         case PCSET:     /* set process flags */
 835                 error = pr_set(p, (long)argp->flags);
 836                 break;
 837 
 838         case PCUNSET:   /* unset process flags */
 839                 error = pr_unset(p, (long)argp->flags);
 840                 break;
 841 
 842         case PCSREG:    /* set general registers */
 843                 if (PROCESS_NOT_32BIT(p))
 844                         error = EOVERFLOW;
 845                 else {
 846                         kthread_t *t = pr_thread(pnp);
 847 
 848                         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
 849                                 thread_unlock(t);
 850                                 error = EBUSY;
 851                         } else {
 852                                 prgregset_t prgregset;
 853                                 klwp_t *lwp = ttolwp(t);
 854 
 855                                 thread_unlock(t);
 856                                 mutex_exit(&p->p_lock);
 857                                 prgregset_32ton(lwp, argp->prgregset,
 858                                     prgregset);
 859                                 prsetprregs(lwp, prgregset, 0);
 860                                 mutex_enter(&p->p_lock);
 861                         }
 862                 }
 863                 break;
 864 
 865         case PCSFPREG:  /* set floating-point registers */
 866                 if (PROCESS_NOT_32BIT(p))
 867                         error = EOVERFLOW;
 868                 else
 869                         error = pr_setfpregs32(pnp, &argp->prfpregset);
 870                 break;
 871 
 872         case PCSXREG:   /* set extra registers */
 873 #if defined(__sparc)
 874                 if (PROCESS_NOT_32BIT(p))
 875                         error = EOVERFLOW;
 876                 else
 877                         error = pr_setxregs(pnp, &argp->prxregset);
 878 #else
 879                 error = EINVAL;
 880 #endif
 881                 break;
 882 
 883         case PCSVADDR:  /* set virtual address at which to resume */
 884                 if (PROCESS_NOT_32BIT(p))
 885                         error = EOVERFLOW;
 886                 else
 887                         error = pr_setvaddr(pnp,
 888                             (caddr_t)(uintptr_t)argp->vaddr);
 889                 break;
 890 
 891         case PCSHOLD:   /* set signal-hold mask */
 892                 pr_sethold(pnp, &argp->sigset);
 893                 break;
 894 
 895         case PCSFAULT:  /* set mask of traced faults */
 896                 pr_setfault(p, &argp->fltset);
 897                 break;
 898 
 899         case PCCSIG:    /* clear current signal */
 900                 error = pr_clearsig(pnp);
 901                 break;
 902 
 903         case PCCFAULT:  /* clear current fault */
 904                 error = pr_clearflt(pnp);
 905                 break;
 906 
 907         case PCWATCH:   /* set or clear watched areas */
 908                 if (PROCESS_NOT_32BIT(p))
 909                         error = EOVERFLOW;
 910                 else {
 911                         prwatch_t prwatch;
 912 
 913                         prwatch.pr_vaddr = argp->prwatch.pr_vaddr;
 914                         prwatch.pr_size = argp->prwatch.pr_size;
 915                         prwatch.pr_wflags = argp->prwatch.pr_wflags;
 916                         prwatch.pr_pad = argp->prwatch.pr_pad;
 917                         error = pr_watch(pnp, &prwatch, &unlocked);
 918                         if (error && unlocked)
 919                                 return (error);
 920                 }
 921                 break;
 922 
 923         case PCAGENT:   /* create the /proc agent lwp in the target process */
 924                 if (PROCESS_NOT_32BIT(p))
 925                         error = EOVERFLOW;
 926                 else {
 927                         prgregset_t prgregset;
 928                         kthread_t *t = pr_thread(pnp);
 929                         klwp_t *lwp = ttolwp(t);
 930                         thread_unlock(t);
 931                         mutex_exit(&p->p_lock);
 932                         prgregset_32ton(lwp, argp->prgregset, prgregset);
 933                         mutex_enter(&p->p_lock);
 934                         error = pr_agent(pnp, prgregset, &unlocked);
 935                         if (error && unlocked)
 936                                 return (error);
 937                 }
 938                 break;
 939 
 940         case PCREAD:    /* read from the address space */
 941         case PCWRITE:   /* write to the address space */
 942                 if (PROCESS_NOT_32BIT(p) || (pnp->pr_flags & PR_OFFMAX))
 943                         error = EOVERFLOW;
 944                 else {
 945                         enum uio_rw rw = (cmd == PCREAD)? UIO_READ : UIO_WRITE;
 946                         priovec_t priovec;
 947 
 948                         priovec.pio_base =
 949                             (void *)(uintptr_t)argp->priovec.pio_base;
 950                         priovec.pio_len = (size_t)argp->priovec.pio_len;
 951                         priovec.pio_offset = (off_t)
 952                             (uint32_t)argp->priovec.pio_offset;
 953                         error = pr_rdwr(p, rw, &priovec);
 954                 }
 955                 break;
 956 
 957         case PCSCRED:   /* set the process credentials */
 958         case PCSCREDX:
 959                 {
 960                         /*
 961                          * All the fields in these structures are exactly the
 962                          * same and so the structures are compatible.  In case
 963                          * this ever changes, we catch this with the ASSERT
 964                          * below.
 965                          */
 966                         prcred_t *prcred = (prcred_t *)&argp->prcred;
 967 
 968 #ifndef __lint
 969                         ASSERT(sizeof (prcred_t) == sizeof (prcred32_t));
 970 #endif
 971 
 972                         error = pr_scred(p, prcred, cr, cmd == PCSCREDX);
 973                         break;
 974                 }
 975 
 976         case PCSPRIV:   /* set the process privileges */
 977                 error = pr_spriv(p, &argp->prpriv, cr);
 978                 break;
 979 
 980         case PCSZONE:   /* set the process's zoneid */
 981                 error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
 982                 break;
 983         }
 984 
 985         if (error)
 986                 prunlock(pnp);
 987         return (error);
 988 }
 989 
 990 #endif  /* _SYSCALL32_IMPL */
 991 
 992 /*
 993  * Return the specific or chosen thread/lwp for a control operation.
 994  * Returns with the thread locked via thread_lock(t).
 995  */
 996 kthread_t *
 997 pr_thread(prnode_t *pnp)
 998 {
 999         prcommon_t *pcp = pnp->pr_common;
1000         kthread_t *t;
1001 
1002         if (pcp->prc_flags & PRC_LWP) {
1003                 t = pcp->prc_thread;
1004                 ASSERT(t != NULL);
1005                 thread_lock(t);
1006         } else {
1007                 proc_t *p = pcp->prc_proc;
1008                 t = prchoose(p);        /* returns locked thread */
1009                 ASSERT(t != NULL);
1010         }
1011 
1012         return (t);
1013 }
1014 
1015 /*
1016  * Direct the process or lwp to stop.
1017  */
1018 void
1019 pr_stop(prnode_t *pnp)
1020 {
1021         prcommon_t *pcp = pnp->pr_common;
1022         proc_t *p = pcp->prc_proc;
1023         kthread_t *t;
1024         vnode_t *vp;
1025 
1026         /*
1027          * If already stopped, do nothing; otherwise flag
1028          * it to be stopped the next time it tries to run.
1029          * If sleeping at interruptible priority, set it
1030          * running so it will stop within cv_wait_sig().
1031          *
1032          * Take care to cooperate with jobcontrol: if an lwp
1033          * is stopped due to the default action of a jobcontrol
1034          * stop signal, flag it to be stopped the next time it
1035          * starts due to a SIGCONT signal.
1036          */
1037         if (pcp->prc_flags & PRC_LWP)
1038                 t = pcp->prc_thread;
1039         else
1040                 t = p->p_tlist;
1041         ASSERT(t != NULL);
1042 
1043         do {
1044                 int notify;
1045 
1046                 notify = 0;
1047                 thread_lock(t);
1048                 if (!ISTOPPED(t)) {
1049                         t->t_proc_flag |= TP_PRSTOP;
1050                         t->t_sig_check = 1;  /* do ISSIG */
1051                 }
1052 
1053                 /* Move the thread from wait queue to run queue */
1054                 if (ISWAITING(t))
1055                         setrun_locked(t);
1056 
1057                 if (ISWAKEABLE(t)) {
1058                         if (t->t_wchan0 == NULL)
1059                                 setrun_locked(t);
1060                         else if (!VSTOPPED(t)) {
1061                                 /*
1062                                  * Mark it virtually stopped.
1063                                  */
1064                                 t->t_proc_flag |= TP_PRVSTOP;
1065                                 notify = 1;
1066                         }
1067                 }
1068                 /*
1069                  * force the thread into the kernel
1070                  * if it is not already there.
1071                  */
1072                 prpokethread(t);
1073                 thread_unlock(t);
1074                 if (notify &&
1075                     (vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace) != NULL)
1076                         prnotify(vp);
1077                 if (pcp->prc_flags & PRC_LWP)
1078                         break;
1079         } while ((t = t->t_forw) != p->p_tlist);
1080 
1081         /*
1082          * We do this just in case the thread we asked
1083          * to stop is in holdlwps() (called from cfork()).
1084          */
1085         cv_broadcast(&p->p_holdlwps);
1086 }
1087 
1088 /*
1089  * Sleep until the lwp stops, but cooperate with
1090  * jobcontrol:  Don't wake up if the lwp is stopped
1091  * due to the default action of a jobcontrol stop signal.
1092  * If this is the process file descriptor, sleep
1093  * until all of the process's lwps stop.
1094  */
1095 int
1096 pr_wait_stop(prnode_t *pnp, time_t timeo)
1097 {
1098         prcommon_t *pcp = pnp->pr_common;
1099         proc_t *p = pcp->prc_proc;
1100         timestruc_t rqtime;
1101         timestruc_t *rqtp = NULL;
1102         int timecheck = 0;
1103         kthread_t *t;
1104         int error;
1105 
1106         if (timeo > 0) {     /* millisecond timeout */
1107                 /*
1108                  * Determine the precise future time of the requested timeout.
1109                  */
1110                 timestruc_t now;
1111 
1112                 timecheck = timechanged;
1113                 gethrestime(&now);
1114                 rqtp = &rqtime;
1115                 rqtp->tv_sec = timeo / MILLISEC;
1116                 rqtp->tv_nsec = (timeo % MILLISEC) * MICROSEC;
1117                 timespecadd(rqtp, &now);
1118         }
1119 
1120         if (pcp->prc_flags & PRC_LWP) {  /* lwp file descriptor */
1121                 t = pcp->prc_thread;
1122                 ASSERT(t != NULL);
1123                 thread_lock(t);
1124                 while (!ISTOPPED(t) && !VSTOPPED(t)) {
1125                         thread_unlock(t);
1126                         mutex_enter(&pcp->prc_mutex);
1127                         prunlock(pnp);
1128                         error = pr_wait(pcp, rqtp, timecheck);
1129                         if (error)      /* -1 is timeout */
1130                                 return (error);
1131                         if ((error = prlock(pnp, ZNO)) != 0)
1132                                 return (error);
1133                         ASSERT(p == pcp->prc_proc);
1134                         ASSERT(t == pcp->prc_thread);
1135                         thread_lock(t);
1136                 }
1137                 thread_unlock(t);
1138         } else {                        /* process file descriptor */
1139                 t = prchoose(p);        /* returns locked thread */
1140                 ASSERT(t != NULL);
1141                 ASSERT(MUTEX_HELD(&p->p_lock));
1142                 while ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t)) ||
1143                     (p->p_flag & SEXITLWPS)) {
1144                         thread_unlock(t);
1145                         mutex_enter(&pcp->prc_mutex);
1146                         prunlock(pnp);
1147                         error = pr_wait(pcp, rqtp, timecheck);
1148                         if (error)      /* -1 is timeout */
1149                                 return (error);
1150                         if ((error = prlock(pnp, ZNO)) != 0)
1151                                 return (error);
1152                         ASSERT(p == pcp->prc_proc);
1153                         t = prchoose(p);        /* returns locked t */
1154                         ASSERT(t != NULL);
1155                 }
1156                 thread_unlock(t);
1157         }
1158 
1159         ASSERT(!(pcp->prc_flags & PRC_DESTROY) && p->p_stat != SZOMB &&
1160             t != NULL && t->t_state != TS_ZOMB);
1161 
1162         return (0);
1163 }
1164 
1165 int
1166 pr_setrun(prnode_t *pnp, ulong_t flags)
1167 {
1168         prcommon_t *pcp = pnp->pr_common;
1169         proc_t *p = pcp->prc_proc;
1170         kthread_t *t;
1171         klwp_t *lwp;
1172 
1173         /*
1174          * Cannot set an lwp running if it is not stopped.
1175          * Also, no lwp other than the /proc agent lwp can
1176          * be set running so long as the /proc agent lwp exists.
1177          */
1178         t = pr_thread(pnp);     /* returns locked thread */
1179         if ((!ISTOPPED(t) && !VSTOPPED(t) &&
1180             !(t->t_proc_flag & TP_PRSTOP)) ||
1181             (p->p_agenttp != NULL &&
1182             (t != p->p_agenttp || !(pcp->prc_flags & PRC_LWP)))) {
1183                 thread_unlock(t);
1184                 return (EBUSY);
1185         }
1186         thread_unlock(t);
1187         if (flags & ~(PRCSIG|PRCFAULT|PRSTEP|PRSTOP|PRSABORT))
1188                 return (EINVAL);
1189         lwp = ttolwp(t);
1190         if ((flags & PRCSIG) && lwp->lwp_cursig != SIGKILL) {
1191                 /*
1192                  * Discard current siginfo_t, if any.
1193                  */
1194                 lwp->lwp_cursig = 0;
1195                 lwp->lwp_extsig = 0;
1196                 if (lwp->lwp_curinfo) {
1197                         siginfofree(lwp->lwp_curinfo);
1198                         lwp->lwp_curinfo = NULL;
1199                 }
1200         }
1201         if (flags & PRCFAULT)
1202                 lwp->lwp_curflt = 0;
1203         /*
1204          * We can't hold p->p_lock when we touch the lwp's registers.
1205          * It may be swapped out and we will get a page fault.
1206          */
1207         if (flags & PRSTEP) {
1208                 mutex_exit(&p->p_lock);
1209                 prstep(lwp, 0);
1210                 mutex_enter(&p->p_lock);
1211         }
1212         if (flags & PRSTOP) {
1213                 t->t_proc_flag |= TP_PRSTOP;
1214                 t->t_sig_check = 1;  /* do ISSIG */
1215         }
1216         if (flags & PRSABORT)
1217                 lwp->lwp_sysabort = 1;
1218         thread_lock(t);
1219         if ((pcp->prc_flags & PRC_LWP) || (flags & (PRSTEP|PRSTOP))) {
1220                 /*
1221                  * Here, we are dealing with a single lwp.
1222                  */
1223                 if (ISTOPPED(t)) {
1224                         t->t_schedflag |= TS_PSTART;
1225                         t->t_dtrace_stop = 0;
1226                         setrun_locked(t);
1227                 } else if (flags & PRSABORT) {
1228                         t->t_proc_flag &=
1229                             ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1230                         setrun_locked(t);
1231                 } else if (!(flags & PRSTOP)) {
1232                         t->t_proc_flag &=
1233                             ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1234                 }
1235                 thread_unlock(t);
1236         } else {
1237                 /*
1238                  * Here, we are dealing with the whole process.
1239                  */
1240                 if (ISTOPPED(t)) {
1241                         /*
1242                          * The representative lwp is stopped on an event
1243                          * of interest.  We demote it to PR_REQUESTED and
1244                          * choose another representative lwp.  If the new
1245                          * representative lwp is not stopped on an event of
1246                          * interest (other than PR_REQUESTED), we set the
1247                          * whole process running, else we leave the process
1248                          * stopped showing the next event of interest.
1249                          */
1250                         kthread_t *tx = NULL;
1251 
1252                         if (!(flags & PRSABORT) &&
1253                             t->t_whystop == PR_SYSENTRY &&
1254                             t->t_whatstop == SYS_lwp_exit)
1255                                 tx = t;         /* remember the exiting lwp */
1256                         t->t_whystop = PR_REQUESTED;
1257                         t->t_whatstop = 0;
1258                         thread_unlock(t);
1259                         t = prchoose(p);        /* returns locked t */
1260                         ASSERT(ISTOPPED(t) || VSTOPPED(t));
1261                         if (VSTOPPED(t) ||
1262                             t->t_whystop == PR_REQUESTED) {
1263                                 thread_unlock(t);
1264                                 allsetrun(p);
1265                         } else {
1266                                 thread_unlock(t);
1267                                 /*
1268                                  * As a special case, if the old representative
1269                                  * lwp was stopped on entry to _lwp_exit()
1270                                  * (and we are not aborting the system call),
1271                                  * we set the old representative lwp running.
1272                                  * We do this so that the next process stop
1273                                  * will find the exiting lwp gone.
1274                                  */
1275                                 if (tx != NULL) {
1276                                         thread_lock(tx);
1277                                         tx->t_schedflag |= TS_PSTART;
1278                                         t->t_dtrace_stop = 0;
1279                                         setrun_locked(tx);
1280                                         thread_unlock(tx);
1281                                 }
1282                         }
1283                 } else {
1284                         /*
1285                          * No event of interest; set all of the lwps running.
1286                          */
1287                         if (flags & PRSABORT) {
1288                                 t->t_proc_flag &=
1289                                     ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1290                                 setrun_locked(t);
1291                         }
1292                         thread_unlock(t);
1293                         allsetrun(p);
1294                 }
1295         }
1296         return (0);
1297 }
1298 
1299 /*
1300  * Wait until process/lwp stops or until timer expires.
1301  * Return EINTR for an interruption, -1 for timeout, else 0.
1302  */
1303 int
1304 pr_wait(prcommon_t *pcp,        /* prcommon referring to process/lwp */
1305         timestruc_t *ts,        /* absolute time of timeout, if any */
1306         int timecheck)
1307 {
1308         int rval;
1309 
1310         ASSERT(MUTEX_HELD(&pcp->prc_mutex));
1311         rval = cv_waituntil_sig(&pcp->prc_wait, &pcp->prc_mutex, ts, timecheck);
1312         mutex_exit(&pcp->prc_mutex);
1313         switch (rval) {
1314         case 0:
1315                 return (EINTR);
1316         case -1:
1317                 return (-1);
1318         default:
1319                 return (0);
1320         }
1321 }
1322 
1323 /*
1324  * Make all threads in the process runnable.
1325  */
1326 void
1327 allsetrun(proc_t *p)
1328 {
1329         kthread_t *t;
1330 
1331         ASSERT(MUTEX_HELD(&p->p_lock));
1332 
1333         if ((t = p->p_tlist) != NULL) {
1334                 do {
1335                         thread_lock(t);
1336                         ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1337                         t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1338                         if (ISTOPPED(t)) {
1339                                 t->t_schedflag |= TS_PSTART;
1340                                 t->t_dtrace_stop = 0;
1341                                 setrun_locked(t);
1342                         }
1343                         thread_unlock(t);
1344                 } while ((t = t->t_forw) != p->p_tlist);
1345         }
1346 }
1347 
1348 /*
1349  * Wait for the process to die.
1350  * We do this after sending SIGKILL because we know it will
1351  * die soon and we want subsequent operations to return ENOENT.
1352  */
1353 void
1354 pr_wait_die(prnode_t *pnp)
1355 {
1356         proc_t *p;
1357 
1358         mutex_enter(&pidlock);
1359         while ((p = pnp->pr_common->prc_proc) != NULL && p->p_stat != SZOMB) {
1360                 if (!cv_wait_sig(&p->p_srwchan_cv, &pidlock))
1361                         break;
1362         }
1363         mutex_exit(&pidlock);
1364 }
1365 
1366 static void
1367 pr_settrace(proc_t *p, sigset_t *sp)
1368 {
1369         prdelset(sp, SIGKILL);
1370         prassignset(&p->p_sigmask, sp);
1371         if (!sigisempty(&p->p_sigmask))
1372                 p->p_proc_flag |= P_PR_TRACE;
1373         else if (prisempty(&p->p_fltmask)) {
1374                 user_t *up = PTOU(p);
1375                 if (up->u_systrap == 0)
1376                         p->p_proc_flag &= ~P_PR_TRACE;
1377         }
1378 }
1379 
1380 int
1381 pr_setsig(prnode_t *pnp, siginfo_t *sip)
1382 {
1383         int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1384         int sig = sip->si_signo;
1385         prcommon_t *pcp = pnp->pr_common;
1386         proc_t *p = pcp->prc_proc;
1387         kthread_t *t;
1388         klwp_t *lwp;
1389         int error = 0;
1390 
1391         t = pr_thread(pnp);     /* returns locked thread */
1392         thread_unlock(t);
1393         lwp = ttolwp(t);
1394         if (sig < 0 || sig >= nsig)
1395                 /* Zero allowed here */
1396                 error = EINVAL;
1397         else if (lwp->lwp_cursig == SIGKILL)
1398                 /* "can't happen", but just in case */
1399                 error = EBUSY;
1400         else if ((lwp->lwp_cursig = (uchar_t)sig) == 0) {
1401                 lwp->lwp_extsig = 0;
1402                 /*
1403                  * Discard current siginfo_t, if any.
1404                  */
1405                 if (lwp->lwp_curinfo) {
1406                         siginfofree(lwp->lwp_curinfo);
1407                         lwp->lwp_curinfo = NULL;
1408                 }
1409         } else {
1410                 kthread_t *tx;
1411                 sigqueue_t *sqp;
1412 
1413                 /* drop p_lock to do kmem_alloc(KM_SLEEP) */
1414                 mutex_exit(&p->p_lock);
1415                 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
1416                 mutex_enter(&p->p_lock);
1417 
1418                 if (lwp->lwp_curinfo == NULL)
1419                         lwp->lwp_curinfo = sqp;
1420                 else
1421                         kmem_free(sqp, sizeof (sigqueue_t));
1422                 /*
1423                  * Copy contents of info to current siginfo_t.
1424                  */
1425                 bcopy(sip, &lwp->lwp_curinfo->sq_info,
1426                     sizeof (lwp->lwp_curinfo->sq_info));
1427                 /*
1428                  * Prevent contents published by si_zoneid-unaware /proc
1429                  * consumers from being incorrectly filtered.  Because
1430                  * an uninitialized si_zoneid is the same as
1431                  * GLOBAL_ZONEID, this means that you can't pr_setsig a
1432                  * process in a non-global zone with a siginfo which
1433                  * appears to come from the global zone.
1434                  */
1435                 if (SI_FROMUSER(sip) && sip->si_zoneid == 0)
1436                         lwp->lwp_curinfo->sq_info.si_zoneid =
1437                             p->p_zone->zone_id;
1438                 /*
1439                  * Side-effects for SIGKILL and jobcontrol signals.
1440                  */
1441                 if (sig == SIGKILL) {
1442                         p->p_flag |= SKILLED;
1443                         p->p_flag &= ~SEXTKILLED;
1444                 } else if (sig == SIGCONT) {
1445                         p->p_flag |= SSCONT;
1446                         sigdelq(p, NULL, SIGSTOP);
1447                         sigdelq(p, NULL, SIGTSTP);
1448                         sigdelq(p, NULL, SIGTTOU);
1449                         sigdelq(p, NULL, SIGTTIN);
1450                         sigdiffset(&p->p_sig, &stopdefault);
1451                         sigdiffset(&p->p_extsig, &stopdefault);
1452                         if ((tx = p->p_tlist) != NULL) {
1453                                 do {
1454                                         sigdelq(p, tx, SIGSTOP);
1455                                         sigdelq(p, tx, SIGTSTP);
1456                                         sigdelq(p, tx, SIGTTOU);
1457                                         sigdelq(p, tx, SIGTTIN);
1458                                         sigdiffset(&tx->t_sig, &stopdefault);
1459                                         sigdiffset(&tx->t_extsig, &stopdefault);
1460                                 } while ((tx = tx->t_forw) != p->p_tlist);
1461                         }
1462                 } else if (sigismember(&stopdefault, sig)) {
1463                         if (PTOU(p)->u_signal[sig-1] == SIG_DFL &&
1464                             (sig == SIGSTOP || !p->p_pgidp->pid_pgorphaned))
1465                                 p->p_flag &= ~SSCONT;
1466                         sigdelq(p, NULL, SIGCONT);
1467                         sigdelset(&p->p_sig, SIGCONT);
1468                         sigdelset(&p->p_extsig, SIGCONT);
1469                         if ((tx = p->p_tlist) != NULL) {
1470                                 do {
1471                                         sigdelq(p, tx, SIGCONT);
1472                                         sigdelset(&tx->t_sig, SIGCONT);
1473                                         sigdelset(&tx->t_extsig, SIGCONT);
1474                                 } while ((tx = tx->t_forw) != p->p_tlist);
1475                         }
1476                 }
1477                 thread_lock(t);
1478                 if (ISWAKEABLE(t) || ISWAITING(t)) {
1479                         /* Set signaled sleeping/waiting lwp running */
1480                         setrun_locked(t);
1481                 } else if (t->t_state == TS_STOPPED && sig == SIGKILL) {
1482                         /* If SIGKILL, set stopped lwp running */
1483                         p->p_stopsig = 0;
1484                         t->t_schedflag |= TS_XSTART | TS_PSTART;
1485                         t->t_dtrace_stop = 0;
1486                         setrun_locked(t);
1487                 }
1488                 t->t_sig_check = 1;  /* so ISSIG will be done */
1489                 thread_unlock(t);
1490                 /*
1491                  * More jobcontrol side-effects.
1492                  */
1493                 if (sig == SIGCONT && (tx = p->p_tlist) != NULL) {
1494                         p->p_stopsig = 0;
1495                         do {
1496                                 thread_lock(tx);
1497                                 if (tx->t_state == TS_STOPPED &&
1498                                     tx->t_whystop == PR_JOBCONTROL) {
1499                                         tx->t_schedflag |= TS_XSTART;
1500                                         setrun_locked(tx);
1501                                 }
1502                                 thread_unlock(tx);
1503                         } while ((tx = tx->t_forw) != p->p_tlist);
1504                 }
1505         }
1506         return (error);
1507 }
1508 
1509 int
1510 pr_kill(prnode_t *pnp, int sig, cred_t *cr)
1511 {
1512         int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1513         prcommon_t *pcp = pnp->pr_common;
1514         proc_t *p = pcp->prc_proc;
1515         k_siginfo_t info;
1516 
1517         if (sig <= 0 || sig >= nsig)
1518                 return (EINVAL);
1519 
1520         bzero(&info, sizeof (info));
1521         info.si_signo = sig;
1522         info.si_code = SI_USER;
1523         info.si_pid = curproc->p_pid;
1524         info.si_ctid = PRCTID(curproc);
1525         info.si_zoneid = getzoneid();
1526         info.si_uid = crgetruid(cr);
1527         sigaddq(p, (pcp->prc_flags & PRC_LWP)?
1528             pcp->prc_thread : NULL, &info, KM_NOSLEEP);
1529 
1530         return (0);
1531 }
1532 
1533 int
1534 pr_unkill(prnode_t *pnp, int sig)
1535 {
1536         int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1537         prcommon_t *pcp = pnp->pr_common;
1538         proc_t *p = pcp->prc_proc;
1539         sigqueue_t *infop = NULL;
1540 
1541         if (sig <= 0 || sig >= nsig || sig == SIGKILL)
1542                 return (EINVAL);
1543 
1544         if (pcp->prc_flags & PRC_LWP)
1545                 sigdeq(p, pcp->prc_thread, sig, &infop);
1546         else
1547                 sigdeq(p, NULL, sig, &infop);
1548 
1549         if (infop)
1550                 siginfofree(infop);
1551 
1552         return (0);
1553 }
1554 
1555 int
1556 pr_nice(proc_t *p, int nice, cred_t *cr)
1557 {
1558         kthread_t *t;
1559         int err;
1560         int error = 0;
1561 
1562         t = p->p_tlist;
1563         do {
1564                 ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1565                 err = CL_DONICE(t, cr, nice, (int *)NULL);
1566                 schedctl_set_cidpri(t);
1567                 if (error == 0)
1568                         error = err;
1569         } while ((t = t->t_forw) != p->p_tlist);
1570 
1571         return (error);
1572 }
1573 
1574 void
1575 pr_setentryexit(proc_t *p, sysset_t *sysset, int entry)
1576 {
1577         user_t *up = PTOU(p);
1578 
1579         if (entry) {
1580                 prassignset(&up->u_entrymask, sysset);
1581         } else {
1582                 prassignset(&up->u_exitmask, sysset);
1583         }
1584         if (!prisempty(&up->u_entrymask) ||
1585             !prisempty(&up->u_exitmask)) {
1586                 up->u_systrap = 1;
1587                 p->p_proc_flag |= P_PR_TRACE;
1588                 set_proc_sys(p);        /* set pre and post-sys flags */
1589         } else {
1590                 up->u_systrap = 0;
1591                 if (sigisempty(&p->p_sigmask) &&
1592                     prisempty(&p->p_fltmask))
1593                         p->p_proc_flag &= ~P_PR_TRACE;
1594         }
1595 }
1596 
1597 #define ALLFLAGS        \
1598         (PR_FORK|PR_RLC|PR_KLC|PR_ASYNC|PR_BPTADJ|PR_MSACCT|PR_MSFORK|PR_PTRACE)
1599 
1600 int
1601 pr_set(proc_t *p, long flags)
1602 {
1603         if ((p->p_flag & SSYS) || p->p_as == &kas)
1604                 return (EBUSY);
1605 
1606         if (flags & ~ALLFLAGS)
1607                 return (EINVAL);
1608 
1609         if (flags & PR_FORK)
1610                 p->p_proc_flag |= P_PR_FORK;
1611         if (flags & PR_RLC)
1612                 p->p_proc_flag |= P_PR_RUNLCL;
1613         if (flags & PR_KLC)
1614                 p->p_proc_flag |= P_PR_KILLCL;
1615         if (flags & PR_ASYNC)
1616                 p->p_proc_flag |= P_PR_ASYNC;
1617         if (flags & PR_BPTADJ)
1618                 p->p_proc_flag |= P_PR_BPTADJ;
1619         if (flags & PR_MSACCT)
1620                 if ((p->p_flag & SMSACCT) == 0)
1621                         estimate_msacct(p->p_tlist, gethrtime());
1622         if (flags & PR_MSFORK)
1623                 p->p_flag |= SMSFORK;
1624         if (flags & PR_PTRACE) {
1625                 p->p_proc_flag |= P_PR_PTRACE;
1626                 /* ptraced process must die if parent dead */
1627                 if (p->p_ppid == 1)
1628                         sigtoproc(p, NULL, SIGKILL);
1629         }
1630 
1631         return (0);
1632 }
1633 
1634 int
1635 pr_unset(proc_t *p, long flags)
1636 {
1637         if ((p->p_flag & SSYS) || p->p_as == &kas)
1638                 return (EBUSY);
1639 
1640         if (flags & ~ALLFLAGS)
1641                 return (EINVAL);
1642 
1643         if (flags & PR_FORK)
1644                 p->p_proc_flag &= ~P_PR_FORK;
1645         if (flags & PR_RLC)
1646                 p->p_proc_flag &= ~P_PR_RUNLCL;
1647         if (flags & PR_KLC)
1648                 p->p_proc_flag &= ~P_PR_KILLCL;
1649         if (flags & PR_ASYNC)
1650                 p->p_proc_flag &= ~P_PR_ASYNC;
1651         if (flags & PR_BPTADJ)
1652                 p->p_proc_flag &= ~P_PR_BPTADJ;
1653         if (flags & PR_MSACCT)
1654                 disable_msacct(p);
1655         if (flags & PR_MSFORK)
1656                 p->p_flag &= ~SMSFORK;
1657         if (flags & PR_PTRACE)
1658                 p->p_proc_flag &= ~P_PR_PTRACE;
1659 
1660         return (0);
1661 }
1662 
1663 static int
1664 pr_setfpregs(prnode_t *pnp, prfpregset_t *prfpregset)
1665 {
1666         proc_t *p = pnp->pr_common->prc_proc;
1667         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1668 
1669         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1670                 thread_unlock(t);
1671                 return (EBUSY);
1672         }
1673         if (!prhasfp()) {
1674                 thread_unlock(t);
1675                 return (EINVAL);        /* No FP support */
1676         }
1677 
1678         /* drop p_lock while touching the lwp's stack */
1679         thread_unlock(t);
1680         mutex_exit(&p->p_lock);
1681         prsetprfpregs(ttolwp(t), prfpregset);
1682         mutex_enter(&p->p_lock);
1683 
1684         return (0);
1685 }
1686 
1687 #ifdef  _SYSCALL32_IMPL
1688 static int
1689 pr_setfpregs32(prnode_t *pnp, prfpregset32_t *prfpregset)
1690 {
1691         proc_t *p = pnp->pr_common->prc_proc;
1692         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1693 
1694         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1695                 thread_unlock(t);
1696                 return (EBUSY);
1697         }
1698         if (!prhasfp()) {
1699                 thread_unlock(t);
1700                 return (EINVAL);        /* No FP support */
1701         }
1702 
1703         /* drop p_lock while touching the lwp's stack */
1704         thread_unlock(t);
1705         mutex_exit(&p->p_lock);
1706         prsetprfpregs32(ttolwp(t), prfpregset);
1707         mutex_enter(&p->p_lock);
1708 
1709         return (0);
1710 }
1711 #endif  /* _SYSCALL32_IMPL */
1712 
1713 #if defined(__sparc)
1714 /* ARGSUSED */
1715 static int
1716 pr_setxregs(prnode_t *pnp, prxregset_t *prxregset)
1717 {
1718         proc_t *p = pnp->pr_common->prc_proc;
1719         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1720 
1721         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1722                 thread_unlock(t);
1723                 return (EBUSY);
1724         }
1725         thread_unlock(t);
1726 
1727         if (!prhasx(p))
1728                 return (EINVAL);        /* No extra register support */
1729 
1730         /* drop p_lock while touching the lwp's stack */
1731         mutex_exit(&p->p_lock);
1732         prsetprxregs(ttolwp(t), (caddr_t)prxregset);
1733         mutex_enter(&p->p_lock);
1734 
1735         return (0);
1736 }
1737 
1738 static int
1739 pr_setasrs(prnode_t *pnp, asrset_t asrset)
1740 {
1741         proc_t *p = pnp->pr_common->prc_proc;
1742         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1743 
1744         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1745                 thread_unlock(t);
1746                 return (EBUSY);
1747         }
1748         thread_unlock(t);
1749 
1750         /* drop p_lock while touching the lwp's stack */
1751         mutex_exit(&p->p_lock);
1752         prsetasregs(ttolwp(t), asrset);
1753         mutex_enter(&p->p_lock);
1754 
1755         return (0);
1756 }
1757 #endif
1758 
1759 static int
1760 pr_setvaddr(prnode_t *pnp, caddr_t vaddr)
1761 {
1762         proc_t *p = pnp->pr_common->prc_proc;
1763         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1764 
1765         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1766                 thread_unlock(t);
1767                 return (EBUSY);
1768         }
1769 
1770         /* drop p_lock while touching the lwp's stack */
1771         thread_unlock(t);
1772         mutex_exit(&p->p_lock);
1773         prsvaddr(ttolwp(t), vaddr);
1774         mutex_enter(&p->p_lock);
1775 
1776         return (0);
1777 }
1778 
1779 void
1780 pr_sethold(prnode_t *pnp, sigset_t *sp)
1781 {
1782         proc_t *p = pnp->pr_common->prc_proc;
1783         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1784 
1785         schedctl_finish_sigblock(t);
1786         sigutok(sp, &t->t_hold);
1787         if (ISWAKEABLE(t) &&
1788             (fsig(&p->p_sig, t) || fsig(&t->t_sig, t)))
1789                 setrun_locked(t);
1790         t->t_sig_check = 1;  /* so thread will see new holdmask */
1791         thread_unlock(t);
1792 }
1793 
1794 void
1795 pr_setfault(proc_t *p, fltset_t *fltp)
1796 {
1797         prassignset(&p->p_fltmask, fltp);
1798         if (!prisempty(&p->p_fltmask))
1799                 p->p_proc_flag |= P_PR_TRACE;
1800         else if (sigisempty(&p->p_sigmask)) {
1801                 user_t *up = PTOU(p);
1802                 if (up->u_systrap == 0)
1803                         p->p_proc_flag &= ~P_PR_TRACE;
1804         }
1805 }
1806 
1807 static int
1808 pr_clearsig(prnode_t *pnp)
1809 {
1810         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1811         klwp_t *lwp = ttolwp(t);
1812 
1813         thread_unlock(t);
1814         if (lwp->lwp_cursig == SIGKILL)
1815                 return (EBUSY);
1816 
1817         /*
1818          * Discard current siginfo_t, if any.
1819          */
1820         lwp->lwp_cursig = 0;
1821         lwp->lwp_extsig = 0;
1822         if (lwp->lwp_curinfo) {
1823                 siginfofree(lwp->lwp_curinfo);
1824                 lwp->lwp_curinfo = NULL;
1825         }
1826 
1827         return (0);
1828 }
1829 
1830 static int
1831 pr_clearflt(prnode_t *pnp)
1832 {
1833         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1834 
1835         thread_unlock(t);
1836         ttolwp(t)->lwp_curflt = 0;
1837 
1838         return (0);
1839 }
1840 
1841 static int
1842 pr_watch(prnode_t *pnp, prwatch_t *pwp, int *unlocked)
1843 {
1844         proc_t *p = pnp->pr_common->prc_proc;
1845         struct as *as = p->p_as;
1846         uintptr_t vaddr = pwp->pr_vaddr;
1847         size_t size = pwp->pr_size;
1848         int wflags = pwp->pr_wflags;
1849         ulong_t newpage = 0;
1850         struct watched_area *pwa;
1851         int error;
1852 
1853         *unlocked = 0;
1854 
1855         /*
1856          * Can't apply to a system process.
1857          */
1858         if ((p->p_flag & SSYS) || p->p_as == &kas)
1859                 return (EBUSY);
1860 
1861         /*
1862          * Verify that the address range does not wrap
1863          * and that only the proper flags were specified.
1864          */
1865         if ((wflags & ~WA_TRAPAFTER) == 0)
1866                 size = 0;
1867         if (vaddr + size < vaddr ||
1868             (wflags & ~(WA_READ|WA_WRITE|WA_EXEC|WA_TRAPAFTER)) != 0 ||
1869             ((wflags & ~WA_TRAPAFTER) != 0 && size == 0))
1870                 return (EINVAL);
1871 
1872         /*
1873          * Don't let the address range go above as->a_userlimit.
1874          * There is no error here, just a limitation.
1875          */
1876         if (vaddr >= (uintptr_t)as->a_userlimit)
1877                 return (0);
1878         if (vaddr + size > (uintptr_t)as->a_userlimit)
1879                 size = (uintptr_t)as->a_userlimit - vaddr;
1880 
1881         /*
1882          * Compute maximum number of pages this will add.
1883          */
1884         if ((wflags & ~WA_TRAPAFTER) != 0) {
1885                 ulong_t pagespan = (vaddr + size) - (vaddr & PAGEMASK);
1886                 newpage = btopr(pagespan);
1887                 if (newpage > 2 * prnwatch)
1888                         return (E2BIG);
1889         }
1890 
1891         /*
1892          * Force the process to be fully stopped.
1893          */
1894         if (p == curproc) {
1895                 prunlock(pnp);
1896                 while (holdwatch() != 0)
1897                         continue;
1898                 if ((error = prlock(pnp, ZNO)) != 0) {
1899                         continuelwps(p);
1900                         *unlocked = 1;
1901                         return (error);
1902                 }
1903         } else {
1904                 pauselwps(p);
1905                 while (pr_allstopped(p, 0) > 0) {
1906                         /*
1907                          * This cv/mutex pair is persistent even
1908                          * if the process disappears after we
1909                          * unmark it and drop p->p_lock.
1910                          */
1911                         kcondvar_t *cv = &pr_pid_cv[p->p_slot];
1912                         kmutex_t *mp = &p->p_lock;
1913 
1914                         prunmark(p);
1915                         (void) cv_wait(cv, mp);
1916                         mutex_exit(mp);
1917                         if ((error = prlock(pnp, ZNO)) != 0) {
1918                                 /*
1919                                  * Unpause the process if it exists.
1920                                  */
1921                                 p = pr_p_lock(pnp);
1922                                 mutex_exit(&pr_pidlock);
1923                                 if (p != NULL) {
1924                                         unpauselwps(p);
1925                                         prunlock(pnp);
1926                                 }
1927                                 *unlocked = 1;
1928                                 return (error);
1929                         }
1930                 }
1931         }
1932 
1933         /*
1934          * Drop p->p_lock in order to perform the rest of this.
1935          * The process is still locked with the P_PR_LOCK flag.
1936          */
1937         mutex_exit(&p->p_lock);
1938 
1939         pwa = kmem_alloc(sizeof (struct watched_area), KM_SLEEP);
1940         pwa->wa_vaddr = (caddr_t)vaddr;
1941         pwa->wa_eaddr = (caddr_t)vaddr + size;
1942         pwa->wa_flags = (ulong_t)wflags;
1943 
1944         error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0)?
1945             clear_watched_area(p, pwa) : set_watched_area(p, pwa);
1946 
1947         if (p == curproc) {
1948                 setallwatch();
1949                 mutex_enter(&p->p_lock);
1950                 continuelwps(p);
1951         } else {
1952                 mutex_enter(&p->p_lock);
1953                 unpauselwps(p);
1954         }
1955 
1956         return (error);
1957 }
1958 
1959 /* jobcontrol stopped, but with a /proc directed stop in effect */
1960 #define JDSTOPPED(t)    \
1961         ((t)->t_state == TS_STOPPED && \
1962         (t)->t_whystop == PR_JOBCONTROL && \
1963         ((t)->t_proc_flag & TP_PRSTOP))
1964 
1965 /*
1966  * pr_agent() creates the agent lwp. If the process is exiting while
1967  * we are creating an agent lwp, then exitlwps() waits until the
1968  * agent has been created using prbarrier().
1969  */
1970 static int
1971 pr_agent(prnode_t *pnp, prgregset_t prgregset, int *unlocked)
1972 {
1973         proc_t *p = pnp->pr_common->prc_proc;
1974         prcommon_t *pcp;
1975         kthread_t *t;
1976         kthread_t *ct;
1977         klwp_t *clwp;
1978         k_sigset_t smask;
1979         int cid;
1980         void *bufp = NULL;
1981         int error;
1982 
1983         *unlocked = 0;
1984 
1985         /*
1986          * Cannot create the /proc agent lwp if :-
1987          * - the process is not fully stopped or directed to stop.
1988          * - there is an agent lwp already.
1989          * - the process has been killed.
1990          * - the process is exiting.
1991          * - it's a vfork(2) parent.
1992          */
1993         t = prchoose(p);        /* returns locked thread */
1994         ASSERT(t != NULL);
1995 
1996         if ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t) && !JDSTOPPED(t)) ||
1997             p->p_agenttp != NULL ||
1998             (p->p_flag & (SKILLED | SEXITING | SVFWAIT))) {
1999                 thread_unlock(t);
2000                 return (EBUSY);
2001         }
2002 
2003         thread_unlock(t);
2004         mutex_exit(&p->p_lock);
2005 
2006         sigfillset(&smask);
2007         sigdiffset(&smask, &cantmask);
2008         clwp = lwp_create(lwp_rtt, NULL, 0, p, TS_STOPPED,
2009             t->t_pri, &smask, NOCLASS, 0);
2010         if (clwp == NULL) {
2011                 mutex_enter(&p->p_lock);
2012                 return (ENOMEM);
2013         }
2014         prsetprregs(clwp, prgregset, 1);
2015 
2016         /*
2017          * Because abandoning the agent inside the target process leads to
2018          * a state that is essentially undebuggable, we record the psinfo of
2019          * the process creating the agent and hang that off of the lwp.
2020          */
2021         clwp->lwp_spymaster = kmem_zalloc(sizeof (psinfo_t), KM_SLEEP);
2022         mutex_enter(&curproc->p_lock);
2023         prgetpsinfo(curproc, clwp->lwp_spymaster);
2024         mutex_exit(&curproc->p_lock);
2025 
2026         /*
2027          * We overload pr_time in the spymaster to denote the time at which the
2028          * agent was created.
2029          */
2030         gethrestime(&clwp->lwp_spymaster->pr_time);
2031 
2032 retry:
2033         cid = t->t_cid;
2034         (void) CL_ALLOC(&bufp, cid, KM_SLEEP);
2035         mutex_enter(&p->p_lock);
2036         if (cid != t->t_cid) {
2037                 /*
2038                  * Someone just changed this thread's scheduling class,
2039                  * so try pre-allocating the buffer again.  Hopefully we
2040                  * don't hit this often.
2041                  */
2042                 mutex_exit(&p->p_lock);
2043                 CL_FREE(cid, bufp);
2044                 goto retry;
2045         }
2046 
2047         clwp->lwp_ap = clwp->lwp_arg;
2048         clwp->lwp_eosys = NORMALRETURN;
2049         ct = lwptot(clwp);
2050         ct->t_clfuncs = t->t_clfuncs;
2051         CL_FORK(t, ct, bufp);
2052         ct->t_cid = t->t_cid;
2053         ct->t_proc_flag |= TP_PRSTOP;
2054         /*
2055          * Setting t_sysnum to zero causes post_syscall()
2056          * to bypass all syscall checks and go directly to
2057          *      if (issig()) psig();
2058          * so that the agent lwp will stop in issig_forreal()
2059          * showing PR_REQUESTED.
2060          */
2061         ct->t_sysnum = 0;
2062         ct->t_post_sys = 1;
2063         ct->t_sig_check = 1;
2064         p->p_agenttp = ct;
2065         ct->t_proc_flag &= ~TP_HOLDLWP;
2066 
2067         pcp = pnp->pr_pcommon;
2068         mutex_enter(&pcp->prc_mutex);
2069 
2070         lwp_create_done(ct);
2071 
2072         /*
2073          * Don't return until the agent is stopped on PR_REQUESTED.
2074          */
2075 
2076         for (;;) {
2077                 prunlock(pnp);
2078                 *unlocked = 1;
2079 
2080                 /*
2081                  * Wait for the agent to stop and notify us.
2082                  * If we've been interrupted, return that information.
2083                  */
2084                 error = pr_wait(pcp, NULL, 0);
2085                 if (error == EINTR) {
2086                         error = 0;
2087                         break;
2088                 }
2089 
2090                 /*
2091                  * Confirm that the agent LWP has stopped.
2092                  */
2093 
2094                 if ((error = prlock(pnp, ZNO)) != 0)
2095                         break;
2096                 *unlocked = 0;
2097 
2098                 /*
2099                  * Since we dropped the lock on the process, the agent
2100                  * may have disappeared or changed. Grab the current
2101                  * agent and check fail if it has disappeared.
2102                  */
2103                 if ((ct = p->p_agenttp) == NULL) {
2104                         error = ENOENT;
2105                         break;
2106                 }
2107 
2108                 mutex_enter(&pcp->prc_mutex);
2109                 thread_lock(ct);
2110 
2111                 if (ISTOPPED(ct)) {
2112                         thread_unlock(ct);
2113                         mutex_exit(&pcp->prc_mutex);
2114                         break;
2115                 }
2116 
2117                 thread_unlock(ct);
2118         }
2119 
2120         return (error ? error : -1);
2121 }
2122 
2123 static int
2124 pr_rdwr(proc_t *p, enum uio_rw rw, priovec_t *pio)
2125 {
2126         caddr_t base = (caddr_t)pio->pio_base;
2127         size_t cnt = pio->pio_len;
2128         uintptr_t offset = (uintptr_t)pio->pio_offset;
2129         struct uio auio;
2130         struct iovec aiov;
2131         int error = 0;
2132 
2133         if ((p->p_flag & SSYS) || p->p_as == &kas)
2134                 error = EIO;
2135         else if ((base + cnt) < base || (offset + cnt) < offset)
2136                 error = EINVAL;
2137         else if (cnt != 0) {
2138                 aiov.iov_base = base;
2139                 aiov.iov_len = cnt;
2140 
2141                 auio.uio_loffset = offset;
2142                 auio.uio_iov = &aiov;
2143                 auio.uio_iovcnt = 1;
2144                 auio.uio_resid = cnt;
2145                 auio.uio_segflg = UIO_USERSPACE;
2146                 auio.uio_llimit = (longlong_t)MAXOFFSET_T;
2147                 auio.uio_fmode = FREAD|FWRITE;
2148                 auio.uio_extflg = UIO_COPY_DEFAULT;
2149 
2150                 mutex_exit(&p->p_lock);
2151                 error = prusrio(p, rw, &auio, 0);
2152                 mutex_enter(&p->p_lock);
2153 
2154                 /*
2155                  * We have no way to return the i/o count,
2156                  * like read() or write() would do, so we
2157                  * return an error if the i/o was truncated.
2158                  */
2159                 if (auio.uio_resid != 0 && error == 0)
2160                         error = EIO;
2161         }
2162 
2163         return (error);
2164 }
2165 
2166 static int
2167 pr_scred(proc_t *p, prcred_t *prcred, cred_t *cr, boolean_t dogrps)
2168 {
2169         kthread_t *t;
2170         cred_t *oldcred;
2171         cred_t *newcred;
2172         uid_t oldruid;
2173         int error;
2174         zone_t *zone = crgetzone(cr);
2175 
2176         if (!VALID_UID(prcred->pr_euid, zone) ||
2177             !VALID_UID(prcred->pr_ruid, zone) ||
2178             !VALID_UID(prcred->pr_suid, zone) ||
2179             !VALID_GID(prcred->pr_egid, zone) ||
2180             !VALID_GID(prcred->pr_rgid, zone) ||
2181             !VALID_GID(prcred->pr_sgid, zone))
2182                 return (EINVAL);
2183 
2184         if (dogrps) {
2185                 int ngrp = prcred->pr_ngroups;
2186                 int i;
2187 
2188                 if (ngrp < 0 || ngrp > ngroups_max)
2189                         return (EINVAL);
2190 
2191                 for (i = 0; i < ngrp; i++) {
2192                         if (!VALID_GID(prcred->pr_groups[i], zone))
2193                                 return (EINVAL);
2194                 }
2195         }
2196 
2197         error = secpolicy_allow_setid(cr, prcred->pr_euid, B_FALSE);
2198 
2199         if (error == 0 && prcred->pr_ruid != prcred->pr_euid)
2200                 error = secpolicy_allow_setid(cr, prcred->pr_ruid, B_FALSE);
2201 
2202         if (error == 0 && prcred->pr_suid != prcred->pr_euid &&
2203             prcred->pr_suid != prcred->pr_ruid)
2204                 error = secpolicy_allow_setid(cr, prcred->pr_suid, B_FALSE);
2205 
2206         if (error)
2207                 return (error);
2208 
2209         mutex_exit(&p->p_lock);
2210 
2211         /* hold old cred so it doesn't disappear while we dup it */
2212         mutex_enter(&p->p_crlock);
2213         crhold(oldcred = p->p_cred);
2214         mutex_exit(&p->p_crlock);
2215         newcred = crdup(oldcred);
2216         oldruid = crgetruid(oldcred);
2217         crfree(oldcred);
2218 
2219         /* Error checking done above */
2220         (void) crsetresuid(newcred, prcred->pr_ruid, prcred->pr_euid,
2221             prcred->pr_suid);
2222         (void) crsetresgid(newcred, prcred->pr_rgid, prcred->pr_egid,
2223             prcred->pr_sgid);
2224 
2225         if (dogrps) {
2226                 (void) crsetgroups(newcred, prcred->pr_ngroups,
2227                     prcred->pr_groups);
2228 
2229         }
2230 
2231         mutex_enter(&p->p_crlock);
2232         oldcred = p->p_cred;
2233         p->p_cred = newcred;
2234         mutex_exit(&p->p_crlock);
2235         crfree(oldcred);
2236 
2237         /*
2238          * Keep count of processes per uid consistent.
2239          */
2240         if (oldruid != prcred->pr_ruid) {
2241                 zoneid_t zoneid = crgetzoneid(newcred);
2242 
2243                 mutex_enter(&pidlock);
2244                 upcount_dec(oldruid, zoneid);
2245                 upcount_inc(prcred->pr_ruid, zoneid);
2246                 mutex_exit(&pidlock);
2247         }
2248 
2249         /*
2250          * Broadcast the cred change to the threads.
2251          */
2252         mutex_enter(&p->p_lock);
2253         t = p->p_tlist;
2254         do {
2255                 t->t_pre_sys = 1; /* so syscall will get new cred */
2256         } while ((t = t->t_forw) != p->p_tlist);
2257 
2258         return (0);
2259 }
2260 
2261 /*
2262  * Change process credentials to specified zone.  Used to temporarily
2263  * set a process to run in the global zone; only transitions between
2264  * the process's actual zone and the global zone are allowed.
2265  */
2266 static int
2267 pr_szoneid(proc_t *p, zoneid_t zoneid, cred_t *cr)
2268 {
2269         kthread_t *t;
2270         cred_t *oldcred;
2271         cred_t *newcred;
2272         zone_t *zptr;
2273         zoneid_t oldzoneid;
2274 
2275         if (secpolicy_zone_config(cr) != 0)
2276                 return (EPERM);
2277         if (zoneid != GLOBAL_ZONEID && zoneid != p->p_zone->zone_id)
2278                 return (EINVAL);
2279         if ((zptr = zone_find_by_id(zoneid)) == NULL)
2280                 return (EINVAL);
2281         mutex_exit(&p->p_lock);
2282         mutex_enter(&p->p_crlock);
2283         oldcred = p->p_cred;
2284         crhold(oldcred);
2285         mutex_exit(&p->p_crlock);
2286         newcred = crdup(oldcred);
2287         oldzoneid = crgetzoneid(oldcred);
2288         crfree(oldcred);
2289 
2290         crsetzone(newcred, zptr);
2291         zone_rele(zptr);
2292 
2293         mutex_enter(&p->p_crlock);
2294         oldcred = p->p_cred;
2295         p->p_cred = newcred;
2296         mutex_exit(&p->p_crlock);
2297         crfree(oldcred);
2298 
2299         /*
2300          * The target process is changing zones (according to its cred), so
2301          * update the per-zone upcounts, which are based on process creds.
2302          */
2303         if (oldzoneid != zoneid) {
2304                 uid_t ruid = crgetruid(newcred);
2305 
2306                 mutex_enter(&pidlock);
2307                 upcount_dec(ruid, oldzoneid);
2308                 upcount_inc(ruid, zoneid);
2309                 mutex_exit(&pidlock);
2310         }
2311         /*
2312          * Broadcast the cred change to the threads.
2313          */
2314         mutex_enter(&p->p_lock);
2315         t = p->p_tlist;
2316         do {
2317                 t->t_pre_sys = 1;    /* so syscall will get new cred */
2318         } while ((t = t->t_forw) != p->p_tlist);
2319 
2320         return (0);
2321 }
2322 
2323 static int
2324 pr_spriv(proc_t *p, prpriv_t *prpriv, cred_t *cr)
2325 {
2326         kthread_t *t;
2327         int err;
2328 
2329         ASSERT(MUTEX_HELD(&p->p_lock));
2330 
2331         if ((err = priv_pr_spriv(p, prpriv, cr)) == 0) {
2332                 /*
2333                  * Broadcast the cred change to the threads.
2334                  */
2335                 t = p->p_tlist;
2336                 do {
2337                         t->t_pre_sys = 1; /* so syscall will get new cred */
2338                 } while ((t = t->t_forw) != p->p_tlist);
2339         }
2340 
2341         return (err);
2342 }
2343 
2344 /*
2345  * Return -1 if the process is the parent of a vfork(1) whose child has yet to
2346  * terminate or perform an exec(2).
2347  *
2348  * Returns 0 if the process is fully stopped except for the current thread (if
2349  * we are operating on our own process), 1 otherwise.
2350  *
2351  * If the watchstop flag is set, then we ignore threads with TP_WATCHSTOP set.
2352  * See holdwatch() for details.
2353  */
2354 int
2355 pr_allstopped(proc_t *p, int watchstop)
2356 {
2357         kthread_t *t;
2358         int rv = 0;
2359 
2360         ASSERT(MUTEX_HELD(&p->p_lock));
2361 
2362         if (p->p_flag & SVFWAIT) /* waiting for vfork'd child to exec */
2363                 return (-1);
2364 
2365         if ((t = p->p_tlist) != NULL) {
2366                 do {
2367                         if (t == curthread || VSTOPPED(t) ||
2368                             (watchstop && (t->t_proc_flag & TP_WATCHSTOP)))
2369                                 continue;
2370                         thread_lock(t);
2371                         switch (t->t_state) {
2372                         case TS_ZOMB:
2373                         case TS_STOPPED:
2374                                 break;
2375                         case TS_SLEEP:
2376                                 if (!(t->t_flag & T_WAKEABLE) ||
2377                                     t->t_wchan0 == NULL)
2378                                         rv = 1;
2379                                 break;
2380                         default:
2381                                 rv = 1;
2382                                 break;
2383                         }
2384                         thread_unlock(t);
2385                 } while (rv == 0 && (t = t->t_forw) != p->p_tlist);
2386         }
2387 
2388         return (rv);
2389 }
2390 
2391 /*
2392  * Cause all lwps in the process to pause (for watchpoint operations).
2393  */
2394 static void
2395 pauselwps(proc_t *p)
2396 {
2397         kthread_t *t;
2398 
2399         ASSERT(MUTEX_HELD(&p->p_lock));
2400         ASSERT(p != curproc);
2401 
2402         if ((t = p->p_tlist) != NULL) {
2403                 do {
2404                         thread_lock(t);
2405                         t->t_proc_flag |= TP_PAUSE;
2406                         aston(t);
2407                         if ((ISWAKEABLE(t) && (t->t_wchan0 == NULL)) ||
2408                             ISWAITING(t)) {
2409                                 setrun_locked(t);
2410                         }
2411                         prpokethread(t);
2412                         thread_unlock(t);
2413                 } while ((t = t->t_forw) != p->p_tlist);
2414         }
2415 }
2416 
2417 /*
2418  * undo the effects of pauselwps()
2419  */
2420 static void
2421 unpauselwps(proc_t *p)
2422 {
2423         kthread_t *t;
2424 
2425         ASSERT(MUTEX_HELD(&p->p_lock));
2426         ASSERT(p != curproc);
2427 
2428         if ((t = p->p_tlist) != NULL) {
2429                 do {
2430                         thread_lock(t);
2431                         t->t_proc_flag &= ~TP_PAUSE;
2432                         if (t->t_state == TS_STOPPED) {
2433                                 t->t_schedflag |= TS_UNPAUSE;
2434                                 t->t_dtrace_stop = 0;
2435                                 setrun_locked(t);
2436                         }
2437                         thread_unlock(t);
2438                 } while ((t = t->t_forw) != p->p_tlist);
2439         }
2440 }
2441 
2442 /*
2443  * Cancel all watched areas.  Called from prclose().
2444  */
2445 proc_t *
2446 pr_cancel_watch(prnode_t *pnp)
2447 {
2448         proc_t *p = pnp->pr_pcommon->prc_proc;
2449         struct as *as;
2450         kthread_t *t;
2451 
2452         ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2453 
2454         if (!pr_watch_active(p))
2455                 return (p);
2456 
2457         /*
2458          * Pause the process before dealing with the watchpoints.
2459          */
2460         if (p == curproc) {
2461                 prunlock(pnp);
2462                 while (holdwatch() != 0)
2463                         continue;
2464                 p = pr_p_lock(pnp);
2465                 mutex_exit(&pr_pidlock);
2466                 ASSERT(p == curproc);
2467         } else {
2468                 pauselwps(p);
2469                 while (p != NULL && pr_allstopped(p, 0) > 0) {
2470                         /*
2471                          * This cv/mutex pair is persistent even
2472                          * if the process disappears after we
2473                          * unmark it and drop p->p_lock.
2474                          */
2475                         kcondvar_t *cv = &pr_pid_cv[p->p_slot];
2476                         kmutex_t *mp = &p->p_lock;
2477 
2478                         prunmark(p);
2479                         (void) cv_wait(cv, mp);
2480                         mutex_exit(mp);
2481                         p = pr_p_lock(pnp);  /* NULL if process disappeared */
2482                         mutex_exit(&pr_pidlock);
2483                 }
2484         }
2485 
2486         if (p == NULL)          /* the process disappeared */
2487                 return (NULL);
2488 
2489         ASSERT(p == pnp->pr_pcommon->prc_proc);
2490         ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2491 
2492         if (pr_watch_active(p)) {
2493                 pr_free_watchpoints(p);
2494                 if ((t = p->p_tlist) != NULL) {
2495                         do {
2496                                 watch_disable(t);
2497 
2498                         } while ((t = t->t_forw) != p->p_tlist);
2499                 }
2500         }
2501 
2502         if ((as = p->p_as) != NULL) {
2503                 avl_tree_t *tree;
2504                 struct watched_page *pwp;
2505 
2506                 /*
2507                  * If this is the parent of a vfork, the watched page
2508                  * list has been moved temporarily to p->p_wpage.
2509                  */
2510                 if (avl_numnodes(&p->p_wpage) != 0)
2511                         tree = &p->p_wpage;
2512                 else
2513                         tree = &as->a_wpage;
2514 
2515                 mutex_exit(&p->p_lock);
2516                 AS_LOCK_ENTER(as, RW_WRITER);
2517 
2518                 for (pwp = avl_first(tree); pwp != NULL;
2519                     pwp = AVL_NEXT(tree, pwp)) {
2520                         pwp->wp_read = 0;
2521                         pwp->wp_write = 0;
2522                         pwp->wp_exec = 0;
2523                         if ((pwp->wp_flags & WP_SETPROT) == 0) {
2524                                 pwp->wp_flags |= WP_SETPROT;
2525                                 pwp->wp_prot = pwp->wp_oprot;
2526                                 pwp->wp_list = p->p_wprot;
2527                                 p->p_wprot = pwp;
2528                         }
2529                 }
2530 
2531                 AS_LOCK_EXIT(as);
2532                 mutex_enter(&p->p_lock);
2533         }
2534 
2535         /*
2536          * Unpause the process now.
2537          */
2538         if (p == curproc)
2539                 continuelwps(p);
2540         else
2541                 unpauselwps(p);
2542 
2543         return (p);
2544 }