1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * A CPR derivative specifically for starfire/starcat
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/systm.h>
  32 #include <sys/machparam.h>
  33 #include <sys/machsystm.h>
  34 #include <sys/ddi.h>
  35 #define SUNDDI_IMPL
  36 #include <sys/sunddi.h>
  37 #include <sys/sunndi.h>
  38 #include <sys/devctl.h>
  39 #include <sys/time.h>
  40 #include <sys/kmem.h>
  41 #include <nfs/lm.h>
  42 #include <sys/ddi_impldefs.h>
  43 #include <sys/ndi_impldefs.h>
  44 #include <sys/obpdefs.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/debug.h>
  47 #include <sys/errno.h>
  48 #include <sys/callb.h>
  49 #include <sys/clock.h>
  50 #include <sys/x_call.h>
  51 #include <sys/cpuvar.h>
  52 #include <sys/epm.h>
  53 #include <sys/vfs.h>
  54 
  55 #include <sys/cpu_sgnblk_defs.h>
  56 #include <sys/dr.h>
  57 #include <sys/dr_util.h>
  58 
  59 #include <sys/promif.h>
  60 #include <sys/conf.h>
  61 #include <sys/cyclic.h>
  62 
  63 extern void     e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
  64 extern void     e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
  65 extern int      is_pseudo_device(dev_info_t *dip);
  66 
  67 extern kmutex_t cpu_lock;
  68 extern dr_unsafe_devs_t dr_unsafe_devs;
  69 
  70 static int              dr_is_real_device(dev_info_t *dip);
  71 static int              dr_is_unsafe_major(major_t major);
  72 static int              dr_bypass_device(char *dname);
  73 static int              dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
  74 static int              dr_resolve_devname(dev_info_t *dip, char *buffer,
  75                                 char *alias);
  76 static sbd_error_t      *drerr_int(int e_code, uint64_t *arr, int idx,
  77                                 int majors);
  78 static int              dr_add_int(uint64_t *arr, int idx, int len,
  79                                 uint64_t val);
  80 
  81 int dr_pt_test_suspend(dr_handle_t *hp);
  82 
  83 /*
  84  * dr_quiesce.c interface
  85  * NOTE: states used internally by dr_suspend and dr_resume
  86  */
  87 typedef enum dr_suspend_state {
  88         DR_SRSTATE_BEGIN = 0,
  89         DR_SRSTATE_USER,
  90         DR_SRSTATE_DRIVER,
  91         DR_SRSTATE_FULL
  92 } suspend_state_t;
  93 
  94 struct dr_sr_handle {
  95         dr_handle_t             *sr_dr_handlep;
  96         dev_info_t              *sr_failed_dip;
  97         suspend_state_t         sr_suspend_state;
  98         uint_t                  sr_flags;
  99         uint64_t                sr_err_ints[DR_MAX_ERR_INT];
 100         int                     sr_err_idx;
 101 };
 102 
 103 #define SR_FLAG_WATCHDOG        0x1
 104 
 105 /*
 106  * XXX
 107  * This hack will go away before RTI.  Just for testing.
 108  * List of drivers to bypass when performing a suspend.
 109  */
 110 static char *dr_bypass_list[] = {
 111         ""
 112 };
 113 
 114 
 115 #define         SKIP_SYNC       /* bypass sync ops in dr_suspend */
 116 
 117 /*
 118  * dr_skip_user_threads is used to control if user threads should
 119  * be suspended.  If dr_skip_user_threads is true, the rest of the
 120  * flags are not used; if it is false, dr_check_user_stop_result
 121  * will be used to control whether or not we need to check suspend
 122  * result, and dr_allow_blocked_threads will be used to control
 123  * whether or not we allow suspend to continue if there are blocked
 124  * threads.  We allow all combinations of dr_check_user_stop_result
 125  * and dr_allow_block_threads, even though it might not make much
 126  * sense to not allow block threads when we don't even check stop
 127  * result.
 128  */
 129 static int      dr_skip_user_threads = 0;       /* default to FALSE */
 130 static int      dr_check_user_stop_result = 1;  /* default to TRUE */
 131 static int      dr_allow_blocked_threads = 1;   /* default to TRUE */
 132 
 133 #define DR_CPU_LOOP_MSEC        1000
 134 
 135 static void
 136 dr_stop_intr(void)
 137 {
 138         ASSERT(MUTEX_HELD(&cpu_lock));
 139 
 140         kpreempt_disable();
 141         cyclic_suspend();
 142 }
 143 
 144 static void
 145 dr_enable_intr(void)
 146 {
 147         ASSERT(MUTEX_HELD(&cpu_lock));
 148 
 149         cyclic_resume();
 150         kpreempt_enable();
 151 }
 152 
 153 dr_sr_handle_t *
 154 dr_get_sr_handle(dr_handle_t *hp)
 155 {
 156         dr_sr_handle_t *srh;
 157 
 158         srh = GETSTRUCT(dr_sr_handle_t, 1);
 159         srh->sr_dr_handlep = hp;
 160 
 161         return (srh);
 162 }
 163 
 164 void
 165 dr_release_sr_handle(dr_sr_handle_t *srh)
 166 {
 167         ASSERT(srh->sr_failed_dip == NULL);
 168         FREESTRUCT(srh, dr_sr_handle_t, 1);
 169 }
 170 
 171 static int
 172 dr_is_real_device(dev_info_t *dip)
 173 {
 174         struct regspec *regbuf = NULL;
 175         int length = 0;
 176         int rc;
 177 
 178         if (ddi_get_driver(dip) == NULL)
 179                 return (0);
 180 
 181         if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
 182                 return (1);
 183         if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
 184                 return (0);
 185 
 186         /*
 187          * now the general case
 188          */
 189         rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
 190             (caddr_t)&regbuf, &length);
 191         ASSERT(rc != DDI_PROP_NO_MEMORY);
 192         if (rc != DDI_PROP_SUCCESS) {
 193                 return (0);
 194         } else {
 195                 if ((length > 0) && (regbuf != NULL))
 196                         kmem_free(regbuf, length);
 197                 return (1);
 198         }
 199 }
 200 
 201 static int
 202 dr_is_unsafe_major(major_t major)
 203 {
 204         char    *dname, **cpp;
 205         int     i, ndevs;
 206 
 207         if ((dname = ddi_major_to_name(major)) == NULL) {
 208                 PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
 209                 return (0);
 210         }
 211 
 212         ndevs = dr_unsafe_devs.ndevs;
 213         for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
 214                 if (strcmp(dname, *cpp++) == 0)
 215                         return (1);
 216         }
 217         return (0);
 218 }
 219 
 220 static int
 221 dr_bypass_device(char *dname)
 222 {
 223         int i;
 224         char **lname;
 225 
 226         if (dname == NULL)
 227                 return (0);
 228 
 229         /* check the bypass list */
 230         for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
 231                 if (strcmp(dname, dr_bypass_list[i++]) == 0)
 232                         return (1);
 233         }
 234         return (0);
 235 }
 236 
 237 static int
 238 dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
 239 {
 240         major_t devmajor;
 241         char    *aka, *name;
 242 
 243         *buffer = *alias = 0;
 244 
 245         if (dip == NULL)
 246                 return (-1);
 247 
 248         if ((name = ddi_get_name(dip)) == NULL)
 249                 name = "<null name>";
 250 
 251         aka = name;
 252 
 253         if ((devmajor = ddi_name_to_major(aka)) != -1)
 254                 aka = ddi_major_to_name(devmajor);
 255 
 256         (void) strcpy(buffer, name);
 257 
 258         if (strcmp(name, aka))
 259                 (void) strcpy(alias, aka);
 260         else
 261                 *alias = 0;
 262 
 263         return (0);
 264 }
 265 
 266 struct dr_ref {
 267         int             *refcount;
 268         int             *refcount_non_gldv3;
 269         uint64_t        *arr;
 270         int             *idx;
 271         int             len;
 272 };
 273 
 274 /* ARGSUSED */
 275 static int
 276 dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
 277 {
 278         major_t         major;
 279         char            *dname;
 280         struct dr_ref   *rp = (struct dr_ref *)arg;
 281 
 282         if (dip == NULL)
 283                 return (DDI_WALK_CONTINUE);
 284 
 285         if (!dr_is_real_device(dip))
 286                 return (DDI_WALK_CONTINUE);
 287 
 288         dname = ddi_binding_name(dip);
 289 
 290         if (dr_bypass_device(dname))
 291                 return (DDI_WALK_CONTINUE);
 292 
 293         if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
 294                 if (ref && rp->refcount) {
 295                         *rp->refcount += ref;
 296                         PR_QR("\n  %s (major# %d) is referenced(%u)\n", dname,
 297                             major, ref);
 298                 }
 299                 if (ref && rp->refcount_non_gldv3) {
 300                         if (NETWORK_PHYSDRV(major) && !GLDV3_DRV(major))
 301                                 *rp->refcount_non_gldv3 += ref;
 302                 }
 303                 if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
 304                         PR_QR("\n  %s (major# %d) not hotpluggable\n", dname,
 305                             major);
 306                         if (rp->arr != NULL && rp->idx != NULL)
 307                                 *rp->idx = dr_add_int(rp->arr, *rp->idx,
 308                                     rp->len, (uint64_t)major);
 309                 }
 310         }
 311         return (DDI_WALK_CONTINUE);
 312 }
 313 
 314 static int
 315 dr_check_unsafe_major(dev_info_t *dip, void *arg)
 316 {
 317         return (dr_check_dip(dip, arg, 0));
 318 }
 319 
 320 
 321 /*ARGSUSED*/
 322 void
 323 dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
 324     uint64_t *arr, int *idx, int len, int *refcount_non_gldv3)
 325 {
 326         struct dr_ref bref = {0};
 327 
 328         if (dip == NULL)
 329                 return;
 330 
 331         bref.refcount = refcount;
 332         bref.refcount_non_gldv3 = refcount_non_gldv3;
 333         bref.arr = arr;
 334         bref.idx = idx;
 335         bref.len = len;
 336 
 337         ASSERT(e_ddi_branch_held(dip));
 338         (void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
 339 }
 340 
 341 /*
 342  * The "dip" argument's parent (if it exists) must be held busy.
 343  */
 344 static int
 345 dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
 346 {
 347         dr_handle_t     *handle;
 348         major_t         major;
 349         char            *dname;
 350         int             circ;
 351 
 352         /*
 353          * If dip is the root node, it has no siblings and it is
 354          * always held. If dip is not the root node, dr_suspend_devices()
 355          * will be invoked with the parent held busy.
 356          */
 357         for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
 358                 char    d_name[40], d_alias[40], *d_info;
 359 
 360                 ndi_devi_enter(dip, &circ);
 361                 if (dr_suspend_devices(ddi_get_child(dip), srh)) {
 362                         ndi_devi_exit(dip, circ);
 363                         return (ENXIO);
 364                 }
 365                 ndi_devi_exit(dip, circ);
 366 
 367                 if (!dr_is_real_device(dip))
 368                         continue;
 369 
 370                 major = (major_t)-1;
 371                 if ((dname = ddi_binding_name(dip)) != NULL)
 372                         major = ddi_name_to_major(dname);
 373 
 374                 if (dr_bypass_device(dname)) {
 375                         PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
 376                             major);
 377                         continue;
 378                 }
 379 
 380                 if (drmach_verify_sr(dip, 1)) {
 381                         PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
 382                             major);
 383                         continue;
 384                 }
 385 
 386                 if ((d_info = ddi_get_name_addr(dip)) == NULL)
 387                         d_info = "<null>";
 388 
 389                 d_name[0] = 0;
 390                 if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
 391                         if (d_alias[0] != 0) {
 392                                 prom_printf("\tsuspending %s@%s (aka %s)\n",
 393                                     d_name, d_info, d_alias);
 394                         } else {
 395                                 prom_printf("\tsuspending %s@%s\n", d_name,
 396                                     d_info);
 397                         }
 398                 } else {
 399                         prom_printf("\tsuspending %s@%s\n", dname, d_info);
 400                 }
 401 
 402                 if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
 403                         prom_printf("\tFAILED to suspend %s@%s\n",
 404                             d_name[0] ? d_name : dname, d_info);
 405 
 406                         srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
 407                             srh->sr_err_idx, DR_MAX_ERR_INT, (uint64_t)major);
 408 
 409                         ndi_hold_devi(dip);
 410                         srh->sr_failed_dip = dip;
 411 
 412                         handle = srh->sr_dr_handlep;
 413                         dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
 414                             d_name[0] ? d_name : dname, d_info);
 415 
 416                         return (DDI_FAILURE);
 417                 }
 418         }
 419 
 420         return (DDI_SUCCESS);
 421 }
 422 
 423 static void
 424 dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
 425 {
 426         dr_handle_t     *handle;
 427         dev_info_t      *dip, *next, *last = NULL;
 428         major_t         major;
 429         char            *bn;
 430         int             circ;
 431 
 432         major = (major_t)-1;
 433 
 434         /* attach in reverse device tree order */
 435         while (last != start) {
 436                 dip = start;
 437                 next = ddi_get_next_sibling(dip);
 438                 while (next != last && dip != srh->sr_failed_dip) {
 439                         dip = next;
 440                         next = ddi_get_next_sibling(dip);
 441                 }
 442                 if (dip == srh->sr_failed_dip) {
 443                         /* release hold acquired in dr_suspend_devices() */
 444                         srh->sr_failed_dip = NULL;
 445                         ndi_rele_devi(dip);
 446                 } else if (dr_is_real_device(dip) &&
 447                     srh->sr_failed_dip == NULL) {
 448 
 449                         if ((bn = ddi_binding_name(dip)) != NULL) {
 450                                 major = ddi_name_to_major(bn);
 451                         } else {
 452                                 bn = "<null>";
 453                         }
 454                         if (!dr_bypass_device(bn) &&
 455                             !drmach_verify_sr(dip, 0)) {
 456                                 char    d_name[40], d_alias[40], *d_info;
 457 
 458                                 d_name[0] = 0;
 459                                 d_info = ddi_get_name_addr(dip);
 460                                 if (d_info == NULL)
 461                                         d_info = "<null>";
 462 
 463                                 if (!dr_resolve_devname(dip, d_name, d_alias)) {
 464                                         if (d_alias[0] != 0) {
 465                                                 prom_printf("\tresuming "
 466                                                     "%s@%s (aka %s)\n", d_name,
 467                                                     d_info, d_alias);
 468                                         } else {
 469                                                 prom_printf("\tresuming "
 470                                                     "%s@%s\n", d_name, d_info);
 471                                         }
 472                                 } else {
 473                                         prom_printf("\tresuming %s@%s\n", bn,
 474                                             d_info);
 475                                 }
 476 
 477                                 if (devi_attach(dip, DDI_RESUME) !=
 478                                     DDI_SUCCESS) {
 479                                         /*
 480                                          * Print a console warning,
 481                                          * set an e_code of ESBD_RESUME,
 482                                          * and save the driver major
 483                                          * number in the e_rsc.
 484                                          */
 485                                         prom_printf("\tFAILED to resume %s@%s",
 486                                             d_name[0] ? d_name : bn, d_info);
 487 
 488                                         srh->sr_err_idx =
 489                                             dr_add_int(srh->sr_err_ints,
 490                                             srh->sr_err_idx, DR_MAX_ERR_INT,
 491                                             (uint64_t)major);
 492 
 493                                         handle = srh->sr_dr_handlep;
 494 
 495                                         dr_op_err(CE_IGNORE, handle,
 496                                             ESBD_RESUME, "%s@%s",
 497                                             d_name[0] ? d_name : bn, d_info);
 498                                 }
 499                         }
 500                 }
 501 
 502                 /* Hold parent busy while walking its children */
 503                 ndi_devi_enter(dip, &circ);
 504                 dr_resume_devices(ddi_get_child(dip), srh);
 505                 ndi_devi_exit(dip, circ);
 506                 last = dip;
 507         }
 508 }
 509 
 510 /*
 511  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
 512  * but from DR point of view.  These user threads are waiting in
 513  * the kernel.  Once they complete in the kernel, they will process
 514  * the stop signal and stop.
 515  */
 516 #define DR_VSTOPPED(t)                  \
 517         ((t)->t_state == TS_SLEEP && \
 518         (t)->t_wchan != NULL &&              \
 519         (t)->t_astflag &&            \
 520         ((t)->t_proc_flag & TP_CHKPT))
 521 
 522 /* ARGSUSED */
 523 static int
 524 dr_stop_user_threads(dr_sr_handle_t *srh)
 525 {
 526         int             count;
 527         int             bailout;
 528         dr_handle_t     *handle = srh->sr_dr_handlep;
 529         static fn_t     f = "dr_stop_user_threads";
 530         kthread_id_t    tp;
 531 
 532         extern void add_one_utstop();
 533         extern void utstop_timedwait(clock_t);
 534         extern void utstop_init(void);
 535 
 536 #define DR_UTSTOP_RETRY 4
 537 #define DR_UTSTOP_WAIT  hz
 538 
 539         if (dr_skip_user_threads)
 540                 return (DDI_SUCCESS);
 541 
 542         utstop_init();
 543 
 544         /* we need to try a few times to get past fork, etc. */
 545         srh->sr_err_idx = 0;
 546         for (count = 0; count < DR_UTSTOP_RETRY; count++) {
 547                 /* walk the entire threadlist */
 548                 mutex_enter(&pidlock);
 549                 for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
 550                         proc_t *p = ttoproc(tp);
 551 
 552                         /* handle kernel threads separately */
 553                         if (p->p_as == &kas || p->p_stat == SZOMB)
 554                                 continue;
 555 
 556                         mutex_enter(&p->p_lock);
 557                         thread_lock(tp);
 558 
 559                         if (tp->t_state == TS_STOPPED) {
 560                                 /* add another reason to stop this thread */
 561                                 tp->t_schedflag &= ~TS_RESUME;
 562                         } else {
 563                                 tp->t_proc_flag |= TP_CHKPT;
 564 
 565                                 thread_unlock(tp);
 566                                 mutex_exit(&p->p_lock);
 567                                 add_one_utstop();
 568                                 mutex_enter(&p->p_lock);
 569                                 thread_lock(tp);
 570 
 571                                 aston(tp);
 572 
 573                                 if (ISWAKEABLE(tp) || ISWAITING(tp)) {
 574                                         setrun_locked(tp);
 575                                 }
 576 
 577                         }
 578 
 579                         /* grab thread if needed */
 580                         if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
 581                                 poke_cpu(tp->t_cpu->cpu_id);
 582 
 583 
 584                         thread_unlock(tp);
 585                         mutex_exit(&p->p_lock);
 586                 }
 587                 mutex_exit(&pidlock);
 588 
 589 
 590                 /* let everything catch up */
 591                 utstop_timedwait(count * count * DR_UTSTOP_WAIT);
 592 
 593 
 594                 /* now, walk the threadlist again to see if we are done */
 595                 mutex_enter(&pidlock);
 596                 for (tp = curthread->t_next, bailout = 0;
 597                     tp != curthread; tp = tp->t_next) {
 598                         proc_t *p = ttoproc(tp);
 599 
 600                         /* handle kernel threads separately */
 601                         if (p->p_as == &kas || p->p_stat == SZOMB)
 602                                 continue;
 603 
 604                         /*
 605                          * If this thread didn't stop, and we don't allow
 606                          * unstopped blocked threads, bail.
 607                          */
 608                         thread_lock(tp);
 609                         if (!CPR_ISTOPPED(tp) &&
 610                             !(dr_allow_blocked_threads &&
 611                             DR_VSTOPPED(tp))) {
 612                                 bailout = 1;
 613                                 if (count == DR_UTSTOP_RETRY - 1) {
 614                                         /*
 615                                          * save the pid for later reporting
 616                                          */
 617                                         srh->sr_err_idx =
 618                                             dr_add_int(srh->sr_err_ints,
 619                                             srh->sr_err_idx, DR_MAX_ERR_INT,
 620                                             (uint64_t)p->p_pid);
 621 
 622                                         cmn_err(CE_WARN, "%s: "
 623                                             "failed to stop thread: "
 624                                             "process=%s, pid=%d",
 625                                             f, p->p_user.u_psargs, p->p_pid);
 626 
 627                                         PR_QR("%s: failed to stop thread: "
 628                                             "process=%s, pid=%d, t_id=0x%p, "
 629                                             "t_state=0x%x, t_proc_flag=0x%x, "
 630                                             "t_schedflag=0x%x\n",
 631                                             f, p->p_user.u_psargs, p->p_pid,
 632                                             (void *)tp, tp->t_state,
 633                                             tp->t_proc_flag, tp->t_schedflag);
 634                                 }
 635 
 636                         }
 637                         thread_unlock(tp);
 638                 }
 639                 mutex_exit(&pidlock);
 640 
 641                 /* were all the threads stopped? */
 642                 if (!bailout)
 643                         break;
 644         }
 645 
 646         /* were we unable to stop all threads after a few tries? */
 647         if (bailout) {
 648                 handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
 649                     srh->sr_err_idx, 0);
 650                 return (ESRCH);
 651         }
 652 
 653         return (DDI_SUCCESS);
 654 }
 655 
 656 static void
 657 dr_start_user_threads(void)
 658 {
 659         kthread_id_t tp;
 660 
 661         mutex_enter(&pidlock);
 662 
 663         /* walk all threads and release them */
 664         for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
 665                 proc_t *p = ttoproc(tp);
 666 
 667                 /* skip kernel threads */
 668                 if (ttoproc(tp)->p_as == &kas)
 669                         continue;
 670 
 671                 mutex_enter(&p->p_lock);
 672                 tp->t_proc_flag &= ~TP_CHKPT;
 673                 mutex_exit(&p->p_lock);
 674 
 675                 thread_lock(tp);
 676                 if (CPR_ISTOPPED(tp)) {
 677                         /* back on the runq */
 678                         tp->t_schedflag |= TS_RESUME;
 679                         setrun_locked(tp);
 680                 }
 681                 thread_unlock(tp);
 682         }
 683 
 684         mutex_exit(&pidlock);
 685 }
 686 
 687 static void
 688 dr_signal_user(int sig)
 689 {
 690         struct proc *p;
 691 
 692         mutex_enter(&pidlock);
 693 
 694         for (p = practive; p != NULL; p = p->p_next) {
 695                 /* only user threads */
 696                 if (p->p_exec == NULL || p->p_stat == SZOMB ||
 697                     p == proc_init || p == ttoproc(curthread))
 698                         continue;
 699 
 700                 mutex_enter(&p->p_lock);
 701                 sigtoproc(p, NULL, sig);
 702                 mutex_exit(&p->p_lock);
 703         }
 704 
 705         mutex_exit(&pidlock);
 706 
 707         /* add a bit of delay */
 708         delay(hz);
 709 }
 710 
 711 void
 712 dr_resume(dr_sr_handle_t *srh)
 713 {
 714         if (srh->sr_suspend_state < DR_SRSTATE_FULL) {
 715                 /*
 716                  * Update the signature block.
 717                  * If cpus are not paused, this can be done now.
 718                  * See comments below.
 719                  */
 720                 CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
 721                     CPU->cpu_id);
 722         }
 723 
 724         switch (srh->sr_suspend_state) {
 725         case DR_SRSTATE_FULL:
 726 
 727                 ASSERT(MUTEX_HELD(&cpu_lock));
 728 
 729                 /*
 730                  * Prevent false alarm in tod_validate() due to tod
 731                  * value change between suspend and resume
 732                  */
 733                 mutex_enter(&tod_lock);
 734                 tod_status_set(TOD_DR_RESUME_DONE);
 735                 mutex_exit(&tod_lock);
 736 
 737                 dr_enable_intr();       /* enable intr & clock */
 738 
 739                 start_cpus();
 740                 mutex_exit(&cpu_lock);
 741 
 742                 /*
 743                  * Update the signature block.
 744                  * This must not be done while cpus are paused, since on
 745                  * Starcat the cpu signature update aquires an adaptive
 746                  * mutex in the iosram driver. Blocking with cpus paused
 747                  * can lead to deadlock.
 748                  */
 749                 CPU_SIGNATURE(OS_SIG, SIGST_RESUME_INPROGRESS, SIGSUBST_NULL,
 750                     CPU->cpu_id);
 751 
 752                 /*
 753                  * If we suspended hw watchdog at suspend,
 754                  * re-enable it now.
 755                  */
 756                 if (srh->sr_flags & (SR_FLAG_WATCHDOG)) {
 757                         mutex_enter(&tod_lock);
 758                         tod_ops.tod_set_watchdog_timer(
 759                             watchdog_timeout_seconds);
 760                         mutex_exit(&tod_lock);
 761                 }
 762 
 763                 /*
 764                  * This should only be called if drmach_suspend_last()
 765                  * was called and state transitioned to DR_SRSTATE_FULL
 766                  * to prevent resume attempts on device instances that
 767                  * were not previously suspended.
 768                  */
 769                 drmach_resume_first();
 770 
 771                 /* FALLTHROUGH */
 772 
 773         case DR_SRSTATE_DRIVER:
 774                 /*
 775                  * resume drivers
 776                  */
 777                 srh->sr_err_idx = 0;
 778 
 779                 /* no parent dip to hold busy */
 780                 dr_resume_devices(ddi_root_node(), srh);
 781 
 782                 if (srh->sr_err_idx && srh->sr_dr_handlep) {
 783                         (srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
 784                             srh->sr_err_ints, srh->sr_err_idx, 1);
 785                 }
 786 
 787                 /*
 788                  * resume the lock manager
 789                  */
 790                 lm_cprresume();
 791 
 792                 /* FALLTHROUGH */
 793 
 794         case DR_SRSTATE_USER:
 795                 /*
 796                  * finally, resume user threads
 797                  */
 798                 if (!dr_skip_user_threads) {
 799                         prom_printf("DR: resuming user threads...\n");
 800                         dr_start_user_threads();
 801                 }
 802                 /* FALLTHROUGH */
 803 
 804         case DR_SRSTATE_BEGIN:
 805         default:
 806                 /*
 807                  * let those who care know that we've just resumed
 808                  */
 809                 PR_QR("sending SIGTHAW...\n");
 810                 dr_signal_user(SIGTHAW);
 811                 break;
 812         }
 813 
 814         /*
 815          * update the signature block
 816          */
 817         CPU_SIGNATURE(OS_SIG, SIGST_RUN, SIGSUBST_NULL, CPU->cpu_id);
 818 
 819         prom_printf("DR: resume COMPLETED\n");
 820 }
 821 
 822 int
 823 dr_suspend(dr_sr_handle_t *srh)
 824 {
 825         dr_handle_t     *handle;
 826         int             force;
 827         int             dev_errs_idx;
 828         uint64_t        dev_errs[DR_MAX_ERR_INT];
 829         int             rc = DDI_SUCCESS;
 830 
 831         handle = srh->sr_dr_handlep;
 832 
 833         force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
 834 
 835         /*
 836          * update the signature block
 837          */
 838         CPU_SIGNATURE(OS_SIG, SIGST_QUIESCE_INPROGRESS, SIGSUBST_NULL,
 839             CPU->cpu_id);
 840 
 841         prom_printf("\nDR: suspending user threads...\n");
 842         srh->sr_suspend_state = DR_SRSTATE_USER;
 843         if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
 844             dr_check_user_stop_result) {
 845                 dr_resume(srh);
 846                 return (rc);
 847         }
 848 
 849         if (!force) {
 850                 struct dr_ref drc = {0};
 851 
 852                 prom_printf("\nDR: checking devices...\n");
 853                 dev_errs_idx = 0;
 854 
 855                 drc.arr = dev_errs;
 856                 drc.idx = &dev_errs_idx;
 857                 drc.len = DR_MAX_ERR_INT;
 858 
 859                 /*
 860                  * Since the root node can never go away, it
 861                  * doesn't have to be held.
 862                  */
 863                 ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
 864                 if (dev_errs_idx) {
 865                         handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
 866                             dev_errs_idx, 1);
 867                         dr_resume(srh);
 868                         return (DDI_FAILURE);
 869                 }
 870                 PR_QR("done\n");
 871         } else {
 872                 prom_printf("\nDR: dr_suspend invoked with force flag\n");
 873         }
 874 
 875 #ifndef SKIP_SYNC
 876         /*
 877          * This sync swap out all user pages
 878          */
 879         vfs_sync(SYNC_ALL);
 880 #endif
 881 
 882         /*
 883          * special treatment for lock manager
 884          */
 885         lm_cprsuspend();
 886 
 887 #ifndef SKIP_SYNC
 888         /*
 889          * sync the file system in case we never make it back
 890          */
 891         sync();
 892 #endif
 893 
 894         /*
 895          * now suspend drivers
 896          */
 897         prom_printf("DR: suspending drivers...\n");
 898         srh->sr_suspend_state = DR_SRSTATE_DRIVER;
 899         srh->sr_err_idx = 0;
 900         /* No parent to hold busy */
 901         if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
 902                 if (srh->sr_err_idx && srh->sr_dr_handlep) {
 903                         (srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
 904                             srh->sr_err_ints, srh->sr_err_idx, 1);
 905                 }
 906                 dr_resume(srh);
 907                 return (rc);
 908         }
 909 
 910         drmach_suspend_last();
 911 
 912         /*
 913          * finally, grab all cpus
 914          */
 915         srh->sr_suspend_state = DR_SRSTATE_FULL;
 916 
 917         /*
 918          * if watchdog was activated, disable it
 919          */
 920         if (watchdog_activated) {
 921                 mutex_enter(&tod_lock);
 922                 tod_ops.tod_clear_watchdog_timer();
 923                 mutex_exit(&tod_lock);
 924                 srh->sr_flags |= SR_FLAG_WATCHDOG;
 925         } else {
 926                 srh->sr_flags &= ~(SR_FLAG_WATCHDOG);
 927         }
 928 
 929         /*
 930          * Update the signature block.
 931          * This must be done before cpus are paused, since on Starcat the
 932          * cpu signature update aquires an adaptive mutex in the iosram driver.
 933          * Blocking with cpus paused can lead to deadlock.
 934          */
 935         CPU_SIGNATURE(OS_SIG, SIGST_QUIESCED, SIGSUBST_NULL, CPU->cpu_id);
 936 
 937         mutex_enter(&cpu_lock);
 938         pause_cpus(NULL, NULL);
 939         dr_stop_intr();
 940 
 941         return (rc);
 942 }
 943 
 944 int
 945 dr_pt_test_suspend(dr_handle_t *hp)
 946 {
 947         dr_sr_handle_t *srh;
 948         int             err;
 949         uint_t          psmerr;
 950         static fn_t     f = "dr_pt_test_suspend";
 951 
 952         PR_QR("%s...\n", f);
 953 
 954         srh = dr_get_sr_handle(hp);
 955         if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
 956                 dr_resume(srh);
 957                 if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
 958                         PR_QR("%s: error on dr_resume()", f);
 959                         switch (psmerr) {
 960                         case ESBD_RESUME:
 961                                 PR_QR("Couldn't resume devices: %s\n",
 962                                     DR_GET_E_RSC(hp->h_err));
 963                                 break;
 964 
 965                         case ESBD_KTHREAD:
 966                                 PR_ALL("psmerr is ESBD_KTHREAD\n");
 967                                 break;
 968                         default:
 969                                 PR_ALL("Resume error unknown = %d\n", psmerr);
 970                                 break;
 971                         }
 972                 }
 973         } else {
 974                 PR_ALL("%s: dr_suspend() failed, err = 0x%x\n", f, err);
 975                 psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
 976                 switch (psmerr) {
 977                 case ESBD_UNSAFE:
 978                         PR_ALL("Unsafe devices (major #): %s\n",
 979                             DR_GET_E_RSC(hp->h_err));
 980                         break;
 981 
 982                 case ESBD_RTTHREAD:
 983                         PR_ALL("RT threads (PIDs): %s\n",
 984                             DR_GET_E_RSC(hp->h_err));
 985                         break;
 986 
 987                 case ESBD_UTHREAD:
 988                         PR_ALL("User threads (PIDs): %s\n",
 989                             DR_GET_E_RSC(hp->h_err));
 990                         break;
 991 
 992                 case ESBD_SUSPEND:
 993                         PR_ALL("Non-suspendable devices (major #): %s\n",
 994                             DR_GET_E_RSC(hp->h_err));
 995                         break;
 996 
 997                 case ESBD_RESUME:
 998                         PR_ALL("Could not resume devices (major #): %s\n",
 999                             DR_GET_E_RSC(hp->h_err));
1000                         break;
1001 
1002                 case ESBD_KTHREAD:
1003                         PR_ALL("psmerr is ESBD_KTHREAD\n");
1004                         break;
1005 
1006                 case ESBD_NOERROR:
1007                         PR_ALL("sbd_error_t error code not set\n");
1008                         break;
1009 
1010                 default:
1011                         PR_ALL("Unknown error psmerr = %d\n", psmerr);
1012                         break;
1013                 }
1014         }
1015         dr_release_sr_handle(srh);
1016 
1017         return (0);
1018 }
1019 
1020 /*
1021  * Add a new integer value to the end of an array.  Don't allow duplicates to
1022  * appear in the array, and don't allow the array to overflow.  Return the new
1023  * total number of entries in the array.
1024  */
1025 static int
1026 dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
1027 {
1028         int i;
1029 
1030         if (arr == NULL)
1031                 return (0);
1032 
1033         if (idx >= len)
1034                 return (idx);
1035 
1036         for (i = 0; i < idx; i++) {
1037                 if (arr[i] == val)
1038                         return (idx);
1039         }
1040 
1041         arr[idx++] = val;
1042 
1043         return (idx);
1044 }
1045 
1046 /*
1047  * Construct an sbd_error_t featuring a string representation of an array of
1048  * integers as its e_rsc.
1049  */
1050 static sbd_error_t *
1051 drerr_int(int e_code, uint64_t *arr, int idx, int majors)
1052 {
1053         int             i, n, buf_len, buf_idx, buf_avail;
1054         char            *dname;
1055         char            *buf;
1056         sbd_error_t     *new_sbd_err;
1057         static char     s_ellipsis[] = "...";
1058 
1059         if (arr == NULL || idx <= 0)
1060                 return (NULL);
1061 
1062         /* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
1063         buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1064 
1065         /*
1066          * This is the total working area of the buffer.  It must be computed
1067          * as the size of 'buf', minus reserved space for the null terminator
1068          * and the ellipsis string.
1069          */
1070         buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
1071 
1072         /* Construct a string representation of the array values */
1073         for (buf_idx = 0, i = 0; i < idx; i++) {
1074                 buf_avail = buf_len - buf_idx;
1075                 if (majors) {
1076                         dname = ddi_major_to_name(arr[i]);
1077                         if (dname) {
1078                                 n = snprintf(&buf[buf_idx], buf_avail, "%s, ",
1079                                     dname);
1080                         } else {
1081                                 n = snprintf(&buf[buf_idx], buf_avail,
1082                                     "major %lu, ", arr[i]);
1083                         }
1084                 } else {
1085                         n = snprintf(&buf[buf_idx], buf_avail, "%lu, ", arr[i]);
1086                 }
1087 
1088                 /* An ellipsis gets appended when no more values fit */
1089                 if (n >= buf_avail) {
1090                         (void) strcpy(&buf[buf_idx], s_ellipsis);
1091                         break;
1092                 }
1093 
1094                 buf_idx += n;
1095         }
1096 
1097         /* If all the contents fit, remove the trailing comma */
1098         if (n < buf_avail) {
1099                 buf[--buf_idx] = '\0';
1100                 buf[--buf_idx] = '\0';
1101         }
1102 
1103         /* Return an sbd_error_t with the buffer and e_code */
1104         new_sbd_err = drerr_new(1, e_code, buf);
1105         kmem_free(buf, MAXPATHLEN);
1106         return (new_sbd_err);
1107 }