1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * A CPR derivative specifically for starfire/starcat
  28  * X86 doesn't make use of the quiesce interfaces, it's kept for simplicity.
  29  */
  30 
  31 #include <sys/types.h>
  32 #include <sys/systm.h>
  33 #include <sys/machparam.h>
  34 #include <sys/machsystm.h>
  35 #include <sys/ddi.h>
  36 #define SUNDDI_IMPL
  37 #include <sys/sunddi.h>
  38 #include <sys/sunndi.h>
  39 #include <sys/devctl.h>
  40 #include <sys/time.h>
  41 #include <sys/kmem.h>
  42 #include <nfs/lm.h>
  43 #include <sys/ddi_impldefs.h>
  44 #include <sys/ndi_impldefs.h>
  45 #include <sys/obpdefs.h>
  46 #include <sys/cmn_err.h>
  47 #include <sys/debug.h>
  48 #include <sys/errno.h>
  49 #include <sys/callb.h>
  50 #include <sys/clock.h>
  51 #include <sys/x_call.h>
  52 #include <sys/cpuvar.h>
  53 #include <sys/epm.h>
  54 #include <sys/vfs.h>
  55 #include <sys/promif.h>
  56 #include <sys/conf.h>
  57 #include <sys/cyclic.h>
  58 
  59 #include <sys/dr.h>
  60 #include <sys/dr_util.h>
  61 
  62 extern void     e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
  63 extern void     e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
  64 extern int      is_pseudo_device(dev_info_t *dip);
  65 
  66 extern kmutex_t cpu_lock;
  67 extern dr_unsafe_devs_t dr_unsafe_devs;
  68 
  69 static int              dr_is_real_device(dev_info_t *dip);
  70 static int              dr_is_unsafe_major(major_t major);
  71 static int              dr_bypass_device(char *dname);
  72 static int              dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
  73 static int              dr_resolve_devname(dev_info_t *dip, char *buffer,
  74                                 char *alias);
  75 static sbd_error_t      *drerr_int(int e_code, uint64_t *arr, int idx,
  76                                 int majors);
  77 static int              dr_add_int(uint64_t *arr, int idx, int len,
  78                                 uint64_t val);
  79 
  80 int dr_pt_test_suspend(dr_handle_t *hp);
  81 
  82 /*
  83  * dr_quiesce.c interface
  84  * NOTE: states used internally by dr_suspend and dr_resume
  85  */
  86 typedef enum dr_suspend_state {
  87         DR_SRSTATE_BEGIN = 0,
  88         DR_SRSTATE_USER,
  89         DR_SRSTATE_DRIVER,
  90         DR_SRSTATE_FULL
  91 } suspend_state_t;
  92 
  93 struct dr_sr_handle {
  94         dr_handle_t             *sr_dr_handlep;
  95         dev_info_t              *sr_failed_dip;
  96         suspend_state_t         sr_suspend_state;
  97         uint_t                  sr_flags;
  98         uint64_t                sr_err_ints[DR_MAX_ERR_INT];
  99         int                     sr_err_idx;
 100 };
 101 
 102 #define SR_FLAG_WATCHDOG        0x1
 103 
 104 /*
 105  * XXX
 106  * This hack will go away before RTI.  Just for testing.
 107  * List of drivers to bypass when performing a suspend.
 108  */
 109 static char *dr_bypass_list[] = {
 110         ""
 111 };
 112 
 113 
 114 #define         SKIP_SYNC       /* bypass sync ops in dr_suspend */
 115 
 116 /*
 117  * dr_skip_user_threads is used to control if user threads should
 118  * be suspended.  If dr_skip_user_threads is true, the rest of the
 119  * flags are not used; if it is false, dr_check_user_stop_result
 120  * will be used to control whether or not we need to check suspend
 121  * result, and dr_allow_blocked_threads will be used to control
 122  * whether or not we allow suspend to continue if there are blocked
 123  * threads.  We allow all combinations of dr_check_user_stop_result
 124  * and dr_allow_block_threads, even though it might not make much
 125  * sense to not allow block threads when we don't even check stop
 126  * result.
 127  */
 128 static int      dr_skip_user_threads = 0;       /* default to FALSE */
 129 static int      dr_check_user_stop_result = 1;  /* default to TRUE */
 130 static int      dr_allow_blocked_threads = 1;   /* default to TRUE */
 131 
 132 #define DR_CPU_LOOP_MSEC        1000
 133 
 134 static void
 135 dr_stop_intr(void)
 136 {
 137         ASSERT(MUTEX_HELD(&cpu_lock));
 138 
 139         kpreempt_disable();
 140         cyclic_suspend();
 141 }
 142 
 143 static void
 144 dr_enable_intr(void)
 145 {
 146         ASSERT(MUTEX_HELD(&cpu_lock));
 147 
 148         cyclic_resume();
 149         kpreempt_enable();
 150 }
 151 
 152 dr_sr_handle_t *
 153 dr_get_sr_handle(dr_handle_t *hp)
 154 {
 155         dr_sr_handle_t *srh;
 156 
 157         srh = GETSTRUCT(dr_sr_handle_t, 1);
 158         srh->sr_dr_handlep = hp;
 159 
 160         return (srh);
 161 }
 162 
 163 void
 164 dr_release_sr_handle(dr_sr_handle_t *srh)
 165 {
 166         ASSERT(srh->sr_failed_dip == NULL);
 167         FREESTRUCT(srh, dr_sr_handle_t, 1);
 168 }
 169 
 170 static int
 171 dr_is_real_device(dev_info_t *dip)
 172 {
 173         struct regspec *regbuf = NULL;
 174         int length = 0;
 175         int rc;
 176 
 177         if (ddi_get_driver(dip) == NULL)
 178                 return (0);
 179 
 180         if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
 181                 return (1);
 182         if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
 183                 return (0);
 184 
 185         /*
 186          * now the general case
 187          */
 188         rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
 189             (caddr_t)&regbuf, &length);
 190         ASSERT(rc != DDI_PROP_NO_MEMORY);
 191         if (rc != DDI_PROP_SUCCESS) {
 192                 return (0);
 193         } else {
 194                 if ((length > 0) && (regbuf != NULL))
 195                         kmem_free(regbuf, length);
 196                 return (1);
 197         }
 198 }
 199 
 200 static int
 201 dr_is_unsafe_major(major_t major)
 202 {
 203         char    *dname, **cpp;
 204         int     i, ndevs;
 205 
 206         if ((dname = ddi_major_to_name(major)) == NULL) {
 207                 PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
 208                 return (0);
 209         }
 210 
 211         ndevs = dr_unsafe_devs.ndevs;
 212         for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
 213                 if (strcmp(dname, *cpp++) == 0)
 214                         return (1);
 215         }
 216         return (0);
 217 }
 218 
 219 static int
 220 dr_bypass_device(char *dname)
 221 {
 222         int i;
 223         char **lname;
 224 
 225         if (dname == NULL)
 226                 return (0);
 227 
 228         /* check the bypass list */
 229         for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
 230                 if (strcmp(dname, dr_bypass_list[i++]) == 0)
 231                         return (1);
 232         }
 233         return (0);
 234 }
 235 
 236 static int
 237 dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
 238 {
 239         major_t devmajor;
 240         char    *aka, *name;
 241 
 242         *buffer = *alias = 0;
 243 
 244         if (dip == NULL)
 245                 return (-1);
 246 
 247         if ((name = ddi_get_name(dip)) == NULL)
 248                 name = "<null name>";
 249 
 250         aka = name;
 251 
 252         if ((devmajor = ddi_name_to_major(aka)) != DDI_MAJOR_T_NONE)
 253                 aka = ddi_major_to_name(devmajor);
 254 
 255         (void) strcpy(buffer, name);
 256 
 257         if (strcmp(name, aka))
 258                 (void) strcpy(alias, aka);
 259         else
 260                 *alias = 0;
 261 
 262         return (0);
 263 }
 264 
 265 struct dr_ref {
 266         int             *refcount;
 267         int             *refcount_non_gldv3;
 268         uint64_t        *arr;
 269         int             *idx;
 270         int             len;
 271 };
 272 
 273 /* ARGSUSED */
 274 static int
 275 dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
 276 {
 277         major_t         major;
 278         char            *dname;
 279         struct dr_ref   *rp = (struct dr_ref *)arg;
 280 
 281         if (dip == NULL)
 282                 return (DDI_WALK_CONTINUE);
 283 
 284         if (!dr_is_real_device(dip))
 285                 return (DDI_WALK_CONTINUE);
 286 
 287         dname = ddi_binding_name(dip);
 288 
 289         if (dr_bypass_device(dname))
 290                 return (DDI_WALK_CONTINUE);
 291 
 292         if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
 293                 if (ref && rp->refcount) {
 294                         *rp->refcount += ref;
 295                         PR_QR("\n  %s (major# %d) is referenced(%u)\n", dname,
 296                             major, ref);
 297                 }
 298                 if (ref && rp->refcount_non_gldv3) {
 299                         if (NETWORK_PHYSDRV(major) && !GLDV3_DRV(major))
 300                                 *rp->refcount_non_gldv3 += ref;
 301                 }
 302                 if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
 303                         PR_QR("\n  %s (major# %d) not hotpluggable\n", dname,
 304                             major);
 305                         if (rp->arr != NULL && rp->idx != NULL)
 306                                 *rp->idx = dr_add_int(rp->arr, *rp->idx,
 307                                     rp->len, (uint64_t)major);
 308                 }
 309         }
 310         return (DDI_WALK_CONTINUE);
 311 }
 312 
 313 static int
 314 dr_check_unsafe_major(dev_info_t *dip, void *arg)
 315 {
 316         return (dr_check_dip(dip, arg, 0));
 317 }
 318 
 319 
 320 /*ARGSUSED*/
 321 void
 322 dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
 323     uint64_t *arr, int *idx, int len, int *refcount_non_gldv3)
 324 {
 325         struct dr_ref bref = {0};
 326 
 327         if (dip == NULL)
 328                 return;
 329 
 330         bref.refcount = refcount;
 331         bref.refcount_non_gldv3 = refcount_non_gldv3;
 332         bref.arr = arr;
 333         bref.idx = idx;
 334         bref.len = len;
 335 
 336         ASSERT(e_ddi_branch_held(dip));
 337         (void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
 338 }
 339 
 340 /*
 341  * The "dip" argument's parent (if it exists) must be held busy.
 342  */
 343 static int
 344 dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
 345 {
 346         dr_handle_t     *handle;
 347         major_t         major;
 348         char            *dname;
 349         int             circ;
 350 
 351         /*
 352          * If dip is the root node, it has no siblings and it is
 353          * always held. If dip is not the root node, dr_suspend_devices()
 354          * will be invoked with the parent held busy.
 355          */
 356         for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
 357                 char    d_name[40], d_alias[40], *d_info;
 358 
 359                 ndi_devi_enter(dip, &circ);
 360                 if (dr_suspend_devices(ddi_get_child(dip), srh)) {
 361                         ndi_devi_exit(dip, circ);
 362                         return (ENXIO);
 363                 }
 364                 ndi_devi_exit(dip, circ);
 365 
 366                 if (!dr_is_real_device(dip))
 367                         continue;
 368 
 369                 major = (major_t)-1;
 370                 if ((dname = ddi_binding_name(dip)) != NULL)
 371                         major = ddi_name_to_major(dname);
 372 
 373                 if (dr_bypass_device(dname)) {
 374                         PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
 375                             major);
 376                         continue;
 377                 }
 378 
 379                 if (drmach_verify_sr(dip, 1)) {
 380                         PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
 381                             major);
 382                         continue;
 383                 }
 384 
 385                 if ((d_info = ddi_get_name_addr(dip)) == NULL)
 386                         d_info = "<null>";
 387 
 388                 d_name[0] = 0;
 389                 if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
 390                         if (d_alias[0] != 0) {
 391                                 prom_printf("\tsuspending %s@%s (aka %s)\n",
 392                                     d_name, d_info, d_alias);
 393                         } else {
 394                                 prom_printf("\tsuspending %s@%s\n", d_name,
 395                                     d_info);
 396                         }
 397                 } else {
 398                         prom_printf("\tsuspending %s@%s\n", dname, d_info);
 399                 }
 400 
 401                 if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
 402                         prom_printf("\tFAILED to suspend %s@%s\n",
 403                             d_name[0] ? d_name : dname, d_info);
 404 
 405                         srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
 406                             srh->sr_err_idx, DR_MAX_ERR_INT, (uint64_t)major);
 407 
 408                         ndi_hold_devi(dip);
 409                         srh->sr_failed_dip = dip;
 410 
 411                         handle = srh->sr_dr_handlep;
 412                         dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
 413                             d_name[0] ? d_name : dname, d_info);
 414 
 415                         return (DDI_FAILURE);
 416                 }
 417         }
 418 
 419         return (DDI_SUCCESS);
 420 }
 421 
 422 static void
 423 dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
 424 {
 425         dr_handle_t     *handle;
 426         dev_info_t      *dip, *next, *last = NULL;
 427         major_t         major;
 428         char            *bn;
 429         int             circ;
 430 
 431         major = (major_t)-1;
 432 
 433         /* attach in reverse device tree order */
 434         while (last != start) {
 435                 dip = start;
 436                 next = ddi_get_next_sibling(dip);
 437                 while (next != last && dip != srh->sr_failed_dip) {
 438                         dip = next;
 439                         next = ddi_get_next_sibling(dip);
 440                 }
 441                 if (dip == srh->sr_failed_dip) {
 442                         /* release hold acquired in dr_suspend_devices() */
 443                         srh->sr_failed_dip = NULL;
 444                         ndi_rele_devi(dip);
 445                 } else if (dr_is_real_device(dip) &&
 446                     srh->sr_failed_dip == NULL) {
 447 
 448                         if ((bn = ddi_binding_name(dip)) != NULL) {
 449                                 major = ddi_name_to_major(bn);
 450                         } else {
 451                                 bn = "<null>";
 452                         }
 453                         if (!dr_bypass_device(bn) &&
 454                             !drmach_verify_sr(dip, 0)) {
 455                                 char    d_name[40], d_alias[40], *d_info;
 456 
 457                                 d_name[0] = 0;
 458                                 d_info = ddi_get_name_addr(dip);
 459                                 if (d_info == NULL)
 460                                         d_info = "<null>";
 461 
 462                                 if (!dr_resolve_devname(dip, d_name, d_alias)) {
 463                                         if (d_alias[0] != 0) {
 464                                                 prom_printf("\tresuming "
 465                                                     "%s@%s (aka %s)\n", d_name,
 466                                                     d_info, d_alias);
 467                                         } else {
 468                                                 prom_printf("\tresuming "
 469                                                     "%s@%s\n", d_name, d_info);
 470                                         }
 471                                 } else {
 472                                         prom_printf("\tresuming %s@%s\n", bn,
 473                                             d_info);
 474                                 }
 475 
 476                                 if (devi_attach(dip, DDI_RESUME) !=
 477                                     DDI_SUCCESS) {
 478                                         /*
 479                                          * Print a console warning,
 480                                          * set an e_code of ESBD_RESUME,
 481                                          * and save the driver major
 482                                          * number in the e_rsc.
 483                                          */
 484                                         prom_printf("\tFAILED to resume %s@%s",
 485                                             d_name[0] ? d_name : bn, d_info);
 486 
 487                                         srh->sr_err_idx =
 488                                             dr_add_int(srh->sr_err_ints,
 489                                             srh->sr_err_idx, DR_MAX_ERR_INT,
 490                                             (uint64_t)major);
 491 
 492                                         handle = srh->sr_dr_handlep;
 493 
 494                                         dr_op_err(CE_IGNORE, handle,
 495                                             ESBD_RESUME, "%s@%s",
 496                                             d_name[0] ? d_name : bn, d_info);
 497                                 }
 498                         }
 499                 }
 500 
 501                 /* Hold parent busy while walking its children */
 502                 ndi_devi_enter(dip, &circ);
 503                 dr_resume_devices(ddi_get_child(dip), srh);
 504                 ndi_devi_exit(dip, circ);
 505                 last = dip;
 506         }
 507 }
 508 
 509 /*
 510  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
 511  * but from DR point of view.  These user threads are waiting in
 512  * the kernel.  Once they complete in the kernel, they will process
 513  * the stop signal and stop.
 514  */
 515 #define DR_VSTOPPED(t)                  \
 516         ((t)->t_state == TS_SLEEP && \
 517         (t)->t_wchan != NULL &&              \
 518         (t)->t_astflag &&            \
 519         ((t)->t_proc_flag & TP_CHKPT))
 520 
 521 /* ARGSUSED */
 522 static int
 523 dr_stop_user_threads(dr_sr_handle_t *srh)
 524 {
 525         int             count;
 526         int             bailout;
 527         dr_handle_t     *handle = srh->sr_dr_handlep;
 528         static fn_t     f = "dr_stop_user_threads";
 529         kthread_id_t    tp;
 530 
 531         extern void add_one_utstop();
 532         extern void utstop_timedwait(clock_t);
 533         extern void utstop_init(void);
 534 
 535 #define DR_UTSTOP_RETRY 4
 536 #define DR_UTSTOP_WAIT  hz
 537 
 538         if (dr_skip_user_threads)
 539                 return (DDI_SUCCESS);
 540 
 541         utstop_init();
 542 
 543         /* we need to try a few times to get past fork, etc. */
 544         srh->sr_err_idx = 0;
 545         for (count = 0; count < DR_UTSTOP_RETRY; count++) {
 546                 /* walk the entire threadlist */
 547                 mutex_enter(&pidlock);
 548                 for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
 549                         proc_t *p = ttoproc(tp);
 550 
 551                         /* handle kernel threads separately */
 552                         if (p->p_as == &kas || p->p_stat == SZOMB)
 553                                 continue;
 554 
 555                         mutex_enter(&p->p_lock);
 556                         thread_lock(tp);
 557 
 558                         if (tp->t_state == TS_STOPPED) {
 559                                 /* add another reason to stop this thread */
 560                                 tp->t_schedflag &= ~TS_RESUME;
 561                         } else {
 562                                 tp->t_proc_flag |= TP_CHKPT;
 563 
 564                                 thread_unlock(tp);
 565                                 mutex_exit(&p->p_lock);
 566                                 add_one_utstop();
 567                                 mutex_enter(&p->p_lock);
 568                                 thread_lock(tp);
 569 
 570                                 aston(tp);
 571 
 572                                 if (ISWAKEABLE(tp) || ISWAITING(tp)) {
 573                                         setrun_locked(tp);
 574                                 }
 575 
 576                         }
 577 
 578                         /* grab thread if needed */
 579                         if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
 580                                 poke_cpu(tp->t_cpu->cpu_id);
 581 
 582 
 583                         thread_unlock(tp);
 584                         mutex_exit(&p->p_lock);
 585                 }
 586                 mutex_exit(&pidlock);
 587 
 588 
 589                 /* let everything catch up */
 590                 utstop_timedwait(count * count * DR_UTSTOP_WAIT);
 591 
 592 
 593                 /* now, walk the threadlist again to see if we are done */
 594                 mutex_enter(&pidlock);
 595                 for (tp = curthread->t_next, bailout = 0;
 596                     tp != curthread; tp = tp->t_next) {
 597                         proc_t *p = ttoproc(tp);
 598 
 599                         /* handle kernel threads separately */
 600                         if (p->p_as == &kas || p->p_stat == SZOMB)
 601                                 continue;
 602 
 603                         /*
 604                          * If this thread didn't stop, and we don't allow
 605                          * unstopped blocked threads, bail.
 606                          */
 607                         thread_lock(tp);
 608                         if (!CPR_ISTOPPED(tp) &&
 609                             !(dr_allow_blocked_threads &&
 610                             DR_VSTOPPED(tp))) {
 611                                 bailout = 1;
 612                                 if (count == DR_UTSTOP_RETRY - 1) {
 613                                         /*
 614                                          * save the pid for later reporting
 615                                          */
 616                                         srh->sr_err_idx =
 617                                             dr_add_int(srh->sr_err_ints,
 618                                             srh->sr_err_idx, DR_MAX_ERR_INT,
 619                                             (uint64_t)p->p_pid);
 620 
 621                                         cmn_err(CE_WARN, "%s: "
 622                                             "failed to stop thread: "
 623                                             "process=%s, pid=%d",
 624                                             f, p->p_user.u_psargs, p->p_pid);
 625 
 626                                         PR_QR("%s: failed to stop thread: "
 627                                             "process=%s, pid=%d, t_id=0x%p, "
 628                                             "t_state=0x%x, t_proc_flag=0x%x, "
 629                                             "t_schedflag=0x%x\n",
 630                                             f, p->p_user.u_psargs, p->p_pid,
 631                                             (void *)tp, tp->t_state,
 632                                             tp->t_proc_flag, tp->t_schedflag);
 633                                 }
 634 
 635                         }
 636                         thread_unlock(tp);
 637                 }
 638                 mutex_exit(&pidlock);
 639 
 640                 /* were all the threads stopped? */
 641                 if (!bailout)
 642                         break;
 643         }
 644 
 645         /* were we unable to stop all threads after a few tries? */
 646         if (bailout) {
 647                 handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
 648                     srh->sr_err_idx, 0);
 649                 return (ESRCH);
 650         }
 651 
 652         return (DDI_SUCCESS);
 653 }
 654 
 655 static void
 656 dr_start_user_threads(void)
 657 {
 658         kthread_id_t tp;
 659 
 660         mutex_enter(&pidlock);
 661 
 662         /* walk all threads and release them */
 663         for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
 664                 proc_t *p = ttoproc(tp);
 665 
 666                 /* skip kernel threads */
 667                 if (ttoproc(tp)->p_as == &kas)
 668                         continue;
 669 
 670                 mutex_enter(&p->p_lock);
 671                 tp->t_proc_flag &= ~TP_CHKPT;
 672                 mutex_exit(&p->p_lock);
 673 
 674                 thread_lock(tp);
 675                 if (CPR_ISTOPPED(tp)) {
 676                         /* back on the runq */
 677                         tp->t_schedflag |= TS_RESUME;
 678                         setrun_locked(tp);
 679                 }
 680                 thread_unlock(tp);
 681         }
 682 
 683         mutex_exit(&pidlock);
 684 }
 685 
 686 static void
 687 dr_signal_user(int sig)
 688 {
 689         struct proc *p;
 690 
 691         mutex_enter(&pidlock);
 692 
 693         for (p = practive; p != NULL; p = p->p_next) {
 694                 /* only user threads */
 695                 if (p->p_exec == NULL || p->p_stat == SZOMB ||
 696                     p == proc_init || p == ttoproc(curthread))
 697                         continue;
 698 
 699                 mutex_enter(&p->p_lock);
 700                 sigtoproc(p, NULL, sig);
 701                 mutex_exit(&p->p_lock);
 702         }
 703 
 704         mutex_exit(&pidlock);
 705 
 706         /* add a bit of delay */
 707         delay(hz);
 708 }
 709 
 710 void
 711 dr_resume(dr_sr_handle_t *srh)
 712 {
 713         switch (srh->sr_suspend_state) {
 714         case DR_SRSTATE_FULL:
 715 
 716                 ASSERT(MUTEX_HELD(&cpu_lock));
 717 
 718                 /*
 719                  * Prevent false alarm in tod_validate() due to tod
 720                  * value change between suspend and resume
 721                  */
 722                 mutex_enter(&tod_lock);
 723                 tod_status_set(TOD_DR_RESUME_DONE);
 724                 mutex_exit(&tod_lock);
 725 
 726                 dr_enable_intr();       /* enable intr & clock */
 727 
 728                 start_cpus();
 729                 mutex_exit(&cpu_lock);
 730 
 731                 /*
 732                  * This should only be called if drmach_suspend_last()
 733                  * was called and state transitioned to DR_SRSTATE_FULL
 734                  * to prevent resume attempts on device instances that
 735                  * were not previously suspended.
 736                  */
 737                 drmach_resume_first();
 738 
 739                 /* FALLTHROUGH */
 740 
 741         case DR_SRSTATE_DRIVER:
 742                 /*
 743                  * resume drivers
 744                  */
 745                 srh->sr_err_idx = 0;
 746 
 747                 /* no parent dip to hold busy */
 748                 dr_resume_devices(ddi_root_node(), srh);
 749 
 750                 if (srh->sr_err_idx && srh->sr_dr_handlep) {
 751                         (srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
 752                             srh->sr_err_ints, srh->sr_err_idx, 1);
 753                 }
 754 
 755                 /*
 756                  * resume the lock manager
 757                  */
 758                 lm_cprresume();
 759 
 760                 /* FALLTHROUGH */
 761 
 762         case DR_SRSTATE_USER:
 763                 /*
 764                  * finally, resume user threads
 765                  */
 766                 if (!dr_skip_user_threads) {
 767                         prom_printf("DR: resuming user threads...\n");
 768                         dr_start_user_threads();
 769                 }
 770                 /* FALLTHROUGH */
 771 
 772         case DR_SRSTATE_BEGIN:
 773         default:
 774                 /*
 775                  * let those who care know that we've just resumed
 776                  */
 777                 PR_QR("sending SIGTHAW...\n");
 778                 dr_signal_user(SIGTHAW);
 779                 break;
 780         }
 781 
 782         prom_printf("DR: resume COMPLETED\n");
 783 }
 784 
 785 int
 786 dr_suspend(dr_sr_handle_t *srh)
 787 {
 788         dr_handle_t     *handle;
 789         int             force;
 790         int             dev_errs_idx;
 791         uint64_t        dev_errs[DR_MAX_ERR_INT];
 792         int             rc = DDI_SUCCESS;
 793 
 794         handle = srh->sr_dr_handlep;
 795 
 796         force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
 797 
 798         prom_printf("\nDR: suspending user threads...\n");
 799         srh->sr_suspend_state = DR_SRSTATE_USER;
 800         if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
 801             dr_check_user_stop_result) {
 802                 dr_resume(srh);
 803                 return (rc);
 804         }
 805 
 806         if (!force) {
 807                 struct dr_ref drc = {0};
 808 
 809                 prom_printf("\nDR: checking devices...\n");
 810                 dev_errs_idx = 0;
 811 
 812                 drc.arr = dev_errs;
 813                 drc.idx = &dev_errs_idx;
 814                 drc.len = DR_MAX_ERR_INT;
 815 
 816                 /*
 817                  * Since the root node can never go away, it
 818                  * doesn't have to be held.
 819                  */
 820                 ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
 821                 if (dev_errs_idx) {
 822                         handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
 823                             dev_errs_idx, 1);
 824                         dr_resume(srh);
 825                         return (DDI_FAILURE);
 826                 }
 827                 PR_QR("done\n");
 828         } else {
 829                 prom_printf("\nDR: dr_suspend invoked with force flag\n");
 830         }
 831 
 832 #ifndef SKIP_SYNC
 833         /*
 834          * This sync swap out all user pages
 835          */
 836         vfs_sync(SYNC_ALL);
 837 #endif
 838 
 839         /*
 840          * special treatment for lock manager
 841          */
 842         lm_cprsuspend();
 843 
 844 #ifndef SKIP_SYNC
 845         /*
 846          * sync the file system in case we never make it back
 847          */
 848         sync();
 849 #endif
 850 
 851         /*
 852          * now suspend drivers
 853          */
 854         prom_printf("DR: suspending drivers...\n");
 855         srh->sr_suspend_state = DR_SRSTATE_DRIVER;
 856         srh->sr_err_idx = 0;
 857         /* No parent to hold busy */
 858         if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
 859                 if (srh->sr_err_idx && srh->sr_dr_handlep) {
 860                         (srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
 861                             srh->sr_err_ints, srh->sr_err_idx, 1);
 862                 }
 863                 dr_resume(srh);
 864                 return (rc);
 865         }
 866 
 867         drmach_suspend_last();
 868 
 869         /*
 870          * finally, grab all cpus
 871          */
 872         srh->sr_suspend_state = DR_SRSTATE_FULL;
 873 
 874         mutex_enter(&cpu_lock);
 875         pause_cpus(NULL, NULL);
 876         dr_stop_intr();
 877 
 878         return (rc);
 879 }
 880 
 881 int
 882 dr_pt_test_suspend(dr_handle_t *hp)
 883 {
 884         dr_sr_handle_t *srh;
 885         int             err;
 886         uint_t          psmerr;
 887         static fn_t     f = "dr_pt_test_suspend";
 888 
 889         PR_QR("%s...\n", f);
 890 
 891         srh = dr_get_sr_handle(hp);
 892         if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
 893                 dr_resume(srh);
 894                 if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
 895                         PR_QR("%s: error on dr_resume()", f);
 896                         switch (psmerr) {
 897                         case ESBD_RESUME:
 898                                 PR_QR("Couldn't resume devices: %s\n",
 899                                     DR_GET_E_RSC(hp->h_err));
 900                                 break;
 901 
 902                         case ESBD_KTHREAD:
 903                                 PR_ALL("psmerr is ESBD_KTHREAD\n");
 904                                 break;
 905                         default:
 906                                 PR_ALL("Resume error unknown = %d\n", psmerr);
 907                                 break;
 908                         }
 909                 }
 910         } else {
 911                 PR_ALL("%s: dr_suspend() failed, err = 0x%x\n", f, err);
 912                 psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
 913                 switch (psmerr) {
 914                 case ESBD_UNSAFE:
 915                         PR_ALL("Unsafe devices (major #): %s\n",
 916                             DR_GET_E_RSC(hp->h_err));
 917                         break;
 918 
 919                 case ESBD_RTTHREAD:
 920                         PR_ALL("RT threads (PIDs): %s\n",
 921                             DR_GET_E_RSC(hp->h_err));
 922                         break;
 923 
 924                 case ESBD_UTHREAD:
 925                         PR_ALL("User threads (PIDs): %s\n",
 926                             DR_GET_E_RSC(hp->h_err));
 927                         break;
 928 
 929                 case ESBD_SUSPEND:
 930                         PR_ALL("Non-suspendable devices (major #): %s\n",
 931                             DR_GET_E_RSC(hp->h_err));
 932                         break;
 933 
 934                 case ESBD_RESUME:
 935                         PR_ALL("Could not resume devices (major #): %s\n",
 936                             DR_GET_E_RSC(hp->h_err));
 937                         break;
 938 
 939                 case ESBD_KTHREAD:
 940                         PR_ALL("psmerr is ESBD_KTHREAD\n");
 941                         break;
 942 
 943                 case ESBD_NOERROR:
 944                         PR_ALL("sbd_error_t error code not set\n");
 945                         break;
 946 
 947                 default:
 948                         PR_ALL("Unknown error psmerr = %d\n", psmerr);
 949                         break;
 950                 }
 951         }
 952         dr_release_sr_handle(srh);
 953 
 954         return (0);
 955 }
 956 
 957 /*
 958  * Add a new integer value to the end of an array.  Don't allow duplicates to
 959  * appear in the array, and don't allow the array to overflow.  Return the new
 960  * total number of entries in the array.
 961  */
 962 static int
 963 dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
 964 {
 965         int i;
 966 
 967         if (arr == NULL)
 968                 return (0);
 969 
 970         if (idx >= len)
 971                 return (idx);
 972 
 973         for (i = 0; i < idx; i++) {
 974                 if (arr[i] == val)
 975                         return (idx);
 976         }
 977 
 978         arr[idx++] = val;
 979 
 980         return (idx);
 981 }
 982 
 983 /*
 984  * Construct an sbd_error_t featuring a string representation of an array of
 985  * integers as its e_rsc.
 986  */
 987 static sbd_error_t *
 988 drerr_int(int e_code, uint64_t *arr, int idx, int majors)
 989 {
 990         int             i, n, buf_len, buf_idx, buf_avail;
 991         char            *dname;
 992         char            *buf;
 993         sbd_error_t     *new_sbd_err;
 994         static char     s_ellipsis[] = "...";
 995 
 996         if (arr == NULL || idx <= 0)
 997                 return (NULL);
 998 
 999         /* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
1000         buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1001 
1002         /*
1003          * This is the total working area of the buffer.  It must be computed
1004          * as the size of 'buf', minus reserved space for the null terminator
1005          * and the ellipsis string.
1006          */
1007         buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
1008 
1009         /* Construct a string representation of the array values */
1010         for (buf_idx = 0, i = 0; i < idx; i++) {
1011                 buf_avail = buf_len - buf_idx;
1012                 if (majors) {
1013                         dname = ddi_major_to_name(arr[i]);
1014                         if (dname) {
1015                                 n = snprintf(&buf[buf_idx], buf_avail, "%s, ",
1016                                     dname);
1017                         } else {
1018                                 n = snprintf(&buf[buf_idx], buf_avail,
1019                                     "major %" PRIu64 ", ", arr[i]);
1020                         }
1021                 } else {
1022                         n = snprintf(&buf[buf_idx], buf_avail, "%" PRIu64 ", ",
1023                             arr[i]);
1024                 }
1025 
1026                 /* An ellipsis gets appended when no more values fit */
1027                 if (n >= buf_avail) {
1028                         (void) strcpy(&buf[buf_idx], s_ellipsis);
1029                         break;
1030                 }
1031 
1032                 buf_idx += n;
1033         }
1034 
1035         /* If all the contents fit, remove the trailing comma */
1036         if (n < buf_avail) {
1037                 buf[--buf_idx] = '\0';
1038                 buf[--buf_idx] = '\0';
1039         }
1040 
1041         /* Return an sbd_error_t with the buffer and e_code */
1042         new_sbd_err = drerr_new(1, e_code, buf);
1043         kmem_free(buf, MAXPATHLEN);
1044         return (new_sbd_err);
1045 }