core Wdiff usr/src/uts/sun4v/os/mach_cpu_states.c

Print this page

[mq]: core-v2

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/sun4v/os/mach_cpu_states.c
          +++ new/usr/src/uts/sun4v/os/mach_cpu_states.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   */
  24   24  
  25   25  #include <sys/types.h>
  26   26  #include <sys/systm.h>
  27   27  #include <sys/archsystm.h>
  28   28  #include <sys/t_lock.h>
  29   29  #include <sys/uadmin.h>
  30   30  #include <sys/panic.h>
  31   31  #include <sys/reboot.h>
  32   32  #include <sys/autoconf.h>
  33   33  #include <sys/machsystm.h>
  34   34  #include <sys/promif.h>
  35   35  #include <sys/membar.h>
  36   36  #include <vm/hat_sfmmu.h>
  37   37  #include <sys/cpu_module.h>
  38   38  #include <sys/cpu_sgnblk_defs.h>
  39   39  #include <sys/intreg.h>
  40   40  #include <sys/consdev.h>
  41   41  #include <sys/kdi_impl.h>
  42   42  #include <sys/traptrace.h>
  43   43  #include <sys/hypervisor_api.h>
  44   44  #include <sys/vmsystm.h>
  45   45  #include <sys/dtrace.h>
  46   46  #include <sys/xc_impl.h>
  47   47  #include <sys/callb.h>
  48   48  #include <sys/mdesc.h>
  49   49  #include <sys/mach_descrip.h>
  50   50  #include <sys/wdt.h>
  51   51  #include <sys/soft_state.h>
  52   52  #include <sys/promimpl.h>
  53   53  #include <sys/hsvc.h>
  54   54  #include <sys/ldoms.h>
  55   55  #include <sys/kldc.h>
  56   56  #include <sys/clock_impl.h>
  57   57  #include <sys/suspend.h>
  58   58  #include <sys/dumphdr.h>
  59   59  
  60   60  /*
  61   61   * hvdump_buf_va is a pointer to the currently-configured hvdump_buf.
  62   62   * A value of NULL indicates that this area is not configured.
  63   63   * hvdump_buf_sz is tunable but will be clamped to HVDUMP_SIZE_MAX.
  64   64   */
  65   65  
  66   66  caddr_t hvdump_buf_va;
  67   67  uint64_t hvdump_buf_sz = HVDUMP_SIZE_DEFAULT;
  68   68  static uint64_t hvdump_buf_pa;
  69   69  
  70   70  u_longlong_t panic_tick;
  71   71  
  72   72  extern u_longlong_t gettick();
  73   73  static void reboot_machine(char *);
  74   74  static void update_hvdump_buffer(void);
  75   75  
  76   76  /*
  77   77   * For xt_sync synchronization.
  78   78   */
  79   79  extern uint64_t xc_tick_limit;
  80   80  extern uint64_t xc_tick_jump_limit;
  81   81  extern uint64_t xc_sync_tick_limit;
  82   82  
  83   83  /*
  84   84   * Bring in the cpc PIL_15 handler for panic_enter_hw.
  85   85   */
  86   86  extern uint64_t cpc_level15_inum;
  87   87  
  88   88  /*
  89   89   * We keep our own copies, used for cache flushing, because we can be called
  90   90   * before cpu_fiximpl().
  91   91   */
  92   92  static int kdi_dcache_size;
  93   93  static int kdi_dcache_linesize;
  94   94  static int kdi_icache_size;
  95   95  static int kdi_icache_linesize;
  96   96  
  97   97  /*
  98   98   * Assembly support for generic modules in sun4v/ml/mach_xc.s
  99   99   */
 100  100  extern void init_mondo_nocheck(xcfunc_t *func, uint64_t arg1, uint64_t arg2);
 101  101  extern void kdi_flush_idcache(int, int, int, int);
 102  102  extern uint64_t get_cpuaddr(uint64_t, uint64_t);
 103  103  
 104  104  
 105  105  #define BOOT_CMD_MAX_LEN        256     /* power of 2 & 16-byte aligned */
 106  106  #define BOOT_CMD_BASE           "boot "
 107  107  
 108  108  /*
 109  109   * In an LDoms system we do not save the user's boot args in NVRAM
 110  110   * as is done on legacy systems.  Instead, we format and send a
 111  111   * 'reboot-command' variable to the variable service.  The contents
 112  112   * of the variable are retrieved by OBP and used verbatim for
 113  113   * the next boot.
 114  114   */
 115  115  static void
 116  116  store_boot_cmd(char *args, boolean_t add_boot_str, boolean_t invoke_cb)
 117  117  {
 118  118          static char     *cmd_buf;
 119  119          size_t          len = 1;
 120  120          pnode_t         node;
 121  121          size_t          base_len = 0;
 122  122          size_t          args_len;
 123  123          size_t          args_max;
 124  124          uint64_t        majornum;
 125  125          uint64_t        minornum;
 126  126          uint64_t        buf_pa;
 127  127          uint64_t        status;
 128  128  
 129  129          status = hsvc_version(HSVC_GROUP_REBOOT_DATA, &majornum, &minornum);
 130  130  
 131  131          /*
 132  132           * invoke_cb is set to true when we are in a normal shutdown sequence
 133  133           * (interrupts are not blocked, the system is not panicking or being
 134  134           * suspended). In that case, we can use any method to store the boot
 135  135           * command. Otherwise storing the boot command can not be done using
 136  136           * a domain service because it can not be safely used in that context.
 137  137           */
 138  138          if ((status != H_EOK) && (invoke_cb == B_FALSE))
 139  139                  return;
 140  140  
 141  141          cmd_buf = contig_mem_alloc(BOOT_CMD_MAX_LEN);
 142  142          if (cmd_buf == NULL)
 143  143                  return;
 144  144  
 145  145          if (add_boot_str) {
 146  146                  (void) strcpy(cmd_buf, BOOT_CMD_BASE);
 147  147  
 148  148                  base_len = strlen(BOOT_CMD_BASE);
 149  149                  len = base_len + 1;
 150  150          }
 151  151  
 152  152          if (args != NULL) {
 153  153                  args_len = strlen(args);
 154  154                  args_max = BOOT_CMD_MAX_LEN - len;
 155  155  
 156  156                  if (args_len > args_max) {
 157  157                          cmn_err(CE_WARN, "Reboot command too long (%ld), "
 158  158                              "truncating command arguments", len + args_len);
 159  159  
 160  160                          args_len = args_max;
 161  161                  }
 162  162  
 163  163                  len += args_len;
 164  164                  (void) strncpy(&cmd_buf[base_len], args, args_len);
 165  165          }
 166  166  
 167  167          /*
 168  168           * Save the reboot-command with HV, if reboot data group is
 169  169           * negotiated. Else save the reboot-command via vars-config domain
 170  170           * services on the SP.
 171  171           */
 172  172          if (status == H_EOK) {
 173  173                  buf_pa = va_to_pa(cmd_buf);
 174  174                  status = hv_reboot_data_set(buf_pa, len);
 175  175                  if (status != H_EOK) {
 176  176                          cmn_err(CE_WARN, "Unable to store boot command for "
 177  177                              "use on reboot with HV: error = 0x%lx", status);
 178  178                  }
 179  179          } else {
 180  180                  node = prom_optionsnode();
 181  181                  if ((node == OBP_NONODE) || (node == OBP_BADNODE) ||
 182  182                      prom_setprop(node, "reboot-command", cmd_buf, len) == -1)
 183  183                          cmn_err(CE_WARN, "Unable to store boot command for "
 184  184                              "use on reboot");
 185  185          }
 186  186  }
 187  187  
 188  188  
 189  189  /*
 190  190   * Machine dependent code to reboot.
 191  191   *
 192  192   * "bootstr", when non-null, points to a string to be used as the
 193  193   * argument string when rebooting.
 194  194   *
 195  195   * "invoke_cb" is a boolean. It is set to true when mdboot() can safely
 196  196   * invoke CB_CL_MDBOOT callbacks before shutting the system down, i.e. when
 197  197   * we are in a normal shutdown sequence (interrupts are not blocked, the
 198  198   * system is not panic'ing or being suspended).
 199  199   */
 200  200  /*ARGSUSED*/
 201  201  void
 202  202  mdboot(int cmd, int fcn, char *bootstr, boolean_t invoke_cb)
 203  203  {
 204  204          extern void pm_cfb_check_and_powerup(void);
 205  205  
 206  206          /*
 207  207           * XXX - rconsvp is set to NULL to ensure that output messages
 208  208           * are sent to the underlying "hardware" device using the
 209  209           * monitor's printf routine since we are in the process of
 210  210           * either rebooting or halting the machine.
 211  211           */
 212  212          rconsvp = NULL;
 213  213  
 214  214          switch (fcn) {
 215  215          case AD_HALT:
 216  216                  /*
 217  217                   * LDoms: By storing a no-op command
 218  218                   * in the 'reboot-command' variable we cause OBP
 219  219                   * to ignore the setting of 'auto-boot?' after
 220  220                   * it completes the reset.  This causes the system
 221  221                   * to stop at the ok prompt.
 222  222                   */
 223  223                  if (domaining_enabled())
 224  224                          store_boot_cmd("noop", B_FALSE, invoke_cb);
 225  225                  break;
 226  226  
 227  227          case AD_POWEROFF:
 228  228                  break;
 229  229  
 230  230          default:
 231  231                  if (bootstr == NULL) {
 232  232                          switch (fcn) {
 233  233  
 234  234                          case AD_FASTREBOOT:
 235  235                          case AD_BOOT:
 236  236                                  bootstr = "";
 237  237                                  break;
 238  238  
 239  239                          case AD_IBOOT:
 240  240                                  bootstr = "-a";
 241  241                                  break;
 242  242  
 243  243                          case AD_SBOOT:
 244  244                                  bootstr = "-s";
 245  245                                  break;
 246  246  
 247  247                          case AD_SIBOOT:
 248  248                                  bootstr = "-sa";
 249  249                                  break;
 250  250                          default:
 251  251                                  cmn_err(CE_WARN,
 252  252                                      "mdboot: invalid function %d", fcn);
 253  253                                  bootstr = "";
 254  254                                  break;
 255  255                          }
 256  256                  }
 257  257  
 258  258                  /*
 259  259                   * If LDoms is running, we must save the boot string
 260  260                   * before we enter restricted mode.  This is possible
 261  261                   * only if we are not being called from panic.
 262  262                   */
 263  263                  if (domaining_enabled())
 264  264                          store_boot_cmd(bootstr, B_TRUE, invoke_cb);
 265  265          }
 266  266  
 267  267          /*
 268  268           * At a high interrupt level we can't:
 269  269           *      1) bring up the console
 270  270           * or
 271  271           *      2) wait for pending interrupts prior to redistribution
 272  272           *         to the current CPU
 273  273           *
 274  274           * so we do them now.
 275  275           */
 276  276          pm_cfb_check_and_powerup();
 277  277  
 278  278          /* make sure there are no more changes to the device tree */
 279  279          devtree_freeze();
 280  280  
 281  281          if (invoke_cb)
 282  282                  (void) callb_execute_class(CB_CL_MDBOOT, NULL);
 283  283  
 284  284          /*
 285  285           * Clear any unresolved UEs from memory.
 286  286           */
 287  287          page_retire_mdboot();
 288  288  
 289  289          /*
 290  290           * stop other cpus which also raise our priority. since there is only
 291  291           * one active cpu after this, and our priority will be too high
 292  292           * for us to be preempted, we're essentially single threaded
 293  293           * from here on out.
 294  294           */
 295  295          stop_other_cpus();
 296  296  
 297  297          /*
 298  298           * try and reset leaf devices.  reset_leaves() should only
 299  299           * be called when there are no other threads that could be
 300  300           * accessing devices
 301  301           */
 302  302          reset_leaves();
 303  303  
 304  304          watchdog_clear();
 305  305  
 306  306          if (fcn == AD_HALT) {
 307  307                  mach_set_soft_state(SIS_TRANSITION,
 308  308                      &SOLARIS_SOFT_STATE_HALT_MSG);
 309  309                  halt((char *)NULL);
 310  310          } else if (fcn == AD_POWEROFF) {
 311  311                  mach_set_soft_state(SIS_TRANSITION,
 312  312                      &SOLARIS_SOFT_STATE_POWER_MSG);
 313  313                  power_down(NULL);
 314  314          } else {
 315  315                  mach_set_soft_state(SIS_TRANSITION,
 316  316                      &SOLARIS_SOFT_STATE_REBOOT_MSG);
 317  317                  reboot_machine(bootstr);
 318  318          }
 319  319          /* MAYBE REACHED */
 320  320  }
 321  321  
 322  322  /* mdpreboot - may be called prior to mdboot while root fs still mounted */
 323  323  /*ARGSUSED*/
 324  324  void
 325  325  mdpreboot(int cmd, int fcn, char *bootstr)
 326  326  {
 327  327  }
 328  328  
 329  329  /*
 330  330   * Halt the machine and then reboot with the device
 331  331   * and arguments specified in bootstr.
 332  332   */
 333  333  static void
 334  334  reboot_machine(char *bootstr)
 335  335  {
 336  336          flush_windows();
 337  337          stop_other_cpus();              /* send stop signal to other CPUs */
 338  338          prom_printf("rebooting...\n");
 339  339          /*
 340  340           * For platforms that use CPU signatures, we
 341  341           * need to set the signature block to OS and
 342  342           * the state to exiting for all the processors.
 343  343           */
 344  344          CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_REBOOT, -1);
 345  345          prom_reboot(bootstr);
 346  346          /*NOTREACHED*/
 347  347  }
 348  348  
 349  349  /*
 350  350   * We use the x-trap mechanism and idle_stop_xcall() to stop the other CPUs.
 351  351   * Once in panic_idle() they raise spl, record their location, and spin.
 352  352   */
 353  353  static void

↓ open down ↓

353 lines elided

↑ open up ↑

 354  354  panic_idle(void)
 355  355  {
 356  356          (void) spl7();
 357  357  
 358  358          debug_flush_windows();
 359  359          (void) setjmp(&curthread->t_pcb);
 360  360  
 361  361          CPU->cpu_m.in_prom = 1;
 362  362          membar_stld();
 363  363  
 364      -        dumpsys_helper();
 365      -
 366  364          for (;;)
 367  365                  ;
 368  366  }
 369  367  
 370  368  /*
 371  369   * Force the other CPUs to trap into panic_idle(), and then remove them
 372  370   * from the cpu_ready_set so they will no longer receive cross-calls.
 373  371   */
 374  372  /*ARGSUSED*/
 375  373  void

 376  374  panic_stopcpus(cpu_t *cp, kthread_t *t, int spl)
 377  375  {
 378  376          cpuset_t cps;
 379  377          int i;
 380  378  
 381  379          (void) splzs();
 382  380          CPUSET_ALL_BUT(cps, cp->cpu_id);
 383  381          xt_some(cps, (xcfunc_t *)idle_stop_xcall, (uint64_t)&panic_idle, NULL);
 384  382  
 385  383          for (i = 0; i < NCPU; i++) {
 386  384                  if (i != cp->cpu_id && CPU_XCALL_READY(i)) {
 387  385                          int ntries = 0x10000;
 388  386  
 389  387                          while (!cpu[i]->cpu_m.in_prom && ntries) {
 390  388                                  DELAY(50);
 391  389                                  ntries--;
 392  390                          }
 393  391  
 394  392                          if (!cpu[i]->cpu_m.in_prom)
 395  393                                  printf("panic: failed to stop cpu%d\n", i);
 396  394  
 397  395                          cpu[i]->cpu_flags &= ~CPU_READY;
 398  396                          cpu[i]->cpu_flags |= CPU_QUIESCED;
 399  397                          CPUSET_DEL(cpu_ready_set, cpu[i]->cpu_id);
 400  398                  }
 401  399          }
 402  400  }
 403  401  
 404  402  /*
 405  403   * Platform callback following each entry to panicsys().  If we've panicked at
 406  404   * level 14, we examine t_panic_trap to see if a fatal trap occurred.  If so,
 407  405   * we disable further %tick_cmpr interrupts.  If not, an explicit call to panic
 408  406   * was made and so we re-enqueue an interrupt request structure to allow
 409  407   * further level 14 interrupts to be processed once we lower PIL.  This allows
 410  408   * us to handle panics from the deadman() CY_HIGH_LEVEL cyclic.
 411  409   *
 412  410   * In case we panic at level 15, ensure that the cpc handler has been
 413  411   * reinstalled otherwise we could run the risk of hitting a missing interrupt
 414  412   * handler when this thread drops PIL and the cpc counter overflows.
 415  413   */
 416  414  void
 417  415  panic_enter_hw(int spl)
 418  416  {
 419  417          uint_t opstate;
 420  418  
 421  419          if (!panic_tick) {
 422  420                  panic_tick = gettick();
 423  421                  if (mach_htraptrace_enable) {
 424  422                          uint64_t prev_freeze;
 425  423  
 426  424                          /*  there are no possible error codes for this hcall */
 427  425                          (void) hv_ttrace_freeze((uint64_t)TRAP_TFREEZE_ALL,
 428  426                              &prev_freeze);
 429  427                  }
 430  428  #ifdef TRAPTRACE
 431  429                  TRAPTRACE_FREEZE;
 432  430  #endif
 433  431          }
 434  432  
 435  433          mach_set_soft_state(SIS_TRANSITION, &SOLARIS_SOFT_STATE_PANIC_MSG);
 436  434  
 437  435          if (spl == ipltospl(PIL_14)) {
 438  436                  opstate = disable_vec_intr();
 439  437  
 440  438                  if (curthread->t_panic_trap != NULL) {
 441  439                          tickcmpr_disable();
 442  440                          intr_dequeue_req(PIL_14, cbe_level14_inum);
 443  441                  } else {
 444  442                          if (!tickcmpr_disabled())
 445  443                                  intr_enqueue_req(PIL_14, cbe_level14_inum);
 446  444                          /*
 447  445                           * Clear SOFTINT<14>, SOFTINT<0> (TICK_INT)
 448  446                           * and SOFTINT<16> (STICK_INT) to indicate
 449  447                           * that the current level 14 has been serviced.
 450  448                           */
 451  449                          wr_clr_softint((1 << PIL_14) |
 452  450                              TICK_INT_MASK | STICK_INT_MASK);
 453  451                  }
 454  452  
 455  453                  enable_vec_intr(opstate);
 456  454          } else if (spl == ipltospl(PIL_15)) {
 457  455                  opstate = disable_vec_intr();
 458  456                  intr_enqueue_req(PIL_15, cpc_level15_inum);
 459  457                  wr_clr_softint(1 << PIL_15);
 460  458                  enable_vec_intr(opstate);
 461  459          }
 462  460  }
 463  461  
 464  462  /*
 465  463   * Miscellaneous hardware-specific code to execute after panicstr is set
 466  464   * by the panic code: we also print and record PTL1 panic information here.
 467  465   */
 468  466  /*ARGSUSED*/
 469  467  void
 470  468  panic_quiesce_hw(panic_data_t *pdp)
 471  469  {
 472  470          extern uint_t getpstate(void);
 473  471          extern void setpstate(uint_t);
 474  472  
 475  473          /*
 476  474           * Turn off TRAPTRACE and save the current %tick value in panic_tick.
 477  475           */
 478  476          if (!panic_tick) {
 479  477                  panic_tick = gettick();
 480  478                  if (mach_htraptrace_enable) {
 481  479                          uint64_t prev_freeze;
 482  480  
 483  481                          /*  there are no possible error codes for this hcall */
 484  482                          (void) hv_ttrace_freeze((uint64_t)TRAP_TFREEZE_ALL,
 485  483                              &prev_freeze);
 486  484                  }
 487  485  #ifdef TRAPTRACE
 488  486                  TRAPTRACE_FREEZE;
 489  487  #endif
 490  488          }
 491  489          /*
 492  490           * For Platforms that use CPU signatures, we
 493  491           * need to set the signature block to OS, the state to
 494  492           * exiting, and the substate to panic for all the processors.
 495  493           */
 496  494          CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_PANIC, -1);
 497  495  
 498  496          update_hvdump_buffer();
 499  497  
 500  498          /*
 501  499           * Disable further ECC errors from the bus nexus.
 502  500           */
 503  501          (void) bus_func_invoke(BF_TYPE_ERRDIS);
 504  502  
 505  503          /*
 506  504           * Redirect all interrupts to the current CPU.
 507  505           */
 508  506          intr_redist_all_cpus_shutdown();
 509  507  
 510  508          /*
 511  509           * This call exists solely to support dumps to network
 512  510           * devices after sync from OBP.
 513  511           *
 514  512           * If we came here via the sync callback, then on some
 515  513           * platforms, interrupts may have arrived while we were
 516  514           * stopped in OBP.  OBP will arrange for those interrupts to
 517  515           * be redelivered if you say "go", but not if you invoke a
 518  516           * client callback like 'sync'.  For some dump devices
 519  517           * (network swap devices), we need interrupts to be
 520  518           * delivered in order to dump, so we have to call the bus
 521  519           * nexus driver to reset the interrupt state machines.
 522  520           */
 523  521          (void) bus_func_invoke(BF_TYPE_RESINTR);
 524  522  
 525  523          setpstate(getpstate() | PSTATE_IE);
 526  524  }
 527  525  
 528  526  /*
 529  527   * Platforms that use CPU signatures need to set the signature block to OS and
 530  528   * the state to exiting for all CPUs. PANIC_CONT indicates that we're about to
 531  529   * write the crash dump, which tells the SSP/SMS to begin a timeout routine to
 532  530   * reboot the machine if the dump never completes.
 533  531   */
 534  532  /*ARGSUSED*/
 535  533  void
 536  534  panic_dump_hw(int spl)
 537  535  {
 538  536          CPU_SIGNATURE(OS_SIG, SIGST_EXIT, SIGSUBST_DUMP, -1);
 539  537  }
 540  538  
 541  539  /*
 542  540   * for ptl1_panic
 543  541   */
 544  542  void
 545  543  ptl1_init_cpu(struct cpu *cpu)
 546  544  {
 547  545          ptl1_state_t *pstate = &cpu->cpu_m.ptl1_state;
 548  546  
 549  547          /*CONSTCOND*/
 550  548          if (sizeof (struct cpu) + PTL1_SSIZE > CPU_ALLOC_SIZE) {
 551  549                  panic("ptl1_init_cpu: not enough space left for ptl1_panic "
 552  550                      "stack, sizeof (struct cpu) = %lu",
 553  551                      (unsigned long)sizeof (struct cpu));
 554  552          }
 555  553  
 556  554          pstate->ptl1_stktop = (uintptr_t)cpu + CPU_ALLOC_SIZE;
 557  555          cpu_pa[cpu->cpu_id] = va_to_pa(cpu);
 558  556  }
 559  557  
 560  558  void
 561  559  ptl1_panic_handler(ptl1_state_t *pstate)
 562  560  {
 563  561          static const char *ptl1_reasons[] = {
 564  562  #ifdef  PTL1_PANIC_DEBUG
 565  563                  "trap for debug purpose",       /* PTL1_BAD_DEBUG */
 566  564  #else
 567  565                  "unknown trap",                 /* PTL1_BAD_DEBUG */
 568  566  #endif
 569  567                  "register window trap",         /* PTL1_BAD_WTRAP */
 570  568                  "kernel MMU miss",              /* PTL1_BAD_KMISS */
 571  569                  "kernel protection fault",      /* PTL1_BAD_KPROT_FAULT */
 572  570                  "ISM MMU miss",                 /* PTL1_BAD_ISM */
 573  571                  "kernel MMU trap",              /* PTL1_BAD_MMUTRAP */
 574  572                  "kernel trap handler state",    /* PTL1_BAD_TRAP */
 575  573                  "floating point trap",          /* PTL1_BAD_FPTRAP */
 576  574  #ifdef  DEBUG
 577  575                  "pointer to intr_vec",          /* PTL1_BAD_INTR_VEC */
 578  576  #else
 579  577                  "unknown trap",                 /* PTL1_BAD_INTR_VEC */
 580  578  #endif
 581  579  #ifdef  TRAPTRACE
 582  580                  "TRACE_PTR state",              /* PTL1_BAD_TRACE_PTR */
 583  581  #else
 584  582                  "unknown trap",                 /* PTL1_BAD_TRACE_PTR */
 585  583  #endif
 586  584                  "stack overflow",               /* PTL1_BAD_STACK */
 587  585                  "DTrace flags",                 /* PTL1_BAD_DTRACE_FLAGS */
 588  586                  "attempt to steal locked ctx",  /* PTL1_BAD_CTX_STEAL */
 589  587                  "CPU ECC error loop",           /* PTL1_BAD_ECC */
 590  588                  "unexpected error from hypervisor call", /* PTL1_BAD_HCALL */
 591  589                  "unexpected global level(%gl)", /* PTL1_BAD_GL */
 592  590                  "Watchdog Reset",               /* PTL1_BAD_WATCHDOG */
 593  591                  "unexpected RED mode trap",     /* PTL1_BAD_RED */
 594  592                  "return value EINVAL from hcall: "\
 595  593                      "UNMAP_PERM_ADDR",  /* PTL1_BAD_HCALL_UNMAP_PERM_EINVAL */
 596  594                  "return value ENOMAP from hcall: "\
 597  595                      "UNMAP_PERM_ADDR", /* PTL1_BAD_HCALL_UNMAP_PERM_ENOMAP */
 598  596                  "error raising a TSB exception", /* PTL1_BAD_RAISE_TSBEXCP */
 599  597                  "missing shared TSB"    /* PTL1_NO_SCDTSB8K */
 600  598          };
 601  599  
 602  600          uint_t reason = pstate->ptl1_regs.ptl1_gregs[0].ptl1_g1;
 603  601          uint_t tl = pstate->ptl1_regs.ptl1_trap_regs[0].ptl1_tl;
 604  602          struct panic_trap_info ti = { 0 };
 605  603  
 606  604          /*
 607  605           * Use trap_info for a place holder to call panic_savetrap() and
 608  606           * panic_showtrap() to save and print out ptl1_panic information.
 609  607           */
 610  608          if (curthread->t_panic_trap == NULL)
 611  609                  curthread->t_panic_trap = &ti;
 612  610  
 613  611          if (reason < sizeof (ptl1_reasons) / sizeof (ptl1_reasons[0]))
 614  612                  panic("bad %s at TL %u", ptl1_reasons[reason], tl);
 615  613          else
 616  614                  panic("ptl1_panic reason 0x%x at TL %u", reason, tl);
 617  615  }
 618  616  
 619  617  void
 620  618  clear_watchdog_on_exit(void)
 621  619  {
 622  620          if (watchdog_enabled && watchdog_activated) {
 623  621                  prom_printf("Debugging requested; hardware watchdog "
 624  622                      "suspended.\n");
 625  623                  (void) watchdog_suspend();
 626  624          }
 627  625  }
 628  626  
 629  627  /*
 630  628   * Restore the watchdog timer when returning from a debugger
 631  629   * after a panic or L1-A and resume watchdog pat.
 632  630   */
 633  631  void
 634  632  restore_watchdog_on_entry()
 635  633  {
 636  634          watchdog_resume();
 637  635  }
 638  636  
 639  637  int
 640  638  kdi_watchdog_disable(void)
 641  639  {
 642  640          watchdog_suspend();
 643  641  
 644  642          return (0);
 645  643  }
 646  644  
 647  645  void
 648  646  kdi_watchdog_restore(void)
 649  647  {
 650  648          watchdog_resume();
 651  649  }
 652  650  
 653  651  void
 654  652  mach_dump_buffer_init(void)
 655  653  {
 656  654          uint64_t  ret, minsize = 0;
 657  655  
 658  656          if (hvdump_buf_sz > HVDUMP_SIZE_MAX)
 659  657                  hvdump_buf_sz = HVDUMP_SIZE_MAX;
 660  658  
 661  659          hvdump_buf_va = contig_mem_alloc_align(hvdump_buf_sz, PAGESIZE);
 662  660          if (hvdump_buf_va == NULL)
 663  661                  return;
 664  662  
 665  663          hvdump_buf_pa = va_to_pa(hvdump_buf_va);
 666  664  
 667  665          ret = hv_dump_buf_update(hvdump_buf_pa, hvdump_buf_sz,
 668  666              &minsize);
 669  667  
 670  668          if (ret != H_EOK) {
 671  669                  contig_mem_free(hvdump_buf_va, hvdump_buf_sz);
 672  670                  hvdump_buf_va = NULL;
 673  671                  cmn_err(CE_NOTE, "!Error in setting up hvstate"
 674  672                      "dump buffer. Error = 0x%lx, size = 0x%lx,"
 675  673                      "buf_pa = 0x%lx", ret, hvdump_buf_sz,
 676  674                      hvdump_buf_pa);
 677  675  
 678  676                  if (ret == H_EINVAL) {
 679  677                          cmn_err(CE_NOTE, "!Buffer size too small."
 680  678                              "Available buffer size = 0x%lx,"
 681  679                              "Minimum buffer size required = 0x%lx",
 682  680                              hvdump_buf_sz, minsize);
 683  681                  }
 684  682          }
 685  683  }
 686  684  
 687  685  
 688  686  static void
 689  687  update_hvdump_buffer(void)
 690  688  {
 691  689          uint64_t ret, dummy_val;
 692  690  
 693  691          if (hvdump_buf_va == NULL)
 694  692                  return;
 695  693  
 696  694          ret = hv_dump_buf_update(hvdump_buf_pa, hvdump_buf_sz,
 697  695              &dummy_val);
 698  696          if (ret != H_EOK) {
 699  697                  cmn_err(CE_NOTE, "!Cannot update hvstate dump"
 700  698                      "buffer. Error = 0x%lx", ret);
 701  699          }
 702  700  }
 703  701  
 704  702  
 705  703  static int
 706  704  getintprop(pnode_t node, char *name, int deflt)
 707  705  {
 708  706          int     value;
 709  707  
 710  708          switch (prom_getproplen(node, name)) {
 711  709          case 0:
 712  710                  value = 1;      /* boolean properties */
 713  711                  break;
 714  712  
 715  713          case sizeof (int):
 716  714                  (void) prom_getprop(node, name, (caddr_t)&value);
 717  715                  break;
 718  716  
 719  717          default:
 720  718                  value = deflt;
 721  719                  break;
 722  720          }
 723  721  
 724  722          return (value);
 725  723  }
 726  724  
 727  725  /*
 728  726   * Called by setcpudelay
 729  727   */
 730  728  void
 731  729  cpu_init_tick_freq(void)
 732  730  {
 733  731          md_t *mdp;
 734  732          mde_cookie_t rootnode;
 735  733          int             listsz;
 736  734          mde_cookie_t    *listp = NULL;
 737  735          int     num_nodes;
 738  736          uint64_t stick_prop;
 739  737  
 740  738          if (broken_md_flag) {
 741  739                  sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
 742  740                  return;
 743  741          }
 744  742  
 745  743          if ((mdp = md_get_handle()) == NULL)
 746  744                  panic("stick_frequency property not found in MD");
 747  745  
 748  746          rootnode = md_root_node(mdp);
 749  747          ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
 750  748  
 751  749          num_nodes = md_node_count(mdp);
 752  750  
 753  751          ASSERT(num_nodes > 0);
 754  752          listsz = num_nodes * sizeof (mde_cookie_t);
 755  753          listp = (mde_cookie_t *)prom_alloc((caddr_t)0, listsz, 0);
 756  754  
 757  755          if (listp == NULL)
 758  756                  panic("cannot allocate list for MD properties");
 759  757  
 760  758          num_nodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "platform"),
 761  759              md_find_name(mdp, "fwd"), listp);
 762  760  
 763  761          ASSERT(num_nodes == 1);
 764  762  
 765  763          if (md_get_prop_val(mdp, *listp, "stick-frequency", &stick_prop) != 0)
 766  764                  panic("stick_frequency property not found in MD");
 767  765  
 768  766          sys_tick_freq = stick_prop;
 769  767  
 770  768          prom_free((caddr_t)listp, listsz);
 771  769          (void) md_fini_handle(mdp);
 772  770  }
 773  771  
 774  772  int shipit(int n, uint64_t cpu_list_ra);
 775  773  
 776  774  #ifdef DEBUG
 777  775  #define SEND_MONDO_STATS        1
 778  776  #endif
 779  777  
 780  778  #ifdef SEND_MONDO_STATS
 781  779  uint32_t x_one_stimes[64];
 782  780  uint32_t x_one_ltimes[16];
 783  781  uint32_t x_set_stimes[64];
 784  782  uint32_t x_set_ltimes[16];
 785  783  uint32_t x_set_cpus[NCPU];
 786  784  #endif
 787  785  
 788  786  void
 789  787  send_one_mondo(int cpuid)
 790  788  {
 791  789          int retries, stat;
 792  790          uint64_t starttick, endtick, tick, lasttick;
 793  791          struct machcpu  *mcpup = &(CPU->cpu_m);
 794  792  
 795  793          CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
 796  794          starttick = lasttick = gettick();
 797  795          mcpup->cpu_list[0] = (uint16_t)cpuid;
 798  796          stat = shipit(1, mcpup->cpu_list_ra);
 799  797          endtick = starttick + xc_tick_limit;
 800  798          retries = 0;
 801  799          while (stat != H_EOK) {
 802  800                  if (stat != H_EWOULDBLOCK) {
 803  801                          if (panic_quiesce)
 804  802                                  return;
 805  803                          if (stat == H_ECPUERROR)
 806  804                                  cmn_err(CE_PANIC, "send_one_mondo: "
 807  805                                      "cpuid: 0x%x has been marked in "
 808  806                                      "error", cpuid);
 809  807                          else
 810  808                                  cmn_err(CE_PANIC, "send_one_mondo: "
 811  809                                      "unexpected hypervisor error 0x%x "
 812  810                                      "while sending a mondo to cpuid: "
 813  811                                      "0x%x", stat, cpuid);
 814  812                  }
 815  813                  tick = gettick();
 816  814                  /*
 817  815                   * If there is a big jump between the current tick
 818  816                   * count and lasttick, we have probably hit a break
 819  817                   * point.  Adjust endtick accordingly to avoid panic.
 820  818                   */
 821  819                  if (tick > (lasttick + xc_tick_jump_limit))
 822  820                          endtick += (tick - lasttick);
 823  821                  lasttick = tick;
 824  822                  if (tick > endtick) {
 825  823                          if (panic_quiesce)
 826  824                                  return;
 827  825                          cmn_err(CE_PANIC, "send mondo timeout "
 828  826                              "(target 0x%x) [retries: 0x%x hvstat: 0x%x]",
 829  827                              cpuid, retries, stat);
 830  828                  }
 831  829                  drv_usecwait(1);
 832  830                  stat = shipit(1, mcpup->cpu_list_ra);
 833  831                  retries++;
 834  832          }
 835  833  #ifdef SEND_MONDO_STATS
 836  834          {
 837  835                  uint64_t n = gettick() - starttick;
 838  836                  if (n < 8192)
 839  837                          x_one_stimes[n >> 7]++;
 840  838                  else if (n < 15*8192)
 841  839                          x_one_ltimes[n >> 13]++;
 842  840                  else
 843  841                          x_one_ltimes[0xf]++;
 844  842          }
 845  843  #endif
 846  844  }
 847  845  
 848  846  void
 849  847  send_mondo_set(cpuset_t set)
 850  848  {
 851  849          uint64_t starttick, endtick, tick, lasttick;
 852  850          uint_t largestid, smallestid;
 853  851          int i, j;
 854  852          int ncpuids = 0;
 855  853          int shipped = 0;
 856  854          int retries = 0;
 857  855          struct machcpu  *mcpup = &(CPU->cpu_m);
 858  856  
 859  857          ASSERT(!CPUSET_ISNULL(set));
 860  858          CPUSET_BOUNDS(set, smallestid, largestid);
 861  859          if (smallestid == CPUSET_NOTINSET) {
 862  860                  return;
 863  861          }
 864  862  
 865  863          starttick = lasttick = gettick();
 866  864          endtick = starttick + xc_tick_limit;
 867  865  
 868  866          /*
 869  867           * Assemble CPU list for HV argument. We already know
 870  868           * smallestid and largestid are members of set.
 871  869           */
 872  870          mcpup->cpu_list[ncpuids++] = (uint16_t)smallestid;
 873  871          if (largestid != smallestid) {
 874  872                  for (i = smallestid+1; i <= largestid-1; i++) {
 875  873                          if (CPU_IN_SET(set, i)) {
 876  874                                  mcpup->cpu_list[ncpuids++] = (uint16_t)i;
 877  875                          }
 878  876                  }
 879  877                  mcpup->cpu_list[ncpuids++] = (uint16_t)largestid;
 880  878          }
 881  879  
 882  880          do {
 883  881                  int stat;
 884  882  
 885  883                  stat = shipit(ncpuids, mcpup->cpu_list_ra);
 886  884                  if (stat == H_EOK) {
 887  885                          shipped += ncpuids;
 888  886                          break;
 889  887                  }
 890  888  
 891  889                  /*
 892  890                   * Either not all CPU mondos were sent, or an
 893  891                   * error occurred. CPUs that were sent mondos
 894  892                   * have their CPU IDs overwritten in cpu_list.
 895  893                   * Reset cpu_list so that it only holds those
 896  894                   * CPU IDs that still need to be sent.
 897  895                   */
 898  896                  for (i = 0, j = 0; i < ncpuids; i++) {
 899  897                          if (mcpup->cpu_list[i] == HV_SEND_MONDO_ENTRYDONE) {
 900  898                                  shipped++;
 901  899                          } else {
 902  900                                  mcpup->cpu_list[j++] = mcpup->cpu_list[i];
 903  901                          }
 904  902                  }
 905  903                  ncpuids = j;
 906  904  
 907  905                  /*
 908  906                   * Now handle possible errors returned
 909  907                   * from hypervisor.
 910  908                   */
 911  909                  if (stat == H_ECPUERROR) {
 912  910                          int errorcpus;
 913  911  
 914  912                          if (!panic_quiesce)
 915  913                                  cmn_err(CE_CONT, "send_mondo_set: cpuid(s) ");
 916  914  
 917  915                          /*
 918  916                           * Remove any CPUs in the error state from
 919  917                           * cpu_list. At this point cpu_list only
 920  918                           * contains the CPU IDs for mondos not
 921  919                           * succesfully sent.
 922  920                           */
 923  921                          for (i = 0, errorcpus = 0; i < ncpuids; i++) {
 924  922                                  uint64_t state = CPU_STATE_INVALID;
 925  923                                  uint16_t id = mcpup->cpu_list[i];
 926  924  
 927  925                                  (void) hv_cpu_state(id, &state);
 928  926                                  if (state == CPU_STATE_ERROR) {
 929  927                                          if (!panic_quiesce)
 930  928                                                  cmn_err(CE_CONT, "0x%x ", id);
 931  929                                          errorcpus++;
 932  930                                  } else if (errorcpus > 0) {
 933  931                                          mcpup->cpu_list[i - errorcpus] =
 934  932                                              mcpup->cpu_list[i];
 935  933                                  }
 936  934                          }
 937  935                          ncpuids -= errorcpus;
 938  936  
 939  937                          if (!panic_quiesce) {
 940  938                                  if (errorcpus == 0) {
 941  939                                          cmn_err(CE_CONT, "<none> have been "
 942  940                                              "marked in error\n");
 943  941                                          cmn_err(CE_PANIC, "send_mondo_set: "
 944  942                                              "hypervisor returned "
 945  943                                              "H_ECPUERROR but no CPU in "
 946  944                                              "cpu_list in error state");
 947  945                                  } else {
 948  946                                          cmn_err(CE_CONT, "have been marked in "
 949  947                                              "error\n");
 950  948                                          cmn_err(CE_PANIC, "send_mondo_set: "
 951  949                                              "CPU(s) in error state");
 952  950                                  }
 953  951                          }
 954  952                  } else if (stat != H_EWOULDBLOCK) {
 955  953                          if (panic_quiesce)
 956  954                                  return;
 957  955                          /*
 958  956                           * For all other errors, panic.
 959  957                           */
 960  958                          cmn_err(CE_CONT, "send_mondo_set: unexpected "
 961  959                              "hypervisor error 0x%x while sending a "
 962  960                              "mondo to cpuid(s):", stat);
 963  961                          for (i = 0; i < ncpuids; i++) {
 964  962                                  cmn_err(CE_CONT, " 0x%x", mcpup->cpu_list[i]);
 965  963                          }
 966  964                          cmn_err(CE_CONT, "\n");
 967  965                          cmn_err(CE_PANIC, "send_mondo_set: unexpected "
 968  966                              "hypervisor error");
 969  967                  }
 970  968  
 971  969                  tick = gettick();
 972  970                  /*
 973  971                   * If there is a big jump between the current tick
 974  972                   * count and lasttick, we have probably hit a break
 975  973                   * point.  Adjust endtick accordingly to avoid panic.
 976  974                   */
 977  975                  if (tick > (lasttick + xc_tick_jump_limit))
 978  976                          endtick += (tick - lasttick);
 979  977                  lasttick = tick;
 980  978                  if (tick > endtick) {
 981  979                          if (panic_quiesce)
 982  980                                  return;
 983  981                          cmn_err(CE_CONT, "send mondo timeout "
 984  982                              "[retries: 0x%x]  cpuids: ", retries);
 985  983                          for (i = 0; i < ncpuids; i++)
 986  984                                  cmn_err(CE_CONT, " 0x%x", mcpup->cpu_list[i]);
 987  985                          cmn_err(CE_CONT, "\n");
 988  986                          cmn_err(CE_PANIC, "send_mondo_set: timeout");
 989  987                  }
 990  988  
 991  989                  while (gettick() < (tick + sys_clock_mhz))
 992  990                          ;
 993  991                  retries++;
 994  992          } while (ncpuids > 0);
 995  993  
 996  994          CPU_STATS_ADDQ(CPU, sys, xcalls, shipped);
 997  995  
 998  996  #ifdef SEND_MONDO_STATS
 999  997          {
1000  998                  uint64_t n = gettick() - starttick;
1001  999                  if (n < 8192)
1002 1000                          x_set_stimes[n >> 7]++;
1003 1001                  else if (n < 15*8192)
1004 1002                          x_set_ltimes[n >> 13]++;
1005 1003                  else
1006 1004                          x_set_ltimes[0xf]++;
1007 1005          }
1008 1006          x_set_cpus[shipped]++;
1009 1007  #endif
1010 1008  }
1011 1009  
1012 1010  void
1013 1011  syncfpu(void)
1014 1012  {
1015 1013  }
1016 1014  
1017 1015  void
1018 1016  sticksync_slave(void)
1019 1017  {
1020 1018          suspend_sync_tick_stick_npt();
1021 1019  }
1022 1020  
1023 1021  void
1024 1022  sticksync_master(void)
1025 1023  {}
1026 1024  
1027 1025  void
1028 1026  cpu_init_cache_scrub(void)
1029 1027  {
1030 1028          mach_set_soft_state(SIS_NORMAL, &SOLARIS_SOFT_STATE_RUN_MSG);
1031 1029  }
1032 1030  
1033 1031  int
1034 1032  dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
1035 1033  {
1036 1034          int ret, watched;
1037 1035  
1038 1036          watched = watch_disable_addr((void *)addr, 4, S_WRITE);
1039 1037          ret = dtrace_blksuword32(addr, data, 0);
1040 1038          if (watched)
1041 1039                  watch_enable_addr((void *)addr, 4, S_WRITE);
1042 1040  
1043 1041          return (ret);
1044 1042  }
1045 1043  
1046 1044  int
1047 1045  dtrace_blksuword32(uintptr_t addr, uint32_t *data, int tryagain)
1048 1046  {
1049 1047          if (suword32((void *)addr, *data) == -1)
1050 1048                  return (tryagain ? dtrace_blksuword32_err(addr, data) : -1);
1051 1049          dtrace_flush_sec(addr);
1052 1050  
1053 1051          return (0);
1054 1052  }
1055 1053  
1056 1054  /*ARGSUSED*/
1057 1055  void
1058 1056  cpu_faulted_enter(struct cpu *cp)
1059 1057  {
1060 1058  }
1061 1059  
1062 1060  /*ARGSUSED*/
1063 1061  void
1064 1062  cpu_faulted_exit(struct cpu *cp)
1065 1063  {
1066 1064  }
1067 1065  
1068 1066  static int
1069 1067  kdi_cpu_ready_iter(int (*cb)(int, void *), void *arg)
1070 1068  {
1071 1069          int rc, i;
1072 1070  
1073 1071          for (rc = 0, i = 0; i < NCPU; i++) {
1074 1072                  if (CPU_IN_SET(cpu_ready_set, i))
1075 1073                          rc += cb(i, arg);
1076 1074          }
1077 1075  
1078 1076          return (rc);
1079 1077  }
1080 1078  
1081 1079  /*
1082 1080   * Sends a cross-call to a specified processor.  The caller assumes
1083 1081   * responsibility for repetition of cross-calls, as appropriate (MARSA for
1084 1082   * debugging).
1085 1083   */
1086 1084  static int
1087 1085  kdi_xc_one(int cpuid, void (*func)(uintptr_t, uintptr_t), uintptr_t arg1,
1088 1086      uintptr_t arg2)
1089 1087  {
1090 1088          int stat;
1091 1089          struct machcpu  *mcpup;
1092 1090          uint64_t cpuaddr_reg = 0, cpuaddr_scr = 0;
1093 1091  
1094 1092          mcpup = &(((cpu_t *)get_cpuaddr(cpuaddr_reg, cpuaddr_scr))->cpu_m);
1095 1093  
1096 1094          /*
1097 1095           * if (idsr_busy())
1098 1096           *      return (KDI_XC_RES_ERR);
1099 1097           */
1100 1098  
1101 1099          init_mondo_nocheck((xcfunc_t *)func, arg1, arg2);
1102 1100  
1103 1101          mcpup->cpu_list[0] = (uint16_t)cpuid;
1104 1102          stat = shipit(1, mcpup->cpu_list_ra);
1105 1103  
1106 1104          if (stat == 0)
1107 1105                  return (KDI_XC_RES_OK);
1108 1106          else
1109 1107                  return (KDI_XC_RES_NACK);
1110 1108  }
1111 1109  
1112 1110  static void
1113 1111  kdi_tickwait(clock_t nticks)
1114 1112  {
1115 1113          clock_t endtick = gettick() + nticks;
1116 1114  
1117 1115          while (gettick() < endtick)
1118 1116                  ;
1119 1117  }
1120 1118  
1121 1119  static void
1122 1120  kdi_cpu_init(int dcache_size, int dcache_linesize, int icache_size,
1123 1121      int icache_linesize)
1124 1122  {
1125 1123          kdi_dcache_size = dcache_size;
1126 1124          kdi_dcache_linesize = dcache_linesize;
1127 1125          kdi_icache_size = icache_size;
1128 1126          kdi_icache_linesize = icache_linesize;
1129 1127  }
1130 1128  
1131 1129  /* used directly by kdi_read/write_phys */
1132 1130  void
1133 1131  kdi_flush_caches(void)
1134 1132  {
1135 1133          /* Not required on sun4v architecture. */
1136 1134  }
1137 1135  
1138 1136  /*ARGSUSED*/
1139 1137  int
1140 1138  kdi_get_stick(uint64_t *stickp)
1141 1139  {
1142 1140          return (-1);
1143 1141  }
1144 1142  
1145 1143  void
1146 1144  cpu_kdi_init(kdi_t *kdi)
1147 1145  {
1148 1146          kdi->kdi_flush_caches = kdi_flush_caches;
1149 1147          kdi->mkdi_cpu_init = kdi_cpu_init;
1150 1148          kdi->mkdi_cpu_ready_iter = kdi_cpu_ready_iter;
1151 1149          kdi->mkdi_xc_one = kdi_xc_one;
1152 1150          kdi->mkdi_tickwait = kdi_tickwait;
1153 1151          kdi->mkdi_get_stick = kdi_get_stick;
1154 1152  }
1155 1153  
1156 1154  uint64_t        soft_state_message_ra[SOLARIS_SOFT_STATE_MSG_CNT];
1157 1155  static uint64_t soft_state_saved_state = (uint64_t)-1;
1158 1156  static int      soft_state_initialized = 0;
1159 1157  static uint64_t soft_state_sup_minor;           /* Supported minor number */
1160 1158  static hsvc_info_t soft_state_hsvc = {
1161 1159                          HSVC_REV_1, NULL, HSVC_GROUP_SOFT_STATE, 1, 0, NULL };
1162 1160  
1163 1161  
1164 1162  static void
1165 1163  sun4v_system_claim(void)
1166 1164  {
1167 1165          lbolt_debug_entry();
1168 1166  
1169 1167          watchdog_suspend();
1170 1168          kldc_debug_enter();
1171 1169          /*
1172 1170           * For "mdb -K", set soft state to debugging
1173 1171           */
1174 1172          if (soft_state_saved_state == -1) {
1175 1173                  mach_get_soft_state(&soft_state_saved_state,
1176 1174                      &SOLARIS_SOFT_STATE_SAVED_MSG);
1177 1175          }
1178 1176          /*
1179 1177           * check again as the read above may or may not have worked and if
1180 1178           * it didn't then soft state will still be -1
1181 1179           */
1182 1180          if (soft_state_saved_state != -1) {
1183 1181                  mach_set_soft_state(SIS_TRANSITION,
1184 1182                      &SOLARIS_SOFT_STATE_DEBUG_MSG);
1185 1183          }
1186 1184  }
1187 1185  
1188 1186  static void
1189 1187  sun4v_system_release(void)
1190 1188  {
1191 1189          watchdog_resume();
1192 1190          /*
1193 1191           * For "mdb -K", set soft_state state back to original state on exit
1194 1192           */
1195 1193          if (soft_state_saved_state != -1) {
1196 1194                  mach_set_soft_state(soft_state_saved_state,
1197 1195                      &SOLARIS_SOFT_STATE_SAVED_MSG);
1198 1196                  soft_state_saved_state = -1;
1199 1197          }
1200 1198  
1201 1199          lbolt_debug_return();
1202 1200  }
1203 1201  
1204 1202  void
1205 1203  plat_kdi_init(kdi_t *kdi)
1206 1204  {
1207 1205          kdi->pkdi_system_claim = sun4v_system_claim;
1208 1206          kdi->pkdi_system_release = sun4v_system_release;
1209 1207  }
1210 1208  
1211 1209  /*
1212 1210   * Routine to return memory information associated
1213 1211   * with a physical address and syndrome.
1214 1212   */
1215 1213  /* ARGSUSED */
1216 1214  int
1217 1215  cpu_get_mem_info(uint64_t synd, uint64_t afar,
1218 1216      uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1219 1217      int *segsp, int *banksp, int *mcidp)
1220 1218  {
1221 1219          return (ENOTSUP);
1222 1220  }
1223 1221  
1224 1222  /*
1225 1223   * This routine returns the size of the kernel's FRU name buffer.
1226 1224   */
1227 1225  size_t
1228 1226  cpu_get_name_bufsize()
1229 1227  {
1230 1228          return (UNUM_NAMLEN);
1231 1229  }
1232 1230  
1233 1231  /*
1234 1232   * This routine is a more generic interface to cpu_get_mem_unum(),
1235 1233   * that may be used by other modules (e.g. mm).
1236 1234   */
1237 1235  /* ARGSUSED */
1238 1236  int
1239 1237  cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1240 1238      char *buf, int buflen, int *lenp)
1241 1239  {
1242 1240          return (ENOTSUP);
1243 1241  }
1244 1242  
1245 1243  /* ARGSUSED */
1246 1244  int
1247 1245  cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
1248 1246  {
1249 1247          return (ENOTSUP);
1250 1248  }
1251 1249  
1252 1250  /* ARGSUSED */
1253 1251  int
1254 1252  cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
1255 1253  {
1256 1254          return (ENOTSUP);
1257 1255  }
1258 1256  
1259 1257  /*
1260 1258   * xt_sync - wait for previous x-traps to finish
1261 1259   */
1262 1260  void
1263 1261  xt_sync(cpuset_t cpuset)
1264 1262  {
1265 1263          union {
1266 1264                  uint8_t volatile byte[NCPU];
1267 1265                  uint64_t volatile xword[NCPU / 8];
1268 1266          } cpu_sync;
1269 1267          uint64_t starttick, endtick, tick, lasttick, traptrace_id;
1270 1268          uint_t largestid, smallestid;
1271 1269          int i, j;
1272 1270  
1273 1271          kpreempt_disable();
1274 1272          CPUSET_DEL(cpuset, CPU->cpu_id);
1275 1273          CPUSET_AND(cpuset, cpu_ready_set);
1276 1274  
1277 1275          CPUSET_BOUNDS(cpuset, smallestid, largestid);
1278 1276          if (smallestid == CPUSET_NOTINSET)
1279 1277                  goto out;
1280 1278  
1281 1279          /*
1282 1280           * Sun4v uses a queue for receiving mondos. Successful
1283 1281           * transmission of a mondo only indicates that the mondo
1284 1282           * has been written into the queue.
1285 1283           *
1286 1284           * We use an array of bytes to let each cpu to signal back
1287 1285           * to the cross trap sender that the cross trap has been
1288 1286           * executed. Set the byte to 1 before sending the cross trap
1289 1287           * and wait until other cpus reset it to 0.
1290 1288           */
1291 1289          bzero((void *)&cpu_sync, NCPU);
1292 1290          cpu_sync.byte[smallestid] = 1;
1293 1291          if (largestid != smallestid) {
1294 1292                  for (i = (smallestid + 1); i <= (largestid - 1); i++)
1295 1293                          if (CPU_IN_SET(cpuset, i))
1296 1294                                  cpu_sync.byte[i] = 1;
1297 1295                  cpu_sync.byte[largestid] = 1;
1298 1296          }
1299 1297  
1300 1298          /*
1301 1299           * To help debug xt_sync panic, each mondo is uniquely identified
1302 1300           * by passing the tick value, traptrace_id as the second mondo
1303 1301           * argument to xt_some which is logged in CPU's mondo queue,
1304 1302           * traptrace buffer and the panic message.
1305 1303           */
1306 1304          traptrace_id = gettick();
1307 1305          xt_some(cpuset, (xcfunc_t *)xt_sync_tl1,
1308 1306              (uint64_t)cpu_sync.byte, traptrace_id);
1309 1307  
1310 1308          starttick = lasttick = gettick();
1311 1309          endtick = starttick + xc_sync_tick_limit;
1312 1310  
1313 1311          for (i = (smallestid / 8); i <= (largestid / 8); i++) {
1314 1312                  while (cpu_sync.xword[i] != 0) {
1315 1313                          tick = gettick();
1316 1314                          /*
1317 1315                           * If there is a big jump between the current tick
1318 1316                           * count and lasttick, we have probably hit a break
1319 1317                           * point. Adjust endtick accordingly to avoid panic.
1320 1318                           */
1321 1319                          if (tick > (lasttick + xc_tick_jump_limit)) {
1322 1320                                  endtick += (tick - lasttick);
1323 1321                          }
1324 1322                          lasttick = tick;
1325 1323                          if (tick > endtick) {
1326 1324                                  if (panic_quiesce)
1327 1325                                          goto out;
1328 1326                                  cmn_err(CE_CONT, "Cross trap sync timeout:  "
1329 1327                                      "at cpu_sync.xword[%d]: 0x%lx "
1330 1328                                      "cpu_sync.byte: 0x%lx "
1331 1329                                      "starttick: 0x%lx endtick: 0x%lx "
1332 1330                                      "traptrace_id = 0x%lx\n",
1333 1331                                      i, cpu_sync.xword[i],
1334 1332                                      (uint64_t)cpu_sync.byte,
1335 1333                                      starttick, endtick, traptrace_id);
1336 1334                                  cmn_err(CE_CONT, "CPUIDs:");
1337 1335                                  for (j = (i * 8); j <= largestid; j++) {
1338 1336                                          if (cpu_sync.byte[j] != 0)
1339 1337                                                  cmn_err(CE_CONT, " 0x%x", j);
1340 1338                                  }
1341 1339                                  cmn_err(CE_PANIC, "xt_sync: timeout");
1342 1340                          }
1343 1341                  }
1344 1342          }
1345 1343  
1346 1344  out:
1347 1345          kpreempt_enable();
1348 1346  }
1349 1347  
1350 1348  #define QFACTOR         200
1351 1349  /*
1352 1350   * Recalculate the values of the cross-call timeout variables based
1353 1351   * on the value of the 'inter-cpu-latency' property of the platform node.
1354 1352   * The property sets the number of nanosec to wait for a cross-call
1355 1353   * to be acknowledged.  Other timeout variables are derived from it.
1356 1354   *
1357 1355   * N.B. This implementation is aware of the internals of xc_init()
1358 1356   * and updates many of the same variables.
1359 1357   */
1360 1358  void
1361 1359  recalc_xc_timeouts(void)
1362 1360  {
1363 1361          typedef union {
1364 1362                  uint64_t whole;
1365 1363                  struct {
1366 1364                          uint_t high;
1367 1365                          uint_t low;
1368 1366                  } half;
1369 1367          } u_number;
1370 1368  
1371 1369          /* See x_call.c for descriptions of these extern variables. */
1372 1370          extern uint64_t xc_tick_limit_scale;
1373 1371          extern uint64_t xc_mondo_time_limit;
1374 1372          extern uint64_t xc_func_time_limit;
1375 1373          extern uint64_t xc_scale;
1376 1374          extern uint64_t xc_mondo_multiplier;
1377 1375          extern uint_t   nsec_shift;
1378 1376  
1379 1377          /* Temp versions of the target variables */
1380 1378          uint64_t tick_limit;
1381 1379          uint64_t tick_jump_limit;
1382 1380          uint64_t mondo_time_limit;
1383 1381          uint64_t func_time_limit;
1384 1382          uint64_t scale;
1385 1383  
1386 1384          uint64_t latency;       /* nanoseconds */
1387 1385          uint64_t maxfreq;
1388 1386          uint64_t tick_limit_save = xc_tick_limit;
1389 1387          uint64_t sync_tick_limit_save = xc_sync_tick_limit;
1390 1388          uint_t   tick_scale;
1391 1389          uint64_t top;
1392 1390          uint64_t bottom;
1393 1391          u_number tk;
1394 1392  
1395 1393          md_t *mdp;
1396 1394          int nrnode;
1397 1395          mde_cookie_t *platlist;
1398 1396  
1399 1397          /*
1400 1398           * Look up the 'inter-cpu-latency' (optional) property in the
1401 1399           * platform node of the MD.  The units are nanoseconds.
1402 1400           */
1403 1401          if ((mdp = md_get_handle()) == NULL) {
1404 1402                  cmn_err(CE_WARN, "recalc_xc_timeouts: "
1405 1403                      "Unable to initialize machine description");
1406 1404                  return;
1407 1405          }
1408 1406  
1409 1407          nrnode = md_alloc_scan_dag(mdp,
1410 1408              md_root_node(mdp), "platform", "fwd", &platlist);
1411 1409  
1412 1410          ASSERT(nrnode == 1);
1413 1411          if (nrnode < 1) {
1414 1412                  cmn_err(CE_WARN, "recalc_xc_timeouts: platform node missing");
1415 1413                  goto done;
1416 1414          }
1417 1415          if (md_get_prop_val(mdp, platlist[0],
1418 1416              "inter-cpu-latency", &latency) == -1)
1419 1417                  goto done;
1420 1418  
1421 1419          /*
1422 1420           * clock.h defines an assembly-language macro
1423 1421           * (NATIVE_TIME_TO_NSEC_SCALE) to convert from %stick
1424 1422           * units to nanoseconds.  Since the inter-cpu-latency
1425 1423           * units are nanoseconds and the xc_* variables require
1426 1424           * %stick units, we need the inverse of that function.
1427 1425           * The trick is to perform the calculation without
1428 1426           * floating point, but also without integer truncation
1429 1427           * or overflow.  To understand the calculation below,
1430 1428           * please read the discussion of the macro in clock.h.
1431 1429           * Since this new code will be invoked infrequently,
1432 1430           * we can afford to implement it in C.
1433 1431           *
1434 1432           * tick_scale is the reciprocal of nsec_scale which is
1435 1433           * calculated at startup in setcpudelay().  The calc
1436 1434           * of tick_limit parallels that of NATIVE_TIME_TO_NSEC_SCALE
1437 1435           * except we use tick_scale instead of nsec_scale and
1438 1436           * C instead of assembler.
1439 1437           */
1440 1438          tick_scale = (uint_t)(((u_longlong_t)sys_tick_freq
1441 1439              << (32 - nsec_shift)) / NANOSEC);
1442 1440  
1443 1441          tk.whole = latency;
1444 1442          top = ((uint64_t)tk.half.high << 4) * tick_scale;
1445 1443          bottom = (((uint64_t)tk.half.low << 4) * (uint64_t)tick_scale) >> 32;
1446 1444          tick_limit = top + bottom;
1447 1445  
1448 1446          /*
1449 1447           * xc_init() calculated 'maxfreq' by looking at all the cpus,
1450 1448           * and used it to derive some of the timeout variables that we
1451 1449           * recalculate below.  We can back into the original value by
1452 1450           * using the inverse of one of those calculations.
1453 1451           */
1454 1452          maxfreq = xc_mondo_time_limit / xc_scale;
1455 1453  
1456 1454          /*
1457 1455           * Don't allow the new timeout (xc_tick_limit) to fall below
1458 1456           * the system tick frequency (stick).  Allowing the timeout
1459 1457           * to be set more tightly than this empirically determined
1460 1458           * value may cause panics.
1461 1459           */
1462 1460          tick_limit = tick_limit < sys_tick_freq ? sys_tick_freq : tick_limit;
1463 1461  
1464 1462          tick_jump_limit = tick_limit / 32;
1465 1463          tick_limit *= xc_tick_limit_scale;
1466 1464  
1467 1465          /*
1468 1466           * Recalculate xc_scale since it is used in a callback function
1469 1467           * (xc_func_timeout_adj) to adjust two of the timeouts dynamically.
1470 1468           * Make the change in xc_scale proportional to the change in
1471 1469           * xc_tick_limit.
1472 1470           */
1473 1471          scale = (xc_scale * tick_limit + sys_tick_freq / 2) / tick_limit_save;
1474 1472          if (scale == 0)
1475 1473                  scale = 1;
1476 1474  
1477 1475          mondo_time_limit = maxfreq * scale;
1478 1476          func_time_limit = mondo_time_limit * xc_mondo_multiplier;
1479 1477  
1480 1478          /*
1481 1479           * Don't modify the timeouts if nothing has changed.  Else,
1482 1480           * stuff the variables with the freshly calculated (temp)
1483 1481           * variables.  This minimizes the window where the set of
1484 1482           * values could be inconsistent.
1485 1483           */
1486 1484          if (tick_limit != xc_tick_limit) {
1487 1485                  xc_tick_limit = tick_limit;
1488 1486                  xc_tick_jump_limit = tick_jump_limit;
1489 1487                  xc_scale = scale;
1490 1488                  xc_mondo_time_limit = mondo_time_limit;
1491 1489                  xc_func_time_limit = func_time_limit;
1492 1490          }
1493 1491  
1494 1492  done:
1495 1493          /*
1496 1494           * Increase the timeout limit for xt_sync() cross calls.
1497 1495           */
1498 1496          xc_sync_tick_limit = xc_tick_limit * (cpu_q_entries / QFACTOR);
1499 1497          xc_sync_tick_limit = xc_sync_tick_limit < xc_tick_limit ?
1500 1498              xc_tick_limit : xc_sync_tick_limit;
1501 1499  
1502 1500          /*
1503 1501           * Force the new values to be used for future cross calls.
1504 1502           * This is necessary only when we increase the timeouts.
1505 1503           */
1506 1504          if ((xc_tick_limit > tick_limit_save) || (xc_sync_tick_limit >
1507 1505              sync_tick_limit_save)) {
1508 1506                  cpuset_t cpuset = cpu_ready_set;
1509 1507                  xt_sync(cpuset);
1510 1508          }
1511 1509  
1512 1510          if (nrnode > 0)
1513 1511                  md_free_scan_dag(mdp, &platlist);
1514 1512          (void) md_fini_handle(mdp);
1515 1513  }
1516 1514  
1517 1515  void
1518 1516  mach_soft_state_init(void)
1519 1517  {
1520 1518          int             i;
1521 1519          uint64_t        ra;
1522 1520  
1523 1521          /*
1524 1522           * Try to register soft_state api. If it fails, soft_state api has not
1525 1523           * been implemented in the firmware, so do not bother to setup
1526 1524           * soft_state in the kernel.
1527 1525           */
1528 1526          if ((i = hsvc_register(&soft_state_hsvc, &soft_state_sup_minor)) != 0) {
1529 1527                  return;
1530 1528          }
1531 1529          for (i = 0; i < SOLARIS_SOFT_STATE_MSG_CNT; i++) {
1532 1530                  ASSERT(strlen((const char *)(void *)
1533 1531                      soft_state_message_strings + i) < SSM_SIZE);
1534 1532                  if ((ra = va_to_pa(
1535 1533                      (void *)(soft_state_message_strings + i))) == -1ll) {
1536 1534                          return;
1537 1535                  }
1538 1536                  soft_state_message_ra[i] = ra;
1539 1537          }
1540 1538          /*
1541 1539           * Tell OBP that we are supporting Guest State
1542 1540           */
1543 1541          prom_sun4v_soft_state_supported();
1544 1542          soft_state_initialized = 1;
1545 1543  }
1546 1544  
1547 1545  void
1548 1546  mach_set_soft_state(uint64_t state, uint64_t *string_ra)
1549 1547  {
1550 1548          uint64_t        rc;
1551 1549  
1552 1550          if (soft_state_initialized && *string_ra) {
1553 1551                  rc = hv_soft_state_set(state, *string_ra);
1554 1552                  if (rc != H_EOK) {
1555 1553                          cmn_err(CE_WARN,
1556 1554                              "hv_soft_state_set returned %ld\n", rc);
1557 1555                  }
1558 1556          }
1559 1557  }
1560 1558  
1561 1559  void
1562 1560  mach_get_soft_state(uint64_t *state, uint64_t *string_ra)
1563 1561  {
1564 1562          uint64_t        rc;
1565 1563  
1566 1564          if (soft_state_initialized && *string_ra) {
1567 1565                  rc = hv_soft_state_get(*string_ra, state);
1568 1566                  if (rc != H_EOK) {
1569 1567                          cmn_err(CE_WARN,
1570 1568                              "hv_soft_state_get returned %ld\n", rc);
1571 1569                          *state = -1;
1572 1570                  }
1573 1571          }
1574 1572  }

↓ open down ↓

1199 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX