1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/types.h>
  26 #include <sys/kstat.h>
  27 #include <sys/param.h>
  28 #include <sys/stack.h>
  29 #include <sys/regset.h>
  30 #include <sys/thread.h>
  31 #include <sys/proc.h>
  32 #include <sys/procfs_isa.h>
  33 #include <sys/kmem.h>
  34 #include <sys/cpuvar.h>
  35 #include <sys/systm.h>
  36 #include <sys/machpcb.h>
  37 #include <sys/machasi.h>
  38 #include <sys/vis.h>
  39 #include <sys/fpu/fpusystm.h>
  40 #include <sys/cpu_module.h>
  41 #include <sys/privregs.h>
  42 #include <sys/archsystm.h>
  43 #include <sys/atomic.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/time.h>
  46 #include <sys/clock.h>
  47 #include <sys/cmp.h>
  48 #include <sys/platform_module.h>
  49 #include <sys/bl.h>
  50 #include <sys/nvpair.h>
  51 #include <sys/kdi_impl.h>
  52 #include <sys/machsystm.h>
  53 #include <sys/sysmacros.h>
  54 #include <sys/promif.h>
  55 #include <sys/pool_pset.h>
  56 #include <sys/mem.h>
  57 #include <sys/dumphdr.h>
  58 #include <vm/seg_kmem.h>
  59 #include <sys/hold_page.h>
  60 #include <sys/cpu.h>
  61 #include <sys/ivintr.h>
  62 #include <sys/clock_impl.h>
  63 #include <sys/machclock.h>
  64 
  65 int maxphys = MMU_PAGESIZE * 16;        /* 128k */
  66 int klustsize = MMU_PAGESIZE * 16;      /* 128k */
  67 
  68 /*
  69  * Initialize kernel thread's stack.
  70  */
  71 caddr_t
  72 thread_stk_init(caddr_t stk)
  73 {
  74         kfpu_t *fp;
  75         ulong_t align;
  76 
  77         /* allocate extra space for floating point state */
  78         stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
  79         align = (uintptr_t)stk & 0x3f;
  80         stk -= align;           /* force v9_fpu to be 16 byte aligned */
  81         fp = (kfpu_t *)stk;
  82         fp->fpu_fprs = 0;
  83 
  84         stk -= SA(MINFRAME);
  85         return (stk);
  86 }
  87 
  88 #define WIN32_SIZE      (MAXWIN * sizeof (struct rwindow32))
  89 #define WIN64_SIZE      (MAXWIN * sizeof (struct rwindow64))
  90 
  91 kmem_cache_t    *wbuf32_cache;
  92 kmem_cache_t    *wbuf64_cache;
  93 
  94 void
  95 lwp_stk_cache_init(void)
  96 {
  97         /*
  98          * Window buffers are allocated from the static arena
  99          * because they are accessed at TL>0. We also must use
 100          * KMC_NOHASH to prevent them from straddling page
 101          * boundaries as they are accessed by physical address.
 102          */
 103         wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE,
 104             0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
 105         wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE,
 106             0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
 107 }
 108 
 109 /*
 110  * Initialize lwp's kernel stack.
 111  * Note that now that the floating point register save area (kfpu_t)
 112  * has been broken out from machpcb and aligned on a 64 byte boundary so that
 113  * we can do block load/stores to/from it, there are a couple of potential
 114  * optimizations to save stack space. 1. The floating point register save
 115  * area could be aligned on a 16 byte boundary, and the floating point code
 116  * changed to (a) check the alignment and (b) use different save/restore
 117  * macros depending upon the alignment. 2. The lwp_stk_init code below
 118  * could be changed to calculate if less space would be wasted if machpcb
 119  * was first instead of second. However there is a REGOFF macro used in
 120  * locore, syscall_trap, machdep and mlsetup that assumes that the saved
 121  * register area is a fixed distance from the %sp, and would have to be
 122  * changed to a pointer or something...JJ said later.
 123  */
 124 caddr_t
 125 lwp_stk_init(klwp_t *lwp, caddr_t stk)
 126 {
 127         struct machpcb *mpcb;
 128         kfpu_t *fp;
 129         uintptr_t aln;
 130 
 131         stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
 132         aln = (uintptr_t)stk & 0x3F;
 133         stk -= aln;
 134         fp = (kfpu_t *)stk;
 135         stk -= SA(sizeof (struct machpcb));
 136         mpcb = (struct machpcb *)stk;
 137         bzero(mpcb, sizeof (struct machpcb));
 138         bzero(fp, sizeof (kfpu_t) + GSR_SIZE);
 139         lwp->lwp_regs = (void *)&mpcb->mpcb_regs;
 140         lwp->lwp_fpu = (void *)fp;
 141         mpcb->mpcb_fpu = fp;
 142         mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q;
 143         mpcb->mpcb_thread = lwp->lwp_thread;
 144         mpcb->mpcb_wbcnt = 0;
 145         if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) {
 146                 mpcb->mpcb_wstate = WSTATE_USER32;
 147                 mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
 148         } else {
 149                 mpcb->mpcb_wstate = WSTATE_USER64;
 150                 mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
 151         }
 152         ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0);
 153         mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf);
 154         mpcb->mpcb_pa = va_to_pa(mpcb);
 155         return (stk);
 156 }
 157 
 158 void
 159 lwp_stk_fini(klwp_t *lwp)
 160 {
 161         struct machpcb *mpcb = lwptompcb(lwp);
 162 
 163         /*
 164          * there might be windows still in the wbuf due to unmapped
 165          * stack, misaligned stack pointer, etc.  We just free it.
 166          */
 167         mpcb->mpcb_wbcnt = 0;
 168         if (mpcb->mpcb_wstate == WSTATE_USER32)
 169                 kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf);
 170         else
 171                 kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf);
 172         mpcb->mpcb_wbuf = NULL;
 173         mpcb->mpcb_wbuf_pa = -1;
 174 }
 175 
 176 
 177 /*
 178  * Copy regs from parent to child.
 179  */
 180 void
 181 lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
 182 {
 183         kthread_t *t, *pt = lwptot(lwp);
 184         struct machpcb *mpcb = lwptompcb(clwp);
 185         struct machpcb *pmpcb = lwptompcb(lwp);
 186         kfpu_t *fp, *pfp = lwptofpu(lwp);
 187         caddr_t wbuf;
 188         uint_t wstate;
 189 
 190         t = mpcb->mpcb_thread;
 191         /*
 192          * remember child's fp and wbuf since they will get erased during
 193          * the bcopy.
 194          */
 195         fp = mpcb->mpcb_fpu;
 196         wbuf = mpcb->mpcb_wbuf;
 197         wstate = mpcb->mpcb_wstate;
 198         /*
 199          * Don't copy mpcb_frame since we hand-crafted it
 200          * in thread_load().
 201          */
 202         bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF);
 203         mpcb->mpcb_thread = t;
 204         mpcb->mpcb_fpu = fp;
 205         fp->fpu_q = mpcb->mpcb_fpu_q;
 206 
 207         /*
 208          * It is theoretically possibly for the lwp's wstate to
 209          * be different from its value assigned in lwp_stk_init,
 210          * since lwp_stk_init assumed the data model of the process.
 211          * Here, we took on the data model of the cloned lwp.
 212          */
 213         if (mpcb->mpcb_wstate != wstate) {
 214                 if (wstate == WSTATE_USER32) {
 215                         kmem_cache_free(wbuf32_cache, wbuf);
 216                         wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
 217                         wstate = WSTATE_USER64;
 218                 } else {
 219                         kmem_cache_free(wbuf64_cache, wbuf);
 220                         wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
 221                         wstate = WSTATE_USER32;
 222                 }
 223         }
 224 
 225         mpcb->mpcb_pa = va_to_pa(mpcb);
 226         mpcb->mpcb_wbuf = wbuf;
 227         mpcb->mpcb_wbuf_pa = va_to_pa(wbuf);
 228 
 229         ASSERT(mpcb->mpcb_wstate == wstate);
 230 
 231         if (mpcb->mpcb_wbcnt != 0) {
 232                 bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf,
 233                     mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ?
 234                     sizeof (struct rwindow32) : sizeof (struct rwindow64)));
 235         }
 236 
 237         if (pt == curthread)
 238                 pfp->fpu_fprs = _fp_read_fprs();
 239         if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) {
 240                 if (pt == curthread && fpu_exists) {
 241                         save_gsr(clwp->lwp_fpu);
 242                 } else {
 243                         uint64_t gsr;
 244                         gsr = get_gsr(lwp->lwp_fpu);
 245                         set_gsr(gsr, clwp->lwp_fpu);
 246                 }
 247                 fp_fork(lwp, clwp);
 248         }
 249 }
 250 
 251 /*
 252  * Free lwp fpu regs.
 253  */
 254 void
 255 lwp_freeregs(klwp_t *lwp, int isexec)
 256 {
 257         kfpu_t *fp = lwptofpu(lwp);
 258 
 259         if (lwptot(lwp) == curthread)
 260                 fp->fpu_fprs = _fp_read_fprs();
 261         if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))
 262                 fp_free(fp, isexec);
 263 }
 264 
 265 /*
 266  * These function are currently unused on sparc.
 267  */
 268 /*ARGSUSED*/
 269 void
 270 lwp_attach_brand_hdlrs(klwp_t *lwp)
 271 {}
 272 
 273 /*ARGSUSED*/
 274 void
 275 lwp_detach_brand_hdlrs(klwp_t *lwp)
 276 {}
 277 
 278 /*
 279  * fill in the extra register state area specified with the
 280  * specified lwp's platform-dependent non-floating-point extra
 281  * register state information
 282  */
 283 /* ARGSUSED */
 284 void
 285 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp)
 286 {
 287         /* for sun4u nothing to do here, added for symmetry */
 288 }
 289 
 290 /*
 291  * fill in the extra register state area specified with the specified lwp's
 292  * platform-dependent floating-point extra register state information.
 293  * NOTE:  'lwp' might not correspond to 'curthread' since this is
 294  * called from code in /proc to get the registers of another lwp.
 295  */
 296 void
 297 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp)
 298 {
 299         prxregset_t *xregs = (prxregset_t *)xrp;
 300         kfpu_t *fp = lwptofpu(lwp);
 301         uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
 302         uint64_t gsr;
 303 
 304         /*
 305          * fp_fksave() does not flush the GSR register into
 306          * the lwp area, so do it now
 307          */
 308         kpreempt_disable();
 309         if (ttolwp(curthread) == lwp && fpu_exists) {
 310                 fp->fpu_fprs = _fp_read_fprs();
 311                 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
 312                         _fp_write_fprs(fprs);
 313                         fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
 314                 }
 315                 save_gsr(fp);
 316         }
 317         gsr = get_gsr(fp);
 318         kpreempt_enable();
 319         PRXREG_GSR(xregs) = gsr;
 320 }
 321 
 322 /*
 323  * set the specified lwp's platform-dependent non-floating-point
 324  * extra register state based on the specified input
 325  */
 326 /* ARGSUSED */
 327 void
 328 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp)
 329 {
 330         /* for sun4u nothing to do here, added for symmetry */
 331 }
 332 
 333 /*
 334  * set the specified lwp's platform-dependent floating-point
 335  * extra register state based on the specified input
 336  */
 337 void
 338 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp)
 339 {
 340         prxregset_t *xregs = (prxregset_t *)xrp;
 341         kfpu_t *fp = lwptofpu(lwp);
 342         uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
 343         uint64_t gsr = PRXREG_GSR(xregs);
 344 
 345         kpreempt_disable();
 346         set_gsr(gsr, lwptofpu(lwp));
 347 
 348         if ((lwp == ttolwp(curthread)) && fpu_exists) {
 349                 fp->fpu_fprs = _fp_read_fprs();
 350                 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
 351                         _fp_write_fprs(fprs);
 352                         fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
 353                 }
 354                 restore_gsr(lwptofpu(lwp));
 355         }
 356         kpreempt_enable();
 357 }
 358 
 359 /*
 360  * fill in the sun4u asrs, ie, the lwp's platform-dependent
 361  * non-floating-point extra register state information
 362  */
 363 /* ARGSUSED */
 364 void
 365 getasrs(klwp_t *lwp, asrset_t asr)
 366 {
 367         /* for sun4u nothing to do here, added for symmetry */
 368 }
 369 
 370 /*
 371  * fill in the sun4u asrs, ie, the lwp's platform-dependent
 372  * floating-point extra register state information
 373  */
 374 void
 375 getfpasrs(klwp_t *lwp, asrset_t asr)
 376 {
 377         kfpu_t *fp = lwptofpu(lwp);
 378         uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
 379 
 380         kpreempt_disable();
 381         if (ttolwp(curthread) == lwp)
 382                 fp->fpu_fprs = _fp_read_fprs();
 383         if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
 384                 if (fpu_exists && ttolwp(curthread) == lwp) {
 385                         if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
 386                                 _fp_write_fprs(fprs);
 387                                 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
 388                         }
 389                         save_gsr(fp);
 390                 }
 391                 asr[ASR_GSR] = (int64_t)get_gsr(fp);
 392         }
 393         kpreempt_enable();
 394 }
 395 
 396 /*
 397  * set the sun4u asrs, ie, the lwp's platform-dependent
 398  * non-floating-point extra register state information
 399  */
 400 /* ARGSUSED */
 401 void
 402 setasrs(klwp_t *lwp, asrset_t asr)
 403 {
 404         /* for sun4u nothing to do here, added for symmetry */
 405 }
 406 
 407 void
 408 setfpasrs(klwp_t *lwp, asrset_t asr)
 409 {
 410         kfpu_t *fp = lwptofpu(lwp);
 411         uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
 412 
 413         kpreempt_disable();
 414         if (ttolwp(curthread) == lwp)
 415                 fp->fpu_fprs = _fp_read_fprs();
 416         if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
 417                 set_gsr(asr[ASR_GSR], fp);
 418                 if (fpu_exists && ttolwp(curthread) == lwp) {
 419                         if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
 420                                 _fp_write_fprs(fprs);
 421                                 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
 422                         }
 423                         restore_gsr(fp);
 424                 }
 425         }
 426         kpreempt_enable();
 427 }
 428 
 429 /*
 430  * Create interrupt kstats for this CPU.
 431  */
 432 void
 433 cpu_create_intrstat(cpu_t *cp)
 434 {
 435         int             i;
 436         kstat_t         *intr_ksp;
 437         kstat_named_t   *knp;
 438         char            name[KSTAT_STRLEN];
 439         zoneid_t        zoneid;
 440 
 441         ASSERT(MUTEX_HELD(&cpu_lock));
 442 
 443         if (pool_pset_enabled())
 444                 zoneid = GLOBAL_ZONEID;
 445         else
 446                 zoneid = ALL_ZONES;
 447 
 448         intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
 449             KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
 450 
 451         /*
 452          * Initialize each PIL's named kstat
 453          */
 454         if (intr_ksp != NULL) {
 455                 intr_ksp->ks_update = cpu_kstat_intrstat_update;
 456                 knp = (kstat_named_t *)intr_ksp->ks_data;
 457                 intr_ksp->ks_private = cp;
 458                 for (i = 0; i < PIL_MAX; i++) {
 459                         (void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
 460                             i + 1);
 461                         kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
 462                         (void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
 463                             i + 1);
 464                         kstat_named_init(&knp[(i * 2) + 1], name,
 465                             KSTAT_DATA_UINT64);
 466                 }
 467                 kstat_install(intr_ksp);
 468         }
 469 }
 470 
 471 /*
 472  * Delete interrupt kstats for this CPU.
 473  */
 474 void
 475 cpu_delete_intrstat(cpu_t *cp)
 476 {
 477         kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
 478 }
 479 
 480 /*
 481  * Convert interrupt statistics from CPU ticks to nanoseconds and
 482  * update kstat.
 483  */
 484 int
 485 cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
 486 {
 487         kstat_named_t   *knp = ksp->ks_data;
 488         cpu_t           *cpup = (cpu_t *)ksp->ks_private;
 489         int             i;
 490 
 491         if (rw == KSTAT_WRITE)
 492                 return (EACCES);
 493 
 494         /*
 495          * We use separate passes to copy and convert the statistics to
 496          * nanoseconds. This assures that the snapshot of the data is as
 497          * self-consistent as possible.
 498          */
 499 
 500         for (i = 0; i < PIL_MAX; i++) {
 501                 knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0];
 502                 knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
 503         }
 504 
 505         for (i = 0; i < PIL_MAX; i++) {
 506                 knp[i * 2].value.ui64 =
 507                     (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64,
 508                     cpup->cpu_id);
 509         }
 510 
 511         return (0);
 512 }
 513 
 514 /*
 515  * Called by common/os/cpu.c for psrinfo(1m) kstats
 516  */
 517 char *
 518 cpu_fru_fmri(cpu_t *cp)
 519 {
 520         return (cpunodes[cp->cpu_id].fru_fmri);
 521 }
 522 
 523 /*
 524  * An interrupt thread is ending a time slice, so compute the interval it
 525  * ran for and update the statistic for its PIL.
 526  */
 527 void
 528 cpu_intr_swtch_enter(kthread_id_t t)
 529 {
 530         uint64_t        interval;
 531         uint64_t        start;
 532         cpu_t           *cpu;
 533 
 534         ASSERT((t->t_flag & T_INTR_THREAD) != 0);
 535         ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
 536 
 537         /*
 538          * We could be here with a zero timestamp. This could happen if:
 539          * an interrupt thread which no longer has a pinned thread underneath
 540          * it (i.e. it blocked at some point in its past) has finished running
 541          * its handler. intr_thread() updated the interrupt statistic for its
 542          * PIL and zeroed its timestamp. Since there was no pinned thread to
 543          * return to, swtch() gets called and we end up here.
 544          *
 545          * It can also happen if an interrupt thread in intr_thread() calls
 546          * preempt. It will have already taken care of updating stats. In
 547          * this event, the interrupt thread will be runnable.
 548          */
 549         if (t->t_intr_start) {
 550                 do {
 551                         start = t->t_intr_start;
 552                         interval = CLOCK_TICK_COUNTER() - start;
 553                 } while (atomic_cas_64(&t->t_intr_start, start, 0) != start);
 554                 cpu = CPU;
 555                 if (cpu->cpu_m.divisor > 1)
 556                         interval *= cpu->cpu_m.divisor;
 557                 cpu->cpu_m.intrstat[t->t_pil][0] += interval;
 558 
 559                 atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
 560                     interval);
 561         } else
 562                 ASSERT(t->t_intr == NULL || t->t_state == TS_RUN);
 563 }
 564 
 565 
 566 /*
 567  * An interrupt thread is returning from swtch(). Place a starting timestamp
 568  * in its thread structure.
 569  */
 570 void
 571 cpu_intr_swtch_exit(kthread_id_t t)
 572 {
 573         uint64_t ts;
 574 
 575         ASSERT((t->t_flag & T_INTR_THREAD) != 0);
 576         ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
 577 
 578         do {
 579                 ts = t->t_intr_start;
 580         } while (atomic_cas_64(&t->t_intr_start, ts, CLOCK_TICK_COUNTER()) !=
 581             ts);
 582 }
 583 
 584 
 585 int
 586 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
 587 {
 588         if (&plat_blacklist)
 589                 return (plat_blacklist(cmd, scheme, fmri, class));
 590 
 591         return (ENOTSUP);
 592 }
 593 
 594 int
 595 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
 596 {
 597         extern void kdi_flush_caches(void);
 598         size_t nread = 0;
 599         uint32_t word;
 600         int slop, i;
 601 
 602         kdi_flush_caches();
 603         membar_enter();
 604 
 605         /* We might not begin on a word boundary. */
 606         if ((slop = addr & 3) != 0) {
 607                 word = ldphys(addr & ~3);
 608                 for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++)
 609                         *buf++ = ((uchar_t *)&word)[i];
 610                 addr = roundup(addr, 4);
 611         }
 612 
 613         while (nbytes > 0) {
 614                 word = ldphys(addr);
 615                 for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++)
 616                         *buf++ = ((uchar_t *)&word)[i];
 617         }
 618 
 619         kdi_flush_caches();
 620 
 621         *ncopiedp = nread;
 622         return (0);
 623 }
 624 
 625 int
 626 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
 627 {
 628         extern void kdi_flush_caches(void);
 629         size_t nwritten = 0;
 630         uint32_t word;
 631         int slop, i;
 632 
 633         kdi_flush_caches();
 634 
 635         /* We might not begin on a word boundary. */
 636         if ((slop = addr & 3) != 0) {
 637                 word = ldphys(addr & ~3);
 638                 for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++)
 639                         ((uchar_t *)&word)[i] = *buf++;
 640                 stphys(addr & ~3, word);
 641                 addr = roundup(addr, 4);
 642         }
 643 
 644         while (nbytes > 3) {
 645                 for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++)
 646                         ((uchar_t *)&word)[i] = *buf++;
 647                 stphys(addr, word);
 648                 addr += 4;
 649         }
 650 
 651         /* We might not end with a whole word. */
 652         if (nbytes > 0) {
 653                 word = ldphys(addr);
 654                 for (i = 0; nbytes > 0; i++, nbytes--, nwritten++)
 655                         ((uchar_t *)&word)[i] = *buf++;
 656                 stphys(addr, word);
 657         }
 658 
 659         membar_enter();
 660         kdi_flush_caches();
 661 
 662         *ncopiedp = nwritten;
 663         return (0);
 664 }
 665 
 666 static void
 667 kdi_kernpanic(struct regs *regs, uint_t tt)
 668 {
 669         sync_reg_buf = *regs;
 670         sync_tt = tt;
 671 
 672         sync_handler();
 673 }
 674 
 675 static void
 676 kdi_plat_call(void (*platfn)(void))
 677 {
 678         if (platfn != NULL) {
 679                 prom_suspend_prepost();
 680                 platfn();
 681                 prom_resume_prepost();
 682         }
 683 }
 684 
 685 /*
 686  * kdi_system_claim and release are defined here for all sun4 platforms and
 687  * pointed to by mach_kdi_init() to provide default callbacks for such systems.
 688  * Specific sun4u or sun4v platforms may implement their own claim and release
 689  * routines, at which point their respective callbacks will be updated.
 690  */
 691 static void
 692 kdi_system_claim(void)
 693 {
 694         lbolt_debug_entry();
 695 }
 696 
 697 static void
 698 kdi_system_release(void)
 699 {
 700         lbolt_debug_return();
 701 }
 702 
 703 void
 704 mach_kdi_init(kdi_t *kdi)
 705 {
 706         kdi->kdi_plat_call = kdi_plat_call;
 707         kdi->kdi_kmdb_enter = kmdb_enter;
 708         kdi->pkdi_system_claim = kdi_system_claim;
 709         kdi->pkdi_system_release = kdi_system_release;
 710         kdi->mkdi_cpu_index = kdi_cpu_index;
 711         kdi->mkdi_trap_vatotte = kdi_trap_vatotte;
 712         kdi->mkdi_kernpanic = kdi_kernpanic;
 713 }
 714 
 715 
 716 /*
 717  * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
 718  * long, and it fills in the array with the time spent on cpu in
 719  * each of the mstates, where time is returned in nsec.
 720  *
 721  * No guarantee is made that the returned values in times[] will
 722  * monotonically increase on sequential calls, although this will
 723  * be true in the long run. Any such guarantee must be handled by
 724  * the caller, if needed. This can happen if we fail to account
 725  * for elapsed time due to a generation counter conflict, yet we
 726  * did account for it on a prior call (see below).
 727  *
 728  * The complication is that the cpu in question may be updating
 729  * its microstate at the same time that we are reading it.
 730  * Because the microstate is only updated when the CPU's state
 731  * changes, the values in cpu_intracct[] can be indefinitely out
 732  * of date. To determine true current values, it is necessary to
 733  * compare the current time with cpu_mstate_start, and add the
 734  * difference to times[cpu_mstate].
 735  *
 736  * This can be a problem if those values are changing out from
 737  * under us. Because the code path in new_cpu_mstate() is
 738  * performance critical, we have not added a lock to it. Instead,
 739  * we have added a generation counter. Before beginning
 740  * modifications, the counter is set to 0. After modifications,
 741  * it is set to the old value plus one.
 742  *
 743  * get_cpu_mstate() will not consider the values of cpu_mstate
 744  * and cpu_mstate_start to be usable unless the value of
 745  * cpu_mstate_gen is both non-zero and unchanged, both before and
 746  * after reading the mstate information. Note that we must
 747  * protect against out-of-order loads around accesses to the
 748  * generation counter. Also, this is a best effort approach in
 749  * that we do not retry should the counter be found to have
 750  * changed.
 751  *
 752  * cpu_intracct[] is used to identify time spent in each CPU
 753  * mstate while handling interrupts. Such time should be reported
 754  * against system time, and so is subtracted out from its
 755  * corresponding cpu_acct[] time and added to
 756  * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in
 757  * %ticks, but acct time may be stored as %sticks, thus requiring
 758  * different conversions before they can be compared.
 759  */
 760 
 761 void
 762 get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
 763 {
 764         int i;
 765         hrtime_t now, start;
 766         uint16_t gen;
 767         uint16_t state;
 768         hrtime_t intracct[NCMSTATES];
 769 
 770         /*
 771          * Load all volatile state under the protection of membar.
 772          * cpu_acct[cpu_mstate] must be loaded to avoid double counting
 773          * of (now - cpu_mstate_start) by a change in CPU mstate that
 774          * arrives after we make our last check of cpu_mstate_gen.
 775          */
 776 
 777         now = gethrtime_unscaled();
 778         gen = cpu->cpu_mstate_gen;
 779 
 780         membar_consumer();      /* guarantee load ordering */
 781         start = cpu->cpu_mstate_start;
 782         state = cpu->cpu_mstate;
 783         for (i = 0; i < NCMSTATES; i++) {
 784                 intracct[i] = cpu->cpu_intracct[i];
 785                 times[i] = cpu->cpu_acct[i];
 786         }
 787         membar_consumer();      /* guarantee load ordering */
 788 
 789         if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
 790                 times[state] += now - start;
 791 
 792         for (i = 0; i < NCMSTATES; i++) {
 793                 scalehrtime(&times[i]);
 794                 intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id);
 795         }
 796 
 797         for (i = 0; i < NCMSTATES; i++) {
 798                 if (i == CMS_SYSTEM)
 799                         continue;
 800                 times[i] -= intracct[i];
 801                 if (times[i] < 0) {
 802                         intracct[i] += times[i];
 803                         times[i] = 0;
 804                 }
 805                 times[CMS_SYSTEM] += intracct[i];
 806         }
 807 }
 808 
 809 void
 810 mach_cpu_pause(volatile char *safe)
 811 {
 812         /*
 813          * This cpu is now safe.
 814          */
 815         *safe = PAUSE_WAIT;
 816         membar_enter(); /* make sure stores are flushed */
 817 
 818         /*
 819          * Now we wait.  When we are allowed to continue, safe
 820          * will be set to PAUSE_IDLE.
 821          */
 822         while (*safe != PAUSE_IDLE)
 823                 SMT_PAUSE();
 824 }
 825 
 826 /*ARGSUSED*/
 827 int
 828 plat_mem_do_mmio(struct uio *uio, enum uio_rw rw)
 829 {
 830         return (ENOTSUP);
 831 }
 832 
 833 /* cpu threshold for compressed dumps */
 834 #ifdef sun4v
 835 uint_t dump_plat_mincpu_default = DUMP_PLAT_SUN4V_MINCPU;
 836 #else
 837 uint_t dump_plat_mincpu_default = DUMP_PLAT_SUN4U_MINCPU;
 838 #endif
 839 
 840 int
 841 dump_plat_addr()
 842 {
 843         return (0);
 844 }
 845 
 846 void
 847 dump_plat_pfn()
 848 {
 849 }
 850 
 851 /* ARGSUSED */
 852 int
 853 dump_plat_data(void *dump_cdata)
 854 {
 855         return (0);
 856 }
 857 
 858 /* ARGSUSED */
 859 int
 860 plat_hold_page(pfn_t pfn, int lock, page_t **pp_ret)
 861 {
 862         return (PLAT_HOLD_OK);
 863 }
 864 
 865 /* ARGSUSED */
 866 void
 867 plat_release_page(page_t *pp)
 868 {
 869 }
 870 
 871 /* ARGSUSED */
 872 void
 873 progressbar_key_abort(ldi_ident_t li)
 874 {
 875 }
 876 
 877 /*
 878  * We need to post a soft interrupt to reprogram the lbolt cyclic when
 879  * switching from event to cyclic driven lbolt. The following code adds
 880  * and posts the softint for sun4 platforms.
 881  */
 882 static uint64_t lbolt_softint_inum;
 883 
 884 void
 885 lbolt_softint_add(void)
 886 {
 887         lbolt_softint_inum = add_softintr(LOCK_LEVEL,
 888             (softintrfunc)lbolt_ev_to_cyclic, NULL, SOFTINT_MT);
 889 }
 890 
 891 void
 892 lbolt_softint_post(void)
 893 {
 894         setsoftint(lbolt_softint_inum);
 895 }