1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /* common code with bug fixes from original version in trap.c */
  27 
  28 #include <sys/param.h>
  29 #include <sys/types.h>
  30 #include <sys/systm.h>
  31 #include <sys/archsystm.h>
  32 #include <sys/vmsystm.h>
  33 #include <sys/fpu/fpusystm.h>
  34 #include <sys/fpu/fpu_simulator.h>
  35 #include <sys/inline.h>
  36 #include <sys/debug.h>
  37 #include <sys/privregs.h>
  38 #include <sys/machpcb.h>
  39 #include <sys/simulate.h>
  40 #include <sys/proc.h>
  41 #include <sys/cmn_err.h>
  42 #include <sys/stack.h>
  43 #include <sys/watchpoint.h>
  44 #include <sys/trap.h>
  45 #include <sys/machtrap.h>
  46 #include <sys/mman.h>
  47 #include <sys/asi.h>
  48 #include <sys/copyops.h>
  49 #include <vm/as.h>
  50 #include <vm/page.h>
  51 #include <sys/model.h>
  52 #include <vm/seg_vn.h>
  53 #include <sys/byteorder.h>
  54 #include <sys/time.h>
  55 
  56 #define IS_IBIT_SET(x)  (x & 0x2000)
  57 #define IS_VIS1(op, op3)(op == 2 && op3 == 0x36)
  58 #define IS_FLOAT_QUAD_OP(op, op3)(op == 2 && (op3 == 0x34 ||    \
  59                 op3 == 0x35))
  60 #define IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(op, op3, asi)           \
  61                 (op == 3 && (op3 == IOP_V8_LDDFA ||             \
  62                 op3 == IOP_V8_STDFA) && asi > ASI_SNFL)
  63 
  64 static int aligndebug = 0;
  65 
  66 /*
  67  * For the sake of those who must be compatible with unaligned
  68  * architectures, users can link their programs to use a
  69  * corrective trap handler that will fix unaligned references
  70  * a special trap #6 (T_FIX_ALIGN) enables this 'feature'.
  71  * Returns 1 for success, 0 for failure.
  72  */
  73 
  74 int
  75 do_unaligned(struct regs *rp, caddr_t *badaddr)
  76 {
  77         uint_t  inst, op3, asi = 0;
  78         uint_t  rd, rs1, rs2;
  79         int     sz, nf = 0, ltlend = 0;
  80         int     floatflg;
  81         int     fsrflg;
  82         int     immflg;
  83         int     lddstdflg;
  84         caddr_t addr;
  85         uint64_t val;
  86         union {
  87                 uint64_t        l[2];
  88                 uint32_t        i[4];
  89                 uint16_t        s[8];
  90                 uint8_t         c[16];
  91         } data;
  92 
  93         ASSERT(USERMODE(rp->r_tstate));
  94         inst = fetch_user_instr((caddr_t)rp->r_pc);
  95 
  96         op3 = (inst >> 19) & 0x3f;
  97         rd = (inst >> 25) & 0x1f;
  98         rs1 = (inst >> 14) & 0x1f;
  99         rs2 = inst & 0x1f;
 100         floatflg = (inst >> 24) & 1;
 101         immflg = (inst >> 13) & 1;
 102         lddstdflg = fsrflg = 0;
 103 
 104         /* if not load or store do nothing */
 105         if ((inst >> 30) != 3)
 106                 return (0);
 107 
 108         /* if ldstub or swap, do nothing */
 109         if ((inst & 0xc1680000) == 0xc0680000)
 110                 return (0);
 111 
 112         /* if cas/casx, do nothing */
 113         if ((inst & 0xc1e00000) == 0xc1e00000)
 114                 return (0);
 115 
 116         if (floatflg) {
 117                 switch ((inst >> 19) & 3) {   /* map size bits to a number */
 118                 case 0: sz = 4;
 119                         break;                  /* ldf{a}/stf{a} */
 120                 case 1: fsrflg = 1;
 121                         if (rd == 0)
 122                                 sz = 4;         /* ldfsr/stfsr */
 123                         else  if (rd == 1)
 124                                 sz = 8;         /* ldxfsr/stxfsr */
 125                         else
 126                                 return (SIMU_ILLEGAL);
 127                         break;
 128                 case 2: sz = 16;
 129                         break;          /* ldqf{a}/stqf{a} */
 130                 case 3: sz = 8;
 131                         break;          /* lddf{a}/stdf{a} */
 132                 }
 133                 /*
 134                  * Fix to access extra double register encoding plus
 135                  * compensate to access the correct fpu_dreg.
 136                  */
 137                 if ((sz > 4) && (fsrflg == 0)) {
 138                         if ((rd & 1) == 1)
 139                                 rd = (rd & 0x1e) | 0x20;
 140                         rd = rd >> 1;
 141                         if ((sz == 16) && ((rd & 0x1) != 0))
 142                                 return (SIMU_ILLEGAL);
 143                 }
 144         } else {
 145                 int sz_bits = (inst >> 19) & 0xf;
 146                 switch (sz_bits) {              /* map size bits to a number */
 147                 case 0:                         /* lduw{a} */
 148                 case 4:                         /* stw{a} */
 149                 case 8:                         /* ldsw{a} */
 150                 case 0xf:                       /* swap */
 151                         sz = 4; break;
 152                 case 1:                         /* ldub{a} */
 153                 case 5:                         /* stb{a} */
 154                 case 9:                         /* ldsb{a} */
 155                 case 0xd:                       /* ldstub */
 156                         sz = 1; break;
 157                 case 2:                         /* lduh{a} */
 158                 case 6:                         /* sth{a} */
 159                 case 0xa:                       /* ldsh{a} */
 160                         sz = 2; break;
 161                 case 3:                         /* ldd{a} */
 162                 case 7:                         /* std{a} */
 163                         lddstdflg = 1;
 164                         sz = 8; break;
 165                 case 0xb:                       /* ldx{a} */
 166                 case 0xe:                       /* stx{a} */
 167                         sz = 8; break;
 168                 }
 169         }
 170 
 171 
 172         /* only support primary and secondary asi's */
 173         if ((op3 >> 4) & 1) {
 174                 if (immflg) {
 175                         asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) &
 176                             TSTATE_ASI_MASK;
 177                 } else {
 178                         asi = (inst >> 5) & 0xff;
 179                 }
 180                 switch (asi) {
 181                 case ASI_P:
 182                 case ASI_S:
 183                         break;
 184                 case ASI_PNF:
 185                 case ASI_SNF:
 186                         nf = 1;
 187                         break;
 188                 case ASI_PL:
 189                 case ASI_SL:
 190                         ltlend = 1;
 191                         break;
 192                 case ASI_PNFL:
 193                 case ASI_SNFL:
 194                         ltlend = 1;
 195                         nf = 1;
 196                         break;
 197                 default:
 198                         return (0);
 199                 }
 200                 /*
 201                  * Non-faulting stores generate a data_access_exception trap,
 202                  * according to the Spitfire manual, which should be signaled
 203                  * as an illegal instruction trap, because it can't be fixed.
 204                  */
 205                 if ((nf) && ((op3 == IOP_V8_STQFA) || (op3 == IOP_V8_STDFA)))
 206                         return (SIMU_ILLEGAL);
 207         }
 208 
 209         if (aligndebug) {
 210                 printf("unaligned access at %p, instruction: 0x%x\n",
 211                     (void *)rp->r_pc, inst);
 212                 printf("type %s", (((inst >> 21) & 1) ? "st" : "ld"));
 213                 if (((inst >> 21) & 1) == 0)
 214                         printf(" %s", (((inst >> 22) & 1) ?
 215                             "signed" : "unsigned"));
 216                 printf(" asi 0x%x size %d immflg %d\n", asi, sz, immflg);
 217                 printf("rd = %d, op3 = 0x%x, rs1 = %d, rs2 = %d, imm13=0x%x\n",
 218                     rd, op3, rs1, rs2, (inst & 0x1fff));
 219         }
 220 
 221         (void) flush_user_windows_to_stack(NULL);
 222         if (getreg(rp, rs1, &val, badaddr))
 223                 return (SIMU_FAULT);
 224         addr = (caddr_t)val;            /* convert to 32/64 bit address */
 225         if (aligndebug)
 226                 printf("addr 1 = %p\n", (void *)addr);
 227 
 228         /* check immediate bit and use immediate field or reg (rs2) */
 229         if (immflg) {
 230                 int imm;
 231                 imm  = inst & 0x1fff;               /* mask out immediate field */
 232                 imm <<= 19;                       /* sign extend it */
 233                 imm >>= 19;
 234                 addr += imm;                    /* compute address */
 235         } else {
 236                 if (getreg(rp, rs2, &val, badaddr))
 237                         return (SIMU_FAULT);
 238                 addr += val;
 239         }
 240 
 241         /*
 242          * If this is a 32-bit program, chop the address accordingly.  The
 243          * intermediate uintptr_t casts prevent warnings under a certain
 244          * compiler, and the temporary 32 bit storage is intended to force
 245          * proper code generation and break up what would otherwise be a
 246          * quadruple cast.
 247          */
 248         if (curproc->p_model == DATAMODEL_ILP32) {
 249                 caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
 250                 addr = (caddr_t)(uintptr_t)addr32;
 251         }
 252 
 253         if (aligndebug)
 254                 printf("addr 2 = %p\n", (void *)addr);
 255 
 256         if (addr >= curproc->p_as->a_userlimit) {
 257                 *badaddr = addr;
 258                 goto badret;
 259         }
 260 
 261         /* a single bit differentiates ld and st */
 262         if ((inst >> 21) & 1) {                       /* store */
 263                 if (floatflg) {
 264                         klwp_id_t lwp = ttolwp(curthread);
 265                         kfpu_t *fp = lwptofpu(lwp);
 266                         /* Ensure fp has been enabled */
 267                         if (fpu_exists) {
 268                                 if (!(_fp_read_fprs() & FPRS_FEF))
 269                                         fp_enable();
 270                         } else {
 271                                 if (!fp->fpu_en)
 272                                         fp_enable();
 273                         }
 274                         /* if fpu_exists read fpu reg */
 275                         if (fpu_exists) {
 276                                 if (fsrflg) {
 277                                         _fp_read_pfsr(&data.l[0]);
 278                                 } else {
 279                                         if (sz == 4) {
 280                                                 data.i[0] = 0;
 281                                                 _fp_read_pfreg(
 282                                                     (unsigned *)&data.i[1], rd);
 283                                         }
 284                                         if (sz >= 8)
 285                                                 _fp_read_pdreg(
 286                                                     &data.l[0], rd);
 287                                         if (sz == 16)
 288                                                 _fp_read_pdreg(
 289                                                     &data.l[1], rd+1);
 290                                 }
 291                         } else {
 292                                 if (fsrflg) {
 293                                         /* Clear reserved bits, set version=7 */
 294                                         fp->fpu_fsr &= ~0x30301000;
 295                                         fp->fpu_fsr |= 0xE0000;
 296                                         data.l[0] = fp->fpu_fsr;
 297                                 } else {
 298                                         if (sz == 4) {
 299                                                 data.i[0] = 0;
 300                                                 data.i[1] =
 301                                                     (unsigned)fp->
 302                                                     fpu_fr.fpu_regs[rd];
 303                                         }
 304                                         if (sz >= 8)
 305                                                 data.l[0] =
 306                                                     fp->fpu_fr.fpu_dregs[rd];
 307                                         if (sz == 16)
 308                                                 data.l[1] =
 309                                                     fp->fpu_fr.fpu_dregs[rd+1];
 310                                 }
 311                         }
 312                 } else {
 313                         if (lddstdflg) {                /* combine the data */
 314                                 if (getreg(rp, rd, &data.l[0], badaddr))
 315                                         return (SIMU_FAULT);
 316                                 if (getreg(rp, rd+1, &data.l[1], badaddr))
 317                                         return (SIMU_FAULT);
 318                                 if (ltlend) {
 319                                         /*
 320                                          * For STD, each 32-bit word is byte-
 321                                          * swapped individually.  For
 322                                          * simplicity we don't want to do that
 323                                          * below, so we swap the words now to
 324                                          * get the desired result in the end.
 325                                          */
 326                                         data.i[0] = data.i[3];
 327                                 } else {
 328                                         data.i[0] = data.i[1];
 329                                         data.i[1] = data.i[3];
 330                                 }
 331                         } else {
 332                                 if (getreg(rp, rd, &data.l[0], badaddr))
 333                                         return (SIMU_FAULT);
 334                         }
 335                 }
 336 
 337                 if (aligndebug) {
 338                         if (sz == 16) {
 339                                 printf("data %x %x %x %x\n",
 340                                     data.i[0], data.i[1], data.i[2], data.c[3]);
 341                         } else {
 342                                 printf("data %x %x %x %x %x %x %x %x\n",
 343                                     data.c[0], data.c[1], data.c[2], data.c[3],
 344                                     data.c[4], data.c[5], data.c[6], data.c[7]);
 345                         }
 346                 }
 347 
 348                 if (ltlend) {
 349                         if (sz == 1) {
 350                                 if (xcopyout_little(&data.c[7], addr,
 351                                     (size_t)sz) != 0)
 352                                         goto badret;
 353                         } else if (sz == 2) {
 354                                 if (xcopyout_little(&data.s[3], addr,
 355                                     (size_t)sz) != 0)
 356                                         goto badret;
 357                         } else if (sz == 4) {
 358                                 if (xcopyout_little(&data.i[1], addr,
 359                                     (size_t)sz) != 0)
 360                                         goto badret;
 361                         } else {
 362                                 if (xcopyout_little(&data.l[0], addr,
 363                                     (size_t)sz) != 0)
 364                                         goto badret;
 365                         }
 366                 } else {
 367                         if (sz == 1) {
 368                                 if (copyout(&data.c[7], addr, (size_t)sz) == -1)
 369                                         goto badret;
 370                         } else if (sz == 2) {
 371                                 if (copyout(&data.s[3], addr, (size_t)sz) == -1)
 372                                         goto badret;
 373                         } else if (sz == 4) {
 374                                 if (copyout(&data.i[1], addr, (size_t)sz) == -1)
 375                                         goto badret;
 376                         } else {
 377                                 if (copyout(&data.l[0], addr, (size_t)sz) == -1)
 378                                         goto badret;
 379                         }
 380                 }
 381         } else {                                /* load */
 382                 if (sz == 1) {
 383                         if (ltlend) {
 384                                 if (xcopyin_little(addr, &data.c[7],
 385                                     (size_t)sz) != 0) {
 386                                         if (nf)
 387                                                 data.c[7] = 0;
 388                                         else
 389                                                 goto badret;
 390                                 }
 391                         } else {
 392                                 if (copyin(addr, &data.c[7],
 393                                     (size_t)sz) == -1) {
 394                                         if (nf)
 395                                                 data.c[7] = 0;
 396                                         else
 397                                                 goto badret;
 398                                 }
 399                         }
 400                         /* if signed and the sign bit is set extend it */
 401                         if (((inst >> 22) & 1) && ((data.c[7] >> 7) & 1)) {
 402                                 data.i[0] = (uint_t)-1; /* extend sign bit */
 403                                 data.s[2] = (ushort_t)-1;
 404                                 data.c[6] = (uchar_t)-1;
 405                         } else {
 406                                 data.i[0] = 0;  /* clear upper 32+24 bits */
 407                                 data.s[2] = 0;
 408                                 data.c[6] = 0;
 409                         }
 410                 } else if (sz == 2) {
 411                         if (ltlend) {
 412                                 if (xcopyin_little(addr, &data.s[3],
 413                                     (size_t)sz) != 0) {
 414                                         if (nf)
 415                                                 data.s[3] = 0;
 416                                         else
 417                                                 goto badret;
 418                                 }
 419                         } else {
 420                                 if (copyin(addr, &data.s[3],
 421                                     (size_t)sz) == -1) {
 422                                         if (nf)
 423                                                 data.s[3] = 0;
 424                                         else
 425                                                 goto badret;
 426                                 }
 427                         }
 428                         /* if signed and the sign bit is set extend it */
 429                         if (((inst >> 22) & 1) && ((data.s[3] >> 15) & 1)) {
 430                                 data.i[0] = (uint_t)-1; /* extend sign bit */
 431                                 data.s[2] = (ushort_t)-1;
 432                         } else {
 433                                 data.i[0] = 0;  /* clear upper 32+16 bits */
 434                                 data.s[2] = 0;
 435                         }
 436                 } else if (sz == 4) {
 437                         if (ltlend) {
 438                                 if (xcopyin_little(addr, &data.i[1],
 439                                     (size_t)sz) != 0) {
 440                                         if (!nf)
 441                                                 goto badret;
 442                                         data.i[1] = 0;
 443                                 }
 444                         } else {
 445                                 if (copyin(addr, &data.i[1],
 446                                     (size_t)sz) == -1) {
 447                                         if (!nf)
 448                                                 goto badret;
 449                                         data.i[1] = 0;
 450                                 }
 451                         }
 452                         /* if signed and the sign bit is set extend it */
 453                         if (((inst >> 22) & 1) && ((data.i[1] >> 31) & 1)) {
 454                                 data.i[0] = (uint_t)-1; /* extend sign bit */
 455                         } else {
 456                                 data.i[0] = 0;  /* clear upper 32 bits */
 457                         }
 458                 } else {
 459                         if (ltlend) {
 460                                 if (xcopyin_little(addr, &data.l[0],
 461                                     (size_t)sz) != 0) {
 462                                         if (!nf)
 463                                                 goto badret;
 464                                         data.l[0] = 0;
 465                                 }
 466                         } else {
 467                                 if (copyin(addr, &data.l[0],
 468                                     (size_t)sz) == -1) {
 469                                         if (!nf)
 470                                                 goto badret;
 471                                         data.l[0] = 0;
 472                                 }
 473                         }
 474                 }
 475 
 476                 if (aligndebug) {
 477                         if (sz == 16) {
 478                                 printf("data %x %x %x %x\n",
 479                                     data.i[0], data.i[1], data.i[2], data.c[3]);
 480                         } else {
 481                                 printf("data %x %x %x %x %x %x %x %x\n",
 482                                     data.c[0], data.c[1], data.c[2], data.c[3],
 483                                     data.c[4], data.c[5], data.c[6], data.c[7]);
 484                         }
 485                 }
 486 
 487                 if (floatflg) {         /* if fpu_exists write fpu reg */
 488                         klwp_id_t lwp = ttolwp(curthread);
 489                         kfpu_t *fp = lwptofpu(lwp);
 490                         /* Ensure fp has been enabled */
 491                         if (fpu_exists) {
 492                                 if (!(_fp_read_fprs() & FPRS_FEF))
 493                                         fp_enable();
 494                         } else {
 495                                 if (!fp->fpu_en)
 496                                         fp_enable();
 497                         }
 498                         /* if fpu_exists read fpu reg */
 499                         if (fpu_exists) {
 500                                 if (fsrflg) {
 501                                         _fp_write_pfsr(&data.l[0]);
 502                                 } else {
 503                                         if (sz == 4)
 504                                                 _fp_write_pfreg(
 505                                                     (unsigned *)&data.i[1], rd);
 506                                         if (sz >= 8)
 507                                                 _fp_write_pdreg(
 508                                                     &data.l[0], rd);
 509                                         if (sz == 16)
 510                                                 _fp_write_pdreg(
 511                                                     &data.l[1], rd+1);
 512                                 }
 513                         } else {
 514                                 if (fsrflg) {
 515                                         fp->fpu_fsr = data.l[0];
 516                                 } else {
 517                                         if (sz == 4)
 518                                                 fp->fpu_fr.fpu_regs[rd] =
 519                                                     (unsigned)data.i[1];
 520                                         if (sz >= 8)
 521                                                 fp->fpu_fr.fpu_dregs[rd] =
 522                                                     data.l[0];
 523                                         if (sz == 16)
 524                                                 fp->fpu_fr.fpu_dregs[rd+1] =
 525                                                     data.l[1];
 526                                 }
 527                         }
 528                 } else {
 529                         if (lddstdflg) {                /* split the data */
 530                                 if (ltlend) {
 531                                         /*
 532                                          * For LDD, each 32-bit word is byte-
 533                                          * swapped individually.  We didn't
 534                                          * do that above, but this will give
 535                                          * us the desired result.
 536                                          */
 537                                         data.i[3] = data.i[0];
 538                                 } else {
 539                                         data.i[3] = data.i[1];
 540                                         data.i[1] = data.i[0];
 541                                 }
 542                                 data.i[0] = 0;
 543                                 data.i[2] = 0;
 544                                 if (putreg(&data.l[0], rp, rd, badaddr) == -1)
 545                                         goto badret;
 546                                 if (putreg(&data.l[1], rp, rd+1, badaddr) == -1)
 547                                         goto badret;
 548                         } else {
 549                                 if (putreg(&data.l[0], rp, rd, badaddr) == -1)
 550                                         goto badret;
 551                         }
 552                 }
 553         }
 554         return (SIMU_SUCCESS);
 555 badret:
 556         return (SIMU_FAULT);
 557 }
 558 
 559 
 560 int
 561 simulate_lddstd(struct regs *rp, caddr_t *badaddr)
 562 {
 563         uint_t  inst, op3, asi = 0;
 564         uint_t  rd, rs1, rs2;
 565         int     nf = 0, ltlend = 0, usermode;
 566         int     immflg;
 567         uint64_t reven;
 568         uint64_t rodd;
 569         caddr_t addr;
 570         uint64_t val;
 571         uint64_t data;
 572 
 573         usermode = USERMODE(rp->r_tstate);
 574 
 575         if (usermode)
 576                 inst = fetch_user_instr((caddr_t)rp->r_pc);
 577         else
 578                 inst = *(uint_t *)rp->r_pc;
 579 
 580         op3 = (inst >> 19) & 0x3f;
 581         rd = (inst >> 25) & 0x1f;
 582         rs1 = (inst >> 14) & 0x1f;
 583         rs2 = inst & 0x1f;
 584         immflg = (inst >> 13) & 1;
 585 
 586         if (USERMODE(rp->r_tstate))
 587                 (void) flush_user_windows_to_stack(NULL);
 588         else
 589                 flush_windows();
 590 
 591         if ((op3 >> 4) & 1) {         /* is this LDDA/STDA? */
 592                 if (immflg) {
 593                         asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) &
 594                             TSTATE_ASI_MASK;
 595                 } else {
 596                         asi = (inst >> 5) & 0xff;
 597                 }
 598                 switch (asi) {
 599                 case ASI_P:
 600                 case ASI_S:
 601                         break;
 602                 case ASI_PNF:
 603                 case ASI_SNF:
 604                         nf = 1;
 605                         break;
 606                 case ASI_PL:
 607                 case ASI_SL:
 608                         ltlend = 1;
 609                         break;
 610                 case ASI_PNFL:
 611                 case ASI_SNFL:
 612                         ltlend = 1;
 613                         nf = 1;
 614                         break;
 615                 case ASI_AIUP:
 616                 case ASI_AIUS:
 617                         usermode = 1;
 618                         break;
 619                 case ASI_AIUPL:
 620                 case ASI_AIUSL:
 621                         usermode = 1;
 622                         ltlend = 1;
 623                         break;
 624                 default:
 625                         return (SIMU_ILLEGAL);
 626                 }
 627         }
 628 
 629         if (getreg(rp, rs1, &val, badaddr))
 630                 return (SIMU_FAULT);
 631         addr = (caddr_t)val;            /* convert to 32/64 bit address */
 632 
 633         /* check immediate bit and use immediate field or reg (rs2) */
 634         if (immflg) {
 635                 int imm;
 636                 imm  = inst & 0x1fff;               /* mask out immediate field */
 637                 imm <<= 19;                       /* sign extend it */
 638                 imm >>= 19;
 639                 addr += imm;                    /* compute address */
 640         } else {
 641                 if (getreg(rp, rs2, &val, badaddr))
 642                         return (SIMU_FAULT);
 643                 addr += val;
 644         }
 645 
 646         /*
 647          * T_UNIMP_LDD and T_UNIMP_STD are higher priority than
 648          * T_ALIGNMENT.  So we have to make sure that the address is
 649          * kosher before trying to use it, because the hardware hasn't
 650          * checked it for us yet.
 651          */
 652         if (((uintptr_t)addr & 0x7) != 0) {
 653                 if (curproc->p_fixalignment)
 654                         return (do_unaligned(rp, badaddr));
 655                 else
 656                         return (SIMU_UNALIGN);
 657         }
 658 
 659         /*
 660          * If this is a 32-bit program, chop the address accordingly.  The
 661          * intermediate uintptr_t casts prevent warnings under a certain
 662          * compiler, and the temporary 32 bit storage is intended to force
 663          * proper code generation and break up what would otherwise be a
 664          * quadruple cast.
 665          */
 666         if (curproc->p_model == DATAMODEL_ILP32 && usermode) {
 667                 caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
 668                 addr = (caddr_t)(uintptr_t)addr32;
 669         }
 670 
 671         if ((inst >> 21) & 1) {                       /* store */
 672                 if (getreg(rp, rd, &reven, badaddr))
 673                         return (SIMU_FAULT);
 674                 if (getreg(rp, rd+1, &rodd, badaddr))
 675                         return (SIMU_FAULT);
 676                 if (ltlend) {
 677                         reven = BSWAP_32(reven);
 678                         rodd  = BSWAP_32(rodd);
 679                 }
 680                 data = (reven << 32) | rodd;
 681                 if (usermode) {
 682                         if (suword64_nowatch(addr, data) == -1)
 683                                 return (SIMU_FAULT);
 684                 } else {
 685                         *(uint64_t *)addr = data;
 686                 }
 687         } else {                                /* load */
 688                 if (usermode) {
 689                         if (fuword64_nowatch(addr, &data)) {
 690                                 if (nf)
 691                                         data = 0;
 692                                 else
 693                                         return (SIMU_FAULT);
 694                         }
 695                 } else
 696                         data = *(uint64_t *)addr;
 697 
 698                 reven = (data >> 32);
 699                 rodd  = (uint64_t)(uint32_t)data;
 700                 if (ltlend) {
 701                         reven = BSWAP_32(reven);
 702                         rodd  = BSWAP_32(rodd);
 703                 }
 704 
 705                 if (putreg(&reven, rp, rd, badaddr) == -1)
 706                         return (SIMU_FAULT);
 707                 if (putreg(&rodd, rp, rd+1, badaddr) == -1)
 708                         return (SIMU_FAULT);
 709         }
 710         return (SIMU_SUCCESS);
 711 }
 712 
 713 
 714 /*
 715  * simulate popc
 716  */
 717 static int
 718 simulate_popc(struct regs *rp, caddr_t *badaddr, uint_t inst)
 719 {
 720         uint_t  rd, rs2, rs1;
 721         uint_t  immflg;
 722         uint64_t val, cnt = 0;
 723 
 724         rd = (inst >> 25) & 0x1f;
 725         rs1 = (inst >> 14) & 0x1f;
 726         rs2 = inst & 0x1f;
 727         immflg = (inst >> 13) & 1;
 728 
 729         if (rs1 > 0)
 730                 return (SIMU_ILLEGAL);
 731 
 732         (void) flush_user_windows_to_stack(NULL);
 733 
 734         /* check immediate bit and use immediate field or reg (rs2) */
 735         if (immflg) {
 736                 int64_t imm;
 737                 imm  = inst & 0x1fff;               /* mask out immediate field */
 738                 imm <<= 51;                       /* sign extend it */
 739                 imm >>= 51;
 740                 if (imm != 0) {
 741                         for (cnt = 0; imm != 0; imm &= imm-1)
 742                                 cnt++;
 743                 }
 744         } else {
 745                 if (getreg(rp, rs2, &val, badaddr))
 746                         return (SIMU_FAULT);
 747                 if (val != 0) {
 748                         for (cnt = 0; val != 0; val &= val-1)
 749                                 cnt++;
 750                 }
 751         }
 752 
 753         if (putreg(&cnt, rp, rd, badaddr) == -1)
 754                 return (SIMU_FAULT);
 755 
 756         return (SIMU_SUCCESS);
 757 }
 758 
 759 /*
 760  * simulate mulscc
 761  */
 762 static int
 763 simulate_mulscc(struct regs *rp, caddr_t *badaddr, uint_t inst)
 764 {
 765         uint32_t        s1, s2;
 766         uint32_t        c, d, v;
 767         uint_t          rd, rs1;
 768         int64_t         d64;
 769         uint64_t        ud64;
 770         uint64_t        drs1;
 771 
 772         (void) flush_user_windows_to_stack(NULL);
 773 
 774         if ((inst >> 13) & 1) {               /* immediate */
 775                 d64 = inst & 0x1fff;
 776                 d64 <<= 51;               /* sign extend it */
 777                 d64 >>= 51;
 778         } else {
 779                 uint_t          rs2;
 780                 uint64_t        drs2;
 781 
 782                 if (inst & 0x1fe0) {
 783                         return (SIMU_ILLEGAL);
 784                 }
 785                 rs2 = inst & 0x1f;
 786                 if (getreg(rp, rs2, &drs2, badaddr)) {
 787                         return (SIMU_FAULT);
 788                 }
 789                 d64 = (int64_t)drs2;
 790         }
 791 
 792         rs1 = (inst >> 14) & 0x1f;
 793         if (getreg(rp, rs1, &drs1, badaddr)) {
 794                 return (SIMU_FAULT);
 795         }
 796         /* icc.n xor icc.v */
 797         s1 = ((rp->r_tstate & TSTATE_IN) >> (TSTATE_CCR_SHIFT + 3)) ^
 798             ((rp->r_tstate & TSTATE_IV) >> (TSTATE_CCR_SHIFT + 1));
 799         s1 = (s1 << 31) | (((uint32_t)drs1) >> 1);
 800 
 801         if (rp->r_y & 1) {
 802                 s2 = (uint32_t)d64;
 803         } else {
 804                 s2 = 0;
 805         }
 806         d = s1 + s2;
 807 
 808         ud64 = (uint64_t)d;
 809 
 810         /* set the icc flags */
 811         v = (s1 & s2 & ~d) | (~s1 & ~s2 & d);
 812         c = (s1 & s2) | (~d & (s1 | s2));
 813         rp->r_tstate &= ~TSTATE_ICC;
 814         rp->r_tstate |= (uint64_t)((c >> 31) & 1) << (TSTATE_CCR_SHIFT + 0);
 815         rp->r_tstate |= (uint64_t)((v >> 31) & 1) << (TSTATE_CCR_SHIFT + 1);
 816         rp->r_tstate |= (uint64_t)(d ? 0 : 1) << (TSTATE_CCR_SHIFT + 2);
 817         rp->r_tstate |= (uint64_t)((d >> 31) & 1) << (TSTATE_CCR_SHIFT + 3);
 818 
 819         if (rp->r_tstate & TSTATE_IC) {
 820                 ud64 |= (1ULL << 32);
 821         }
 822 
 823         /* set the xcc flags */
 824         rp->r_tstate &= ~TSTATE_XCC;
 825         if (ud64 == 0) {
 826                 rp->r_tstate |= TSTATE_XZ;
 827         }
 828 
 829         rd = (inst >> 25) & 0x1f;
 830         if (putreg(&ud64, rp, rd, badaddr)) {
 831                 return (SIMU_FAULT);
 832         }
 833 
 834         d64 = (drs1 << 32) | (uint32_t)rp->r_y;
 835         d64 >>= 1;
 836         rp->r_y = (uint32_t)d64;
 837 
 838         return (SIMU_SUCCESS);
 839 }
 840 
 841 /*
 842  * simulate unimplemented instructions (popc, ldqf{a}, stqf{a})
 843  */
 844 int
 845 simulate_unimp(struct regs *rp, caddr_t *badaddr)
 846 {
 847         uint_t  inst, optype, op3, asi;
 848         uint_t  rs1, rd;
 849         uint_t  ignor, i;
 850         machpcb_t *mpcb = lwptompcb(ttolwp(curthread));
 851         int     nomatch = 0;
 852         caddr_t addr = (caddr_t)rp->r_pc;
 853         struct as *as;
 854         caddr_t ka;
 855         pfn_t   pfnum;
 856         page_t *pp;
 857         proc_t *p = ttoproc(curthread);
 858         struct seg *mapseg;
 859         struct segvn_data *svd;
 860 
 861         ASSERT(USERMODE(rp->r_tstate));
 862         inst = fetch_user_instr(addr);
 863         if (inst == (uint_t)-1) {
 864                 mpcb->mpcb_illexcaddr = addr;
 865                 mpcb->mpcb_illexcinsn = (uint32_t)-1;
 866                 return (SIMU_ILLEGAL);
 867         }
 868 
 869         /*
 870          * When fixing dirty v8 instructions there's a race if two processors
 871          * are executing the dirty executable at the same time.  If one
 872          * cleans the instruction as the other is executing it the second
 873          * processor will see a clean instruction when it comes through this
 874          * code and will return SIMU_ILLEGAL.  To work around the race
 875          * this code will keep track of the last illegal instruction seen
 876          * by each lwp and will only take action if the illegal instruction
 877          * is repeatable.
 878          */
 879         if (addr != mpcb->mpcb_illexcaddr ||
 880             inst != mpcb->mpcb_illexcinsn)
 881                 nomatch = 1;
 882         mpcb->mpcb_illexcaddr = addr;
 883         mpcb->mpcb_illexcinsn = inst;
 884 
 885         /* instruction fields */
 886         i = (inst >> 13) & 0x1;
 887         rd = (inst >> 25) & 0x1f;
 888         optype = (inst >> 30) & 0x3;
 889         op3 = (inst >> 19) & 0x3f;
 890         ignor = (inst >> 5) & 0xff;
 891         if (IS_IBIT_SET(inst)) {
 892                 asi = (uint32_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
 893                     TSTATE_ASI_MASK);
 894         } else {
 895                 asi = ignor;
 896         }
 897 
 898         if (IS_VIS1(optype, op3) ||
 899             IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(optype, op3, asi) ||
 900             IS_FLOAT_QUAD_OP(optype, op3)) {
 901                 klwp_t *lwp = ttolwp(curthread);
 902                 kfpu_t *fp = lwptofpu(lwp);
 903                 if (fpu_exists) {
 904                         if (!(_fp_read_fprs() & FPRS_FEF))
 905                                 fp_enable();
 906                         _fp_read_pfsr(&fp->fpu_fsr);
 907                 } else {
 908                         if (!fp->fpu_en)
 909                                 fp_enable();
 910                 }
 911                 fp_precise(rp);
 912                 return (SIMU_RETRY);
 913         }
 914 
 915         if (optype == 2 && op3 == IOP_V8_POPC) {
 916                 return (simulate_popc(rp, badaddr, inst));
 917         } else if (optype == 3 && op3 == IOP_V8_POPC) {
 918                 return (SIMU_ILLEGAL);
 919         } else if (optype == OP_V8_ARITH && op3 == IOP_V8_MULScc) {
 920                 return (simulate_mulscc(rp, badaddr, inst));
 921         }
 922 
 923         if (optype == OP_V8_LDSTR) {
 924                 if (op3 == IOP_V8_LDQF || op3 == IOP_V8_LDQFA ||
 925                     op3 == IOP_V8_STQF || op3 == IOP_V8_STQFA)
 926                         return (do_unaligned(rp, badaddr));
 927         }
 928 
 929         /* This is a new instruction so illexccnt should also be set. */
 930         if (nomatch) {
 931                 mpcb->mpcb_illexccnt = 0;
 932                 return (SIMU_RETRY);
 933         }
 934 
 935         /*
 936          * In order to keep us from entering into an infinite loop while
 937          * attempting to clean up faulty instructions, we will return
 938          * SIMU_ILLEGAL once we've cleaned up the instruction as much
 939          * as we can, and still end up here.
 940          */
 941         if (mpcb->mpcb_illexccnt >= 3)
 942                 return (SIMU_ILLEGAL);
 943 
 944         mpcb->mpcb_illexccnt += 1;
 945 
 946         /*
 947          * The rest of the code handles v8 binaries with instructions
 948          * that have dirty (non-zero) bits in reserved or 'ignored'
 949          * fields; these will cause core dumps on v9 machines.
 950          *
 951          * We only clean dirty instructions in 32-bit programs (ie, v8)
 952          * running on SPARCv9 processors.  True v9 programs are forced
 953          * to use the instruction set as intended.
 954          */
 955         if (lwp_getdatamodel(curthread->t_lwp) != DATAMODEL_ILP32)
 956                 return (SIMU_ILLEGAL);
 957         switch (optype) {
 958         case OP_V8_BRANCH:
 959         case OP_V8_CALL:
 960                 return (SIMU_ILLEGAL);  /* these don't have ignored fields */
 961                 /*NOTREACHED*/
 962         case OP_V8_ARITH:
 963                 switch (op3) {
 964                 case IOP_V8_RETT:
 965                         if (rd == 0 && !(i == 0 && ignor))
 966                                 return (SIMU_ILLEGAL);
 967                         if (rd)
 968                                 inst &= ~(0x1f << 25);
 969                         if (i == 0 && ignor)
 970                                 inst &= ~(0xff << 5);
 971                         break;
 972                 case IOP_V8_TCC:
 973                         if (i == 0 && ignor != 0) {
 974                                 inst &= ~(0xff << 5);
 975                         } else if (i == 1 && (((inst >> 7) & 0x3f) != 0)) {
 976                                 inst &= ~(0x3f << 7);
 977                         } else {
 978                                 return (SIMU_ILLEGAL);
 979                         }
 980                         break;
 981                 case IOP_V8_JMPL:
 982                 case IOP_V8_RESTORE:
 983                 case IOP_V8_SAVE:
 984                         if ((op3 == IOP_V8_RETT && rd) ||
 985                             (i == 0 && ignor)) {
 986                                 inst &= ~(0xff << 5);
 987                         } else {
 988                                 return (SIMU_ILLEGAL);
 989                         }
 990                         break;
 991                 case IOP_V8_FCMP:
 992                         if (rd == 0)
 993                                 return (SIMU_ILLEGAL);
 994                         inst &= ~(0x1f << 25);
 995                         break;
 996                 case IOP_V8_RDASR:
 997                         rs1 = ((inst >> 14) & 0x1f);
 998                         if (rs1 == 1 || (rs1 >= 7 && rs1 <= 14)) {
 999                                 /*
1000                                  * The instruction specifies an invalid
1001                                  * state register - better bail out than
1002                                  * "fix" it when we're not sure what was
1003                                  * intended.
1004                                  */
1005                                 return (SIMU_ILLEGAL);
1006                         }
1007                                 /*
1008                                  * Note: this case includes the 'stbar'
1009                                  * instruction (rs1 == 15 && i == 0).
1010                                  */
1011                                 if ((ignor = (inst & 0x3fff)) != 0)
1012                                         inst &= ~(0x3fff);
1013                         break;
1014                 case IOP_V8_SRA:
1015                 case IOP_V8_SRL:
1016                 case IOP_V8_SLL:
1017                         if (ignor == 0)
1018                                 return (SIMU_ILLEGAL);
1019                         inst &= ~(0xff << 5);
1020                         break;
1021                 case IOP_V8_ADD:
1022                 case IOP_V8_AND:
1023                 case IOP_V8_OR:
1024                 case IOP_V8_XOR:
1025                 case IOP_V8_SUB:
1026                 case IOP_V8_ANDN:
1027                 case IOP_V8_ORN:
1028                 case IOP_V8_XNOR:
1029                 case IOP_V8_ADDC:
1030                 case IOP_V8_UMUL:
1031                 case IOP_V8_SMUL:
1032                 case IOP_V8_SUBC:
1033                 case IOP_V8_UDIV:
1034                 case IOP_V8_SDIV:
1035                 case IOP_V8_ADDcc:
1036                 case IOP_V8_ANDcc:
1037                 case IOP_V8_ORcc:
1038                 case IOP_V8_XORcc:
1039                 case IOP_V8_SUBcc:
1040                 case IOP_V8_ANDNcc:
1041                 case IOP_V8_ORNcc:
1042                 case IOP_V8_XNORcc:
1043                 case IOP_V8_ADDCcc:
1044                 case IOP_V8_UMULcc:
1045                 case IOP_V8_SMULcc:
1046                 case IOP_V8_SUBCcc:
1047                 case IOP_V8_UDIVcc:
1048                 case IOP_V8_SDIVcc:
1049                 case IOP_V8_TADDcc:
1050                 case IOP_V8_TSUBcc:
1051                 case IOP_V8_TADDccTV:
1052                 case IOP_V8_TSUBccTV:
1053                 case IOP_V8_MULScc:
1054                 case IOP_V8_WRASR:
1055                 case IOP_V8_FLUSH:
1056                         if (i != 0 || ignor == 0)
1057                                 return (SIMU_ILLEGAL);
1058                         inst &= ~(0xff << 5);
1059                         break;
1060                 default:
1061                         return (SIMU_ILLEGAL);
1062                 }
1063                 break;
1064         case OP_V8_LDSTR:
1065                 switch (op3) {
1066                 case IOP_V8_STFSR:
1067                 case IOP_V8_LDFSR:
1068                         if (rd == 0 && !(i == 0 && ignor))
1069                                 return (SIMU_ILLEGAL);
1070                         if (rd)
1071                                 inst &= ~(0x1f << 25);
1072                         if (i == 0 && ignor)
1073                                 inst &= ~(0xff << 5);
1074                         break;
1075                 default:
1076                         if (optype == OP_V8_LDSTR && !IS_LDST_ALT(op3) &&
1077                             i == 0 && ignor)
1078                                 inst &= ~(0xff << 5);
1079                         else
1080                                 return (SIMU_ILLEGAL);
1081                         break;
1082                 }
1083                 break;
1084         default:
1085                 return (SIMU_ILLEGAL);
1086         }
1087 
1088         as = p->p_as;
1089 
1090         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1091         mapseg = as_findseg(as, (caddr_t)rp->r_pc, 0);
1092         ASSERT(mapseg != NULL);
1093         svd = (struct segvn_data *)mapseg->s_data;
1094 
1095         /*
1096          * We only create COW page for MAP_PRIVATE mappings.
1097          */
1098         SEGVN_LOCK_ENTER(as, &svd->lock, RW_READER);
1099         if ((svd->type & MAP_TYPE) & MAP_SHARED) {
1100                 SEGVN_LOCK_EXIT(as, &svd->lock);
1101                 AS_LOCK_EXIT(as, &as->a_lock);
1102                 return (SIMU_ILLEGAL);
1103         }
1104         SEGVN_LOCK_EXIT(as, &svd->lock);
1105         AS_LOCK_EXIT(as, &as->a_lock);
1106 
1107         /*
1108          * A "flush" instruction using the user PC's vaddr will not work
1109          * here, at least on Spitfire. Instead we create a temporary kernel
1110          * mapping to the user's text page, then modify and flush that.
1111          * Break COW by locking user page.
1112          */
1113         if (as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK), PAGESIZE,
1114             F_SOFTLOCK, S_READ))
1115                 return (SIMU_FAULT);
1116 
1117         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1118         pfnum = hat_getpfnum(as->a_hat, (caddr_t)rp->r_pc);
1119         AS_LOCK_EXIT(as, &as->a_lock);
1120         if (pf_is_memory(pfnum)) {
1121                 pp = page_numtopp_nolock(pfnum);
1122                 ASSERT(pp == NULL || PAGE_LOCKED(pp));
1123         } else {
1124                 (void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK),
1125                     PAGESIZE, F_SOFTUNLOCK, S_READ);
1126                 return (SIMU_FAULT);
1127         }
1128 
1129         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1130         ka = ppmapin(pp, PROT_READ|PROT_WRITE, (caddr_t)rp->r_pc);
1131         *(uint_t *)(ka + (uintptr_t)(rp->r_pc % PAGESIZE)) = inst;
1132         doflush(ka + (uintptr_t)(rp->r_pc % PAGESIZE));
1133         ppmapout(ka);
1134         AS_LOCK_EXIT(as, &as->a_lock);
1135 
1136         (void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK),
1137             PAGESIZE, F_SOFTUNLOCK, S_READ);
1138         return (SIMU_RETRY);
1139 }
1140 
1141 /*
1142  * Simulate a "rd %tick" or "rd %stick" (%asr24) instruction.
1143  */
1144 int
1145 simulate_rdtick(struct regs *rp)
1146 {
1147         uint_t  inst, op, op3, rd, rs1, i;
1148         caddr_t badaddr;
1149 
1150         inst = fetch_user_instr((caddr_t)rp->r_pc);
1151         op   = (inst >> 30) & 0x3;
1152         rd   = (inst >> 25) & 0x1F;
1153         op3  = (inst >> 19) & 0x3F;
1154         i    = (inst >> 13) & 0x1;
1155 
1156         /*
1157          * Make sure this is either a %tick read (rs1 == 0x4) or
1158          * a %stick read (rs1 == 0x18) instruction.
1159          */
1160         if (op == 2 && op3 == 0x28 && i == 0) {
1161                 rs1 = (inst >> 14) & 0x1F;
1162 
1163                 if (rs1 == 0x4) {
1164                         uint64_t tick;
1165                         (void) flush_user_windows_to_stack(NULL);
1166                         tick = gettick_counter();
1167                         if (putreg(&tick, rp, rd, &badaddr) == 0)
1168                                 return (SIMU_SUCCESS);
1169                 } else if (rs1 == 0x18) {
1170                         uint64_t stick;
1171                         (void) flush_user_windows_to_stack(NULL);
1172                         stick = gethrtime_unscaled();
1173                         if (putreg(&stick, rp, rd, &badaddr) == 0)
1174                                 return (SIMU_SUCCESS);
1175                 }
1176         }
1177 
1178         return (SIMU_FAULT);
1179 }
1180 
1181 /*
1182  * Get the value of a register for instruction simulation
1183  * by using the regs or window structure pointers.
1184  * Return 0 for success, and -1 for failure.  If there is a failure,
1185  * save the faulting address using badaddr pointer.
1186  * We have 64 bit globals and outs, and 32 or 64 bit ins and locals.
1187  * Don't truncate globals/outs for 32 bit programs, for v8+ support.
1188  */
1189 int
1190 getreg(struct regs *rp, uint_t reg, uint64_t *val, caddr_t *badaddr)
1191 {
1192         uint64_t *rgs, *sp;
1193         int rv = 0;
1194 
1195         rgs = (uint64_t *)&rp->r_ps;             /* globals and outs */
1196         sp = (uint64_t *)rp->r_sp;           /* ins and locals */
1197         if (reg == 0) {
1198                 *val = 0;
1199         } else if (reg < 16) {
1200                 *val = rgs[reg];
1201         } else if (IS_V9STACK(sp)) {
1202                 uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64);
1203                 uint64_t *addr = (uint64_t *)&rw[reg - 16];
1204                 uint64_t res;
1205 
1206                 if (USERMODE(rp->r_tstate)) {
1207                         if (fuword64_nowatch(addr, &res) == -1) {
1208                                 *badaddr = (caddr_t)addr;
1209                                 rv = -1;
1210                         }
1211                 } else {
1212                         res = *addr;
1213                 }
1214                 *val = res;
1215         } else {
1216                 caddr32_t sp32 = (caddr32_t)(uintptr_t)sp;
1217                 uint32_t *rw = (uint32_t *)(uintptr_t)sp32;
1218                 uint32_t *addr = (uint32_t *)&rw[reg - 16];
1219                 uint32_t res;
1220 
1221                 if (USERMODE(rp->r_tstate)) {
1222                         if (fuword32_nowatch(addr, &res) == -1) {
1223                                 *badaddr = (caddr_t)addr;
1224                                 rv = -1;
1225                         }
1226                 } else {
1227                         res = *addr;
1228                 }
1229                 *val = (uint64_t)res;
1230         }
1231         return (rv);
1232 }
1233 
1234 /*
1235  * Set the value of a register after instruction simulation
1236  * by using the regs or window structure pointers.
1237  * Return 0 for succes -1 failure.
1238  * save the faulting address using badaddr pointer.
1239  * We have 64 bit globals and outs, and 32 or 64 bit ins and locals.
1240  * Don't truncate globals/outs for 32 bit programs, for v8+ support.
1241  */
1242 int
1243 putreg(uint64_t *data, struct regs *rp, uint_t reg, caddr_t *badaddr)
1244 {
1245         uint64_t *rgs, *sp;
1246         int rv = 0;
1247 
1248         rgs = (uint64_t *)&rp->r_ps;             /* globals and outs */
1249         sp = (uint64_t *)rp->r_sp;           /* ins and locals */
1250         if (reg == 0) {
1251                 return (0);
1252         } else if (reg < 16) {
1253                 rgs[reg] = *data;
1254         } else if (IS_V9STACK(sp)) {
1255                 uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64);
1256                 uint64_t *addr = (uint64_t *)&rw[reg - 16];
1257                 uint64_t res;
1258 
1259                 if (USERMODE(rp->r_tstate)) {
1260                         struct machpcb *mpcb = lwptompcb(curthread->t_lwp);
1261 
1262                         res = *data;
1263                         if (suword64_nowatch(addr, res) != 0) {
1264                                 *badaddr = (caddr_t)addr;
1265                                 rv = -1;
1266                         }
1267                         /*
1268                          * We have changed a local or in register;
1269                          * nuke the watchpoint return windows.
1270                          */
1271                         mpcb->mpcb_rsp[0] = NULL;
1272                         mpcb->mpcb_rsp[1] = NULL;
1273                 } else {
1274                         res = *data;
1275                         *addr = res;
1276                 }
1277         } else {
1278                 caddr32_t sp32 = (caddr32_t)(uintptr_t)sp;
1279                 uint32_t *rw = (uint32_t *)(uintptr_t)sp32;
1280                 uint32_t *addr = (uint32_t *)&rw[reg - 16];
1281                 uint32_t res;
1282 
1283                 if (USERMODE(rp->r_tstate)) {
1284                         struct machpcb *mpcb = lwptompcb(curthread->t_lwp);
1285 
1286                         res = (uint_t)*data;
1287                         if (suword32_nowatch(addr, res) != 0) {
1288                                 *badaddr = (caddr_t)addr;
1289                                 rv = -1;
1290                         }
1291                         /*
1292                          * We have changed a local or in register;
1293                          * nuke the watchpoint return windows.
1294                          */
1295                         mpcb->mpcb_rsp[0] = NULL;
1296                         mpcb->mpcb_rsp[1] = NULL;
1297 
1298                 } else {
1299                         res = (uint_t)*data;
1300                         *addr = res;
1301                 }
1302         }
1303         return (rv);
1304 }
1305 
1306 /*
1307  * Calculate a memory reference address from instruction
1308  * operands, used to return the address of a fault, instead
1309  * of the instruction when an error occurs.  This is code that is
1310  * common with most of the routines that simulate instructions.
1311  */
1312 int
1313 calc_memaddr(struct regs *rp, caddr_t *badaddr)
1314 {
1315         uint_t  inst;
1316         uint_t  rd, rs1, rs2;
1317         int     sz;
1318         int     immflg;
1319         int     floatflg;
1320         caddr_t  addr;
1321         uint64_t val;
1322 
1323         if (USERMODE(rp->r_tstate))
1324                 inst = fetch_user_instr((caddr_t)rp->r_pc);
1325         else
1326                 inst = *(uint_t *)rp->r_pc;
1327 
1328         rd = (inst >> 25) & 0x1f;
1329         rs1 = (inst >> 14) & 0x1f;
1330         rs2 = inst & 0x1f;
1331         floatflg = (inst >> 24) & 1;
1332         immflg = (inst >> 13) & 1;
1333 
1334         if (floatflg) {
1335                 switch ((inst >> 19) & 3) {   /* map size bits to a number */
1336                 case 0: sz = 4; break;          /* ldf/stf */
1337                 case 1: return (0);             /* ld[x]fsr/st[x]fsr */
1338                 case 2: sz = 16; break;         /* ldqf/stqf */
1339                 case 3: sz = 8; break;          /* lddf/stdf */
1340                 }
1341                 /*
1342                  * Fix to access extra double register encoding plus
1343                  * compensate to access the correct fpu_dreg.
1344                  */
1345                 if (sz > 4) {
1346                         if ((rd & 1) == 1)
1347                                 rd = (rd & 0x1e) | 0x20;
1348                         rd = rd >> 1;
1349                 }
1350         } else {
1351                 switch ((inst >> 19) & 0xf) { /* map size bits to a number */
1352                 case 0:                         /* lduw */
1353                 case 4:                         /* stw */
1354                 case 8:                         /* ldsw */
1355                 case 0xf:                       /* swap */
1356                         sz = 4; break;
1357                 case 1:                         /* ldub */
1358                 case 5:                         /* stb */
1359                 case 9:                         /* ldsb */
1360                 case 0xd:                       /* ldstub */
1361                         sz = 1; break;
1362                 case 2:                         /* lduh */
1363                 case 6:                         /* sth */
1364                 case 0xa:                       /* ldsh */
1365                         sz = 2; break;
1366                 case 3:                         /* ldd */
1367                 case 7:                         /* std */
1368                 case 0xb:                       /* ldx */
1369                 case 0xe:                       /* stx */
1370                         sz = 8; break;
1371                 }
1372         }
1373 
1374         if (USERMODE(rp->r_tstate))
1375                 (void) flush_user_windows_to_stack(NULL);
1376         else
1377                 flush_windows();
1378 
1379         if (getreg(rp, rs1, &val, badaddr))
1380                 return (SIMU_FAULT);
1381         addr = (caddr_t)val;
1382 
1383         /* check immediate bit and use immediate field or reg (rs2) */
1384         if (immflg) {
1385                 int imm;
1386                 imm = inst & 0x1fff;                /* mask out immediate field */
1387                 imm <<= 19;                       /* sign extend it */
1388                 imm >>= 19;
1389                 addr += imm;                    /* compute address */
1390         } else {
1391                 if (getreg(rp, rs2, &val, badaddr))
1392                         return (SIMU_FAULT);
1393                 addr += val;
1394         }
1395 
1396         /*
1397          * If this is a 32-bit program, chop the address accordingly.  The
1398          * intermediate uintptr_t casts prevent warnings under a certain
1399          * compiler, and the temporary 32 bit storage is intended to force
1400          * proper code generation and break up what would otherwise be a
1401          * quadruple cast.
1402          */
1403         if (curproc->p_model == DATAMODEL_ILP32 && USERMODE(rp->r_tstate)) {
1404                 caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
1405                 addr = (caddr_t)(uintptr_t)addr32;
1406         }
1407 
1408         *badaddr = addr;
1409         return ((uintptr_t)addr & (sz - 1) ? SIMU_UNALIGN : SIMU_SUCCESS);
1410 }
1411 
1412 /*
1413  * Return the size of a load or store instruction (1, 2, 4, 8, 16, 64).
1414  * Also compute the precise address by instruction disassembly.
1415  * (v9 page faults only provide the page address via the hardware.)
1416  * Return 0 on failure (not a load or store instruction).
1417  */
1418 int
1419 instr_size(struct regs *rp, caddr_t *addrp, enum seg_rw rdwr)
1420 {
1421         uint_t  inst, op3, asi;
1422         uint_t  rd, rs1, rs2;
1423         int     sz = 0;
1424         int     immflg;
1425         int     floatflg;
1426         caddr_t addr;
1427         caddr_t badaddr;
1428         uint64_t val;
1429 
1430         if (rdwr == S_EXEC) {
1431                 *addrp = (caddr_t)rp->r_pc;
1432                 return (4);
1433         }
1434 
1435         /*
1436          * Fetch the instruction from user-level.
1437          * We would like to assert this:
1438          *   ASSERT(USERMODE(rp->r_tstate));
1439          * but we can't because we can reach this point from a
1440          * register window underflow/overflow and the v9 wbuf
1441          * traps call trap() with T_USER even though r_tstate
1442          * indicates a system trap, not a user trap.
1443          */
1444         inst = fetch_user_instr((caddr_t)rp->r_pc);
1445 
1446         op3 = (inst >> 19) & 0x3f;
1447         rd = (inst >> 25) & 0x1f;
1448         rs1 = (inst >> 14) & 0x1f;
1449         rs2 = inst & 0x1f;
1450         floatflg = (inst >> 24) & 1;
1451         immflg = (inst >> 13) & 1;
1452 
1453         /* if not load or store do nothing.  can't happen? */
1454         if ((inst >> 30) != 3)
1455                 return (0);
1456 
1457         if (immflg)
1458                 asi = (uint_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
1459                     TSTATE_ASI_MASK);
1460         else
1461                 asi = (inst >> 5) & 0xff;
1462 
1463         if (floatflg) {
1464                 /* check for ld/st alternate and highest defined V9 asi */
1465                 if ((op3 & 0x30) == 0x30 && asi > ASI_SNFL) {
1466                         sz = extended_asi_size(asi);
1467                 } else {
1468                         switch (op3 & 3) {
1469                         case 0:
1470                                 sz = 4;                 /* ldf/stf/cas */
1471                                 break;
1472                         case 1:
1473                                 if (rd == 0)
1474                                         sz = 4;         /* ldfsr/stfsr */
1475                                 else
1476                                         sz = 8;         /* ldxfsr/stxfsr */
1477                                 break;
1478                         case 2:
1479                                 if (op3 == 0x3e)
1480                                         sz = 8;         /* casx */
1481                                 else
1482                                         sz = 16;        /* ldqf/stqf */
1483                                 break;
1484                         case 3:
1485                                 sz = 8;                 /* lddf/stdf */
1486                                 break;
1487                         }
1488                 }
1489         } else {
1490                 switch (op3 & 0xf) {                /* map size bits to a number */
1491                 case 0:                         /* lduw */
1492                 case 4:                         /* stw */
1493                 case 8:                         /* ldsw */
1494                 case 0xf:                       /* swap */
1495                         sz = 4; break;
1496                 case 1:                         /* ldub */
1497                 case 5:                         /* stb */
1498                 case 9:                         /* ldsb */
1499                 case 0xd:                       /* ldstub */
1500                         sz = 1; break;
1501                 case 2:                         /* lduh */
1502                 case 6:                         /* sth */
1503                 case 0xa:                       /* ldsh */
1504                         sz = 2; break;
1505                 case 3:                         /* ldd */
1506                 case 7:                         /* std */
1507                 case 0xb:                       /* ldx */
1508                 case 0xe:                       /* stx */
1509                         sz = 8; break;
1510                 }
1511         }
1512 
1513         if (sz == 0)    /* can't happen? */
1514                 return (0);
1515         (void) flush_user_windows_to_stack(NULL);
1516 
1517         if (getreg(rp, rs1, &val, &badaddr))
1518                 return (0);
1519         addr = (caddr_t)val;
1520 
1521         /* cas/casx don't use rs2 / simm13 to compute the address */
1522         if ((op3 & 0x3d) != 0x3c) {
1523                 /* check immediate bit and use immediate field or reg (rs2) */
1524                 if (immflg) {
1525                         int imm;
1526                         imm  = inst & 0x1fff;       /* mask out immediate field */
1527                         imm <<= 19;               /* sign extend it */
1528                         imm >>= 19;
1529                         addr += imm;            /* compute address */
1530                 } else {
1531                         /*
1532                          * asi's in the 0xCx range are partial store
1533                          * instructions.  For these, rs2 is a mask, not part of
1534                          * the address.
1535                          */
1536                         if (!(floatflg && (asi & 0xf0) == 0xc0)) {
1537                                 if (getreg(rp, rs2, &val, &badaddr))
1538                                         return (0);
1539                                 addr += val;
1540                         }
1541                 }
1542         }
1543 
1544         /*
1545          * If this is a 32-bit program, chop the address accordingly.  The
1546          * intermediate uintptr_t casts prevent warnings under a certain
1547          * compiler, and the temporary 32 bit storage is intended to force
1548          * proper code generation and break up what would otherwise be a
1549          * quadruple cast.
1550          */
1551         if (curproc->p_model == DATAMODEL_ILP32) {
1552                 caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
1553                 addr = (caddr_t)(uintptr_t)addr32;
1554         }
1555 
1556         *addrp = addr;
1557         ASSERT(sz != 0);
1558         return (sz);
1559 }
1560 
1561 /*
1562  * Fetch an instruction from user-level.
1563  * Deal with watchpoints, if they are in effect.
1564  */
1565 int32_t
1566 fetch_user_instr(caddr_t vaddr)
1567 {
1568         proc_t *p = curproc;
1569         int32_t instr;
1570 
1571         /*
1572          * If this is a 32-bit program, chop the address accordingly.  The
1573          * intermediate uintptr_t casts prevent warnings under a certain
1574          * compiler, and the temporary 32 bit storage is intended to force
1575          * proper code generation and break up what would otherwise be a
1576          * quadruple cast.
1577          */
1578         if (p->p_model == DATAMODEL_ILP32) {
1579                 caddr32_t vaddr32 = (caddr32_t)(uintptr_t)vaddr;
1580                 vaddr = (caddr_t)(uintptr_t)vaddr32;
1581         }
1582 
1583         if (fuword32_nowatch(vaddr, (uint32_t *)&instr) == -1)
1584                 instr = -1;
1585 
1586         return (instr);
1587 }