1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/types.h>
  26 #include <sys/mkdev.h>
  27 #include <sys/stat.h>
  28 #include <sys/sunddi.h>
  29 #include <vm/seg_kmem.h>
  30 #include <sys/machparam.h>
  31 #include <sys/sunndi.h>
  32 #include <sys/ontrap.h>
  33 #include <sys/psm.h>
  34 #include <sys/pcie.h>
  35 #include <sys/pci_cfgspace.h>
  36 #include <sys/pci_tools.h>
  37 #include <io/pci/pci_tools_ext.h>
  38 #include <sys/apic.h>
  39 #include <sys/apix.h>
  40 #include <io/pci/pci_var.h>
  41 #include <sys/pci_impl.h>
  42 #include <sys/promif.h>
  43 #include <sys/x86_archext.h>
  44 #include <sys/cpuvar.h>
  45 #include <sys/pci_cfgacc.h>
  46 
  47 #ifdef __xpv
  48 #include <sys/hypervisor.h>
  49 #endif
  50 
  51 #define PCIEX_BDF_OFFSET_DELTA  4
  52 #define PCIEX_REG_FUNC_SHIFT    (PCI_REG_FUNC_SHIFT + PCIEX_BDF_OFFSET_DELTA)
  53 #define PCIEX_REG_DEV_SHIFT     (PCI_REG_DEV_SHIFT + PCIEX_BDF_OFFSET_DELTA)
  54 #define PCIEX_REG_BUS_SHIFT     (PCI_REG_BUS_SHIFT + PCIEX_BDF_OFFSET_DELTA)
  55 
  56 #define SUCCESS 0
  57 
  58 extern uint64_t mcfg_mem_base;
  59 int pcitool_debug = 0;
  60 
  61 /*
  62  * Offsets of BARS in config space.  First entry of 0 means config space.
  63  * Entries here correlate to pcitool_bars_t enumerated type.
  64  */
  65 static uint8_t pci_bars[] = {
  66         0x0,
  67         PCI_CONF_BASE0,
  68         PCI_CONF_BASE1,
  69         PCI_CONF_BASE2,
  70         PCI_CONF_BASE3,
  71         PCI_CONF_BASE4,
  72         PCI_CONF_BASE5,
  73         PCI_CONF_ROM
  74 };
  75 
  76 /* Max offset allowed into config space for a particular device. */
  77 static uint64_t max_cfg_size = PCI_CONF_HDR_SIZE;
  78 
  79 static uint64_t pcitool_swap_endian(uint64_t data, int size);
  80 static int pcitool_cfg_access(pcitool_reg_t *prg, boolean_t write_flag,
  81     boolean_t io_access);
  82 static int pcitool_io_access(pcitool_reg_t *prg, boolean_t write_flag);
  83 static int pcitool_mem_access(pcitool_reg_t *prg, uint64_t virt_addr,
  84     boolean_t write_flag);
  85 static uint64_t pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages);
  86 static void pcitool_unmap(uint64_t virt_addr, size_t num_pages);
  87 
  88 /* Extern declarations */
  89 extern int      (*psm_intr_ops)(dev_info_t *, ddi_intr_handle_impl_t *,
  90                     psm_intr_op_t, int *);
  91 
  92 int
  93 pcitool_init(dev_info_t *dip, boolean_t is_pciex)
  94 {
  95         int instance = ddi_get_instance(dip);
  96 
  97         /* Create pcitool nodes for register access and interrupt routing. */
  98 
  99         if (ddi_create_minor_node(dip, PCI_MINOR_REG, S_IFCHR,
 100             PCI_MINOR_NUM(instance, PCI_TOOL_REG_MINOR_NUM),
 101             DDI_NT_REGACC, 0) != DDI_SUCCESS) {
 102                 return (DDI_FAILURE);
 103         }
 104 
 105         if (ddi_create_minor_node(dip, PCI_MINOR_INTR, S_IFCHR,
 106             PCI_MINOR_NUM(instance, PCI_TOOL_INTR_MINOR_NUM),
 107             DDI_NT_INTRCTL, 0) != DDI_SUCCESS) {
 108                 ddi_remove_minor_node(dip, PCI_MINOR_REG);
 109                 return (DDI_FAILURE);
 110         }
 111 
 112         if (is_pciex)
 113                 max_cfg_size = PCIE_CONF_HDR_SIZE;
 114 
 115         return (DDI_SUCCESS);
 116 }
 117 
 118 void
 119 pcitool_uninit(dev_info_t *dip)
 120 {
 121         ddi_remove_minor_node(dip, PCI_MINOR_INTR);
 122         ddi_remove_minor_node(dip, PCI_MINOR_REG);
 123 }
 124 
 125 /*ARGSUSED*/
 126 static int
 127 pcitool_set_intr(dev_info_t *dip, void *arg, int mode)
 128 {
 129         ddi_intr_handle_impl_t info_hdl;
 130         pcitool_intr_set_t iset;
 131         uint32_t old_cpu;
 132         int ret, result;
 133         size_t copyinout_size;
 134         int rval = SUCCESS;
 135         apic_get_type_t type_info;
 136 
 137         /* Version 1 of pcitool_intr_set_t doesn't have flags. */
 138         copyinout_size = (size_t)&iset.flags - (size_t)&iset;
 139 
 140         if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
 141                 return (EFAULT);
 142 
 143         switch (iset.user_version) {
 144         case PCITOOL_V1:
 145                 break;
 146 
 147         case PCITOOL_V2:
 148                 copyinout_size = sizeof (pcitool_intr_set_t);
 149                 if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
 150                         return (EFAULT);
 151                 break;
 152 
 153         default:
 154                 iset.status = PCITOOL_OUT_OF_RANGE;
 155                 rval = ENOTSUP;
 156                 goto done_set_intr;
 157         }
 158 
 159         if (iset.flags & PCITOOL_INTR_FLAG_SET_MSI) {
 160                 rval = ENOTSUP;
 161                 iset.status = PCITOOL_IO_ERROR;
 162                 goto done_set_intr;
 163         }
 164 
 165         info_hdl.ih_private = &type_info;
 166 
 167         if ((*psm_intr_ops)(NULL, &info_hdl,
 168             PSM_INTR_OP_APIC_TYPE, NULL) != PSM_SUCCESS) {
 169                 rval = ENOTSUP;
 170                 iset.status = PCITOOL_IO_ERROR;
 171                 goto done_set_intr;
 172         }
 173 
 174         if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
 175                 if (iset.old_cpu > type_info.avgi_num_cpu) {
 176                         rval = EINVAL;
 177                         iset.status = PCITOOL_INVALID_CPUID;
 178                         goto done_set_intr;
 179                 }
 180                 old_cpu = iset.old_cpu;
 181         } else {
 182                 if ((old_cpu =
 183                     pci_get_cpu_from_vecirq(iset.ino, IS_VEC)) == -1) {
 184                         iset.status = PCITOOL_IO_ERROR;
 185                         rval = EINVAL;
 186                         goto done_set_intr;
 187                 }
 188         }
 189 
 190         if (iset.ino > type_info.avgi_num_intr) {
 191                 rval = EINVAL;
 192                 iset.status = PCITOOL_INVALID_INO;
 193                 goto done_set_intr;
 194         }
 195 
 196         iset.status = PCITOOL_SUCCESS;
 197 
 198         old_cpu &= ~PSMGI_CPU_USER_BOUND;
 199 
 200         /*
 201          * For this locally-declared and used handle, ih_private will contain a
 202          * CPU value, not an ihdl_plat_t as used for global interrupt handling.
 203          */
 204         if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
 205                 info_hdl.ih_vector = APIX_VIRTVECTOR(old_cpu, iset.ino);
 206         } else {
 207                 info_hdl.ih_vector = iset.ino;
 208         }
 209         info_hdl.ih_private = (void *)(uintptr_t)iset.cpu_id;
 210         info_hdl.ih_flags = PSMGI_INTRBY_VEC;
 211         if (pcitool_debug)
 212                 prom_printf("user version:%d, flags:0x%x\n",
 213                     iset.user_version, iset.flags);
 214 
 215         result = ENOTSUP;
 216         if ((iset.user_version >= PCITOOL_V2) &&
 217             (iset.flags & PCITOOL_INTR_FLAG_SET_GROUP)) {
 218                 ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_GRP_SET_CPU,
 219                     &result);
 220         } else {
 221                 ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_SET_CPU,
 222                     &result);
 223         }
 224 
 225         if (ret != PSM_SUCCESS) {
 226                 switch (result) {
 227                 case EIO:               /* Error making the change */
 228                         rval = EIO;
 229                         iset.status = PCITOOL_IO_ERROR;
 230                         break;
 231                 case ENXIO:             /* Couldn't convert vector to irq */
 232                         rval = EINVAL;
 233                         iset.status = PCITOOL_INVALID_INO;
 234                         break;
 235                 case EINVAL:            /* CPU out of range */
 236                         rval = EINVAL;
 237                         iset.status = PCITOOL_INVALID_CPUID;
 238                         break;
 239                 case ENOTSUP:           /* Requested PSM intr ops missing */
 240                         rval = ENOTSUP;
 241                         iset.status = PCITOOL_IO_ERROR;
 242                         break;
 243                 }
 244         }
 245 
 246         /* Return original CPU. */
 247         iset.cpu_id = old_cpu;
 248 
 249         /* Return new vector */
 250         if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
 251                 iset.ino = APIX_VIRTVEC_VECTOR(info_hdl.ih_vector);
 252         }
 253 
 254 done_set_intr:
 255         iset.drvr_version = PCITOOL_VERSION;
 256         if (ddi_copyout(&iset, arg, copyinout_size, mode) != DDI_SUCCESS)
 257                 rval = EFAULT;
 258         return (rval);
 259 }
 260 
 261 
 262 /* It is assumed that dip != NULL */
 263 static void
 264 pcitool_get_intr_dev_info(dev_info_t *dip, pcitool_intr_dev_t *devs)
 265 {
 266         (void) strncpy(devs->driver_name,
 267             ddi_driver_name(dip), MAXMODCONFNAME-2);
 268         devs->driver_name[MAXMODCONFNAME-1] = '\0';
 269         (void) ddi_pathname(dip, devs->path);
 270         devs->dev_inst = ddi_get_instance(dip);
 271 }
 272 
 273 static int
 274 pcitool_get_intr(dev_info_t *dip, void *arg, int mode)
 275 {
 276         /* Array part isn't used here, but oh well... */
 277         pcitool_intr_get_t partial_iget;
 278         pcitool_intr_get_t *iget = &partial_iget;
 279         size_t  iget_kmem_alloc_size = 0;
 280         uint8_t num_devs_ret;
 281         int copyout_rval;
 282         int rval = SUCCESS;
 283         int circ;
 284         int i;
 285 
 286         ddi_intr_handle_impl_t info_hdl;
 287         apic_get_intr_t intr_info;
 288         apic_get_type_t type_info;
 289 
 290         /* Read in just the header part, no array section. */
 291         if (ddi_copyin(arg, &partial_iget, PCITOOL_IGET_SIZE(0), mode) !=
 292             DDI_SUCCESS)
 293                 return (EFAULT);
 294 
 295         if (partial_iget.flags & PCITOOL_INTR_FLAG_GET_MSI) {
 296                 partial_iget.status = PCITOOL_IO_ERROR;
 297                 partial_iget.num_devs_ret = 0;
 298                 rval = ENOTSUP;
 299                 goto done_get_intr;
 300         }
 301 
 302         info_hdl.ih_private = &type_info;
 303 
 304         if ((*psm_intr_ops)(NULL, &info_hdl,
 305             PSM_INTR_OP_APIC_TYPE, NULL) != PSM_SUCCESS) {
 306                 iget->status = PCITOOL_IO_ERROR;
 307                 iget->num_devs_ret = 0;
 308                 rval = EINVAL;
 309                 goto done_get_intr;
 310         }
 311 
 312         if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
 313                 if (partial_iget.cpu_id > type_info.avgi_num_cpu) {
 314                         partial_iget.status = PCITOOL_INVALID_CPUID;
 315                         partial_iget.num_devs_ret = 0;
 316                         rval = EINVAL;
 317                         goto done_get_intr;
 318                 }
 319         }
 320 
 321         /* Validate argument. */
 322         if ((partial_iget.ino & APIX_VIRTVEC_VECMASK) >
 323             type_info.avgi_num_intr) {
 324                 partial_iget.status = PCITOOL_INVALID_INO;
 325                 partial_iget.num_devs_ret = 0;
 326                 rval = EINVAL;
 327                 goto done_get_intr;
 328         }
 329 
 330         num_devs_ret = partial_iget.num_devs_ret;
 331         intr_info.avgi_dip_list = NULL;
 332         intr_info.avgi_req_flags =
 333             PSMGI_REQ_CPUID | PSMGI_REQ_NUM_DEVS | PSMGI_INTRBY_VEC;
 334         /*
 335          * For this locally-declared and used handle, ih_private will contain a
 336          * pointer to apic_get_intr_t, not an ihdl_plat_t as used for
 337          * global interrupt handling.
 338          */
 339         info_hdl.ih_private = &intr_info;
 340 
 341         if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
 342                 info_hdl.ih_vector =
 343                     APIX_VIRTVECTOR(partial_iget.cpu_id, partial_iget.ino);
 344         } else {
 345                 info_hdl.ih_vector = partial_iget.ino;
 346         }
 347 
 348         /* Caller wants device information returned. */
 349         if (num_devs_ret > 0) {
 350 
 351                 intr_info.avgi_req_flags |= PSMGI_REQ_GET_DEVS;
 352 
 353                 /*
 354                  * Allocate room.
 355                  * If num_devs_ret == 0 iget remains pointing to partial_iget.
 356                  */
 357                 iget_kmem_alloc_size = PCITOOL_IGET_SIZE(num_devs_ret);
 358                 iget = kmem_alloc(iget_kmem_alloc_size, KM_SLEEP);
 359 
 360                 /* Read in whole structure to verify there's room. */
 361                 if (ddi_copyin(arg, iget, iget_kmem_alloc_size, mode) !=
 362                     SUCCESS) {
 363 
 364                         /* Be consistent and just return EFAULT here. */
 365                         kmem_free(iget, iget_kmem_alloc_size);
 366 
 367                         return (EFAULT);
 368                 }
 369         }
 370 
 371         bzero(iget, PCITOOL_IGET_SIZE(num_devs_ret));
 372         iget->ino = info_hdl.ih_vector;
 373 
 374         /*
 375          * Lock device tree branch from the pci root nexus on down if info will
 376          * be extracted from dips returned from the tree.
 377          */
 378         if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
 379                 ndi_devi_enter(dip, &circ);
 380         }
 381 
 382         /* Call psm_intr_ops(PSM_INTR_OP_GET_INTR) to get information. */
 383         if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
 384             PSM_INTR_OP_GET_INTR, NULL)) != PSM_SUCCESS) {
 385                 iget->status = PCITOOL_IO_ERROR;
 386                 iget->num_devs_ret = 0;
 387                 rval = EINVAL;
 388                 goto done_get_intr;
 389         }
 390 
 391         /*
 392          * Fill in the pcitool_intr_get_t to be returned,
 393          * with the CPU, num_devs_ret and num_devs.
 394          */
 395         if (intr_info.avgi_cpu_id == IRQ_UNBOUND ||
 396             intr_info.avgi_cpu_id == IRQ_UNINIT)
 397                 iget->cpu_id = 0;
 398         else
 399                 iget->cpu_id = intr_info.avgi_cpu_id & ~PSMGI_CPU_USER_BOUND;
 400 
 401         /* Number of devices returned by apic. */
 402         iget->num_devs = intr_info.avgi_num_devs;
 403 
 404         /* Device info was returned. */
 405         if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
 406 
 407                 /*
 408                  * num devs returned is num devs ret by apic,
 409                  * space permitting.
 410                  */
 411                 iget->num_devs_ret = min(num_devs_ret, intr_info.avgi_num_devs);
 412 
 413                 /*
 414                  * Loop thru list of dips and extract driver, name and instance.
 415                  * Fill in the pcitool_intr_dev_t's with this info.
 416                  */
 417                 for (i = 0; i < iget->num_devs_ret; i++)
 418                         pcitool_get_intr_dev_info(intr_info.avgi_dip_list[i],
 419                             &iget->dev[i]);
 420 
 421                 /* Free kmem_alloc'ed memory of the apic_get_intr_t */
 422                 kmem_free(intr_info.avgi_dip_list,
 423                     intr_info.avgi_num_devs * sizeof (dev_info_t *));
 424         }
 425 
 426 done_get_intr:
 427 
 428         if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
 429                 ndi_devi_exit(dip, circ);
 430         }
 431 
 432         iget->drvr_version = PCITOOL_VERSION;
 433         copyout_rval = ddi_copyout(iget, arg,
 434             PCITOOL_IGET_SIZE(num_devs_ret), mode);
 435 
 436         if (iget_kmem_alloc_size > 0)
 437                 kmem_free(iget, iget_kmem_alloc_size);
 438 
 439         if (copyout_rval != DDI_SUCCESS)
 440                 rval = EFAULT;
 441 
 442         return (rval);
 443 }
 444 
 445 /*ARGSUSED*/
 446 static int
 447 pcitool_intr_info(dev_info_t *dip, void *arg, int mode)
 448 {
 449         pcitool_intr_info_t intr_info;
 450         ddi_intr_handle_impl_t info_hdl;
 451         int rval = SUCCESS;
 452         apic_get_type_t type_info;
 453 
 454         /* If we need user_version, and to ret same user version as passed in */
 455         if (ddi_copyin(arg, &intr_info, sizeof (pcitool_intr_info_t), mode) !=
 456             DDI_SUCCESS) {
 457                 if (pcitool_debug)
 458                         prom_printf("Error reading arguments\n");
 459                 return (EFAULT);
 460         }
 461 
 462         if (intr_info.flags & PCITOOL_INTR_FLAG_GET_MSI)
 463                 return (ENOTSUP);
 464 
 465         info_hdl.ih_private = &type_info;
 466 
 467         /* For UPPC systems, psm_intr_ops has no entry for APIC_TYPE. */
 468         if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
 469             PSM_INTR_OP_APIC_TYPE, NULL)) != PSM_SUCCESS) {
 470                 intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UPPC;
 471                 intr_info.ctlr_version = 0;
 472                 intr_info.num_intr = APIC_MAX_VECTOR;
 473         } else {
 474                 intr_info.ctlr_version = (uint32_t)info_hdl.ih_ver;
 475                 intr_info.num_cpu = type_info.avgi_num_cpu;
 476                 if (strcmp(type_info.avgi_type,
 477                     APIC_PCPLUSMP_NAME) == 0) {
 478                         intr_info.ctlr_type = PCITOOL_CTLR_TYPE_PCPLUSMP;
 479                         intr_info.num_intr = type_info.avgi_num_intr;
 480                 } else if (strcmp(type_info.avgi_type,
 481                     APIC_APIX_NAME) == 0) {
 482                         intr_info.ctlr_type = PCITOOL_CTLR_TYPE_APIX;
 483                         intr_info.num_intr = type_info.avgi_num_intr;
 484                 } else {
 485                         intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UNKNOWN;
 486                         intr_info.num_intr = APIC_MAX_VECTOR;
 487                 }
 488         }
 489 
 490         intr_info.drvr_version = PCITOOL_VERSION;
 491         if (ddi_copyout(&intr_info, arg, sizeof (pcitool_intr_info_t), mode) !=
 492             DDI_SUCCESS) {
 493                 if (pcitool_debug)
 494                         prom_printf("Error returning arguments.\n");
 495                 rval = EFAULT;
 496         }
 497 
 498         return (rval);
 499 }
 500 
 501 
 502 
 503 /*
 504  * Main function for handling interrupt CPU binding requests and queries.
 505  * Need to implement later
 506  */
 507 int
 508 pcitool_intr_admn(dev_info_t *dip, void *arg, int cmd, int mode)
 509 {
 510         int rval;
 511 
 512         switch (cmd) {
 513 
 514         /* Associate a new CPU with a given vector */
 515         case PCITOOL_DEVICE_SET_INTR:
 516                 rval = pcitool_set_intr(dip, arg, mode);
 517                 break;
 518 
 519         case PCITOOL_DEVICE_GET_INTR:
 520                 rval = pcitool_get_intr(dip, arg, mode);
 521                 break;
 522 
 523         case PCITOOL_SYSTEM_INTR_INFO:
 524                 rval = pcitool_intr_info(dip, arg, mode);
 525                 break;
 526 
 527         default:
 528                 rval = ENOTSUP;
 529         }
 530 
 531         return (rval);
 532 }
 533 
 534 /*
 535  * Perform register accesses on the nexus device itself.
 536  * No explicit PCI nexus device for X86, so not applicable.
 537  */
 538 
 539 /*ARGSUSED*/
 540 int
 541 pcitool_bus_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
 542 {
 543         return (ENOTSUP);
 544 }
 545 
 546 /* Swap endianness. */
 547 static uint64_t
 548 pcitool_swap_endian(uint64_t data, int size)
 549 {
 550         typedef union {
 551                 uint64_t data64;
 552                 uint8_t data8[8];
 553         } data_split_t;
 554 
 555         data_split_t orig_data;
 556         data_split_t returned_data;
 557         int i;
 558 
 559         orig_data.data64 = data;
 560         returned_data.data64 = 0;
 561 
 562         for (i = 0; i < size; i++) {
 563                 returned_data.data8[i] = orig_data.data8[size - 1 - i];
 564         }
 565 
 566         return (returned_data.data64);
 567 }
 568 
 569 /*
 570  * A note about ontrap handling:
 571  *
 572  * X86 systems on which this module was tested return FFs instead of bus errors
 573  * when accessing devices with invalid addresses.  Ontrap handling, which
 574  * gracefully handles kernel bus errors, is installed anyway for I/O and mem
 575  * space accessing (not for pci config space), in case future X86 platforms
 576  * require it.
 577  */
 578 
 579 /* Access device.  prg is modified. */
 580 static int
 581 pcitool_cfg_access(pcitool_reg_t *prg, boolean_t write_flag,
 582     boolean_t io_access)
 583 {
 584         int size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
 585         boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
 586         int rval = SUCCESS;
 587         uint64_t local_data;
 588         pci_cfgacc_req_t req;
 589         uint32_t max_offset;
 590 
 591         if ((size <= 0) || (size > 8) || ((size & (size - 1)) != 0)) {
 592                 prg->status = PCITOOL_INVALID_SIZE;
 593                 return (ENOTSUP);
 594         }
 595 
 596         /*
 597          * NOTE: there is no way to verify whether or not the address is
 598          * valid other than that it is within the maximum offset.  The
 599          * put functions return void and the get functions return -1 on error.
 600          */
 601 
 602         if (io_access)
 603                 max_offset = 0xFF;
 604         else
 605                 max_offset = 0xFFF;
 606         if (prg->offset + size - 1 > max_offset) {
 607                 prg->status = PCITOOL_INVALID_ADDRESS;
 608                 return (ENOTSUP);
 609         }
 610 
 611         prg->status = PCITOOL_SUCCESS;
 612 
 613         req.rcdip = NULL;
 614         req.bdf = PCI_GETBDF(prg->bus_no, prg->dev_no, prg->func_no);
 615         req.offset = prg->offset;
 616         req.size = size;
 617         req.write = write_flag;
 618         req.ioacc = io_access;
 619         if (write_flag) {
 620                 if (big_endian) {
 621                         local_data = pcitool_swap_endian(prg->data, size);
 622                 } else {
 623                         local_data = prg->data;
 624                 }
 625                 VAL64(&req) = local_data;
 626                 pci_cfgacc_acc(&req);
 627         } else {
 628                 pci_cfgacc_acc(&req);
 629                 switch (size) {
 630                 case 1:
 631                         local_data = VAL8(&req);
 632                         break;
 633                 case 2:
 634                         local_data = VAL16(&req);
 635                         break;
 636                 case 4:
 637                         local_data = VAL32(&req);
 638                         break;
 639                 case 8:
 640                         local_data = VAL64(&req);
 641                         break;
 642                 }
 643                 if (big_endian) {
 644                         prg->data =
 645                             pcitool_swap_endian(local_data, size);
 646                 } else {
 647                         prg->data = local_data;
 648                 }
 649         }
 650         /*
 651          * Check if legacy IO config access is used, in which case
 652          * only first 256 bytes are valid.
 653          */
 654         if (req.ioacc && (prg->offset + size - 1 > 0xFF)) {
 655                 prg->status = PCITOOL_INVALID_ADDRESS;
 656                 return (ENOTSUP);
 657         }
 658 
 659         /* Set phys_addr only if MMIO is used */
 660         prg->phys_addr = 0;
 661         if (!req.ioacc && mcfg_mem_base != 0) {
 662                 prg->phys_addr = mcfg_mem_base + prg->offset +
 663                     ((prg->bus_no << PCIEX_REG_BUS_SHIFT) |
 664                     (prg->dev_no << PCIEX_REG_DEV_SHIFT) |
 665                     (prg->func_no << PCIEX_REG_FUNC_SHIFT));
 666         }
 667 
 668         return (rval);
 669 }
 670 
 671 static int
 672 pcitool_io_access(pcitool_reg_t *prg, boolean_t write_flag)
 673 {
 674         int port = (int)prg->phys_addr;
 675         size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
 676         boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
 677         int rval = SUCCESS;
 678         on_trap_data_t otd;
 679         uint64_t local_data;
 680 
 681 
 682         /*
 683          * on_trap works like setjmp.
 684          *
 685          * A non-zero return here means on_trap has returned from an error.
 686          *
 687          * A zero return here means that on_trap has just returned from setup.
 688          */
 689         if (on_trap(&otd, OT_DATA_ACCESS)) {
 690                 no_trap();
 691                 if (pcitool_debug)
 692                         prom_printf(
 693                             "pcitool_io_access: on_trap caught an error...\n");
 694                 prg->status = PCITOOL_INVALID_ADDRESS;
 695                 return (EFAULT);
 696         }
 697 
 698         if (write_flag) {
 699 
 700                 if (big_endian) {
 701                         local_data = pcitool_swap_endian(prg->data, size);
 702                 } else {
 703                         local_data = prg->data;
 704                 }
 705 
 706                 if (pcitool_debug)
 707                         prom_printf("Writing %ld byte(s) to port 0x%x\n",
 708                             size, port);
 709 
 710                 switch (size) {
 711                 case 1:
 712                         outb(port, (uint8_t)local_data);
 713                         break;
 714                 case 2:
 715                         outw(port, (uint16_t)local_data);
 716                         break;
 717                 case 4:
 718                         outl(port, (uint32_t)local_data);
 719                         break;
 720                 default:
 721                         rval = ENOTSUP;
 722                         prg->status = PCITOOL_INVALID_SIZE;
 723                         break;
 724                 }
 725         } else {
 726                 if (pcitool_debug)
 727                         prom_printf("Reading %ld byte(s) from port 0x%x\n",
 728                             size, port);
 729 
 730                 switch (size) {
 731                 case 1:
 732                         local_data = inb(port);
 733                         break;
 734                 case 2:
 735                         local_data = inw(port);
 736                         break;
 737                 case 4:
 738                         local_data = inl(port);
 739                         break;
 740                 default:
 741                         rval = ENOTSUP;
 742                         prg->status = PCITOOL_INVALID_SIZE;
 743                         break;
 744                 }
 745 
 746                 if (rval == SUCCESS) {
 747                         if (big_endian) {
 748                                 prg->data =
 749                                     pcitool_swap_endian(local_data, size);
 750                         } else {
 751                                 prg->data = local_data;
 752                         }
 753                 }
 754         }
 755 
 756         no_trap();
 757         return (rval);
 758 }
 759 
 760 static int
 761 pcitool_mem_access(pcitool_reg_t *prg, uint64_t virt_addr, boolean_t write_flag)
 762 {
 763         size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
 764         boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
 765         int rval = DDI_SUCCESS;
 766         on_trap_data_t otd;
 767         uint64_t local_data;
 768 
 769         /*
 770          * on_trap works like setjmp.
 771          *
 772          * A non-zero return here means on_trap has returned from an error.
 773          *
 774          * A zero return here means that on_trap has just returned from setup.
 775          */
 776         if (on_trap(&otd, OT_DATA_ACCESS)) {
 777                 no_trap();
 778                 if (pcitool_debug)
 779                         prom_printf(
 780                             "pcitool_mem_access: on_trap caught an error...\n");
 781                 prg->status = PCITOOL_INVALID_ADDRESS;
 782                 return (EFAULT);
 783         }
 784 
 785         if (write_flag) {
 786 
 787                 if (big_endian) {
 788                         local_data = pcitool_swap_endian(prg->data, size);
 789                 } else {
 790                         local_data = prg->data;
 791                 }
 792 
 793                 switch (size) {
 794                 case 1:
 795                         *((uint8_t *)(uintptr_t)virt_addr) = local_data;
 796                         break;
 797                 case 2:
 798                         *((uint16_t *)(uintptr_t)virt_addr) = local_data;
 799                         break;
 800                 case 4:
 801                         *((uint32_t *)(uintptr_t)virt_addr) = local_data;
 802                         break;
 803                 case 8:
 804                         *((uint64_t *)(uintptr_t)virt_addr) = local_data;
 805                         break;
 806                 default:
 807                         rval = ENOTSUP;
 808                         prg->status = PCITOOL_INVALID_SIZE;
 809                         break;
 810                 }
 811         } else {
 812                 switch (size) {
 813                 case 1:
 814                         local_data = *((uint8_t *)(uintptr_t)virt_addr);
 815                         break;
 816                 case 2:
 817                         local_data = *((uint16_t *)(uintptr_t)virt_addr);
 818                         break;
 819                 case 4:
 820                         local_data = *((uint32_t *)(uintptr_t)virt_addr);
 821                         break;
 822                 case 8:
 823                         local_data = *((uint64_t *)(uintptr_t)virt_addr);
 824                         break;
 825                 default:
 826                         rval = ENOTSUP;
 827                         prg->status = PCITOOL_INVALID_SIZE;
 828                         break;
 829                 }
 830 
 831                 if (rval == SUCCESS) {
 832                         if (big_endian) {
 833                                 prg->data =
 834                                     pcitool_swap_endian(local_data, size);
 835                         } else {
 836                                 prg->data = local_data;
 837                         }
 838                 }
 839         }
 840 
 841         no_trap();
 842         return (rval);
 843 }
 844 
 845 /*
 846  * Map up to 2 pages which contain the address we want to access.
 847  *
 848  * Mapping should span no more than 8 bytes.  With X86 it is possible for an
 849  * 8 byte value to start on a 4 byte boundary, so it can cross a page boundary.
 850  * We'll never have to map more than two pages.
 851  */
 852 
 853 static uint64_t
 854 pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages)
 855 {
 856 
 857         uint64_t page_base = phys_addr & ~MMU_PAGEOFFSET;
 858         uint64_t offset = phys_addr & MMU_PAGEOFFSET;
 859         void *virt_base;
 860         uint64_t returned_addr;
 861         pfn_t pfn;
 862 
 863         if (pcitool_debug)
 864                 prom_printf("pcitool_map: Called with PA:0x%p\n",
 865                     (void *)(uintptr_t)phys_addr);
 866 
 867         *num_pages = 1;
 868 
 869         /* Desired mapping would span more than two pages. */
 870         if ((offset + size) > (MMU_PAGESIZE * 2)) {
 871                 if (pcitool_debug)
 872                         prom_printf("boundary violation: "
 873                             "offset:0x%" PRIx64 ", size:%ld, pagesize:0x%lx\n",
 874                             offset, (uintptr_t)size, (uintptr_t)MMU_PAGESIZE);
 875                 return (NULL);
 876 
 877         } else if ((offset + size) > MMU_PAGESIZE) {
 878                 (*num_pages)++;
 879         }
 880 
 881         /* Get page(s) of virtual space. */
 882         virt_base = vmem_alloc(heap_arena, ptob(*num_pages), VM_NOSLEEP);
 883         if (virt_base == NULL) {
 884                 if (pcitool_debug)
 885                         prom_printf("Couldn't get virtual base address.\n");
 886                 return (NULL);
 887         }
 888 
 889         if (pcitool_debug)
 890                 prom_printf("Got base virtual address:0x%p\n", virt_base);
 891 
 892 #ifdef __xpv
 893         /*
 894          * We should only get here if we are dom0.
 895          * We're using a real device so we need to translate the MA to a PFN.
 896          */
 897         ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
 898         pfn = xen_assign_pfn(mmu_btop(page_base));
 899 #else
 900         pfn = btop(page_base);
 901 #endif
 902 
 903         /* Now map the allocated virtual space to the physical address. */
 904         hat_devload(kas.a_hat, virt_base, mmu_ptob(*num_pages), pfn,
 905             PROT_READ | PROT_WRITE | HAT_STRICTORDER,
 906             HAT_LOAD_LOCK);
 907 
 908         returned_addr = ((uintptr_t)(virt_base)) + offset;
 909 
 910         if (pcitool_debug)
 911                 prom_printf("pcitool_map: returning VA:0x%p\n",
 912                     (void *)(uintptr_t)returned_addr);
 913 
 914         return (returned_addr);
 915 }
 916 
 917 /* Unmap the mapped page(s). */
 918 static void
 919 pcitool_unmap(uint64_t virt_addr, size_t num_pages)
 920 {
 921         void *base_virt_addr = (void *)(uintptr_t)(virt_addr & ~MMU_PAGEOFFSET);
 922 
 923         hat_unload(kas.a_hat, base_virt_addr, ptob(num_pages),
 924             HAT_UNLOAD_UNLOCK);
 925         vmem_free(heap_arena, base_virt_addr, ptob(num_pages));
 926 }
 927 
 928 
 929 /* Perform register accesses on PCI leaf devices. */
 930 /*ARGSUSED*/
 931 int
 932 pcitool_dev_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
 933 {
 934         boolean_t       write_flag = B_FALSE;
 935         boolean_t       io_access = B_TRUE;
 936         int             rval = 0;
 937         pcitool_reg_t   prg;
 938         uint8_t         size;
 939 
 940         uint64_t        base_addr;
 941         uint64_t        virt_addr;
 942         size_t          num_virt_pages;
 943 
 944         switch (cmd) {
 945         case (PCITOOL_DEVICE_SET_REG):
 946                 write_flag = B_TRUE;
 947 
 948         /*FALLTHRU*/
 949         case (PCITOOL_DEVICE_GET_REG):
 950                 if (pcitool_debug)
 951                         prom_printf("pci_dev_reg_ops set/get reg\n");
 952                 if (ddi_copyin(arg, &prg, sizeof (pcitool_reg_t), mode) !=
 953                     DDI_SUCCESS) {
 954                         if (pcitool_debug)
 955                                 prom_printf("Error reading arguments\n");
 956                         return (EFAULT);
 957                 }
 958 
 959                 if (prg.barnum >= (sizeof (pci_bars) / sizeof (pci_bars[0]))) {
 960                         prg.status = PCITOOL_OUT_OF_RANGE;
 961                         rval = EINVAL;
 962                         goto done_reg;
 963                 }
 964 
 965                 if (pcitool_debug)
 966                         prom_printf("raw bus:0x%x, dev:0x%x, func:0x%x\n",
 967                             prg.bus_no, prg.dev_no, prg.func_no);
 968                 /* Validate address arguments of bus / dev / func */
 969                 if (((prg.bus_no &
 970                     (PCI_REG_BUS_M >> PCI_REG_BUS_SHIFT)) !=
 971                     prg.bus_no) ||
 972                     ((prg.dev_no &
 973                     (PCI_REG_DEV_M >> PCI_REG_DEV_SHIFT)) !=
 974                     prg.dev_no) ||
 975                     ((prg.func_no &
 976                     (PCI_REG_FUNC_M >> PCI_REG_FUNC_SHIFT)) !=
 977                     prg.func_no)) {
 978                         prg.status = PCITOOL_INVALID_ADDRESS;
 979                         rval = EINVAL;
 980                         goto done_reg;
 981                 }
 982 
 983                 size = PCITOOL_ACC_ATTR_SIZE(prg.acc_attr);
 984 
 985                 /* Proper config space desired. */
 986                 if (prg.barnum == 0) {
 987 
 988                         if (pcitool_debug)
 989                                 prom_printf(
 990                                     "config access: offset:0x%" PRIx64 ", "
 991                                     "phys_addr:0x%" PRIx64 "\n",
 992                                     prg.offset, prg.phys_addr);
 993 
 994                         if (prg.offset >= max_cfg_size) {
 995                                 prg.status = PCITOOL_OUT_OF_RANGE;
 996                                 rval = EINVAL;
 997                                 goto done_reg;
 998                         }
 999                         if (max_cfg_size == PCIE_CONF_HDR_SIZE)
1000                                 io_access = B_FALSE;
1001 
1002                         rval = pcitool_cfg_access(&prg, write_flag, io_access);
1003                         if (pcitool_debug)
1004                                 prom_printf(
1005                                     "config access: data:0x%" PRIx64 "\n",
1006                                     prg.data);
1007 
1008                 /* IO/ MEM/ MEM64 space. */
1009                 } else {
1010 
1011                         pcitool_reg_t   prg2;
1012                         bcopy(&prg, &prg2, sizeof (pcitool_reg_t));
1013 
1014                         /*
1015                          * Translate BAR number into offset of the BAR in
1016                          * the device's config space.
1017                          */
1018                         prg2.offset = pci_bars[prg2.barnum];
1019                         prg2.acc_attr =
1020                             PCITOOL_ACC_ATTR_SIZE_4 | PCITOOL_ACC_ATTR_ENDN_LTL;
1021 
1022                         if (pcitool_debug)
1023                                 prom_printf(
1024                                     "barnum:%d, bar_offset:0x%" PRIx64 "\n",
1025                                     prg2.barnum, prg2.offset);
1026                         /*
1027                          * Get Bus Address Register (BAR) from config space.
1028                          * prg2.offset is the offset into config space of the
1029                          * BAR desired.  prg.status is modified on error.
1030                          */
1031                         rval = pcitool_cfg_access(&prg2, B_FALSE, B_TRUE);
1032                         if (rval != SUCCESS) {
1033                                 if (pcitool_debug)
1034                                         prom_printf("BAR access failed\n");
1035                                 prg.status = prg2.status;
1036                                 goto done_reg;
1037                         }
1038                         /*
1039                          * Reference proper PCI space based on the BAR.
1040                          * If 64 bit MEM space, need to load other half of the
1041                          * BAR first.
1042                          */
1043 
1044                         if (pcitool_debug)
1045                                 prom_printf("bar returned is 0x%" PRIx64 "\n",
1046                                     prg2.data);
1047                         if (!prg2.data) {
1048                                 if (pcitool_debug)
1049                                         prom_printf("BAR data == 0\n");
1050                                 rval = EINVAL;
1051                                 prg.status = PCITOOL_INVALID_ADDRESS;
1052                                 goto done_reg;
1053                         }
1054                         if (prg2.data == 0xffffffff) {
1055                                 if (pcitool_debug)
1056                                         prom_printf("BAR data == -1\n");
1057                                 rval = EINVAL;
1058                                 prg.status = PCITOOL_INVALID_ADDRESS;
1059                                 goto done_reg;
1060                         }
1061 
1062                         /*
1063                          * BAR has bits saying this space is IO space, unless
1064                          * this is the ROM address register.
1065                          */
1066                         if (((PCI_BASE_SPACE_M & prg2.data) ==
1067                             PCI_BASE_SPACE_IO) &&
1068                             (prg2.offset != PCI_CONF_ROM)) {
1069                                 if (pcitool_debug)
1070                                         prom_printf("IO space\n");
1071 
1072                                 prg2.data &= PCI_BASE_IO_ADDR_M;
1073                                 prg.phys_addr = prg2.data + prg.offset;
1074 
1075                                 rval = pcitool_io_access(&prg, write_flag);
1076                                 if ((rval != SUCCESS) && (pcitool_debug))
1077                                         prom_printf("IO access failed\n");
1078 
1079                                 goto done_reg;
1080 
1081 
1082                         /*
1083                          * BAR has bits saying this space is 64 bit memory
1084                          * space, unless this is the ROM address register.
1085                          *
1086                          * The 64 bit address stored in two BAR cells is not
1087                          * necessarily aligned on an 8-byte boundary.
1088                          * Need to keep the first 4 bytes read,
1089                          * and do a separate read of the high 4 bytes.
1090                          */
1091 
1092                         } else if ((PCI_BASE_TYPE_ALL & prg2.data) &&
1093                             (prg2.offset != PCI_CONF_ROM)) {
1094 
1095                                 uint32_t low_bytes =
1096                                     (uint32_t)(prg2.data & ~PCI_BASE_TYPE_ALL);
1097 
1098                                 /*
1099                                  * Don't try to read the next 4 bytes
1100                                  * past the end of BARs.
1101                                  */
1102                                 if (prg2.offset >= PCI_CONF_BASE5) {
1103                                         prg.status = PCITOOL_OUT_OF_RANGE;
1104                                         rval = EIO;
1105                                         goto done_reg;
1106                                 }
1107 
1108                                 /*
1109                                  * Access device.
1110                                  * prg2.status is modified on error.
1111                                  */
1112                                 prg2.offset += 4;
1113                                 rval = pcitool_cfg_access(&prg2,
1114                                     B_FALSE, B_TRUE);
1115                                 if (rval != SUCCESS) {
1116                                         prg.status = prg2.status;
1117                                         goto done_reg;
1118                                 }
1119 
1120                                 if (prg2.data == 0xffffffff) {
1121                                         prg.status = PCITOOL_INVALID_ADDRESS;
1122                                         prg.status = EFAULT;
1123                                         goto done_reg;
1124                                 }
1125 
1126                                 prg2.data = (prg2.data << 32) + low_bytes;
1127                                 if (pcitool_debug)
1128                                         prom_printf(
1129                                             "64 bit mem space.  "
1130                                             "64-bit bar is 0x%" PRIx64 "\n",
1131                                             prg2.data);
1132 
1133                         /* Mem32 space, including ROM */
1134                         } else {
1135 
1136                                 if (prg2.offset == PCI_CONF_ROM) {
1137                                         if (pcitool_debug)
1138                                                 prom_printf(
1139                                                     "Additional ROM "
1140                                                     "checking\n");
1141                                         /* Can't write to ROM */
1142                                         if (write_flag) {
1143                                                 prg.status = PCITOOL_ROM_WRITE;
1144                                                 rval = EIO;
1145                                                 goto done_reg;
1146 
1147                                         /* ROM disabled for reading */
1148                                         } else if (!(prg2.data & 0x00000001)) {
1149                                                 prg.status =
1150                                                     PCITOOL_ROM_DISABLED;
1151                                                 rval = EIO;
1152                                                 goto done_reg;
1153                                         }
1154                                 }
1155 
1156                                 if (pcitool_debug)
1157                                         prom_printf("32 bit mem space\n");
1158                         }
1159 
1160                         /* Common code for all IO/MEM range spaces. */
1161 
1162                         base_addr = prg2.data;
1163                         if (pcitool_debug)
1164                                 prom_printf(
1165                                     "addr portion of bar is 0x%" PRIx64 ", "
1166                                     "base=0x%" PRIx64 ", "
1167                                     "offset:0x%" PRIx64 "\n",
1168                                     prg2.data, base_addr, prg.offset);
1169                         /*
1170                          * Use offset provided by caller to index into
1171                          * desired space, then access.
1172                          * Note that prg.status is modified on error.
1173                          */
1174                         prg.phys_addr = base_addr + prg.offset;
1175 
1176                         virt_addr = pcitool_map(prg.phys_addr, size,
1177                             &num_virt_pages);
1178                         if (virt_addr == NULL) {
1179                                 prg.status = PCITOOL_IO_ERROR;
1180                                 rval = EIO;
1181                                 goto done_reg;
1182                         }
1183 
1184                         rval = pcitool_mem_access(&prg, virt_addr, write_flag);
1185                         pcitool_unmap(virt_addr, num_virt_pages);
1186                 }
1187 done_reg:
1188                 prg.drvr_version = PCITOOL_VERSION;
1189                 if (ddi_copyout(&prg, arg, sizeof (pcitool_reg_t), mode) !=
1190                     DDI_SUCCESS) {
1191                         if (pcitool_debug)
1192                                 prom_printf("Error returning arguments.\n");
1193                         rval = EFAULT;
1194                 }
1195                 break;
1196         default:
1197                 rval = ENOTTY;
1198                 break;
1199         }
1200         return (rval);
1201 }