1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * tavor_cfg.c
  29  *    Tavor Configuration Profile Routines
  30  *
  31  *    Implements the routines necessary for initializing and (later) tearing
  32  *    down the list of Tavor configuration information.
  33  */
  34 
  35 #include <sys/sysmacros.h>
  36 #include <sys/types.h>
  37 #include <sys/conf.h>
  38 #include <sys/ddi.h>
  39 #include <sys/sunddi.h>
  40 #include <sys/modctl.h>
  41 #include <sys/bitmap.h>
  42 
  43 #include <sys/ib/adapters/tavor/tavor.h>
  44 
  45 /* Set to enable alternative configurations: 0 = automatic config, 1 = manual */
  46 uint32_t tavor_alt_config_enable        = 0;
  47 
  48 /* Number of supported QPs and their maximum size */
  49 uint32_t tavor_log_num_qp               = TAVOR_NUM_QP_SHIFT_128;
  50 uint32_t tavor_log_max_qp_sz            = TAVOR_QP_SZ_SHIFT;
  51 
  52 /* Number of supported SGL per WQE */
  53 uint32_t tavor_wqe_max_sgl              = TAVOR_NUM_WQE_SGL;
  54 
  55 /* Number of supported CQs and their maximum size */
  56 uint32_t tavor_log_num_cq               = TAVOR_NUM_CQ_SHIFT_128;
  57 uint32_t tavor_log_max_cq_sz            = TAVOR_CQ_SZ_SHIFT;
  58 
  59 /* Select to enable SRQ or not; NOTE: 0 for disabled, 1 for enabled */
  60 uint32_t tavor_srq_enable               = 1;
  61 
  62 /* Number of supported SRQs and their maximum size */
  63 uint32_t tavor_log_num_srq              = TAVOR_NUM_SRQ_SHIFT_128;
  64 uint32_t tavor_log_max_srq_sz           = TAVOR_SRQ_SZ_SHIFT;
  65 uint32_t tavor_srq_max_sgl              = TAVOR_SRQ_MAX_SGL;
  66 
  67 /* Default size for all EQs */
  68 uint32_t tavor_log_default_eq_sz        = TAVOR_DEFAULT_EQ_SZ_SHIFT;
  69 
  70 /* Number of supported RDB (for incoming RDMA Read/Atomic) */
  71 uint32_t tavor_log_num_rdb              = TAVOR_NUM_RDB_SHIFT_128;
  72 
  73 /*
  74  * Number of support multicast groups, number of QP per multicast group, and
  75  * the number of entries (from the total number) in the multicast group "hash
  76  * table"
  77  */
  78 uint32_t tavor_log_num_mcg              = TAVOR_NUM_MCG_SHIFT;
  79 uint32_t tavor_num_qp_per_mcg           = TAVOR_NUM_QP_PER_MCG;
  80 uint32_t tavor_log_num_mcg_hash         = TAVOR_NUM_MCG_HASH_SHIFT;
  81 
  82 /*
  83  * Number of supported MPTs (memory regions and windows) and their maximum
  84  * size.  Also the number of MTT per "MTT segment" (see tavor_mr.h for more
  85  * details)
  86  */
  87 uint32_t tavor_log_num_mpt              = TAVOR_NUM_MPT_SHIFT_128;
  88 uint32_t tavor_log_max_mrw_sz           = TAVOR_MAX_MEM_MPT_SHIFT_128;
  89 uint32_t tavor_log_num_mttseg           = TAVOR_NUM_MTTSEG_SHIFT;
  90 
  91 /*
  92  * Number of supported Tavor mailboxes ("In" and "Out") and their maximum
  93  * sizes, respectively
  94  */
  95 uint32_t tavor_log_num_inmbox           = TAVOR_NUM_MAILBOXES_SHIFT;
  96 uint32_t tavor_log_num_outmbox          = TAVOR_NUM_MAILBOXES_SHIFT;
  97 uint32_t tavor_log_num_intr_inmbox      = TAVOR_NUM_INTR_MAILBOXES_SHIFT;
  98 uint32_t tavor_log_num_intr_outmbox     = TAVOR_NUM_INTR_MAILBOXES_SHIFT;
  99 uint32_t tavor_log_inmbox_size          = TAVOR_MBOX_SIZE_SHIFT;
 100 uint32_t tavor_log_outmbox_size         = TAVOR_MBOX_SIZE_SHIFT;
 101 
 102 /* Number of supported UAR pages */
 103 uint32_t tavor_log_num_uar              = TAVOR_NUM_UAR_SHIFT;
 104 
 105 /* Number of supported Protection Domains (PD) */
 106 uint32_t tavor_log_num_pd               = TAVOR_NUM_PD_SHIFT;
 107 
 108 /* Number of supported Address Handles (AH) */
 109 uint32_t tavor_log_num_ah               = TAVOR_NUM_AH_SHIFT;
 110 
 111 /*
 112  * Number of total supported PKeys per PKey table (i.e.
 113  * per port).  Also the number of SGID per GID table.
 114  */
 115 uint32_t tavor_log_max_pkeytbl          = TAVOR_NUM_PKEYTBL_SHIFT;
 116 uint32_t tavor_log_max_gidtbl           = TAVOR_NUM_GIDTBL_SHIFT;
 117 
 118 /* Maximum "responder resources" (in) and "initiator depth" (out) per QP */
 119 uint32_t tavor_hca_max_rdma_in_qp       = TAVOR_HCA_MAX_RDMA_IN_QP;
 120 uint32_t tavor_hca_max_rdma_out_qp      = TAVOR_HCA_MAX_RDMA_OUT_QP;
 121 
 122 /* Maximum supported MTU and portwidth */
 123 uint32_t tavor_max_mtu                  = TAVOR_MAX_MTU;
 124 uint32_t tavor_max_port_width           = TAVOR_MAX_PORT_WIDTH;
 125 
 126 /* Number of supported Virtual Lanes (VL) */
 127 uint32_t tavor_max_vlcap                = TAVOR_MAX_VLCAP;
 128 
 129 /* Number of supported ports (1 or 2) */
 130 uint32_t tavor_num_ports                = TAVOR_NUM_PORTS;
 131 
 132 /*
 133  * Whether or not to use the built-in (i.e. in firmware) agents for QP0 and
 134  * QP1, respectively.
 135  */
 136 uint32_t tavor_qp0_agents_in_fw         = 1;
 137 uint32_t tavor_qp1_agents_in_fw         = 0;
 138 
 139 /*
 140  * Whether DMA mappings should be made with DDI_DMA_STREAMING or with
 141  * DDI_DMA_CONSISTENT mode.  Note: 0 for "streaming", 1 for "consistent"
 142  */
 143 uint32_t tavor_streaming_consistent     = 1;
 144 
 145 /*
 146  * For DMA mappings made with DDI_DMA_CONSISTENT, this flag determines
 147  * whether to override the necessity for calls to ddi_dma_sync().
 148  */
 149 uint32_t tavor_consistent_syncoverride  = 0;
 150 
 151 /*
 152  * Whether DMA mappings should bypass the PCI IOMMU or not.
 153  * tavor_iommu_bypass is a global setting for all memory addresses.  However,
 154  * if set to BYPASS, memory attempted to be registered for streaming (ie:
 155  * NON-COHERENT) will necessarily turn off BYPASS for that registration.  To
 156  * instead disable streaming in this situation the
 157  * 'tavor_disable_streaming_on_bypass' can be set to 1.  This setting will
 158  * change the memory mapping to be implicitly consistent (ie: COHERENT), and
 159  * will still perform the iommu BYPASS operation.
 160  */
 161 uint32_t tavor_iommu_bypass             = 1;
 162 uint32_t tavor_disable_streaming_on_bypass = 0;
 163 
 164 /*
 165  * Whether QP work queues should be allocated from system memory or
 166  * from Tavor DDR memory.  Note: 0 for system memory, 1 for DDR memory
 167  */
 168 uint32_t tavor_qp_wq_inddr              = 0;
 169 
 170 /*
 171  * Whether SRQ work queues should be allocated from system memory or
 172  * from Tavor DDR memory.  Note: 0 for system memory, 1 for DDR memory
 173  */
 174 uint32_t tavor_srq_wq_inddr             = 0;
 175 
 176 /*
 177  * Whether Tavor should use MSI (Message Signaled Interrupts), if available.
 178  * Note: 0 indicates 'legacy interrupt', 1 indicates MSI (if available)
 179  */
 180 uint32_t tavor_use_msi_if_avail         = 1;
 181 
 182 /*
 183  * This is a patchable variable that determines the time we will wait after
 184  * initiating SW reset before we do our first read from Tavor config space.
 185  * If this value is set too small (less than the default 100ms), it is
 186  * possible for Tavor hardware to be unready to respond to the config cycle
 187  * reads.  This could cause master abort on the PCI bridge.  Note: If
 188  * "tavor_sw_reset_delay" is set to zero, then no software reset of the Tavor
 189  * device will be attempted.
 190  */
 191 uint32_t tavor_sw_reset_delay           = TAVOR_SW_RESET_DELAY;
 192 
 193 /*
 194  * These are patchable variables for tavor command polling. The poll_delay is
 195  * the number of usec to wait in-between calls to poll the 'go' bit.  The
 196  * poll_max is the total number of usec to loop in waiting for the 'go' bit to
 197  * clear.
 198  */
 199 uint32_t tavor_cmd_poll_delay           = TAVOR_CMD_POLL_DELAY;
 200 uint32_t tavor_cmd_poll_max             = TAVOR_CMD_POLL_MAX;
 201 
 202 /*
 203  * This is a patchable variable that determines the frequency with which
 204  * the AckReq bit will be set in outgoing RC packets.  The AckReq bit will be
 205  * set in at least every 2^tavor_qp_ackreq_freq packets (but at least once
 206  * per message, i.e. in the last packet).  Tuning this value can increase
 207  * IB fabric utilization by cutting down on the number of unnecessary ACKs.
 208  */
 209 uint32_t tavor_qp_ackreq_freq           = TAVOR_QP_ACKREQ_FREQ;
 210 
 211 /*
 212  * This is a patchable variable that determines the default value for the
 213  * maximum number of outstanding split transactions.  The number of
 214  * outstanding split transations (i.e. PCI reads) has an affect on device
 215  * throughput.  The value here should not be modified as it defines the
 216  * default (least common denominator - one (1) PCI read) behavior that is
 217  * guaranteed to work, regardless of how the Tavor firmware has been
 218  * initialized.  The format for this variable is the same as the corresponding
 219  * field in the "PCI-X Command Register".
 220  */
 221 #ifdef  __sparc
 222 /*
 223  * Default SPARC platforms to be 1 outstanding PCI read.
 224  */
 225 int tavor_max_out_splt_trans    = 0;
 226 #else
 227 /*
 228  * Default non-SPARC platforms to be the default as set in tavor firmware
 229  * number of outstanding PCI reads.
 230  */
 231 int tavor_max_out_splt_trans    = -1;
 232 #endif
 233 
 234 /*
 235  * This is a patchable variable that determines the default value for the
 236  * maximum size of PCI read burst.  This maximum size has an affect on
 237  * device throughput.  The value here should not be modified as it defines
 238  * the default (least common denominator - 512B read) behavior that is
 239  * guaranteed to work, regardless of how the Tavor device has been
 240  * initialized.  The format for this variable is the same as the corresponding
 241  * field in the "PCI-X Command Register".
 242  */
 243 #ifdef  __sparc
 244 /*
 245  * Default SPARC platforms to be 512B read.
 246  */
 247 int tavor_max_mem_rd_byte_cnt   = 0;
 248 static void tavor_check_iommu_bypass(tavor_state_t *state,
 249     tavor_cfg_profile_t *cp);
 250 #else
 251 /*
 252  * Default non-SPARC platforms to be the default as set in tavor firmware.
 253  *
 254  */
 255 int tavor_max_mem_rd_byte_cnt   = -1;
 256 #endif
 257 
 258 static void tavor_cfg_wqe_sizes(tavor_cfg_profile_t *cp);
 259 static void tavor_cfg_prop_lookup(tavor_state_t *state,
 260     tavor_cfg_profile_t *cp);
 261 
 262 /*
 263  * tavor_cfg_profile_init_phase1()
 264  *    Context: Only called from attach() path context
 265  */
 266 int
 267 tavor_cfg_profile_init_phase1(tavor_state_t *state)
 268 {
 269         tavor_cfg_profile_t     *cp;
 270 
 271         TAVOR_TNF_ENTER(tavor_cfg_profile_init_phase1);
 272 
 273         /*
 274          * Allocate space for the configuration profile structure
 275          */
 276         cp = (tavor_cfg_profile_t *)kmem_zalloc(sizeof (tavor_cfg_profile_t),
 277             KM_SLEEP);
 278 
 279         cp->cp_qp0_agents_in_fw              = tavor_qp0_agents_in_fw;
 280         cp->cp_qp1_agents_in_fw              = tavor_qp1_agents_in_fw;
 281         cp->cp_sw_reset_delay                = tavor_sw_reset_delay;
 282         cp->cp_cmd_poll_delay                = tavor_cmd_poll_delay;
 283         cp->cp_cmd_poll_max          = tavor_cmd_poll_max;
 284         cp->cp_ackreq_freq           = tavor_qp_ackreq_freq;
 285         cp->cp_max_out_splt_trans    = tavor_max_out_splt_trans;
 286         cp->cp_max_mem_rd_byte_cnt   = tavor_max_mem_rd_byte_cnt;
 287         cp->cp_srq_enable            = tavor_srq_enable;
 288         cp->cp_fmr_enable            = 0;
 289         cp->cp_fmr_max_remaps                = 0;
 290 
 291         /*
 292          * Although most of the configuration is enabled in "phase2" of the
 293          * cfg_profile_init, we have to setup the OUT mailboxes here, since
 294          * they are used immediately after this "phase1" completes.  Check for
 295          * alt_config_enable, and set the values appropriately.  Otherwise, the
 296          * config profile is setup using the values based on the dimm size.
 297          * While it is expected that the mailbox size and number will remain
 298          * the same independent of dimm size, we separate it out here anyway
 299          * for completeness.
 300          *
 301          * We have to setup SRQ settings here because MOD_STAT_CFG must be
 302          * called before our call to QUERY_DEVLIM.  If SRQ is enabled, then we
 303          * must enable it in the firmware so that the phase2 settings will have
 304          * the right device limits.
 305          */
 306         if (tavor_alt_config_enable) {
 307                 cp->cp_log_num_outmbox               = tavor_log_num_outmbox;
 308                 cp->cp_log_num_intr_outmbox  = tavor_log_num_intr_outmbox;
 309                 cp->cp_log_outmbox_size              = tavor_log_outmbox_size;
 310                 cp->cp_log_num_inmbox                = tavor_log_num_inmbox;
 311                 cp->cp_log_num_intr_inmbox   = tavor_log_num_intr_inmbox;
 312                 cp->cp_log_inmbox_size               = tavor_log_inmbox_size;
 313                 cp->cp_log_num_srq           = tavor_log_num_srq;
 314                 cp->cp_log_max_srq_sz                = tavor_log_max_srq_sz;
 315 
 316         } else if (state->ts_cfg_profile_setting >= TAVOR_DDR_SIZE_256) {
 317                 cp->cp_log_num_outmbox               = TAVOR_NUM_MAILBOXES_SHIFT;
 318                 cp->cp_log_num_intr_outmbox  =
 319                     TAVOR_NUM_INTR_MAILBOXES_SHIFT;
 320                 cp->cp_log_outmbox_size              = TAVOR_MBOX_SIZE_SHIFT;
 321                 cp->cp_log_num_inmbox                = TAVOR_NUM_MAILBOXES_SHIFT;
 322                 cp->cp_log_num_intr_inmbox   =
 323                     TAVOR_NUM_INTR_MAILBOXES_SHIFT;
 324                 cp->cp_log_inmbox_size               = TAVOR_MBOX_SIZE_SHIFT;
 325                 cp->cp_log_num_srq           = TAVOR_NUM_SRQ_SHIFT_256;
 326                 cp->cp_log_max_srq_sz                = TAVOR_SRQ_SZ_SHIFT;
 327 
 328         } else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_128) {
 329                 cp->cp_log_num_outmbox               = TAVOR_NUM_MAILBOXES_SHIFT;
 330                 cp->cp_log_num_intr_outmbox  =
 331                     TAVOR_NUM_INTR_MAILBOXES_SHIFT;
 332                 cp->cp_log_outmbox_size              = TAVOR_MBOX_SIZE_SHIFT;
 333                 cp->cp_log_num_inmbox                = TAVOR_NUM_MAILBOXES_SHIFT;
 334                 cp->cp_log_num_intr_inmbox   =
 335                     TAVOR_NUM_INTR_MAILBOXES_SHIFT;
 336                 cp->cp_log_inmbox_size               = TAVOR_MBOX_SIZE_SHIFT;
 337                 cp->cp_log_num_srq           = TAVOR_NUM_SRQ_SHIFT_128;
 338                 cp->cp_log_max_srq_sz                = TAVOR_SRQ_SZ_SHIFT;
 339 
 340         } else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_MIN) {
 341                 cp->cp_log_num_outmbox               = TAVOR_NUM_MAILBOXES_SHIFT;
 342                 cp->cp_log_num_intr_outmbox  =
 343                     TAVOR_NUM_INTR_MAILBOXES_SHIFT;
 344                 cp->cp_log_outmbox_size              = TAVOR_MBOX_SIZE_SHIFT;
 345                 cp->cp_log_num_inmbox                = TAVOR_NUM_MAILBOXES_SHIFT;
 346                 cp->cp_log_num_intr_inmbox   =
 347                     TAVOR_NUM_INTR_MAILBOXES_SHIFT;
 348                 cp->cp_log_inmbox_size               = TAVOR_MBOX_SIZE_SHIFT;
 349                 cp->cp_log_num_srq           = TAVOR_NUM_SRQ_SHIFT_MIN;
 350                 cp->cp_log_max_srq_sz                = TAVOR_SRQ_SZ_SHIFT_MIN;
 351 
 352         } else {
 353                 TNF_PROBE_0(tavor_cfg_profile_invalid_dimmsz_fail,
 354                     TAVOR_TNF_ERROR, "");
 355                 return (DDI_FAILURE);
 356         }
 357 
 358         /*
 359          * Set default DMA mapping mode.  Ensure consistency of flags
 360          * with both architecture type and other configuration flags.
 361          */
 362         if (tavor_streaming_consistent == 0) {
 363 #ifdef  __sparc
 364                 cp->cp_streaming_consistent = DDI_DMA_STREAMING;
 365 
 366                 /* Can't do both "streaming" and IOMMU bypass */
 367                 if (tavor_iommu_bypass != 0) {
 368                         TNF_PROBE_0(tavor_cfg_profile_streamingbypass_fail,
 369                             TAVOR_TNF_ERROR, "");
 370                         kmem_free(cp, sizeof (tavor_cfg_profile_t));
 371                         return (DDI_FAILURE);
 372                 }
 373 #else
 374                 cp->cp_streaming_consistent = DDI_DMA_CONSISTENT;
 375 #endif
 376         } else {
 377                 cp->cp_streaming_consistent = DDI_DMA_CONSISTENT;
 378         }
 379 
 380         /* Determine whether to override ddi_dma_sync() */
 381         cp->cp_consistent_syncoverride = tavor_consistent_syncoverride;
 382 
 383         /* Attach the configuration profile to Tavor softstate */
 384         state->ts_cfg_profile = cp;
 385 
 386         TAVOR_TNF_EXIT(tavor_cfg_profile_init_phase1);
 387         return (DDI_SUCCESS);
 388 }
 389 
 390 /*
 391  * tavor_cfg_profile_init_phase2()
 392  *    Context: Only called from attach() path context
 393  */
 394 int
 395 tavor_cfg_profile_init_phase2(tavor_state_t *state)
 396 {
 397         tavor_cfg_profile_t     *cp;
 398 
 399         TAVOR_TNF_ENTER(tavor_cfg_profile_init_phase2);
 400 
 401         /* Read the configuration profile from Tavor softstate */
 402         cp = state->ts_cfg_profile;
 403 
 404         /*
 405          * Verify the config profile setting.  The 'setting' should already be
 406          * set, during a call to ddi_dev_regsize() to get the size of DDR
 407          * memory, or during a fallback to a smaller supported size.  If it is
 408          * not set, we should not have reached this 'phase2'.  So we assert
 409          * here.
 410          */
 411         ASSERT(state->ts_cfg_profile_setting != 0);
 412 
 413         /*
 414          * The automatic configuration override is the
 415          * 'tavor_alt_config_enable' variable.  If this is set, we no longer
 416          * use the DIMM size to enable the correct profile.  Instead, all of
 417          * the tavor config options at the top of this file are used directly.
 418          *
 419          * This allows customization for a user who knows what they are doing
 420          * to set tavor configuration values manually.
 421          *
 422          * If this variable is 0, we do automatic config for both 128MB and
 423          * 256MB DIMM sizes.
 424          */
 425         if (tavor_alt_config_enable) {
 426                 /*
 427                  * Initialize the configuration profile
 428                  */
 429                 cp->cp_log_num_qp            = tavor_log_num_qp;
 430                 cp->cp_log_max_qp_sz         = tavor_log_max_qp_sz;
 431 
 432                 /* Determine WQE sizes from requested max SGLs */
 433                 tavor_cfg_wqe_sizes(cp);
 434 
 435                 cp->cp_log_num_cq            = tavor_log_num_cq;
 436                 cp->cp_log_max_cq_sz         = tavor_log_max_cq_sz;
 437                 cp->cp_log_default_eq_sz     = tavor_log_default_eq_sz;
 438                 cp->cp_log_num_rdb           = tavor_log_num_rdb;
 439                 cp->cp_log_num_mcg           = tavor_log_num_mcg;
 440                 cp->cp_num_qp_per_mcg                = tavor_num_qp_per_mcg;
 441                 cp->cp_log_num_mcg_hash              = tavor_log_num_mcg_hash;
 442                 cp->cp_log_num_mpt           = tavor_log_num_mpt;
 443                 cp->cp_log_max_mrw_sz                = tavor_log_max_mrw_sz;
 444                 cp->cp_log_num_mttseg                = tavor_log_num_mttseg;
 445                 cp->cp_log_num_uar           = tavor_log_num_uar;
 446                 cp->cp_log_num_pd            = tavor_log_num_pd;
 447                 cp->cp_log_num_ah            = tavor_log_num_ah;
 448                 cp->cp_log_max_pkeytbl               = tavor_log_max_pkeytbl;
 449                 cp->cp_log_max_gidtbl                = tavor_log_max_gidtbl;
 450                 cp->cp_hca_max_rdma_in_qp    = tavor_hca_max_rdma_in_qp;
 451                 cp->cp_hca_max_rdma_out_qp   = tavor_hca_max_rdma_out_qp;
 452                 cp->cp_max_mtu                       = tavor_max_mtu;
 453                 cp->cp_max_port_width                = tavor_max_port_width;
 454                 cp->cp_max_vlcap             = tavor_max_vlcap;
 455                 cp->cp_num_ports             = tavor_num_ports;
 456                 cp->cp_qp0_agents_in_fw              = tavor_qp0_agents_in_fw;
 457                 cp->cp_qp1_agents_in_fw              = tavor_qp1_agents_in_fw;
 458                 cp->cp_sw_reset_delay                = tavor_sw_reset_delay;
 459                 cp->cp_ackreq_freq           = tavor_qp_ackreq_freq;
 460                 cp->cp_max_out_splt_trans    = tavor_max_out_splt_trans;
 461                 cp->cp_max_mem_rd_byte_cnt   = tavor_max_mem_rd_byte_cnt;
 462 
 463         } else if (state->ts_cfg_profile_setting >= TAVOR_DDR_SIZE_256) {
 464                 /*
 465                  * Initialize the configuration profile
 466                  */
 467                 cp->cp_log_num_qp            = TAVOR_NUM_QP_SHIFT_256;
 468                 cp->cp_log_max_qp_sz         = TAVOR_QP_SZ_SHIFT;
 469 
 470                 /* Determine WQE sizes from requested max SGLs */
 471                 tavor_cfg_wqe_sizes(cp);
 472 
 473                 cp->cp_log_num_cq            = TAVOR_NUM_CQ_SHIFT_256;
 474                 cp->cp_log_max_cq_sz         = TAVOR_CQ_SZ_SHIFT;
 475                 cp->cp_log_default_eq_sz     = TAVOR_DEFAULT_EQ_SZ_SHIFT;
 476                 cp->cp_log_num_rdb           = TAVOR_NUM_RDB_SHIFT_256;
 477                 cp->cp_log_num_mcg           = TAVOR_NUM_MCG_SHIFT;
 478                 cp->cp_num_qp_per_mcg                = TAVOR_NUM_QP_PER_MCG;
 479                 cp->cp_log_num_mcg_hash              = TAVOR_NUM_MCG_HASH_SHIFT;
 480                 cp->cp_log_num_mpt           = TAVOR_NUM_MPT_SHIFT_256;
 481                 cp->cp_log_max_mrw_sz                = TAVOR_MAX_MEM_MPT_SHIFT_256;
 482                 cp->cp_log_num_mttseg                = TAVOR_NUM_MTTSEG_SHIFT;
 483                 cp->cp_log_num_uar           = TAVOR_NUM_UAR_SHIFT;
 484                 cp->cp_log_num_pd            = TAVOR_NUM_PD_SHIFT;
 485                 cp->cp_log_num_ah            = TAVOR_NUM_AH_SHIFT;
 486                 cp->cp_log_max_pkeytbl               = TAVOR_NUM_PKEYTBL_SHIFT;
 487                 cp->cp_log_max_gidtbl                = TAVOR_NUM_GIDTBL_SHIFT;
 488                 cp->cp_hca_max_rdma_in_qp    = TAVOR_HCA_MAX_RDMA_IN_QP;
 489                 cp->cp_hca_max_rdma_out_qp   = TAVOR_HCA_MAX_RDMA_OUT_QP;
 490                 cp->cp_max_mtu                       = TAVOR_MAX_MTU;
 491                 cp->cp_max_port_width                = TAVOR_MAX_PORT_WIDTH;
 492                 cp->cp_max_vlcap             = TAVOR_MAX_VLCAP;
 493                 cp->cp_num_ports             = TAVOR_NUM_PORTS;
 494                 cp->cp_qp0_agents_in_fw              = tavor_qp0_agents_in_fw;
 495                 cp->cp_qp1_agents_in_fw              = tavor_qp1_agents_in_fw;
 496                 cp->cp_sw_reset_delay                = tavor_sw_reset_delay;
 497                 cp->cp_ackreq_freq           = tavor_qp_ackreq_freq;
 498                 cp->cp_max_out_splt_trans    = tavor_max_out_splt_trans;
 499                 cp->cp_max_mem_rd_byte_cnt   = tavor_max_mem_rd_byte_cnt;
 500 
 501         } else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_128) {
 502                 /*
 503                  * Initialize the configuration profile
 504                  */
 505                 cp->cp_log_num_qp            = TAVOR_NUM_QP_SHIFT_128;
 506                 cp->cp_log_max_qp_sz         = TAVOR_QP_SZ_SHIFT;
 507 
 508                 /* Determine WQE sizes from requested max SGLs */
 509                 tavor_cfg_wqe_sizes(cp);
 510 
 511                 cp->cp_log_num_cq            = TAVOR_NUM_CQ_SHIFT_128;
 512                 cp->cp_log_max_cq_sz         = TAVOR_CQ_SZ_SHIFT;
 513                 cp->cp_log_default_eq_sz     = TAVOR_DEFAULT_EQ_SZ_SHIFT;
 514                 cp->cp_log_num_rdb           = TAVOR_NUM_RDB_SHIFT_128;
 515                 cp->cp_log_num_mcg           = TAVOR_NUM_MCG_SHIFT;
 516                 cp->cp_num_qp_per_mcg                = TAVOR_NUM_QP_PER_MCG;
 517                 cp->cp_log_num_mcg_hash              = TAVOR_NUM_MCG_HASH_SHIFT;
 518                 cp->cp_log_num_mpt           = TAVOR_NUM_MPT_SHIFT_128;
 519                 cp->cp_log_max_mrw_sz                = TAVOR_MAX_MEM_MPT_SHIFT_128;
 520                 cp->cp_log_num_mttseg                = TAVOR_NUM_MTTSEG_SHIFT;
 521                 cp->cp_log_num_uar           = TAVOR_NUM_UAR_SHIFT;
 522                 cp->cp_log_num_pd            = TAVOR_NUM_PD_SHIFT;
 523                 cp->cp_log_num_ah            = TAVOR_NUM_AH_SHIFT;
 524                 cp->cp_log_max_pkeytbl               = TAVOR_NUM_PKEYTBL_SHIFT;
 525                 cp->cp_log_max_gidtbl                = TAVOR_NUM_GIDTBL_SHIFT;
 526                 cp->cp_hca_max_rdma_in_qp    = TAVOR_HCA_MAX_RDMA_IN_QP;
 527                 cp->cp_hca_max_rdma_out_qp   = TAVOR_HCA_MAX_RDMA_OUT_QP;
 528                 cp->cp_max_mtu                       = TAVOR_MAX_MTU;
 529                 cp->cp_max_port_width                = TAVOR_MAX_PORT_WIDTH;
 530                 cp->cp_max_vlcap             = TAVOR_MAX_VLCAP;
 531                 cp->cp_num_ports             = TAVOR_NUM_PORTS;
 532                 cp->cp_qp0_agents_in_fw              = tavor_qp0_agents_in_fw;
 533                 cp->cp_qp1_agents_in_fw              = tavor_qp1_agents_in_fw;
 534                 cp->cp_sw_reset_delay                = tavor_sw_reset_delay;
 535                 cp->cp_ackreq_freq           = tavor_qp_ackreq_freq;
 536                 cp->cp_max_out_splt_trans    = tavor_max_out_splt_trans;
 537                 cp->cp_max_mem_rd_byte_cnt   = tavor_max_mem_rd_byte_cnt;
 538 
 539         } else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_MIN) {
 540                 /*
 541                  * Initialize the configuration profile for minimal footprint.
 542                  */
 543 
 544                 cp->cp_log_num_qp            = TAVOR_NUM_QP_SHIFT_MIN;
 545                 cp->cp_log_max_qp_sz         = TAVOR_QP_SZ_SHIFT_MIN;
 546 
 547                 /* Determine WQE sizes from requested max SGLs */
 548                 tavor_cfg_wqe_sizes(cp);
 549 
 550                 cp->cp_log_num_cq            = TAVOR_NUM_CQ_SHIFT_MIN;
 551                 cp->cp_log_max_cq_sz         = TAVOR_CQ_SZ_SHIFT_MIN;
 552                 cp->cp_log_default_eq_sz     = TAVOR_DEFAULT_EQ_SZ_SHIFT;
 553                 cp->cp_log_num_rdb           = TAVOR_NUM_RDB_SHIFT_MIN;
 554                 cp->cp_log_num_mcg           = TAVOR_NUM_MCG_SHIFT_MIN;
 555                 cp->cp_num_qp_per_mcg                = TAVOR_NUM_QP_PER_MCG_MIN;
 556                 cp->cp_log_num_mcg_hash              = TAVOR_NUM_MCG_HASH_SHIFT_MIN;
 557                 cp->cp_log_num_mpt           = TAVOR_NUM_MPT_SHIFT_MIN;
 558                 cp->cp_log_max_mrw_sz                = TAVOR_MAX_MEM_MPT_SHIFT_MIN;
 559                 cp->cp_log_num_mttseg                = TAVOR_NUM_MTTSEG_SHIFT_MIN;
 560                 cp->cp_log_num_uar           = TAVOR_NUM_UAR_SHIFT_MIN;
 561                 cp->cp_log_num_pd            = TAVOR_NUM_PD_SHIFT;
 562                 cp->cp_log_num_ah            = TAVOR_NUM_AH_SHIFT_MIN;
 563                 cp->cp_log_max_pkeytbl               = TAVOR_NUM_PKEYTBL_SHIFT;
 564                 cp->cp_log_max_gidtbl                = TAVOR_NUM_GIDTBL_SHIFT;
 565                 cp->cp_hca_max_rdma_in_qp    = TAVOR_HCA_MAX_RDMA_IN_QP;
 566                 cp->cp_hca_max_rdma_out_qp   = TAVOR_HCA_MAX_RDMA_OUT_QP;
 567                 cp->cp_max_mtu                       = TAVOR_MAX_MTU;
 568                 cp->cp_max_port_width                = TAVOR_MAX_PORT_WIDTH;
 569                 cp->cp_max_vlcap             = TAVOR_MAX_VLCAP;
 570                 cp->cp_num_ports             = TAVOR_NUM_PORTS;
 571                 cp->cp_qp0_agents_in_fw              = tavor_qp0_agents_in_fw;
 572                 cp->cp_qp1_agents_in_fw              = tavor_qp1_agents_in_fw;
 573                 cp->cp_sw_reset_delay                = tavor_sw_reset_delay;
 574                 cp->cp_ackreq_freq           = tavor_qp_ackreq_freq;
 575                 cp->cp_max_out_splt_trans    = tavor_max_out_splt_trans;
 576                 cp->cp_max_mem_rd_byte_cnt   = tavor_max_mem_rd_byte_cnt;
 577 
 578         } else {
 579                 TNF_PROBE_0(tavor_cfg_profile_invalid_dimmsz_fail,
 580                     TAVOR_TNF_ERROR, "");
 581                 return (DDI_FAILURE);
 582         }
 583 
 584         /*
 585          * Set IOMMU bypass or not.  Ensure consistency of flags with
 586          * architecture type.
 587          */
 588 #ifdef __sparc
 589         if (tavor_iommu_bypass == 1) {
 590                 tavor_check_iommu_bypass(state, cp);
 591         } else {
 592                 cp->cp_iommu_bypass = TAVOR_BINDMEM_NORMAL;
 593                 cp->cp_disable_streaming_on_bypass = 0;
 594         }
 595 #else
 596         cp->cp_iommu_bypass = TAVOR_BINDMEM_NORMAL;
 597         cp->cp_disable_streaming_on_bypass = 0;
 598 #endif
 599         /* Set whether QP WQEs will be in DDR or not */
 600         cp->cp_qp_wq_inddr = (tavor_qp_wq_inddr == 0) ?
 601             TAVOR_QUEUE_LOCATION_NORMAL : TAVOR_QUEUE_LOCATION_INDDR;
 602 
 603         /* Set whether SRQ WQEs will be in DDR or not */
 604         cp->cp_srq_wq_inddr = (tavor_srq_wq_inddr == 0) ?
 605             TAVOR_QUEUE_LOCATION_NORMAL : TAVOR_QUEUE_LOCATION_INDDR;
 606 
 607         cp->cp_use_msi_if_avail = tavor_use_msi_if_avail;
 608 
 609         /* Determine additional configuration from optional properties */
 610         tavor_cfg_prop_lookup(state, cp);
 611 
 612         TAVOR_TNF_EXIT(tavor_cfg_profile_init_phase2);
 613         return (DDI_SUCCESS);
 614 }
 615 
 616 
 617 /*
 618  * tavor_cfg_profile_fini()
 619  *    Context: Only called from attach() and/or detach() path contexts
 620  */
 621 void
 622 tavor_cfg_profile_fini(tavor_state_t *state)
 623 {
 624         TAVOR_TNF_ENTER(tavor_cfg_profile_fini);
 625 
 626         /*
 627          * Free up the space for configuration profile
 628          */
 629         kmem_free(state->ts_cfg_profile, sizeof (tavor_cfg_profile_t));
 630 
 631         TAVOR_TNF_EXIT(tavor_cfg_profile_fini);
 632 }
 633 
 634 
 635 /*
 636  * tavor_cfg_wqe_sizes()
 637  *    Context: Only called from attach() path context
 638  */
 639 static void
 640 tavor_cfg_wqe_sizes(tavor_cfg_profile_t *cp)
 641 {
 642         uint_t  max_size, log2;
 643         uint_t  max_sgl, real_max_sgl;
 644 
 645         /*
 646          * Get the requested maximum number SGL per WQE from the Tavor
 647          * patchable variable
 648          */
 649         max_sgl = tavor_wqe_max_sgl;
 650 
 651         /*
 652          * Use requested maximum number of SGL to calculate the max descriptor
 653          * size (while guaranteeing that the descriptor size is a power-of-2
 654          * cachelines).  We have to use the calculation for QP1 MLX transport
 655          * because the possibility that we might need to inline a GRH, along
 656          * with all the other headers and alignment restrictions, sets the
 657          * maximum for the number of SGLs that we can advertise support for.
 658          */
 659         max_size = (TAVOR_QP_WQE_MLX_QP1_HDRS + (max_sgl << 4));
 660         log2 = highbit(max_size);
 661         if (ISP2(max_size)) {
 662                 log2 = log2 - 1;
 663         }
 664         max_size = (1 << log2);
 665 
 666         /*
 667          * Now clip the maximum descriptor size based on Tavor HW maximum
 668          */
 669         max_size = min(max_size, TAVOR_QP_WQE_MAX_SIZE);
 670 
 671         /*
 672          * Then use the calculated max descriptor size to determine the "real"
 673          * maximum SGL (the number beyond which we would roll over to the next
 674          * power-of-2).
 675          */
 676         real_max_sgl = (max_size - TAVOR_QP_WQE_MLX_QP1_HDRS) >> 4;
 677 
 678         /* Then save away this configuration information */
 679         cp->cp_wqe_max_sgl   = max_sgl;
 680         cp->cp_wqe_real_max_sgl = real_max_sgl;
 681 
 682         /* SRQ SGL gets set to it's own patchable variable value */
 683         cp->cp_srq_max_sgl           = tavor_srq_max_sgl;
 684 }
 685 
 686 
 687 /*
 688  * tavor_cfg_prop_lookup()
 689  *    Context: Only called from attach() path context
 690  */
 691 static void
 692 tavor_cfg_prop_lookup(tavor_state_t *state, tavor_cfg_profile_t *cp)
 693 {
 694         uint_t          num_ports, nelementsp;
 695         uchar_t         *datap;
 696         int             status;
 697 
 698         /*
 699          * Read the property defining the number of Tavor ports to
 700          * support.  If the property is undefined or invalid, then return.
 701          * We return here assuming also that OBP is not supposed to be setting
 702          * up other properties in this case (eg: HCA plugin cards).  But if
 703          * this property is valid, then we print out a message for the other
 704          * properties to show an OBP error.
 705          */
 706         num_ports = ddi_prop_get_int(DDI_DEV_T_ANY, state->ts_dip,
 707             DDI_PROP_DONTPASS, "#ports", 0);
 708         if ((num_ports > TAVOR_NUM_PORTS) || (num_ports == 0)) {
 709                 return;
 710         }
 711         cp->cp_num_ports   = num_ports;
 712 
 713         /*
 714          * The system image guid is not currently supported in the 1275
 715          * binding.  So we leave this commented out for now.
 716          */
 717 #ifdef SUPPORTED_IN_1275_BINDING
 718         /*
 719          * Read the property defining the value to use later to override the
 720          * default SystemImageGUID (in firmware).  If the property is
 721          * undefined, then return.
 722          */
 723         status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, state->ts_dip,
 724             DDI_PROP_DONTPASS, "system-image-guid", &datap, &nelementsp);
 725         if (status == DDI_PROP_SUCCESS) {
 726                 cp->cp_sysimgguid = ((uint64_t *)datap)[0];
 727                 ddi_prop_free(datap);
 728         } else {
 729                 cmn_err(CE_NOTE,
 730                     "Unable to read OBP system-image-guid property");
 731         }
 732 #endif
 733 
 734         /*
 735          * Read the property defining the value to use later to override
 736          * the default SystemImageGUID (in firmware).  If the property is
 737          * undefined, then return.
 738          */
 739         status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, state->ts_dip,
 740             DDI_PROP_DONTPASS, "node-guid", &datap, &nelementsp);
 741         if (status == DDI_PROP_SUCCESS) {
 742                 cp->cp_nodeguid = ((uint64_t *)datap)[0];
 743                 ddi_prop_free(datap);
 744         } else {
 745                 cmn_err(CE_NOTE, "Unable to read OBP node-guid property");
 746         }
 747 
 748         /*
 749          * Using the value for the number of ports (above) read the properties
 750          * used to later to override the default PortGUIDs for each Tavor port.
 751          * If either of these properties are undefined, then return.
 752          */
 753         if (num_ports == TAVOR_NUM_PORTS) {
 754                 status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY,
 755                     state->ts_dip, DDI_PROP_DONTPASS, "port-2-guid", &datap,
 756                     &nelementsp);
 757                 if (status == DDI_PROP_SUCCESS) {
 758                         cp->cp_portguid[1] = ((uint64_t *)datap)[0];
 759                         ddi_prop_free(datap);
 760                 } else {
 761                         cmn_err(CE_NOTE,
 762                             "Unable to read OBP port-2-guid property");
 763                 }
 764         }
 765         status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, state->ts_dip,
 766             DDI_PROP_DONTPASS, "port-1-guid", &datap, &nelementsp);
 767         if (status == DDI_PROP_SUCCESS) {
 768                 cp->cp_portguid[0] = ((uint64_t *)datap)[0];
 769                 ddi_prop_free(datap);
 770         } else {
 771                 cmn_err(CE_NOTE, "Unable to read OBP port-1-guid property");
 772         }
 773 }
 774 
 775 #ifdef __sparc
 776 /*
 777  * tavor_check_iommu_bypass()
 778  *    Context: Only called from attach() path context
 779  */
 780 static void
 781 tavor_check_iommu_bypass(tavor_state_t *state, tavor_cfg_profile_t *cp)
 782 {
 783         ddi_dma_handle_t        dmahdl;
 784         ddi_dma_attr_t          dma_attr;
 785         int                     status;
 786 
 787         tavor_dma_attr_init(&dma_attr);
 788 
 789         /* Try mapping for IOMMU bypass (Force Physical) */
 790         dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
 791 
 792         /*
 793          * Call ddi_dma_alloc_handle().  If this returns DDI_DMA_BADATTR then
 794          * it is not possible to use IOMMU bypass with our PCI bridge parent.
 795          * For example, certain versions of Tomatillo do not support IOMMU
 796          * bypass.  Since the function we are in can only be called if iommu
 797          * bypass was requested in the config profile, we configure for bypass
 798          * if the ddi_dma_alloc_handle() was successful.  Otherwise, we
 799          * configure for non-bypass (ie: normal) mapping.
 800          */
 801         status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr,
 802             DDI_DMA_SLEEP, NULL, &dmahdl);
 803         if (status == DDI_DMA_BADATTR) {
 804                 cp->cp_iommu_bypass = TAVOR_BINDMEM_NORMAL;
 805                 cp->cp_disable_streaming_on_bypass = 0;
 806         } else {
 807                 cp->cp_iommu_bypass = TAVOR_BINDMEM_BYPASS;
 808                 cp->cp_disable_streaming_on_bypass =
 809                     tavor_disable_streaming_on_bypass;
 810 
 811                 if (status == DDI_SUCCESS) {
 812                         ddi_dma_free_handle(&dmahdl);
 813                 }
 814         }
 815 }
 816 #endif