1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * hermon_qp.c
  28  *    Hermon Queue Pair Processing Routines
  29  *
  30  *    Implements all the routines necessary for allocating, freeing, and
  31  *    querying the Hermon queue pairs.
  32  */
  33 
  34 #include <sys/types.h>
  35 #include <sys/conf.h>
  36 #include <sys/ddi.h>
  37 #include <sys/sunddi.h>
  38 #include <sys/modctl.h>
  39 #include <sys/bitmap.h>
  40 #include <sys/sysmacros.h>
  41 
  42 #include <sys/ib/adapters/hermon/hermon.h>
  43 #include <sys/ib/ib_pkt_hdrs.h>
  44 
  45 static int hermon_qp_create_qpn(hermon_state_t *state, hermon_qphdl_t qp,
  46     hermon_rsrc_t *qpc);
  47 static int hermon_qpn_avl_compare(const void *q, const void *e);
  48 static int hermon_special_qp_rsrc_alloc(hermon_state_t *state,
  49     ibt_sqp_type_t type, uint_t port, hermon_rsrc_t **qp_rsrc);
  50 static int hermon_special_qp_rsrc_free(hermon_state_t *state,
  51     ibt_sqp_type_t type, uint_t port);
  52 static void hermon_qp_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
  53     uint_t real_max_sgl, hermon_qp_wq_type_t wq_type,
  54     uint_t *logwqesz, uint_t *max_sgl);
  55 
  56 /*
  57  * hermon_qp_alloc()
  58  *    Context: Can be called only from user or kernel context.
  59  */
  60 int
  61 hermon_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo,
  62     uint_t sleepflag)
  63 {
  64         hermon_rsrc_t                   *qpc, *rsrc;
  65         hermon_rsrc_type_t              rsrc_type;
  66         hermon_umap_db_entry_t          *umapdb;
  67         hermon_qphdl_t                  qp;
  68         ibt_qp_alloc_attr_t             *attr_p;
  69         ibt_qp_alloc_flags_t            alloc_flags;
  70         ibt_qp_type_t                   type;
  71         hermon_qp_wq_type_t             swq_type;
  72         ibtl_qp_hdl_t                   ibt_qphdl;
  73         ibt_chan_sizes_t                *queuesz_p;
  74         ib_qpn_t                        *qpn;
  75         hermon_qphdl_t                  *qphdl;
  76         ibt_mr_attr_t                   mr_attr;
  77         hermon_mr_options_t             mr_op;
  78         hermon_srqhdl_t                 srq;
  79         hermon_pdhdl_t                  pd;
  80         hermon_cqhdl_t                  sq_cq, rq_cq;
  81         hermon_mrhdl_t                  mr;
  82         uint64_t                        value, qp_desc_off;
  83         uint64_t                        *thewqe, thewqesz;
  84         uint32_t                        *sq_buf, *rq_buf;
  85         uint32_t                        log_qp_sq_size, log_qp_rq_size;
  86         uint32_t                        sq_size, rq_size;
  87         uint32_t                        sq_depth, rq_depth;
  88         uint32_t                        sq_wqe_size, rq_wqe_size, wqesz_shift;
  89         uint32_t                        max_sgl, max_recv_sgl, uarpg;
  90         uint_t                          qp_is_umap;
  91         uint_t                          qp_srq_en, i, j;
  92         int                             status, flag;
  93 
  94         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p))
  95 
  96         /*
  97          * Extract the necessary info from the hermon_qp_info_t structure
  98          */
  99         attr_p    = qpinfo->qpi_attrp;
 100         type      = qpinfo->qpi_type;
 101         ibt_qphdl = qpinfo->qpi_ibt_qphdl;
 102         queuesz_p = qpinfo->qpi_queueszp;
 103         qpn       = qpinfo->qpi_qpn;
 104         qphdl     = &qpinfo->qpi_qphdl;
 105         alloc_flags = attr_p->qp_alloc_flags;
 106 
 107         /*
 108          * Verify correctness of alloc_flags.
 109          *
 110          * 1. FEXCH and RSS are only allocated via qp_range.
 111          */
 112         if (alloc_flags & (IBT_QP_USES_FEXCH | IBT_QP_USES_RSS)) {
 113                 return (IBT_INVALID_PARAM);
 114         }
 115         rsrc_type = HERMON_QPC;
 116         qp_is_umap = 0;
 117 
 118         /* 2. Make sure only one of these flags is set. */
 119         switch (alloc_flags &
 120             (IBT_QP_USER_MAP | IBT_QP_USES_RFCI | IBT_QP_USES_FCMD)) {
 121         case IBT_QP_USER_MAP:
 122                 qp_is_umap = 1;
 123                 break;
 124         case IBT_QP_USES_RFCI:
 125                 if (type != IBT_UD_RQP)
 126                         return (IBT_INVALID_PARAM);
 127 
 128                 switch (attr_p->qp_fc.fc_hca_port) {
 129                 case 1:
 130                         rsrc_type = HERMON_QPC_RFCI_PORT1;
 131                         break;
 132                 case 2:
 133                         rsrc_type = HERMON_QPC_RFCI_PORT2;
 134                         break;
 135                 default:
 136                         return (IBT_INVALID_PARAM);
 137                 }
 138                 break;
 139         case IBT_QP_USES_FCMD:
 140                 if (type != IBT_UD_RQP)
 141                         return (IBT_INVALID_PARAM);
 142                 break;
 143         case 0:
 144                 break;
 145         default:
 146                 return (IBT_INVALID_PARAM);     /* conflicting flags set */
 147         }
 148 
 149         /*
 150          * Determine whether QP is being allocated for userland access or
 151          * whether it is being allocated for kernel access.  If the QP is
 152          * being allocated for userland access, then lookup the UAR
 153          * page number for the current process.  Note:  If this is not found
 154          * (e.g. if the process has not previously open()'d the Hermon driver),
 155          * then an error is returned.
 156          */
 157         if (qp_is_umap) {
 158                 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
 159                     MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
 160                 if (status != DDI_SUCCESS) {
 161                         return (IBT_INVALID_PARAM);
 162                 }
 163                 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
 164         } else {
 165                 uarpg = state->hs_kernel_uar_index;
 166         }
 167 
 168         /*
 169          * Determine whether QP is being associated with an SRQ
 170          */
 171         qp_srq_en = (alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0;
 172         if (qp_srq_en) {
 173                 /*
 174                  * Check for valid SRQ handle pointers
 175                  */
 176                 if (attr_p->qp_ibc_srq_hdl == NULL) {
 177                         status = IBT_SRQ_HDL_INVALID;
 178                         goto qpalloc_fail;
 179                 }
 180                 srq = (hermon_srqhdl_t)attr_p->qp_ibc_srq_hdl;
 181         }
 182 
 183         /*
 184          * Check for valid QP service type (only UD/RC/UC supported)
 185          */
 186         if (((type != IBT_UD_RQP) && (type != IBT_RC_RQP) &&
 187             (type != IBT_UC_RQP))) {
 188                 status = IBT_QP_SRV_TYPE_INVALID;
 189                 goto qpalloc_fail;
 190         }
 191 
 192 
 193         /*
 194          * Check for valid PD handle pointer
 195          */
 196         if (attr_p->qp_pd_hdl == NULL) {
 197                 status = IBT_PD_HDL_INVALID;
 198                 goto qpalloc_fail;
 199         }
 200         pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
 201 
 202         /*
 203          * If on an SRQ, check to make sure the PD is the same
 204          */
 205         if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) {
 206                 status = IBT_PD_HDL_INVALID;
 207                 goto qpalloc_fail;
 208         }
 209 
 210         /* Increment the reference count on the protection domain (PD) */
 211         hermon_pd_refcnt_inc(pd);
 212 
 213         /*
 214          * Check for valid CQ handle pointers
 215          *
 216          * FCMD QPs do not require a receive cq handle.
 217          */
 218         if (attr_p->qp_ibc_scq_hdl == NULL) {
 219                 status = IBT_CQ_HDL_INVALID;
 220                 goto qpalloc_fail1;
 221         }
 222         sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl;
 223         if ((attr_p->qp_ibc_rcq_hdl == NULL)) {
 224                 if ((alloc_flags & IBT_QP_USES_FCMD) == 0) {
 225                         status = IBT_CQ_HDL_INVALID;
 226                         goto qpalloc_fail1;
 227                 }
 228                 rq_cq = sq_cq;  /* just use the send cq */
 229         } else
 230                 rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
 231 
 232         /*
 233          * Increment the reference count on the CQs.  One or both of these
 234          * could return error if we determine that the given CQ is already
 235          * being used with a special (SMI/GSI) QP.
 236          */
 237         status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_NORMAL);
 238         if (status != DDI_SUCCESS) {
 239                 status = IBT_CQ_HDL_INVALID;
 240                 goto qpalloc_fail1;
 241         }
 242         status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_NORMAL);
 243         if (status != DDI_SUCCESS) {
 244                 status = IBT_CQ_HDL_INVALID;
 245                 goto qpalloc_fail2;
 246         }
 247 
 248         /*
 249          * Allocate an QP context entry.  This will be filled in with all
 250          * the necessary parameters to define the Queue Pair.  Unlike
 251          * other Hermon hardware resources, ownership is not immediately
 252          * given to hardware in the final step here.  Instead, we must
 253          * wait until the QP is later transitioned to the "Init" state before
 254          * passing the QP to hardware.  If we fail here, we must undo all
 255          * the reference count (CQ and PD).
 256          */
 257         status = hermon_rsrc_alloc(state, rsrc_type, 1, sleepflag, &qpc);
 258         if (status != DDI_SUCCESS) {
 259                 status = IBT_INSUFF_RESOURCE;
 260                 goto qpalloc_fail3;
 261         }
 262 
 263         /*
 264          * Allocate the software structure for tracking the queue pair
 265          * (i.e. the Hermon Queue Pair handle).  If we fail here, we must
 266          * undo the reference counts and the previous resource allocation.
 267          */
 268         status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
 269         if (status != DDI_SUCCESS) {
 270                 status = IBT_INSUFF_RESOURCE;
 271                 goto qpalloc_fail4;
 272         }
 273         qp = (hermon_qphdl_t)rsrc->hr_addr;
 274         bzero(qp, sizeof (struct hermon_sw_qp_s));
 275         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
 276 
 277         qp->qp_alloc_flags = alloc_flags;
 278 
 279         /*
 280          * Calculate the QP number from QPC index.  This routine handles
 281          * all of the operations necessary to keep track of used, unused,
 282          * and released QP numbers.
 283          */
 284         if (type == IBT_UD_RQP) {
 285                 qp->qp_qpnum = qpc->hr_indx;
 286                 qp->qp_ring = qp->qp_qpnum << 8;
 287                 qp->qp_qpn_hdl = NULL;
 288         } else {
 289                 status = hermon_qp_create_qpn(state, qp, qpc);
 290                 if (status != DDI_SUCCESS) {
 291                         status = IBT_INSUFF_RESOURCE;
 292                         goto qpalloc_fail5;
 293                 }
 294         }
 295 
 296         /*
 297          * If this will be a user-mappable QP, then allocate an entry for
 298          * the "userland resources database".  This will later be added to
 299          * the database (after all further QP operations are successful).
 300          * If we fail here, we must undo the reference counts and the
 301          * previous resource allocation.
 302          */
 303         if (qp_is_umap) {
 304                 umapdb = hermon_umap_db_alloc(state->hs_instance, qp->qp_qpnum,
 305                     MLNX_UMAP_QPMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
 306                 if (umapdb == NULL) {
 307                         status = IBT_INSUFF_RESOURCE;
 308                         goto qpalloc_fail6;
 309                 }
 310         }
 311 
 312         /*
 313          * Allocate the doorbell record.  Hermon just needs one for the RQ,
 314          * if the QP is not associated with an SRQ, and use uarpg (above) as
 315          * the uar index
 316          */
 317 
 318         if (!qp_srq_en) {
 319                 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
 320                     &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
 321                 if (status != DDI_SUCCESS) {
 322                         status = IBT_INSUFF_RESOURCE;
 323                         goto qpalloc_fail6;
 324                 }
 325         }
 326 
 327         qp->qp_uses_lso = (attr_p->qp_flags & IBT_USES_LSO);
 328 
 329         /*
 330          * We verify that the requested number of SGL is valid (i.e.
 331          * consistent with the device limits and/or software-configured
 332          * limits).  If not, then obviously the same cleanup needs to be done.
 333          */
 334         if (type == IBT_UD_RQP) {
 335                 max_sgl = state->hs_ibtfinfo.hca_attr->hca_ud_send_sgl_sz;
 336                 swq_type = HERMON_QP_WQ_TYPE_SENDQ_UD;
 337         } else {
 338                 max_sgl = state->hs_ibtfinfo.hca_attr->hca_conn_send_sgl_sz;
 339                 swq_type = HERMON_QP_WQ_TYPE_SENDQ_CONN;
 340         }
 341         max_recv_sgl = state->hs_ibtfinfo.hca_attr->hca_recv_sgl_sz;
 342         if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
 343             (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_recv_sgl))) {
 344                 status = IBT_HCA_SGL_EXCEEDED;
 345                 goto qpalloc_fail7;
 346         }
 347 
 348         /*
 349          * Determine this QP's WQE stride (for both the Send and Recv WQEs).
 350          * This will depend on the requested number of SGLs.  Note: this
 351          * has the side-effect of also calculating the real number of SGLs
 352          * (for the calculated WQE size).
 353          *
 354          * For QP's on an SRQ, we set these to 0.
 355          */
 356         if (qp_srq_en) {
 357                 qp->qp_rq_log_wqesz = 0;
 358                 qp->qp_rq_sgl = 0;
 359         } else {
 360                 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
 361                     max_recv_sgl, HERMON_QP_WQ_TYPE_RECVQ,
 362                     &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
 363         }
 364         hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
 365             max_sgl, swq_type, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
 366 
 367         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
 368 
 369         /* NOTE: currently policy in driver, later maybe IBTF interface */
 370         qp->qp_no_prefetch = 0;
 371 
 372         /*
 373          * for prefetching, we need to add the number of wqes in
 374          * the 2k area plus one to the number requested, but
 375          * ONLY for send queue.  If no_prefetch == 1 (prefetch off)
 376          * it's exactly TWO wqes for the headroom
 377          */
 378         if (qp->qp_no_prefetch)
 379                 qp->qp_sq_headroom = 2 * sq_wqe_size;
 380         else
 381                 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
 382         /*
 383          * hdrm wqes must be integral since both sq_wqe_size &
 384          * HERMON_QP_OH_SIZE are power of 2
 385          */
 386         qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
 387 
 388 
 389         /*
 390          * Calculate the appropriate size for the work queues.
 391          * For send queue, add in the headroom wqes to the calculation.
 392          * Note:  All Hermon QP work queues must be a power-of-2 in size.  Also
 393          * they may not be any smaller than HERMON_QP_MIN_SIZE.  This step is
 394          * to round the requested size up to the next highest power-of-2
 395          */
 396         /* first, adjust to a minimum and tell the caller the change */
 397         attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq,
 398             HERMON_QP_MIN_SIZE);
 399         attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq,
 400             HERMON_QP_MIN_SIZE);
 401         /*
 402          * now, calculate the alloc size, taking into account
 403          * the headroom for the sq
 404          */
 405         log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes);
 406         /* if the total is a power of two, reduce it */
 407         if (((attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes)  &
 408             (attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes - 1)) == 0)      {
 409                 log_qp_sq_size = log_qp_sq_size - 1;
 410         }
 411 
 412         log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
 413         if ((attr_p->qp_sizes.cs_rq & (attr_p->qp_sizes.cs_rq - 1)) == 0) {
 414                 log_qp_rq_size = log_qp_rq_size - 1;
 415         }
 416 
 417         /*
 418          * Next we verify that the rounded-up size is valid (i.e. consistent
 419          * with the device limits and/or software-configured limits).  If not,
 420          * then obviously we have a lot of cleanup to do before returning.
 421          *
 422          * NOTE: the first condition deals with the (test) case of cs_sq
 423          * being just less than 2^32.  In this case, the headroom addition
 424          * to the requested cs_sq will pass the test when it should not.
 425          * This test no longer lets that case slip through the check.
 426          */
 427         if ((attr_p->qp_sizes.cs_sq >
 428             (1 << state->hs_cfg_profile->cp_log_max_qp_sz)) ||
 429             (log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
 430             (!qp_srq_en && (log_qp_rq_size >
 431             state->hs_cfg_profile->cp_log_max_qp_sz))) {
 432                 status = IBT_HCA_WR_EXCEEDED;
 433                 goto qpalloc_fail7;
 434         }
 435 
 436         /*
 437          * Allocate the memory for QP work queues. Since Hermon work queues
 438          * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
 439          * the work queue memory is very important.  We used to allocate
 440          * work queues (the combined receive and send queues) so that they
 441          * would be aligned on their combined size.  That alignment guaranteed
 442          * that they would never cross the 4GB boundary (Hermon work queues
 443          * are on the order of MBs at maximum).  Now we are able to relax
 444          * this alignment constraint by ensuring that the IB address assigned
 445          * to the queue memory (as a result of the hermon_mr_register() call)
 446          * is offset from zero.
 447          * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
 448          * guarantee the alignment, but when attempting to use IOMMU bypass
 449          * mode we found that we were not allowed to specify any alignment
 450          * that was more restrictive than the system page size.
 451          * So we avoided this constraint by passing two alignment values,
 452          * one for the memory allocation itself and the other for the DMA
 453          * handle (for later bind).  This used to cause more memory than
 454          * necessary to be allocated (in order to guarantee the more
 455          * restrictive alignment contraint).  But by guaranteeing the
 456          * zero-based IB virtual address for the queue, we are able to
 457          * conserve this memory.
 458          */
 459         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
 460         sq_depth    = 1 << log_qp_sq_size;
 461         sq_size     = sq_depth * sq_wqe_size;
 462 
 463         /* QP on SRQ sets these to 0 */
 464         if (qp_srq_en) {
 465                 rq_wqe_size = 0;
 466                 rq_size     = 0;
 467         } else {
 468                 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
 469                 rq_depth    = 1 << log_qp_rq_size;
 470                 rq_size     = rq_depth * rq_wqe_size;
 471         }
 472 
 473         qp->qp_wqinfo.qa_size = sq_size + rq_size;
 474 
 475         qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
 476         qp->qp_wqinfo.qa_bind_align  = PAGESIZE;
 477 
 478         if (qp_is_umap) {
 479                 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
 480         } else {
 481                 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
 482         }
 483         status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
 484         if (status != DDI_SUCCESS) {
 485                 status = IBT_INSUFF_RESOURCE;
 486                 goto qpalloc_fail7;
 487         }
 488 
 489         /*
 490          * Sort WQs in memory according to stride (*q_wqe_size), largest first
 491          * If they are equal, still put the SQ first
 492          */
 493         qp->qp_sq_baseaddr = 0;
 494         qp->qp_rq_baseaddr = 0;
 495         if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
 496                 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
 497 
 498                 /* if this QP is on an SRQ, set the rq_buf to NULL */
 499                 if (qp_srq_en) {
 500                         rq_buf = NULL;
 501                 } else {
 502                         rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
 503                         qp->qp_rq_baseaddr = sq_size;
 504                 }
 505         } else {
 506                 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
 507                 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
 508                 qp->qp_sq_baseaddr = rq_size;
 509         }
 510 
 511         if (qp_is_umap == 0) {
 512                 qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
 513                 if (qp->qp_sq_wqhdr == NULL) {
 514                         status = IBT_INSUFF_RESOURCE;
 515                         goto qpalloc_fail8;
 516                 }
 517                 if (qp_srq_en) {
 518                         qp->qp_rq_wqavl.wqa_wq = srq->srq_wq_wqhdr;
 519                         qp->qp_rq_wqavl.wqa_srq_en = 1;
 520                         qp->qp_rq_wqavl.wqa_srq = srq;
 521                 } else {
 522                         qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(rq_depth);
 523                         if (qp->qp_rq_wqhdr == NULL) {
 524                                 status = IBT_INSUFF_RESOURCE;
 525                                 goto qpalloc_fail8;
 526                         }
 527                         qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
 528                 }
 529                 qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
 530                 qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
 531                 qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
 532                 qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
 533                 qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
 534         }
 535 
 536         /*
 537          * Register the memory for the QP work queues.  The memory for the
 538          * QP must be registered in the Hermon cMPT tables.  This gives us the
 539          * LKey to specify in the QP context later.  Note: The memory for
 540          * Hermon work queues (both Send and Recv) must be contiguous and
 541          * registered as a single memory region.  Note: If the QP memory is
 542          * user-mappable, force DDI_DMA_CONSISTENT mapping. Also, in order to
 543          * meet the alignment restriction, we pass the "mro_bind_override_addr"
 544          * flag in the call to hermon_mr_register(). This guarantees that the
 545          * resulting IB vaddr will be zero-based (modulo the offset into the
 546          * first page). If we fail here, we still have the bunch of resource
 547          * and reference count cleanup to do.
 548          */
 549         flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
 550             IBT_MR_NOSLEEP;
 551         mr_attr.mr_vaddr    = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
 552         mr_attr.mr_len      = qp->qp_wqinfo.qa_size;
 553         mr_attr.mr_as       = NULL;
 554         mr_attr.mr_flags    = flag;
 555         if (qp_is_umap) {
 556                 mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
 557         } else {
 558                 /* HERMON_QUEUE_LOCATION_NORMAL */
 559                 mr_op.mro_bind_type =
 560                     state->hs_cfg_profile->cp_iommu_bypass;
 561         }
 562         mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
 563         mr_op.mro_bind_override_addr = 1;
 564         status = hermon_mr_register(state, pd, &mr_attr, &mr,
 565             &mr_op, HERMON_QP_CMPT);
 566         if (status != DDI_SUCCESS) {
 567                 status = IBT_INSUFF_RESOURCE;
 568                 goto qpalloc_fail9;
 569         }
 570 
 571         /*
 572          * Calculate the offset between the kernel virtual address space
 573          * and the IB virtual address space.  This will be used when
 574          * posting work requests to properly initialize each WQE.
 575          */
 576         qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
 577             (uint64_t)mr->mr_bindinfo.bi_addr;
 578 
 579         /*
 580          * Fill in all the return arguments (if necessary).  This includes
 581          * real work queue sizes (in wqes), real SGLs, and QP number
 582          */
 583         if (queuesz_p != NULL) {
 584                 queuesz_p->cs_sq     =
 585                     (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
 586                 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
 587 
 588                 /* if this QP is on an SRQ, set these to 0 */
 589                 if (qp_srq_en) {
 590                         queuesz_p->cs_rq     = 0;
 591                         queuesz_p->cs_rq_sgl = 0;
 592                 } else {
 593                         queuesz_p->cs_rq     = (1 << log_qp_rq_size);
 594                         queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
 595                 }
 596         }
 597         if (qpn != NULL) {
 598                 *qpn = (ib_qpn_t)qp->qp_qpnum;
 599         }
 600 
 601         /*
 602          * Fill in the rest of the Hermon Queue Pair handle.
 603          */
 604         qp->qp_qpcrsrcp              = qpc;
 605         qp->qp_rsrcp         = rsrc;
 606         qp->qp_state         = HERMON_QP_RESET;
 607         HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
 608         qp->qp_pdhdl         = pd;
 609         qp->qp_mrhdl         = mr;
 610         qp->qp_sq_sigtype    = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
 611             HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
 612         qp->qp_is_special    = 0;
 613         qp->qp_uarpg         = uarpg;
 614         qp->qp_umap_dhp              = (devmap_cookie_t)NULL;
 615         qp->qp_sq_cqhdl              = sq_cq;
 616         qp->qp_sq_bufsz              = (1 << log_qp_sq_size);
 617         qp->qp_sq_logqsz     = log_qp_sq_size;
 618         qp->qp_sq_buf                = sq_buf;
 619         qp->qp_desc_off              = qp_desc_off;
 620         qp->qp_rq_cqhdl              = rq_cq;
 621         qp->qp_rq_buf                = rq_buf;
 622         qp->qp_rlky          = (attr_p->qp_flags & IBT_FAST_REG_RES_LKEY) !=
 623             0;
 624 
 625         /* if this QP is on an SRQ, set rq_bufsz to 0 */
 626         if (qp_srq_en) {
 627                 qp->qp_rq_bufsz              = 0;
 628                 qp->qp_rq_logqsz     = 0;
 629         } else {
 630                 qp->qp_rq_bufsz              = (1 << log_qp_rq_size);
 631                 qp->qp_rq_logqsz     = log_qp_rq_size;
 632         }
 633 
 634         qp->qp_forward_sqd_event  = 0;
 635         qp->qp_sqd_still_draining = 0;
 636         qp->qp_hdlrarg               = (void *)ibt_qphdl;
 637         qp->qp_mcg_refcnt    = 0;
 638 
 639         /*
 640          * If this QP is to be associated with an SRQ, set the SRQ handle
 641          */
 642         if (qp_srq_en) {
 643                 qp->qp_srqhdl = srq;
 644                 hermon_srq_refcnt_inc(qp->qp_srqhdl);
 645         } else {
 646                 qp->qp_srqhdl = NULL;
 647         }
 648 
 649         /* Determine the QP service type */
 650         qp->qp_type = type;
 651         if (type == IBT_RC_RQP) {
 652                 qp->qp_serv_type = HERMON_QP_RC;
 653         } else if (type == IBT_UD_RQP) {
 654                 if (alloc_flags & IBT_QP_USES_RFCI)
 655                         qp->qp_serv_type = HERMON_QP_RFCI;
 656                 else if (alloc_flags & IBT_QP_USES_FCMD)
 657                         qp->qp_serv_type = HERMON_QP_FCMND;
 658                 else
 659                         qp->qp_serv_type = HERMON_QP_UD;
 660         } else {
 661                 qp->qp_serv_type = HERMON_QP_UC;
 662         }
 663 
 664         /*
 665          * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
 666          */
 667 
 668         /*
 669          * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
 670          * set the quadword to all F's - high-order bit is owner (init to one)
 671          * and the rest for the headroom definition of prefetching
 672          *
 673          */
 674         wqesz_shift = qp->qp_sq_log_wqesz;
 675         thewqesz    = 1 << wqesz_shift;
 676         thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
 677         if (qp_is_umap == 0) {
 678                 for (i = 0; i < sq_depth; i++) {
 679                         /*
 680                          * for each stride, go through and every 64 bytes
 681                          * write the init value - having set the address
 682                          * once, just keep incrementing it
 683                          */
 684                         for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
 685                                 *(uint32_t *)thewqe = 0xFFFFFFFF;
 686                         }
 687                 }
 688         }
 689 
 690         /* Zero out the QP context */
 691         bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
 692 
 693         /*
 694          * Put QP handle in Hermon QPNum-to-QPHdl list.  Then fill in the
 695          * "qphdl" and return success
 696          */
 697         hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx, qp);
 698 
 699         /*
 700          * If this is a user-mappable QP, then we need to insert the previously
 701          * allocated entry into the "userland resources database".  This will
 702          * allow for later lookup during devmap() (i.e. mmap()) calls.
 703          */
 704         if (qp_is_umap) {
 705                 hermon_umap_db_add(umapdb);
 706         }
 707         mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
 708             DDI_INTR_PRI(state->hs_intrmsi_pri));
 709 
 710         *qphdl = qp;
 711 
 712         return (DDI_SUCCESS);
 713 
 714 /*
 715  * The following is cleanup for all possible failure cases in this routine
 716  */
 717 qpalloc_fail9:
 718         hermon_queue_free(&qp->qp_wqinfo);
 719 qpalloc_fail8:
 720         if (qp->qp_sq_wqhdr)
 721                 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
 722         if (qp->qp_rq_wqhdr)
 723                 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
 724 qpalloc_fail7:
 725         if (qp_is_umap) {
 726                 hermon_umap_db_free(umapdb);
 727         }
 728         if (!qp_srq_en) {
 729                 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
 730         }
 731 
 732 qpalloc_fail6:
 733         /*
 734          * Releasing the QPN will also free up the QPC context.  Update
 735          * the QPC context pointer to indicate this.
 736          */
 737         if (qp->qp_qpn_hdl) {
 738                 hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
 739                     HERMON_QPN_RELEASE);
 740         } else {
 741                 hermon_rsrc_free(state, &qpc);
 742         }
 743         qpc = NULL;
 744 qpalloc_fail5:
 745         hermon_rsrc_free(state, &rsrc);
 746 qpalloc_fail4:
 747         if (qpc) {
 748                 hermon_rsrc_free(state, &qpc);
 749         }
 750 qpalloc_fail3:
 751         hermon_cq_refcnt_dec(rq_cq);
 752 qpalloc_fail2:
 753         hermon_cq_refcnt_dec(sq_cq);
 754 qpalloc_fail1:
 755         hermon_pd_refcnt_dec(pd);
 756 qpalloc_fail:
 757         return (status);
 758 }
 759 
 760 
 761 
 762 /*
 763  * hermon_special_qp_alloc()
 764  *    Context: Can be called only from user or kernel context.
 765  */
 766 int
 767 hermon_special_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo,
 768     uint_t sleepflag)
 769 {
 770         hermon_rsrc_t           *qpc, *rsrc;
 771         hermon_qphdl_t          qp;
 772         ibt_qp_alloc_attr_t     *attr_p;
 773         ibt_sqp_type_t          type;
 774         uint8_t                 port;
 775         ibtl_qp_hdl_t           ibt_qphdl;
 776         ibt_chan_sizes_t        *queuesz_p;
 777         hermon_qphdl_t          *qphdl;
 778         ibt_mr_attr_t           mr_attr;
 779         hermon_mr_options_t     mr_op;
 780         hermon_pdhdl_t          pd;
 781         hermon_cqhdl_t          sq_cq, rq_cq;
 782         hermon_mrhdl_t          mr;
 783         uint64_t                qp_desc_off;
 784         uint64_t                *thewqe, thewqesz;
 785         uint32_t                *sq_buf, *rq_buf;
 786         uint32_t                log_qp_sq_size, log_qp_rq_size;
 787         uint32_t                sq_size, rq_size, max_sgl;
 788         uint32_t                uarpg;
 789         uint32_t                sq_depth;
 790         uint32_t                sq_wqe_size, rq_wqe_size, wqesz_shift;
 791         int                     status, flag, i, j;
 792 
 793         /*
 794          * Extract the necessary info from the hermon_qp_info_t structure
 795          */
 796         attr_p    = qpinfo->qpi_attrp;
 797         type      = qpinfo->qpi_type;
 798         port      = qpinfo->qpi_port;
 799         ibt_qphdl = qpinfo->qpi_ibt_qphdl;
 800         queuesz_p = qpinfo->qpi_queueszp;
 801         qphdl     = &qpinfo->qpi_qphdl;
 802 
 803         /*
 804          * Check for valid special QP type (only SMI & GSI supported)
 805          */
 806         if ((type != IBT_SMI_SQP) && (type != IBT_GSI_SQP)) {
 807                 status = IBT_QP_SPECIAL_TYPE_INVALID;
 808                 goto spec_qpalloc_fail;
 809         }
 810 
 811         /*
 812          * Check for valid port number
 813          */
 814         if (!hermon_portnum_is_valid(state, port)) {
 815                 status = IBT_HCA_PORT_INVALID;
 816                 goto spec_qpalloc_fail;
 817         }
 818         port = port - 1;
 819 
 820         /*
 821          * Check for valid PD handle pointer
 822          */
 823         if (attr_p->qp_pd_hdl == NULL) {
 824                 status = IBT_PD_HDL_INVALID;
 825                 goto spec_qpalloc_fail;
 826         }
 827         pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
 828 
 829         /* Increment the reference count on the PD */
 830         hermon_pd_refcnt_inc(pd);
 831 
 832         /*
 833          * Check for valid CQ handle pointers
 834          */
 835         if ((attr_p->qp_ibc_scq_hdl == NULL) ||
 836             (attr_p->qp_ibc_rcq_hdl == NULL)) {
 837                 status = IBT_CQ_HDL_INVALID;
 838                 goto spec_qpalloc_fail1;
 839         }
 840         sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl;
 841         rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
 842 
 843         /*
 844          * Increment the reference count on the CQs.  One or both of these
 845          * could return error if we determine that the given CQ is already
 846          * being used with a non-special QP (i.e. a normal QP).
 847          */
 848         status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_SPECIAL);
 849         if (status != DDI_SUCCESS) {
 850                 status = IBT_CQ_HDL_INVALID;
 851                 goto spec_qpalloc_fail1;
 852         }
 853         status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_SPECIAL);
 854         if (status != DDI_SUCCESS) {
 855                 status = IBT_CQ_HDL_INVALID;
 856                 goto spec_qpalloc_fail2;
 857         }
 858 
 859         /*
 860          * Allocate the special QP resources.  Essentially, this allocation
 861          * amounts to checking if the request special QP has already been
 862          * allocated.  If successful, the QP context return is an actual
 863          * QP context that has been "aliased" to act as a special QP of the
 864          * appropriate type (and for the appropriate port).  Just as in
 865          * hermon_qp_alloc() above, ownership for this QP context is not
 866          * immediately given to hardware in the final step here.  Instead, we
 867          * wait until the QP is later transitioned to the "Init" state before
 868          * passing the QP to hardware.  If we fail here, we must undo all
 869          * the reference count (CQ and PD).
 870          */
 871         status = hermon_special_qp_rsrc_alloc(state, type, port, &qpc);
 872         if (status != DDI_SUCCESS) {
 873                 goto spec_qpalloc_fail3;
 874         }
 875 
 876         /*
 877          * Allocate the software structure for tracking the special queue
 878          * pair (i.e. the Hermon Queue Pair handle).  If we fail here, we
 879          * must undo the reference counts and the previous resource allocation.
 880          */
 881         status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
 882         if (status != DDI_SUCCESS) {
 883                 status = IBT_INSUFF_RESOURCE;
 884                 goto spec_qpalloc_fail4;
 885         }
 886         qp = (hermon_qphdl_t)rsrc->hr_addr;
 887 
 888         bzero(qp, sizeof (struct hermon_sw_qp_s));
 889 
 890         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
 891         qp->qp_alloc_flags = attr_p->qp_alloc_flags;
 892 
 893         /*
 894          * Actual QP number is a combination of the index of the QPC and
 895          * the port number.  This is because the special QP contexts must
 896          * be allocated two-at-a-time.
 897          */
 898         qp->qp_qpnum = qpc->hr_indx + port;
 899         qp->qp_ring = qp->qp_qpnum << 8;
 900 
 901         uarpg = state->hs_kernel_uar_index; /* must be for spec qp */
 902         /*
 903          * Allocate the doorbell record.  Hermon uses only one for the RQ so
 904          * alloc a qp doorbell, using uarpg (above) as the uar index
 905          */
 906 
 907         status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
 908             &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
 909         if (status != DDI_SUCCESS) {
 910                 status = IBT_INSUFF_RESOURCE;
 911                 goto spec_qpalloc_fail5;
 912         }
 913         /*
 914          * Calculate the appropriate size for the work queues.
 915          * Note:  All Hermon QP work queues must be a power-of-2 in size.  Also
 916          * they may not be any smaller than HERMON_QP_MIN_SIZE.  This step is
 917          * to round the requested size up to the next highest power-of-2
 918          */
 919         attr_p->qp_sizes.cs_sq =
 920             max(attr_p->qp_sizes.cs_sq, HERMON_QP_MIN_SIZE);
 921         attr_p->qp_sizes.cs_rq =
 922             max(attr_p->qp_sizes.cs_rq, HERMON_QP_MIN_SIZE);
 923         log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq);
 924         if ((attr_p->qp_sizes.cs_sq & (attr_p->qp_sizes.cs_sq - 1)) == 0) {
 925                 log_qp_sq_size = log_qp_sq_size - 1;
 926         }
 927         log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
 928         if ((attr_p->qp_sizes.cs_rq & (attr_p->qp_sizes.cs_rq - 1)) == 0) {
 929                 log_qp_rq_size = log_qp_rq_size - 1;
 930         }
 931 
 932         /*
 933          * Next we verify that the rounded-up size is valid (i.e. consistent
 934          * with the device limits and/or software-configured limits).  If not,
 935          * then obviously we have a bit of cleanup to do before returning.
 936          */
 937         if ((log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
 938             (log_qp_rq_size > state->hs_cfg_profile->cp_log_max_qp_sz)) {
 939                 status = IBT_HCA_WR_EXCEEDED;
 940                 goto spec_qpalloc_fail5a;
 941         }
 942 
 943         /*
 944          * Next we verify that the requested number of SGL is valid (i.e.
 945          * consistent with the device limits and/or software-configured
 946          * limits).  If not, then obviously the same cleanup needs to be done.
 947          */
 948         max_sgl = state->hs_cfg_profile->cp_wqe_real_max_sgl;
 949         if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
 950             (attr_p->qp_sizes.cs_rq_sgl > max_sgl)) {
 951                 status = IBT_HCA_SGL_EXCEEDED;
 952                 goto spec_qpalloc_fail5a;
 953         }
 954 
 955         /*
 956          * Determine this QP's WQE stride (for both the Send and Recv WQEs).
 957          * This will depend on the requested number of SGLs.  Note: this
 958          * has the side-effect of also calculating the real number of SGLs
 959          * (for the calculated WQE size).
 960          */
 961         hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
 962             max_sgl, HERMON_QP_WQ_TYPE_RECVQ,
 963             &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
 964         if (type == IBT_SMI_SQP) {
 965                 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
 966                     max_sgl, HERMON_QP_WQ_TYPE_SENDMLX_QP0,
 967                     &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
 968         } else {
 969                 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
 970                     max_sgl, HERMON_QP_WQ_TYPE_SENDMLX_QP1,
 971                     &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
 972         }
 973 
 974         /*
 975          * Allocate the memory for QP work queues. Since Hermon work queues
 976          * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
 977          * the work queue memory is very important.  We used to allocate
 978          * work queues (the combined receive and send queues) so that they
 979          * would be aligned on their combined size.  That alignment guaranteed
 980          * that they would never cross the 4GB boundary (Hermon work queues
 981          * are on the order of MBs at maximum).  Now we are able to relax
 982          * this alignment constraint by ensuring that the IB address assigned
 983          * to the queue memory (as a result of the hermon_mr_register() call)
 984          * is offset from zero.
 985          * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
 986          * guarantee the alignment, but when attempting to use IOMMU bypass
 987          * mode we found that we were not allowed to specify any alignment
 988          * that was more restrictive than the system page size.
 989          * So we avoided this constraint by passing two alignment values,
 990          * one for the memory allocation itself and the other for the DMA
 991          * handle (for later bind).  This used to cause more memory than
 992          * necessary to be allocated (in order to guarantee the more
 993          * restrictive alignment contraint).  But by guaranteeing the
 994          * zero-based IB virtual address for the queue, we are able to
 995          * conserve this memory.
 996          */
 997         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
 998         sq_depth    = 1 << log_qp_sq_size;
 999         sq_size     = (1 << log_qp_sq_size) * sq_wqe_size;
1000 
1001         rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
1002         rq_size     = (1 << log_qp_rq_size) * rq_wqe_size;
1003 
1004         qp->qp_wqinfo.qa_size          = sq_size + rq_size;
1005 
1006         qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
1007         qp->qp_wqinfo.qa_bind_align  = PAGESIZE;
1008         qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
1009 
1010         status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
1011         if (status != NULL) {
1012                 status = IBT_INSUFF_RESOURCE;
1013                 goto spec_qpalloc_fail5a;
1014         }
1015 
1016         /*
1017          * Sort WQs in memory according to depth, stride (*q_wqe_size),
1018          * biggest first. If equal, the Send Queue still goes first
1019          */
1020         qp->qp_sq_baseaddr = 0;
1021         qp->qp_rq_baseaddr = 0;
1022         if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
1023                 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
1024                 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
1025                 qp->qp_rq_baseaddr = sq_size;
1026         } else {
1027                 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
1028                 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
1029                 qp->qp_sq_baseaddr = rq_size;
1030         }
1031 
1032         qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
1033         if (qp->qp_sq_wqhdr == NULL) {
1034                 status = IBT_INSUFF_RESOURCE;
1035                 goto spec_qpalloc_fail6;
1036         }
1037         qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(1 << log_qp_rq_size);
1038         if (qp->qp_rq_wqhdr == NULL) {
1039                 status = IBT_INSUFF_RESOURCE;
1040                 goto spec_qpalloc_fail6;
1041         }
1042         qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
1043         qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
1044         qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
1045         qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
1046         qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
1047         qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
1048 
1049         /*
1050          * Register the memory for the special QP work queues.  The memory for
1051          * the special QP must be registered in the Hermon cMPT tables.  This
1052          * gives us the LKey to specify in the QP context later.  Note: The
1053          * memory for Hermon work queues (both Send and Recv) must be contiguous
1054          * and registered as a single memory region. Also, in order to meet the
1055          * alignment restriction, we pass the "mro_bind_override_addr" flag in
1056          * the call to hermon_mr_register(). This guarantees that the resulting
1057          * IB vaddr will be zero-based (modulo the offset into the first page).
1058          * If we fail here, we have a bunch of resource and reference count
1059          * cleanup to do.
1060          */
1061         flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
1062             IBT_MR_NOSLEEP;
1063         mr_attr.mr_vaddr    = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1064         mr_attr.mr_len      = qp->qp_wqinfo.qa_size;
1065         mr_attr.mr_as       = NULL;
1066         mr_attr.mr_flags    = flag;
1067 
1068         mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
1069         mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
1070         mr_op.mro_bind_override_addr = 1;
1071 
1072         status = hermon_mr_register(state, pd, &mr_attr, &mr, &mr_op,
1073             HERMON_QP_CMPT);
1074         if (status != DDI_SUCCESS) {
1075                 status = IBT_INSUFF_RESOURCE;
1076                 goto spec_qpalloc_fail6;
1077         }
1078 
1079         /*
1080          * Calculate the offset between the kernel virtual address space
1081          * and the IB virtual address space.  This will be used when
1082          * posting work requests to properly initialize each WQE.
1083          */
1084         qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
1085             (uint64_t)mr->mr_bindinfo.bi_addr;
1086 
1087         /* set the prefetch - initially, not prefetching */
1088         qp->qp_no_prefetch = 1;
1089 
1090         if (qp->qp_no_prefetch)
1091                 qp->qp_sq_headroom = 2 * sq_wqe_size;
1092         else
1093                 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
1094         /*
1095          * hdrm wqes must be integral since both sq_wqe_size &
1096          * HERMON_QP_OH_SIZE are power of 2
1097          */
1098         qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
1099         /*
1100          * Fill in all the return arguments (if necessary).  This includes
1101          * real work queue sizes, real SGLs, and QP number (which will be
1102          * either zero or one, depending on the special QP type)
1103          */
1104         if (queuesz_p != NULL) {
1105                 queuesz_p->cs_sq     =
1106                     (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
1107                 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
1108                 queuesz_p->cs_rq     = (1 << log_qp_rq_size);
1109                 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
1110         }
1111 
1112         /*
1113          * Fill in the rest of the Hermon Queue Pair handle.  We can update
1114          * the following fields for use in further operations on the QP.
1115          */
1116         qp->qp_qpcrsrcp              = qpc;
1117         qp->qp_rsrcp         = rsrc;
1118         qp->qp_state         = HERMON_QP_RESET;
1119         HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1120         qp->qp_pdhdl         = pd;
1121         qp->qp_mrhdl         = mr;
1122         qp->qp_sq_sigtype    = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
1123             HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
1124         qp->qp_is_special    = (type == IBT_SMI_SQP) ?
1125             HERMON_QP_SMI : HERMON_QP_GSI;
1126         qp->qp_uarpg         = uarpg;
1127         qp->qp_umap_dhp              = (devmap_cookie_t)NULL;
1128         qp->qp_sq_cqhdl              = sq_cq;
1129         qp->qp_sq_bufsz              = (1 << log_qp_sq_size);
1130         qp->qp_sq_buf                = sq_buf;
1131         qp->qp_sq_logqsz     = log_qp_sq_size;
1132         qp->qp_desc_off              = qp_desc_off;
1133         qp->qp_rq_cqhdl              = rq_cq;
1134         qp->qp_rq_bufsz              = (1 << log_qp_rq_size);
1135         qp->qp_rq_buf                = rq_buf;
1136         qp->qp_rq_logqsz     = log_qp_rq_size;
1137         qp->qp_portnum               = port;
1138         qp->qp_pkeyindx              = 0;
1139         qp->qp_forward_sqd_event  = 0;
1140         qp->qp_sqd_still_draining = 0;
1141         qp->qp_hdlrarg               = (void *)ibt_qphdl;
1142         qp->qp_mcg_refcnt    = 0;
1143         qp->qp_srqhdl                = NULL;
1144 
1145         /* All special QPs are UD QP service type */
1146         qp->qp_type = IBT_UD_RQP;
1147         qp->qp_serv_type = HERMON_QP_UD;
1148 
1149         /*
1150          * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
1151          */
1152 
1153         /*
1154          * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
1155          * set the quadword to all F's - high-order bit is owner (init to one)
1156          * and the rest for the headroom definition of prefetching
1157          *
1158          */
1159 
1160         wqesz_shift = qp->qp_sq_log_wqesz;
1161         thewqesz    = 1 << wqesz_shift;
1162         thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
1163         for (i = 0; i < sq_depth; i++) {
1164                 /*
1165                  * for each stride, go through and every 64 bytes write the
1166                  * init value - having set the address once, just keep
1167                  * incrementing it
1168                  */
1169                 for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
1170                         *(uint32_t *)thewqe = 0xFFFFFFFF;
1171                 }
1172         }
1173 
1174 
1175         /* Zero out the QP context */
1176         bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
1177 
1178         /*
1179          * Put QP handle in Hermon QPNum-to-QPHdl list.  Then fill in the
1180          * "qphdl" and return success
1181          */
1182         hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + port, qp);
1183 
1184         mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
1185             DDI_INTR_PRI(state->hs_intrmsi_pri));
1186 
1187         *qphdl = qp;
1188 
1189         return (DDI_SUCCESS);
1190 
1191 /*
1192  * The following is cleanup for all possible failure cases in this routine
1193  */
1194 spec_qpalloc_fail6:
1195         hermon_queue_free(&qp->qp_wqinfo);
1196         if (qp->qp_sq_wqhdr)
1197                 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
1198         if (qp->qp_rq_wqhdr)
1199                 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
1200 spec_qpalloc_fail5a:
1201         hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
1202 spec_qpalloc_fail5:
1203         hermon_rsrc_free(state, &rsrc);
1204 spec_qpalloc_fail4:
1205         if (hermon_special_qp_rsrc_free(state, type, port) != DDI_SUCCESS) {
1206                 HERMON_WARNING(state, "failed to free special QP rsrc");
1207         }
1208 spec_qpalloc_fail3:
1209         hermon_cq_refcnt_dec(rq_cq);
1210 spec_qpalloc_fail2:
1211         hermon_cq_refcnt_dec(sq_cq);
1212 spec_qpalloc_fail1:
1213         hermon_pd_refcnt_dec(pd);
1214 spec_qpalloc_fail:
1215         return (status);
1216 }
1217 
1218 
1219 /*
1220  * hermon_qp_alloc_range()
1221  *    Context: Can be called only from user or kernel context.
1222  */
1223 int
1224 hermon_qp_alloc_range(hermon_state_t *state, uint_t log2,
1225     hermon_qp_info_t *qpinfo, ibtl_qp_hdl_t *ibt_qphdl,
1226     ibc_cq_hdl_t *send_cq, ibc_cq_hdl_t *recv_cq,
1227     hermon_qphdl_t *qphdl, uint_t sleepflag)
1228 {
1229         hermon_rsrc_t                   *qpc, *rsrc;
1230         hermon_rsrc_type_t              rsrc_type;
1231         hermon_qphdl_t                  qp;
1232         hermon_qp_range_t               *qp_range_p;
1233         ibt_qp_alloc_attr_t             *attr_p;
1234         ibt_qp_type_t                   type;
1235         hermon_qp_wq_type_t             swq_type;
1236         ibt_chan_sizes_t                *queuesz_p;
1237         ibt_mr_attr_t                   mr_attr;
1238         hermon_mr_options_t             mr_op;
1239         hermon_srqhdl_t                 srq;
1240         hermon_pdhdl_t                  pd;
1241         hermon_cqhdl_t                  sq_cq, rq_cq;
1242         hermon_mrhdl_t                  mr;
1243         uint64_t                        qp_desc_off;
1244         uint64_t                        *thewqe, thewqesz;
1245         uint32_t                        *sq_buf, *rq_buf;
1246         uint32_t                        log_qp_sq_size, log_qp_rq_size;
1247         uint32_t                        sq_size, rq_size;
1248         uint32_t                        sq_depth, rq_depth;
1249         uint32_t                        sq_wqe_size, rq_wqe_size, wqesz_shift;
1250         uint32_t                        max_sgl, max_recv_sgl, uarpg;
1251         uint_t                          qp_srq_en, i, j;
1252         int                             ii;     /* loop counter for range */
1253         int                             status, flag;
1254         uint_t                          serv_type;
1255 
1256         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p))
1257 
1258         /*
1259          * Extract the necessary info from the hermon_qp_info_t structure
1260          */
1261         attr_p    = qpinfo->qpi_attrp;
1262         type      = qpinfo->qpi_type;
1263         queuesz_p = qpinfo->qpi_queueszp;
1264 
1265         if (attr_p->qp_alloc_flags & IBT_QP_USES_RSS) {
1266                 if (log2 > state->hs_ibtfinfo.hca_attr->hca_rss_max_log2_table)
1267                         return (IBT_INSUFF_RESOURCE);
1268                 rsrc_type = HERMON_QPC;
1269                 serv_type = HERMON_QP_UD;
1270         } else if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) {
1271                 if (log2 > state->hs_ibtfinfo.hca_attr->hca_fexch_max_log2_qp)
1272                         return (IBT_INSUFF_RESOURCE);
1273                 switch (attr_p->qp_fc.fc_hca_port) {
1274                 case 1:
1275                         rsrc_type = HERMON_QPC_FEXCH_PORT1;
1276                         break;
1277                 case 2:
1278                         rsrc_type = HERMON_QPC_FEXCH_PORT2;
1279                         break;
1280                 default:
1281                         return (IBT_INVALID_PARAM);
1282                 }
1283                 serv_type = HERMON_QP_FEXCH;
1284         } else
1285                 return (IBT_INVALID_PARAM);
1286 
1287         /*
1288          * Determine whether QP is being allocated for userland access or
1289          * whether it is being allocated for kernel access.  If the QP is
1290          * being allocated for userland access, fail (too complex for now).
1291          */
1292         if (attr_p->qp_alloc_flags & IBT_QP_USER_MAP) {
1293                 return (IBT_NOT_SUPPORTED);
1294         } else {
1295                 uarpg = state->hs_kernel_uar_index;
1296         }
1297 
1298         /*
1299          * Determine whether QP is being associated with an SRQ
1300          */
1301         qp_srq_en = (attr_p->qp_alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0;
1302         if (qp_srq_en) {
1303                 /*
1304                  * Check for valid SRQ handle pointers
1305                  */
1306                 if (attr_p->qp_ibc_srq_hdl == NULL) {
1307                         return (IBT_SRQ_HDL_INVALID);
1308                 }
1309                 srq = (hermon_srqhdl_t)attr_p->qp_ibc_srq_hdl;
1310         }
1311 
1312         /*
1313          * Check for valid QP service type (only UD supported)
1314          */
1315         if (type != IBT_UD_RQP) {
1316                 return (IBT_QP_SRV_TYPE_INVALID);
1317         }
1318 
1319         /*
1320          * Check for valid PD handle pointer
1321          */
1322         if (attr_p->qp_pd_hdl == NULL) {
1323                 return (IBT_PD_HDL_INVALID);
1324         }
1325         pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
1326 
1327         /*
1328          * If on an SRQ, check to make sure the PD is the same
1329          */
1330         if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) {
1331                 return (IBT_PD_HDL_INVALID);
1332         }
1333 
1334         /* set loop variable here, for freeing resources on error */
1335         ii = 0;
1336 
1337         /*
1338          * Allocate 2^log2 contiguous/aligned QP context entries.  This will
1339          * be filled in with all the necessary parameters to define the
1340          * Queue Pairs.  Unlike other Hermon hardware resources, ownership
1341          * is not immediately given to hardware in the final step here.
1342          * Instead, we must wait until the QP is later transitioned to the
1343          * "Init" state before passing the QP to hardware.  If we fail here,
1344          * we must undo all the reference count (CQ and PD).
1345          */
1346         status = hermon_rsrc_alloc(state, rsrc_type, 1 << log2, sleepflag,
1347             &qpc);
1348         if (status != DDI_SUCCESS) {
1349                 return (IBT_INSUFF_RESOURCE);
1350         }
1351 
1352         if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH)
1353                 /*
1354                  * Need to init the MKEYs for the FEXCH QPs.
1355                  *
1356                  * For FEXCH QP subranges, we return the QPN base as
1357                  * "relative" to the full FEXCH QP range for the port.
1358                  */
1359                 *(qpinfo->qpi_qpn) = hermon_fcoib_fexch_relative_qpn(state,
1360                     attr_p->qp_fc.fc_hca_port, qpc->hr_indx);
1361         else
1362                 *(qpinfo->qpi_qpn) = (ib_qpn_t)qpc->hr_indx;
1363 
1364         qp_range_p = kmem_alloc(sizeof (*qp_range_p),
1365             (sleepflag == HERMON_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1366         if (qp_range_p == NULL) {
1367                 status = IBT_INSUFF_RESOURCE;
1368                 goto qpalloc_fail0;
1369         }
1370         mutex_init(&qp_range_p->hqpr_lock, NULL, MUTEX_DRIVER,
1371             DDI_INTR_PRI(state->hs_intrmsi_pri));
1372         mutex_enter(&qp_range_p->hqpr_lock);
1373         qp_range_p->hqpr_refcnt = 1 << log2;
1374         qp_range_p->hqpr_qpcrsrc = qpc;
1375         mutex_exit(&qp_range_p->hqpr_lock);
1376 
1377 for_each_qp:
1378 
1379         /* Increment the reference count on the protection domain (PD) */
1380         hermon_pd_refcnt_inc(pd);
1381 
1382         rq_cq = (hermon_cqhdl_t)recv_cq[ii];
1383         sq_cq = (hermon_cqhdl_t)send_cq[ii];
1384         if (sq_cq == NULL) {
1385                 if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) {
1386                         /* if no send completions, just use rq_cq */
1387                         sq_cq = rq_cq;
1388                 } else {
1389                         status = IBT_CQ_HDL_INVALID;
1390                         goto qpalloc_fail1;
1391                 }
1392         }
1393 
1394         /*
1395          * Increment the reference count on the CQs.  One or both of these
1396          * could return error if we determine that the given CQ is already
1397          * being used with a special (SMI/GSI) QP.
1398          */
1399         status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_NORMAL);
1400         if (status != DDI_SUCCESS) {
1401                 status = IBT_CQ_HDL_INVALID;
1402                 goto qpalloc_fail1;
1403         }
1404         status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_NORMAL);
1405         if (status != DDI_SUCCESS) {
1406                 status = IBT_CQ_HDL_INVALID;
1407                 goto qpalloc_fail2;
1408         }
1409 
1410         /*
1411          * Allocate the software structure for tracking the queue pair
1412          * (i.e. the Hermon Queue Pair handle).  If we fail here, we must
1413          * undo the reference counts and the previous resource allocation.
1414          */
1415         status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
1416         if (status != DDI_SUCCESS) {
1417                 status = IBT_INSUFF_RESOURCE;
1418                 goto qpalloc_fail4;
1419         }
1420         qp = (hermon_qphdl_t)rsrc->hr_addr;
1421         bzero(qp, sizeof (struct hermon_sw_qp_s));
1422         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
1423         qp->qp_alloc_flags = attr_p->qp_alloc_flags;
1424 
1425         /*
1426          * Calculate the QP number from QPC index.  This routine handles
1427          * all of the operations necessary to keep track of used, unused,
1428          * and released QP numbers.
1429          */
1430         qp->qp_qpnum = qpc->hr_indx + ii;
1431         qp->qp_ring = qp->qp_qpnum << 8;
1432         qp->qp_qpn_hdl = NULL;
1433 
1434         /*
1435          * Allocate the doorbell record.  Hermon just needs one for the RQ,
1436          * if the QP is not associated with an SRQ, and use uarpg (above) as
1437          * the uar index
1438          */
1439 
1440         if (!qp_srq_en) {
1441                 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
1442                     &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
1443                 if (status != DDI_SUCCESS) {
1444                         status = IBT_INSUFF_RESOURCE;
1445                         goto qpalloc_fail6;
1446                 }
1447         }
1448 
1449         qp->qp_uses_lso = (attr_p->qp_flags & IBT_USES_LSO);
1450 
1451         /*
1452          * We verify that the requested number of SGL is valid (i.e.
1453          * consistent with the device limits and/or software-configured
1454          * limits).  If not, then obviously the same cleanup needs to be done.
1455          */
1456         max_sgl = state->hs_ibtfinfo.hca_attr->hca_ud_send_sgl_sz;
1457         swq_type = HERMON_QP_WQ_TYPE_SENDQ_UD;
1458         max_recv_sgl = state->hs_ibtfinfo.hca_attr->hca_recv_sgl_sz;
1459         if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
1460             (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_recv_sgl))) {
1461                 status = IBT_HCA_SGL_EXCEEDED;
1462                 goto qpalloc_fail7;
1463         }
1464 
1465         /*
1466          * Determine this QP's WQE stride (for both the Send and Recv WQEs).
1467          * This will depend on the requested number of SGLs.  Note: this
1468          * has the side-effect of also calculating the real number of SGLs
1469          * (for the calculated WQE size).
1470          *
1471          * For QP's on an SRQ, we set these to 0.
1472          */
1473         if (qp_srq_en) {
1474                 qp->qp_rq_log_wqesz = 0;
1475                 qp->qp_rq_sgl = 0;
1476         } else {
1477                 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
1478                     max_recv_sgl, HERMON_QP_WQ_TYPE_RECVQ,
1479                     &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
1480         }
1481         hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
1482             max_sgl, swq_type, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
1483 
1484         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
1485 
1486         /* NOTE: currently policy in driver, later maybe IBTF interface */
1487         qp->qp_no_prefetch = 0;
1488 
1489         /*
1490          * for prefetching, we need to add the number of wqes in
1491          * the 2k area plus one to the number requested, but
1492          * ONLY for send queue.  If no_prefetch == 1 (prefetch off)
1493          * it's exactly TWO wqes for the headroom
1494          */
1495         if (qp->qp_no_prefetch)
1496                 qp->qp_sq_headroom = 2 * sq_wqe_size;
1497         else
1498                 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
1499         /*
1500          * hdrm wqes must be integral since both sq_wqe_size &
1501          * HERMON_QP_OH_SIZE are power of 2
1502          */
1503         qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
1504 
1505 
1506         /*
1507          * Calculate the appropriate size for the work queues.
1508          * For send queue, add in the headroom wqes to the calculation.
1509          * Note:  All Hermon QP work queues must be a power-of-2 in size.  Also
1510          * they may not be any smaller than HERMON_QP_MIN_SIZE.  This step is
1511          * to round the requested size up to the next highest power-of-2
1512          */
1513         /* first, adjust to a minimum and tell the caller the change */
1514         attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq,
1515             HERMON_QP_MIN_SIZE);
1516         attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq,
1517             HERMON_QP_MIN_SIZE);
1518         /*
1519          * now, calculate the alloc size, taking into account
1520          * the headroom for the sq
1521          */
1522         log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes);
1523         /* if the total is a power of two, reduce it */
1524         if (((attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes)  &
1525             (attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes - 1)) == 0)      {
1526                 log_qp_sq_size = log_qp_sq_size - 1;
1527         }
1528 
1529         log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
1530         if ((attr_p->qp_sizes.cs_rq & (attr_p->qp_sizes.cs_rq - 1)) == 0) {
1531                 log_qp_rq_size = log_qp_rq_size - 1;
1532         }
1533 
1534         /*
1535          * Next we verify that the rounded-up size is valid (i.e. consistent
1536          * with the device limits and/or software-configured limits).  If not,
1537          * then obviously we have a lot of cleanup to do before returning.
1538          *
1539          * NOTE: the first condition deals with the (test) case of cs_sq
1540          * being just less than 2^32.  In this case, the headroom addition
1541          * to the requested cs_sq will pass the test when it should not.
1542          * This test no longer lets that case slip through the check.
1543          */
1544         if ((attr_p->qp_sizes.cs_sq >
1545             (1 << state->hs_cfg_profile->cp_log_max_qp_sz)) ||
1546             (log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
1547             (!qp_srq_en && (log_qp_rq_size >
1548             state->hs_cfg_profile->cp_log_max_qp_sz))) {
1549                 status = IBT_HCA_WR_EXCEEDED;
1550                 goto qpalloc_fail7;
1551         }
1552 
1553         /*
1554          * Allocate the memory for QP work queues. Since Hermon work queues
1555          * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
1556          * the work queue memory is very important.  We used to allocate
1557          * work queues (the combined receive and send queues) so that they
1558          * would be aligned on their combined size.  That alignment guaranteed
1559          * that they would never cross the 4GB boundary (Hermon work queues
1560          * are on the order of MBs at maximum).  Now we are able to relax
1561          * this alignment constraint by ensuring that the IB address assigned
1562          * to the queue memory (as a result of the hermon_mr_register() call)
1563          * is offset from zero.
1564          * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
1565          * guarantee the alignment, but when attempting to use IOMMU bypass
1566          * mode we found that we were not allowed to specify any alignment
1567          * that was more restrictive than the system page size.
1568          * So we avoided this constraint by passing two alignment values,
1569          * one for the memory allocation itself and the other for the DMA
1570          * handle (for later bind).  This used to cause more memory than
1571          * necessary to be allocated (in order to guarantee the more
1572          * restrictive alignment contraint).  But by guaranteeing the
1573          * zero-based IB virtual address for the queue, we are able to
1574          * conserve this memory.
1575          */
1576         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
1577         sq_depth    = 1 << log_qp_sq_size;
1578         sq_size     = sq_depth * sq_wqe_size;
1579 
1580         /* QP on SRQ sets these to 0 */
1581         if (qp_srq_en) {
1582                 rq_wqe_size = 0;
1583                 rq_size     = 0;
1584         } else {
1585                 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
1586                 rq_depth    = 1 << log_qp_rq_size;
1587                 rq_size     = rq_depth * rq_wqe_size;
1588         }
1589 
1590         qp->qp_wqinfo.qa_size = sq_size + rq_size;
1591         qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
1592         qp->qp_wqinfo.qa_bind_align  = PAGESIZE;
1593         qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
1594         status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
1595         if (status != DDI_SUCCESS) {
1596                 status = IBT_INSUFF_RESOURCE;
1597                 goto qpalloc_fail7;
1598         }
1599 
1600         /*
1601          * Sort WQs in memory according to stride (*q_wqe_size), largest first
1602          * If they are equal, still put the SQ first
1603          */
1604         qp->qp_sq_baseaddr = 0;
1605         qp->qp_rq_baseaddr = 0;
1606         if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
1607                 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
1608 
1609                 /* if this QP is on an SRQ, set the rq_buf to NULL */
1610                 if (qp_srq_en) {
1611                         rq_buf = NULL;
1612                 } else {
1613                         rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
1614                         qp->qp_rq_baseaddr = sq_size;
1615                 }
1616         } else {
1617                 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
1618                 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
1619                 qp->qp_sq_baseaddr = rq_size;
1620         }
1621 
1622         qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
1623         if (qp->qp_sq_wqhdr == NULL) {
1624                 status = IBT_INSUFF_RESOURCE;
1625                 goto qpalloc_fail8;
1626         }
1627         if (qp_srq_en) {
1628                 qp->qp_rq_wqavl.wqa_wq = srq->srq_wq_wqhdr;
1629                 qp->qp_rq_wqavl.wqa_srq_en = 1;
1630                 qp->qp_rq_wqavl.wqa_srq = srq;
1631         } else {
1632                 qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(rq_depth);
1633                 if (qp->qp_rq_wqhdr == NULL) {
1634                         status = IBT_INSUFF_RESOURCE;
1635                         goto qpalloc_fail8;
1636                 }
1637                 qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
1638         }
1639         qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
1640         qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
1641         qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
1642         qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
1643         qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
1644 
1645         /*
1646          * Register the memory for the QP work queues.  The memory for the
1647          * QP must be registered in the Hermon cMPT tables.  This gives us the
1648          * LKey to specify in the QP context later.  Note: The memory for
1649          * Hermon work queues (both Send and Recv) must be contiguous and
1650          * registered as a single memory region.  Note: If the QP memory is
1651          * user-mappable, force DDI_DMA_CONSISTENT mapping. Also, in order to
1652          * meet the alignment restriction, we pass the "mro_bind_override_addr"
1653          * flag in the call to hermon_mr_register(). This guarantees that the
1654          * resulting IB vaddr will be zero-based (modulo the offset into the
1655          * first page). If we fail here, we still have the bunch of resource
1656          * and reference count cleanup to do.
1657          */
1658         flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
1659             IBT_MR_NOSLEEP;
1660         mr_attr.mr_vaddr    = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1661         mr_attr.mr_len      = qp->qp_wqinfo.qa_size;
1662         mr_attr.mr_as       = NULL;
1663         mr_attr.mr_flags    = flag;
1664         /* HERMON_QUEUE_LOCATION_NORMAL */
1665         mr_op.mro_bind_type =
1666             state->hs_cfg_profile->cp_iommu_bypass;
1667         mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
1668         mr_op.mro_bind_override_addr = 1;
1669         status = hermon_mr_register(state, pd, &mr_attr, &mr,
1670             &mr_op, HERMON_QP_CMPT);
1671         if (status != DDI_SUCCESS) {
1672                 status = IBT_INSUFF_RESOURCE;
1673                 goto qpalloc_fail9;
1674         }
1675 
1676         /*
1677          * Calculate the offset between the kernel virtual address space
1678          * and the IB virtual address space.  This will be used when
1679          * posting work requests to properly initialize each WQE.
1680          */
1681         qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
1682             (uint64_t)mr->mr_bindinfo.bi_addr;
1683 
1684         /*
1685          * Fill in all the return arguments (if necessary).  This includes
1686          * real work queue sizes (in wqes), real SGLs, and QP number
1687          */
1688         if (queuesz_p != NULL) {
1689                 queuesz_p->cs_sq     =
1690                     (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
1691                 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
1692 
1693                 /* if this QP is on an SRQ, set these to 0 */
1694                 if (qp_srq_en) {
1695                         queuesz_p->cs_rq     = 0;
1696                         queuesz_p->cs_rq_sgl = 0;
1697                 } else {
1698                         queuesz_p->cs_rq     = (1 << log_qp_rq_size);
1699                         queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
1700                 }
1701         }
1702 
1703         /*
1704          * Fill in the rest of the Hermon Queue Pair handle.
1705          */
1706         qp->qp_qpcrsrcp              = NULL;
1707         qp->qp_rsrcp         = rsrc;
1708         qp->qp_state         = HERMON_QP_RESET;
1709         HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1710         qp->qp_pdhdl         = pd;
1711         qp->qp_mrhdl         = mr;
1712         qp->qp_sq_sigtype    = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
1713             HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
1714         qp->qp_is_special    = 0;
1715         qp->qp_uarpg         = uarpg;
1716         qp->qp_umap_dhp              = (devmap_cookie_t)NULL;
1717         qp->qp_sq_cqhdl              = sq_cq;
1718         qp->qp_sq_bufsz              = (1 << log_qp_sq_size);
1719         qp->qp_sq_logqsz     = log_qp_sq_size;
1720         qp->qp_sq_buf                = sq_buf;
1721         qp->qp_desc_off              = qp_desc_off;
1722         qp->qp_rq_cqhdl              = rq_cq;
1723         qp->qp_rq_buf                = rq_buf;
1724         qp->qp_rlky          = (attr_p->qp_flags & IBT_FAST_REG_RES_LKEY) !=
1725             0;
1726 
1727         /* if this QP is on an SRQ, set rq_bufsz to 0 */
1728         if (qp_srq_en) {
1729                 qp->qp_rq_bufsz              = 0;
1730                 qp->qp_rq_logqsz     = 0;
1731         } else {
1732                 qp->qp_rq_bufsz              = (1 << log_qp_rq_size);
1733                 qp->qp_rq_logqsz     = log_qp_rq_size;
1734         }
1735 
1736         qp->qp_forward_sqd_event  = 0;
1737         qp->qp_sqd_still_draining = 0;
1738         qp->qp_hdlrarg               = (void *)ibt_qphdl[ii];
1739         qp->qp_mcg_refcnt    = 0;
1740 
1741         /*
1742          * If this QP is to be associated with an SRQ, set the SRQ handle
1743          */
1744         if (qp_srq_en) {
1745                 qp->qp_srqhdl = srq;
1746                 hermon_srq_refcnt_inc(qp->qp_srqhdl);
1747         } else {
1748                 qp->qp_srqhdl = NULL;
1749         }
1750 
1751         qp->qp_type = IBT_UD_RQP;
1752         qp->qp_serv_type = serv_type;
1753 
1754         /*
1755          * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
1756          */
1757 
1758         /*
1759          * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
1760          * set the quadword to all F's - high-order bit is owner (init to one)
1761          * and the rest for the headroom definition of prefetching.
1762          */
1763         if ((attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) == 0) {
1764                 wqesz_shift = qp->qp_sq_log_wqesz;
1765                 thewqesz    = 1 << wqesz_shift;
1766                 thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
1767                 for (i = 0; i < sq_depth; i++) {
1768                         /*
1769                          * for each stride, go through and every 64 bytes
1770                          * write the init value - having set the address
1771                          * once, just keep incrementing it
1772                          */
1773                         for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
1774                                 *(uint32_t *)thewqe = 0xFFFFFFFF;
1775                         }
1776                 }
1777         }
1778 
1779         /* Zero out the QP context */
1780         bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
1781 
1782         /*
1783          * Put QP handle in Hermon QPNum-to-QPHdl list.  Then fill in the
1784          * "qphdl" and return success
1785          */
1786         hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + ii, qp);
1787 
1788         mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
1789             DDI_INTR_PRI(state->hs_intrmsi_pri));
1790 
1791         qp->qp_rangep = qp_range_p;
1792 
1793         qphdl[ii] = qp;
1794 
1795         if (++ii < (1 << log2))
1796                 goto for_each_qp;
1797 
1798         return (DDI_SUCCESS);
1799 
1800 /*
1801  * The following is cleanup for all possible failure cases in this routine
1802  */
1803 qpalloc_fail9:
1804         hermon_queue_free(&qp->qp_wqinfo);
1805 qpalloc_fail8:
1806         if (qp->qp_sq_wqhdr)
1807                 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
1808         if (qp->qp_rq_wqhdr)
1809                 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
1810 qpalloc_fail7:
1811         if (!qp_srq_en) {
1812                 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
1813         }
1814 
1815 qpalloc_fail6:
1816         hermon_rsrc_free(state, &rsrc);
1817 qpalloc_fail4:
1818         hermon_cq_refcnt_dec(rq_cq);
1819 qpalloc_fail2:
1820         hermon_cq_refcnt_dec(sq_cq);
1821 qpalloc_fail1:
1822         hermon_pd_refcnt_dec(pd);
1823 qpalloc_fail0:
1824         if (ii == 0) {
1825                 if (qp_range_p)
1826                         kmem_free(qp_range_p, sizeof (*qp_range_p));
1827                 hermon_rsrc_free(state, &qpc);
1828         } else {
1829                 /* qp_range_p and qpc rsrc will be freed in hermon_qp_free */
1830 
1831                 mutex_enter(&qp->qp_rangep->hqpr_lock);
1832                 qp_range_p->hqpr_refcnt = ii;
1833                 mutex_exit(&qp->qp_rangep->hqpr_lock);
1834                 while (--ii >= 0) {
1835                         ibc_qpn_hdl_t qpn_hdl;
1836                         int free_status;
1837 
1838                         free_status = hermon_qp_free(state, &qphdl[ii],
1839                             IBC_FREE_QP_AND_QPN, &qpn_hdl, sleepflag);
1840                         if (free_status != DDI_SUCCESS)
1841                                 cmn_err(CE_CONT, "!qp_range: status 0x%x: "
1842                                     "error status %x during free",
1843                                     status, free_status);
1844                 }
1845         }
1846 
1847         return (status);
1848 }
1849 
1850 
1851 /*
1852  * hermon_qp_free()
1853  *    This function frees up the QP resources.  Depending on the value
1854  *    of the "free_qp_flags", the QP number may not be released until
1855  *    a subsequent call to hermon_qp_release_qpn().
1856  *
1857  *    Context: Can be called only from user or kernel context.
1858  */
1859 /* ARGSUSED */
1860 int
1861 hermon_qp_free(hermon_state_t *state, hermon_qphdl_t *qphdl,
1862     ibc_free_qp_flags_t free_qp_flags, ibc_qpn_hdl_t *qpnh,
1863     uint_t sleepflag)
1864 {
1865         hermon_rsrc_t           *qpc, *rsrc;
1866         hermon_umap_db_entry_t  *umapdb;
1867         hermon_qpn_entry_t      *entry;
1868         hermon_pdhdl_t          pd;
1869         hermon_mrhdl_t          mr;
1870         hermon_cqhdl_t          sq_cq, rq_cq;
1871         hermon_srqhdl_t         srq;
1872         hermon_qphdl_t          qp;
1873         uint64_t                value;
1874         uint_t                  type, port;
1875         uint_t                  maxprot;
1876         uint_t                  qp_srq_en;
1877         int                     status;
1878 
1879         /*
1880          * Pull all the necessary information from the Hermon Queue Pair
1881          * handle.  This is necessary here because the resource for the
1882          * QP handle is going to be freed up as part of this operation.
1883          */
1884         qp      = *qphdl;
1885         mutex_enter(&qp->qp_lock);
1886         qpc     = qp->qp_qpcrsrcp;   /* NULL if part of a "range" */
1887         rsrc    = qp->qp_rsrcp;
1888         pd      = qp->qp_pdhdl;
1889         srq     = qp->qp_srqhdl;
1890         mr      = qp->qp_mrhdl;
1891         rq_cq   = qp->qp_rq_cqhdl;
1892         sq_cq   = qp->qp_sq_cqhdl;
1893         port    = qp->qp_portnum;
1894         qp_srq_en = qp->qp_alloc_flags & IBT_QP_USES_SRQ;
1895 
1896         /*
1897          * If the QP is part of an MCG, then we fail the qp_free
1898          */
1899         if (qp->qp_mcg_refcnt != 0) {
1900                 mutex_exit(&qp->qp_lock);
1901                 status = ibc_get_ci_failure(0);
1902                 goto qpfree_fail;
1903         }
1904 
1905         /*
1906          * If the QP is not already in "Reset" state, then transition to
1907          * "Reset".  This is necessary because software does not reclaim
1908          * ownership of the QP context until the QP is in the "Reset" state.
1909          * If the ownership transfer fails for any reason, then it is an
1910          * indication that something (either in HW or SW) has gone seriously
1911          * wrong.  So we print a warning message and return.
1912          */
1913         if (qp->qp_state != HERMON_QP_RESET) {
1914                 if (hermon_qp_to_reset(state, qp) != DDI_SUCCESS) {
1915                         mutex_exit(&qp->qp_lock);
1916                         HERMON_WARNING(state, "failed to reset QP context");
1917                         status = ibc_get_ci_failure(0);
1918                         goto qpfree_fail;
1919                 }
1920                 qp->qp_state = HERMON_QP_RESET;
1921                 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1922 
1923                 /*
1924                  * Do any additional handling necessary for the transition
1925                  * to the "Reset" state (e.g. update the WRID lists)
1926                  */
1927                 if (hermon_wrid_to_reset_handling(state, qp) != DDI_SUCCESS) {
1928                         mutex_exit(&qp->qp_lock);
1929                         HERMON_WARNING(state, "failed to reset QP WRID list");
1930                         status = ibc_get_ci_failure(0);
1931                         goto qpfree_fail;
1932                 }
1933         }
1934 
1935         /*
1936          * If this was a user-mappable QP, then we need to remove its entry
1937          * from the "userland resources database".  If it is also currently
1938          * mmap()'d out to a user process, then we need to call
1939          * devmap_devmem_remap() to remap the QP memory to an invalid mapping.
1940          * We also need to invalidate the QP tracking information for the
1941          * user mapping.
1942          */
1943         if (qp->qp_alloc_flags & IBT_QP_USER_MAP) {
1944                 status = hermon_umap_db_find(state->hs_instance, qp->qp_qpnum,
1945                     MLNX_UMAP_QPMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
1946                     &umapdb);
1947                 if (status != DDI_SUCCESS) {
1948                         mutex_exit(&qp->qp_lock);
1949                         HERMON_WARNING(state, "failed to find in database");
1950                         return (ibc_get_ci_failure(0));
1951                 }
1952                 hermon_umap_db_free(umapdb);
1953                 if (qp->qp_umap_dhp != NULL) {
1954                         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
1955                         status = devmap_devmem_remap(qp->qp_umap_dhp,
1956                             state->hs_dip, 0, 0, qp->qp_wqinfo.qa_size,
1957                             maxprot, DEVMAP_MAPPING_INVALID, NULL);
1958                         if (status != DDI_SUCCESS) {
1959                                 mutex_exit(&qp->qp_lock);
1960                                 HERMON_WARNING(state, "failed in QP memory "
1961                                     "devmap_devmem_remap()");
1962                                 return (ibc_get_ci_failure(0));
1963                         }
1964                         qp->qp_umap_dhp = (devmap_cookie_t)NULL;
1965                 }
1966         }
1967 
1968 
1969         /*
1970          * Put NULL into the Hermon QPNum-to-QPHdl list.  This will allow any
1971          * in-progress events to detect that the QP corresponding to this
1972          * number has been freed.  Note: it does depend in whether we are
1973          * freeing a special QP or not.
1974          */
1975         if (qpc == NULL) {
1976                 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1977                     qp->qp_qpnum, NULL);
1978         } else if (qp->qp_is_special) {
1979                 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1980                     qpc->hr_indx + port, NULL);
1981         } else {
1982                 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1983                     qpc->hr_indx, NULL);
1984         }
1985 
1986         /*
1987          * Drop the QP lock
1988          *    At this point the lock is no longer necessary.  We cannot
1989          *    protect from multiple simultaneous calls to free the same QP.
1990          *    In addition, since the QP lock is contained in the QP "software
1991          *    handle" resource, which we will free (see below), it is
1992          *    important that we have no further references to that memory.
1993          */
1994         mutex_exit(&qp->qp_lock);
1995         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
1996 
1997         /*
1998          * Free the QP resources
1999          *    Start by deregistering and freeing the memory for work queues.
2000          *    Next free any previously allocated context information
2001          *    (depending on QP type)
2002          *    Finally, decrement the necessary reference counts.
2003          * If this fails for any reason, then it is an indication that
2004          * something (either in HW or SW) has gone seriously wrong.  So we
2005          * print a warning message and return.
2006          */
2007         status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
2008             sleepflag);
2009         if (status != DDI_SUCCESS) {
2010                 HERMON_WARNING(state, "failed to deregister QP memory");
2011                 status = ibc_get_ci_failure(0);
2012                 goto qpfree_fail;
2013         }
2014 
2015         /* Free the memory for the QP */
2016         hermon_queue_free(&qp->qp_wqinfo);
2017 
2018         if (qp->qp_sq_wqhdr)
2019                 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
2020         if (qp->qp_rq_wqhdr)
2021                 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
2022 
2023         /* Free the dbr */
2024         if (!qp_srq_en) {
2025                 hermon_dbr_free(state, qp->qp_uarpg, qp->qp_rq_vdbr);
2026         }
2027 
2028         /*
2029          * Free up the remainder of the QP resources.  Note: we have a few
2030          * different resources to free up depending on whether the QP is a
2031          * special QP or not.  As described above, if any of these fail for
2032          * any reason it is an indication that something (either in HW or SW)
2033          * has gone seriously wrong.  So we print a warning message and
2034          * return.
2035          */
2036         if (qp->qp_is_special) {
2037                 type = (qp->qp_is_special == HERMON_QP_SMI) ?
2038                     IBT_SMI_SQP : IBT_GSI_SQP;
2039 
2040                 /* Free up resources for the special QP */
2041                 status = hermon_special_qp_rsrc_free(state, type, port);
2042                 if (status != DDI_SUCCESS) {
2043                         HERMON_WARNING(state, "failed to free special QP rsrc");
2044                         status = ibc_get_ci_failure(0);
2045                         goto qpfree_fail;
2046                 }
2047 
2048         } else if (qp->qp_rangep) {
2049                 int refcnt;
2050                 mutex_enter(&qp->qp_rangep->hqpr_lock);
2051                 refcnt = --qp->qp_rangep->hqpr_refcnt;
2052                 mutex_exit(&qp->qp_rangep->hqpr_lock);
2053                 if (refcnt == 0) {
2054                         mutex_destroy(&qp->qp_rangep->hqpr_lock);
2055                         hermon_rsrc_free(state, &qp->qp_rangep->hqpr_qpcrsrc);
2056                         kmem_free(qp->qp_rangep, sizeof (*qp->qp_rangep));
2057                 }
2058                 qp->qp_rangep = NULL;
2059         } else if (qp->qp_qpn_hdl == NULL) {
2060                 hermon_rsrc_free(state, &qpc);
2061         } else {
2062                 /*
2063                  * Check the flags and determine whether to release the
2064                  * QPN or not, based on their value.
2065                  */
2066                 if (free_qp_flags == IBC_FREE_QP_ONLY) {
2067                         entry = qp->qp_qpn_hdl;
2068                         hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
2069                             HERMON_QPN_FREE_ONLY);
2070                         *qpnh = (ibc_qpn_hdl_t)entry;
2071                 } else {
2072                         hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
2073                             HERMON_QPN_RELEASE);
2074                 }
2075         }
2076 
2077         mutex_destroy(&qp->qp_sq_lock);
2078 
2079         /* Free the Hermon Queue Pair handle */
2080         hermon_rsrc_free(state, &rsrc);
2081 
2082         /* Decrement the reference counts on CQs, PD and SRQ (if needed) */
2083         hermon_cq_refcnt_dec(rq_cq);
2084         hermon_cq_refcnt_dec(sq_cq);
2085         hermon_pd_refcnt_dec(pd);
2086         if (qp_srq_en == HERMON_QP_SRQ_ENABLED) {
2087                 hermon_srq_refcnt_dec(srq);
2088         }
2089 
2090         /* Set the qphdl pointer to NULL and return success */
2091         *qphdl = NULL;
2092 
2093         return (DDI_SUCCESS);
2094 
2095 qpfree_fail:
2096         return (status);
2097 }
2098 
2099 
2100 /*
2101  * hermon_qp_query()
2102  *    Context: Can be called from interrupt or base context.
2103  */
2104 int
2105 hermon_qp_query(hermon_state_t *state, hermon_qphdl_t qp,
2106     ibt_qp_query_attr_t *attr_p)
2107 {
2108         ibt_cep_state_t         qp_state;
2109         ibt_qp_ud_attr_t        *ud;
2110         ibt_qp_rc_attr_t        *rc;
2111         ibt_qp_uc_attr_t        *uc;
2112         ibt_cep_flags_t         enable_flags;
2113         hermon_hw_addr_path_t   *qpc_path, *qpc_alt_path;
2114         ibt_cep_path_t          *path_ptr, *alt_path_ptr;
2115         hermon_hw_qpc_t         *qpc;
2116         int                     status;
2117         uint_t                  tmp_sched_q, tmp_alt_sched_q;
2118 
2119         mutex_enter(&qp->qp_lock);
2120 
2121         /*
2122          * Grab the temporary QPC entry from QP software state
2123          */
2124         qpc = &qp->qpc;
2125 
2126         /* Convert the current Hermon QP state to IBTF QP state */
2127         switch (qp->qp_state) {
2128         case HERMON_QP_RESET:
2129                 qp_state = IBT_STATE_RESET;             /* "Reset" */
2130                 break;
2131         case HERMON_QP_INIT:
2132                 qp_state = IBT_STATE_INIT;              /* Initialized */
2133                 break;
2134         case HERMON_QP_RTR:
2135                 qp_state = IBT_STATE_RTR;               /* Ready to Receive */
2136                 break;
2137         case HERMON_QP_RTS:
2138                 qp_state = IBT_STATE_RTS;               /* Ready to Send */
2139                 break;
2140         case HERMON_QP_SQERR:
2141                 qp_state = IBT_STATE_SQE;               /* Send Queue Error */
2142                 break;
2143         case HERMON_QP_SQD:
2144                 if (qp->qp_sqd_still_draining) {
2145                         qp_state = IBT_STATE_SQDRAIN;   /* SQ Draining */
2146                 } else {
2147                         qp_state = IBT_STATE_SQD;       /* SQ Drained */
2148                 }
2149                 break;
2150         case HERMON_QP_ERR:
2151                 qp_state = IBT_STATE_ERROR;             /* Error */
2152                 break;
2153         default:
2154                 mutex_exit(&qp->qp_lock);
2155                 return (ibc_get_ci_failure(0));
2156         }
2157         attr_p->qp_info.qp_state = qp_state;
2158 
2159         /* SRQ Hook. */
2160         attr_p->qp_srq = NULL;
2161 
2162         /*
2163          * The following QP information is always returned, regardless of
2164          * the current QP state.  Note: Some special handling is necessary
2165          * for calculating the QP number on special QP (QP0 and QP1).
2166          */
2167         attr_p->qp_sq_cq    =
2168             (qp->qp_sq_cqhdl == NULL) ? NULL : qp->qp_sq_cqhdl->cq_hdlrarg;
2169         attr_p->qp_rq_cq    =
2170             (qp->qp_rq_cqhdl == NULL) ? NULL : qp->qp_rq_cqhdl->cq_hdlrarg;
2171         if (qp->qp_is_special) {
2172                 attr_p->qp_qpn = (qp->qp_is_special == HERMON_QP_SMI) ? 0 : 1;
2173         } else {
2174                 attr_p->qp_qpn = (ib_qpn_t)qp->qp_qpnum;
2175         }
2176         attr_p->qp_sq_sgl   = qp->qp_sq_sgl;
2177         attr_p->qp_rq_sgl   = qp->qp_rq_sgl;
2178         attr_p->qp_info.qp_sq_sz = qp->qp_sq_bufsz - qp->qp_sq_hdrmwqes;
2179         attr_p->qp_info.qp_rq_sz = qp->qp_rq_bufsz;
2180 
2181         /*
2182          * If QP is currently in the "Reset" state, then only the above are
2183          * returned
2184          */
2185         if (qp_state == IBT_STATE_RESET) {
2186                 mutex_exit(&qp->qp_lock);
2187                 return (DDI_SUCCESS);
2188         }
2189 
2190         /*
2191          * Post QUERY_QP command to firmware
2192          *
2193          * We do a HERMON_NOSLEEP here because we are holding the "qp_lock".
2194          * Since we may be in the interrupt context (or subsequently raised
2195          * to interrupt level by priority inversion), we do not want to block
2196          * in this routine waiting for success.
2197          */
2198         tmp_sched_q = qpc->pri_addr_path.sched_q;
2199         tmp_alt_sched_q = qpc->alt_addr_path.sched_q;
2200         status = hermon_cmn_query_cmd_post(state, QUERY_QP, 0, qp->qp_qpnum,
2201             qpc, sizeof (hermon_hw_qpc_t), HERMON_CMD_NOSLEEP_SPIN);
2202         if (status != HERMON_CMD_SUCCESS) {
2203                 mutex_exit(&qp->qp_lock);
2204                 cmn_err(CE_WARN, "hermon%d: hermon_qp_query: QUERY_QP "
2205                     "command failed: %08x\n", state->hs_instance, status);
2206                 if (status == HERMON_CMD_INVALID_STATUS) {
2207                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2208                 }
2209                 return (ibc_get_ci_failure(0));
2210         }
2211         qpc->pri_addr_path.sched_q = tmp_sched_q;
2212         qpc->alt_addr_path.sched_q = tmp_alt_sched_q;
2213 
2214         /*
2215          * Fill in the additional QP info based on the QP's transport type.
2216          */
2217         if (qp->qp_type == IBT_UD_RQP) {
2218 
2219                 /* Fill in the UD-specific info */
2220                 ud = &attr_p->qp_info.qp_transport.ud;
2221                 ud->ud_qkey  = (ib_qkey_t)qpc->qkey;
2222                 ud->ud_sq_psn        = qpc->next_snd_psn;
2223                 ud->ud_pkey_ix       = qpc->pri_addr_path.pkey_indx;
2224                 /* port+1 for port 1/2 */
2225                 ud->ud_port  =
2226                     (uint8_t)(((qpc->pri_addr_path.sched_q >> 6) & 0x01) + 1);
2227 
2228                 attr_p->qp_info.qp_trans = IBT_UD_SRV;
2229 
2230                 if (qp->qp_serv_type == HERMON_QP_FEXCH) {
2231                         ibt_pmr_desc_t *pmr;
2232                         uint64_t heart_beat;
2233 
2234                         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pmr))
2235                         pmr = &attr_p->qp_query_fexch.fq_uni_mem_desc;
2236                         pmr->pmd_iova = 0;
2237                         pmr->pmd_lkey = pmr->pmd_rkey =
2238                             hermon_fcoib_qpn_to_mkey(state, qp->qp_qpnum);
2239                         pmr->pmd_phys_buf_list_sz =
2240                             state->hs_fcoib.hfc_mtts_per_mpt;
2241                         pmr->pmd_sync_required = 0;
2242 
2243                         pmr = &attr_p->qp_query_fexch.fq_bi_mem_desc;
2244                         pmr->pmd_iova = 0;
2245                         pmr->pmd_lkey = 0;
2246                         pmr->pmd_rkey = 0;
2247                         pmr->pmd_phys_buf_list_sz = 0;
2248                         pmr->pmd_sync_required = 0;
2249 
2250                         attr_p->qp_query_fexch.fq_flags =
2251                             ((hermon_get_heart_beat_rq_cmd_post(state,
2252                             qp->qp_qpnum, &heart_beat) == HERMON_CMD_SUCCESS) &&
2253                             (heart_beat == 0)) ? IBT_FEXCH_HEART_BEAT_OK :
2254                             IBT_FEXCH_NO_FLAGS;
2255 
2256                         ud->ud_fc = qp->qp_fc_attr;
2257                 } else if (qp->qp_serv_type == HERMON_QP_FCMND ||
2258                     qp->qp_serv_type == HERMON_QP_RFCI) {
2259                         ud->ud_fc = qp->qp_fc_attr;
2260                 }
2261 
2262         } else if (qp->qp_serv_type == HERMON_QP_RC) {
2263 
2264                 /* Fill in the RC-specific info */
2265                 rc = &attr_p->qp_info.qp_transport.rc;
2266                 rc->rc_sq_psn        = qpc->next_snd_psn;
2267                 rc->rc_rq_psn        = qpc->next_rcv_psn;
2268                 rc->rc_dst_qpn       = qpc->rem_qpn;
2269 
2270                 /* Grab the path migration state information */
2271                 if (qpc->pm_state == HERMON_QP_PMSTATE_MIGRATED) {
2272                         rc->rc_mig_state = IBT_STATE_MIGRATED;
2273                 } else if (qpc->pm_state == HERMON_QP_PMSTATE_REARM) {
2274                         rc->rc_mig_state = IBT_STATE_REARMED;
2275                 } else {
2276                         rc->rc_mig_state = IBT_STATE_ARMED;
2277                 }
2278                 rc->rc_rdma_ra_out = (1 << qpc->sra_max);
2279                 rc->rc_rdma_ra_in  = (1 << qpc->rra_max);
2280                 rc->rc_min_rnr_nak = qpc->min_rnr_nak;
2281                 rc->rc_path_mtu         = qpc->mtu;
2282                 rc->rc_retry_cnt   = qpc->retry_cnt;
2283 
2284                 /* Get the common primary address path fields */
2285                 qpc_path = &qpc->pri_addr_path;
2286                 path_ptr = &rc->rc_path;
2287                 hermon_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
2288                     HERMON_ADDRPATH_QP);
2289 
2290                 /* Fill in the additional primary address path fields */
2291                 path_ptr->cep_pkey_ix           = qpc_path->pkey_indx;
2292                 path_ptr->cep_hca_port_num =
2293                     path_ptr->cep_adds_vect.av_port_num =
2294                     (uint8_t)(((qpc_path->sched_q >> 6) & 0x01) + 1);
2295                 path_ptr->cep_timeout           = qpc_path->ack_timeout;
2296 
2297                 /* Get the common alternate address path fields */
2298                 qpc_alt_path = &qpc->alt_addr_path;
2299                 alt_path_ptr = &rc->rc_alt_path;
2300                 hermon_get_addr_path(state, qpc_alt_path,
2301                     &alt_path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP);
2302 
2303                 /* Fill in the additional alternate address path fields */
2304                 alt_path_ptr->cep_pkey_ix    = qpc_alt_path->pkey_indx;
2305                 alt_path_ptr->cep_hca_port_num       =
2306                     alt_path_ptr->cep_adds_vect.av_port_num =
2307                     (uint8_t)(((qpc_alt_path->sched_q >> 6) & 0x01) + 1);
2308                 alt_path_ptr->cep_timeout    = qpc_alt_path->ack_timeout;
2309 
2310                 /* Get the RNR retry time from primary path */
2311                 rc->rc_rnr_retry_cnt = qpc->rnr_retry;
2312 
2313                 /* Set the enable flags based on RDMA/Atomic enable bits */
2314                 enable_flags = IBT_CEP_NO_FLAGS;
2315                 enable_flags |= ((qpc->rre == 0) ? 0 : IBT_CEP_RDMA_RD);
2316                 enable_flags |= ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
2317                 enable_flags |= ((qpc->rae == 0) ? 0 : IBT_CEP_ATOMIC);
2318                 attr_p->qp_info.qp_flags = enable_flags;
2319 
2320                 attr_p->qp_info.qp_trans = IBT_RC_SRV;
2321 
2322         } else if (qp->qp_serv_type == HERMON_QP_UC) {
2323 
2324                 /* Fill in the UC-specific info */
2325                 uc = &attr_p->qp_info.qp_transport.uc;
2326                 uc->uc_sq_psn        = qpc->next_snd_psn;
2327                 uc->uc_rq_psn        = qpc->next_rcv_psn;
2328                 uc->uc_dst_qpn       = qpc->rem_qpn;
2329 
2330                 /* Grab the path migration state information */
2331                 if (qpc->pm_state == HERMON_QP_PMSTATE_MIGRATED) {
2332                         uc->uc_mig_state = IBT_STATE_MIGRATED;
2333                 } else if (qpc->pm_state == HERMON_QP_PMSTATE_REARM) {
2334                         uc->uc_mig_state = IBT_STATE_REARMED;
2335                 } else {
2336                         uc->uc_mig_state = IBT_STATE_ARMED;
2337                 }
2338                 uc->uc_path_mtu = qpc->mtu;
2339 
2340                 /* Get the common primary address path fields */
2341                 qpc_path = &qpc->pri_addr_path;
2342                 path_ptr = &uc->uc_path;
2343                 hermon_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
2344                     HERMON_ADDRPATH_QP);
2345 
2346                 /* Fill in the additional primary address path fields */
2347                 path_ptr->cep_pkey_ix           = qpc_path->pkey_indx;
2348                 path_ptr->cep_hca_port_num =
2349                     path_ptr->cep_adds_vect.av_port_num =
2350                     (uint8_t)(((qpc_path->sched_q >> 6) & 0x01) + 1);
2351 
2352                 /* Get the common alternate address path fields */
2353                 qpc_alt_path = &qpc->alt_addr_path;
2354                 alt_path_ptr = &uc->uc_alt_path;
2355                 hermon_get_addr_path(state, qpc_alt_path,
2356                     &alt_path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP);
2357 
2358                 /* Fill in the additional alternate address path fields */
2359                 alt_path_ptr->cep_pkey_ix    = qpc_alt_path->pkey_indx;
2360                 alt_path_ptr->cep_hca_port_num       =
2361                     alt_path_ptr->cep_adds_vect.av_port_num =
2362                     (uint8_t)(((qpc_alt_path->sched_q >> 6) & 0x01) + 1);
2363 
2364                 /*
2365                  * Set the enable flags based on RDMA enable bits (by
2366                  * definition UC doesn't support Atomic or RDMA Read)
2367                  */
2368                 enable_flags = ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
2369                 attr_p->qp_info.qp_flags = enable_flags;
2370 
2371                 attr_p->qp_info.qp_trans = IBT_UC_SRV;
2372 
2373         } else {
2374                 HERMON_WARNING(state, "unexpected QP transport type");
2375                 mutex_exit(&qp->qp_lock);
2376                 return (ibc_get_ci_failure(0));
2377         }
2378 
2379         /*
2380          * Under certain circumstances it is possible for the Hermon hardware
2381          * to transition to one of the error states without software directly
2382          * knowing about it.  The QueryQP() call is the one place where we
2383          * have an opportunity to sample and update our view of the QP state.
2384          */
2385         if (qpc->state == HERMON_QP_SQERR) {
2386                 attr_p->qp_info.qp_state = IBT_STATE_SQE;
2387                 qp->qp_state = HERMON_QP_SQERR;
2388                 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_SQERR);
2389         }
2390         if (qpc->state == HERMON_QP_ERR) {
2391                 attr_p->qp_info.qp_state = IBT_STATE_ERROR;
2392                 qp->qp_state = HERMON_QP_ERR;
2393                 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_ERR);
2394         }
2395         mutex_exit(&qp->qp_lock);
2396 
2397         return (DDI_SUCCESS);
2398 }
2399 
2400 
2401 /*
2402  * hermon_qp_create_qpn()
2403  *    Context: Can be called from interrupt or base context.
2404  */
2405 static int
2406 hermon_qp_create_qpn(hermon_state_t *state, hermon_qphdl_t qp,
2407     hermon_rsrc_t *qpc)
2408 {
2409         hermon_qpn_entry_t      query;
2410         hermon_qpn_entry_t      *entry;
2411         avl_index_t             where;
2412 
2413         /*
2414          * Build a query (for the AVL tree lookup) and attempt to find
2415          * a previously added entry that has a matching QPC index.  If
2416          * no matching entry is found, then allocate, initialize, and
2417          * add an entry to the AVL tree.
2418          * If a matching entry is found, then increment its QPN counter
2419          * and reference counter.
2420          */
2421         query.qpn_indx = qpc->hr_indx;
2422         mutex_enter(&state->hs_qpn_avl_lock);
2423         entry = (hermon_qpn_entry_t *)avl_find(&state->hs_qpn_avl,
2424             &query, &where);
2425         if (entry == NULL) {
2426                 /*
2427                  * Allocate and initialize a QPN entry, then insert
2428                  * it into the AVL tree.
2429                  */
2430                 entry = (hermon_qpn_entry_t *)kmem_zalloc(
2431                     sizeof (hermon_qpn_entry_t), KM_NOSLEEP);
2432                 if (entry == NULL) {
2433                         mutex_exit(&state->hs_qpn_avl_lock);
2434                         return (DDI_FAILURE);
2435                 }
2436                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry))
2437 
2438                 entry->qpn_indx         = qpc->hr_indx;
2439                 entry->qpn_refcnt  = 0;
2440                 entry->qpn_counter = 0;
2441 
2442                 avl_insert(&state->hs_qpn_avl, entry, where);
2443         }
2444 
2445         /*
2446          * Make the AVL tree entry point to the QP context resource that
2447          * it will be responsible for tracking
2448          */
2449         entry->qpn_qpc = qpc;
2450 
2451         /*
2452          * Setup the QP handle to point to the AVL tree entry.  Then
2453          * generate the new QP number from the entry's QPN counter value
2454          * and the hardware's QP context table index.
2455          */
2456         qp->qp_qpn_hdl       = entry;
2457         qp->qp_qpnum = ((entry->qpn_counter <<
2458             state->hs_cfg_profile->cp_log_num_qp) | qpc->hr_indx) &
2459             HERMON_QP_MAXNUMBER_MSK;
2460         qp->qp_ring = qp->qp_qpnum << 8;
2461 
2462         /*
2463          * Increment the reference counter and QPN counter.  The QPN
2464          * counter always indicates the next available number for use.
2465          */
2466         entry->qpn_counter++;
2467         entry->qpn_refcnt++;
2468 
2469         mutex_exit(&state->hs_qpn_avl_lock);
2470 
2471         return (DDI_SUCCESS);
2472 }
2473 
2474 
2475 /*
2476  * hermon_qp_release_qpn()
2477  *    Context: Can be called only from user or kernel context.
2478  */
2479 void
2480 hermon_qp_release_qpn(hermon_state_t *state, hermon_qpn_entry_t *entry,
2481     int flags)
2482 {
2483         ASSERT(entry != NULL);
2484 
2485         mutex_enter(&state->hs_qpn_avl_lock);
2486 
2487         /*
2488          * If we are releasing the QP number here, then we decrement the
2489          * reference count and check for zero references.  If there are
2490          * zero references, then we free the QPC context (if it hadn't
2491          * already been freed during a HERMON_QPN_FREE_ONLY free, i.e. for
2492          * reuse with another similar QP number) and remove the tracking
2493          * structure from the QP number AVL tree and free the structure.
2494          * If we are not releasing the QP number here, then, as long as we
2495          * have not exhausted the usefulness of the QPC context (that is,
2496          * re-used it too many times without the reference count having
2497          * gone to zero), we free up the QPC context for use by another
2498          * thread (which will use it to construct a different QP number
2499          * from the same QPC table index).
2500          */
2501         if (flags == HERMON_QPN_RELEASE) {
2502                 entry->qpn_refcnt--;
2503 
2504                 /*
2505                  * If the reference count is zero, then we free the QPC
2506                  * context (if it hadn't already been freed in an early
2507                  * step, e.g. HERMON_QPN_FREE_ONLY) and remove/free the
2508                  * tracking structure from the QP number AVL tree.
2509                  */
2510                 if (entry->qpn_refcnt == 0) {
2511                         if (entry->qpn_qpc != NULL) {
2512                                 hermon_rsrc_free(state, &entry->qpn_qpc);
2513                         }
2514 
2515                         /*
2516                          * If the current entry has served it's useful
2517                          * purpose (i.e. been reused the maximum allowable
2518                          * number of times), then remove it from QP number
2519                          * AVL tree and free it up.
2520                          */
2521                         if (entry->qpn_counter >= (1 <<
2522                             (24 - state->hs_cfg_profile->cp_log_num_qp))) {
2523                                 avl_remove(&state->hs_qpn_avl, entry);
2524                                 kmem_free(entry, sizeof (hermon_qpn_entry_t));
2525                         }
2526                 }
2527 
2528         } else if (flags == HERMON_QPN_FREE_ONLY) {
2529                 /*
2530                  * Even if we are not freeing the QP number, that will not
2531                  * always prevent us from releasing the QPC context.  In fact,
2532                  * since the QPC context only forms part of the whole QPN,
2533                  * we want to free it up for use by other consumers.  But
2534                  * if the reference count is non-zero (which it will always
2535                  * be when we are doing HERMON_QPN_FREE_ONLY) and the counter
2536                  * has reached its maximum value, then we cannot reuse the
2537                  * QPC context until the reference count eventually reaches
2538                  * zero (in HERMON_QPN_RELEASE, above).
2539                  */
2540                 if (entry->qpn_counter < (1 <<
2541                     (24 - state->hs_cfg_profile->cp_log_num_qp))) {
2542                         hermon_rsrc_free(state, &entry->qpn_qpc);
2543                 }
2544         }
2545         mutex_exit(&state->hs_qpn_avl_lock);
2546 }
2547 
2548 
2549 /*
2550  * hermon_qpn_avl_compare()
2551  *    Context: Can be called from user or kernel context.
2552  */
2553 static int
2554 hermon_qpn_avl_compare(const void *q, const void *e)
2555 {
2556         hermon_qpn_entry_t      *entry, *query;
2557 
2558         entry = (hermon_qpn_entry_t *)e;
2559         query = (hermon_qpn_entry_t *)q;
2560 
2561         if (query->qpn_indx < entry->qpn_indx) {
2562                 return (-1);
2563         } else if (query->qpn_indx > entry->qpn_indx) {
2564                 return (+1);
2565         } else {
2566                 return (0);
2567         }
2568 }
2569 
2570 
2571 /*
2572  * hermon_qpn_avl_init()
2573  *    Context: Only called from attach() path context
2574  */
2575 void
2576 hermon_qpn_avl_init(hermon_state_t *state)
2577 {
2578         /* Initialize the lock used for QP number (QPN) AVL tree access */
2579         mutex_init(&state->hs_qpn_avl_lock, NULL, MUTEX_DRIVER,
2580             DDI_INTR_PRI(state->hs_intrmsi_pri));
2581 
2582         /* Initialize the AVL tree for the QP number (QPN) storage */
2583         avl_create(&state->hs_qpn_avl, hermon_qpn_avl_compare,
2584             sizeof (hermon_qpn_entry_t),
2585             offsetof(hermon_qpn_entry_t, qpn_avlnode));
2586 }
2587 
2588 
2589 /*
2590  * hermon_qpn_avl_fini()
2591  *    Context: Only called from attach() and/or detach() path contexts
2592  */
2593 void
2594 hermon_qpn_avl_fini(hermon_state_t *state)
2595 {
2596         hermon_qpn_entry_t      *entry;
2597         void                    *cookie;
2598 
2599         /*
2600          * Empty all entries (if necessary) and destroy the AVL tree
2601          * that was used for QP number (QPN) tracking.
2602          */
2603         cookie = NULL;
2604         while ((entry = (hermon_qpn_entry_t *)avl_destroy_nodes(
2605             &state->hs_qpn_avl, &cookie)) != NULL) {
2606                 kmem_free(entry, sizeof (hermon_qpn_entry_t));
2607         }
2608         avl_destroy(&state->hs_qpn_avl);
2609 
2610         /* Destroy the lock used for QP number (QPN) AVL tree access */
2611         mutex_destroy(&state->hs_qpn_avl_lock);
2612 }
2613 
2614 
2615 /*
2616  * hermon_qphdl_from_qpnum()
2617  *    Context: Can be called from interrupt or base context.
2618  *
2619  *    This routine is important because changing the unconstrained
2620  *    portion of the QP number is critical to the detection of a
2621  *    potential race condition in the QP event handler code (i.e. the case
2622  *    where a QP is freed and alloc'd again before an event for the
2623  *    "old" QP can be handled).
2624  *
2625  *    While this is not a perfect solution (not sure that one exists)
2626  *    it does help to mitigate the chance that this race condition will
2627  *    cause us to deliver a "stale" event to the new QP owner.  Note:
2628  *    this solution does not scale well because the number of constrained
2629  *    bits increases (and, hence, the number of unconstrained bits
2630  *    decreases) as the number of supported QPs grows.  For small and
2631  *    intermediate values, it should hopefully provide sufficient
2632  *    protection.
2633  */
2634 hermon_qphdl_t
2635 hermon_qphdl_from_qpnum(hermon_state_t *state, uint_t qpnum)
2636 {
2637         uint_t  qpindx, qpmask;
2638 
2639         /* Calculate the QP table index from the qpnum */
2640         qpmask = (1 << state->hs_cfg_profile->cp_log_num_qp) - 1;
2641         qpindx = qpnum & qpmask;
2642         return (hermon_icm_num_to_hdl(state, HERMON_QPC, qpindx));
2643 }
2644 
2645 
2646 /*
2647  * hermon_special_qp_rsrc_alloc
2648  *    Context: Can be called from interrupt or base context.
2649  */
2650 static int
2651 hermon_special_qp_rsrc_alloc(hermon_state_t *state, ibt_sqp_type_t type,
2652     uint_t port, hermon_rsrc_t **qp_rsrc)
2653 {
2654         uint_t          mask, flags;
2655         int             status;
2656 
2657         mutex_enter(&state->hs_spec_qplock);
2658         flags = state->hs_spec_qpflags;
2659         if (type == IBT_SMI_SQP) {
2660                 /*
2661                  * Check here to see if the driver has been configured
2662                  * to instruct the Hermon firmware to handle all incoming
2663                  * SMP messages (i.e. messages sent to SMA).  If so,
2664                  * then we will treat QP0 as if it has already been
2665                  * allocated (for internal use).  Otherwise, if we allow
2666                  * the allocation to happen, it will cause unexpected
2667                  * behaviors (e.g. Hermon SMA becomes unresponsive).
2668                  */
2669                 if (state->hs_cfg_profile->cp_qp0_agents_in_fw != 0) {
2670                         mutex_exit(&state->hs_spec_qplock);
2671                         return (IBT_QP_IN_USE);
2672                 }
2673 
2674                 /*
2675                  * If this is the first QP0 allocation, then post
2676                  * a CONF_SPECIAL_QP firmware command
2677                  */
2678                 if ((flags & HERMON_SPECIAL_QP0_RSRC_MASK) == 0) {
2679                         status = hermon_conf_special_qp_cmd_post(state,
2680                             state->hs_spec_qp0->hr_indx, HERMON_CMD_QP_SMI,
2681                             HERMON_CMD_NOSLEEP_SPIN,
2682                             HERMON_CMD_SPEC_QP_OPMOD(
2683                             state->hs_cfg_profile->cp_qp0_agents_in_fw,
2684                             state->hs_cfg_profile->cp_qp1_agents_in_fw));
2685                         if (status != HERMON_CMD_SUCCESS) {
2686                                 mutex_exit(&state->hs_spec_qplock);
2687                                 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2688                                     "command failed: %08x\n",
2689                                     state->hs_instance, status);
2690                                 return (IBT_INSUFF_RESOURCE);
2691                         }
2692                 }
2693 
2694                 /*
2695                  * Now check (and, if necessary, modify) the flags to indicate
2696                  * whether the allocation was successful
2697                  */
2698                 mask = (1 << (HERMON_SPECIAL_QP0_RSRC + port));
2699                 if (flags & mask) {
2700                         mutex_exit(&state->hs_spec_qplock);
2701                         return (IBT_QP_IN_USE);
2702                 }
2703                 state->hs_spec_qpflags |= mask;
2704                 *qp_rsrc = state->hs_spec_qp0;
2705 
2706         } else {
2707                 /*
2708                  * If this is the first QP1 allocation, then post
2709                  * a CONF_SPECIAL_QP firmware command
2710                  */
2711                 if ((flags & HERMON_SPECIAL_QP1_RSRC_MASK) == 0) {
2712                         status = hermon_conf_special_qp_cmd_post(state,
2713                             state->hs_spec_qp1->hr_indx, HERMON_CMD_QP_GSI,
2714                             HERMON_CMD_NOSLEEP_SPIN,
2715                             HERMON_CMD_SPEC_QP_OPMOD(
2716                             state->hs_cfg_profile->cp_qp0_agents_in_fw,
2717                             state->hs_cfg_profile->cp_qp1_agents_in_fw));
2718                         if (status != HERMON_CMD_SUCCESS) {
2719                                 mutex_exit(&state->hs_spec_qplock);
2720                                 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2721                                     "command failed: %08x\n",
2722                                     state->hs_instance, status);
2723                                 return (IBT_INSUFF_RESOURCE);
2724                         }
2725                 }
2726 
2727                 /*
2728                  * Now check (and, if necessary, modify) the flags to indicate
2729                  * whether the allocation was successful
2730                  */
2731                 mask = (1 << (HERMON_SPECIAL_QP1_RSRC + port));
2732                 if (flags & mask) {
2733                         mutex_exit(&state->hs_spec_qplock);
2734                         return (IBT_QP_IN_USE);
2735                 }
2736                 state->hs_spec_qpflags |= mask;
2737                 *qp_rsrc = state->hs_spec_qp1;
2738         }
2739 
2740         mutex_exit(&state->hs_spec_qplock);
2741         return (DDI_SUCCESS);
2742 }
2743 
2744 
2745 /*
2746  * hermon_special_qp_rsrc_free
2747  *    Context: Can be called from interrupt or base context.
2748  */
2749 static int
2750 hermon_special_qp_rsrc_free(hermon_state_t *state, ibt_sqp_type_t type,
2751     uint_t port)
2752 {
2753         uint_t          mask, flags;
2754         int             status;
2755 
2756         mutex_enter(&state->hs_spec_qplock);
2757         if (type == IBT_SMI_SQP) {
2758                 mask = (1 << (HERMON_SPECIAL_QP0_RSRC + port));
2759                 state->hs_spec_qpflags &= ~mask;
2760                 flags = state->hs_spec_qpflags;
2761 
2762                 /*
2763                  * If this is the last QP0 free, then post a CONF_SPECIAL_QP
2764                  * NOW, If this is the last Special QP free, then post a
2765                  * CONF_SPECIAL_QP firmware command - it'll stop them all
2766                  */
2767                 if (flags) {
2768                         status = hermon_conf_special_qp_cmd_post(state, 0,
2769                             HERMON_CMD_QP_SMI, HERMON_CMD_NOSLEEP_SPIN, 0);
2770                         if (status != HERMON_CMD_SUCCESS) {
2771                                 mutex_exit(&state->hs_spec_qplock);
2772                                 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2773                                     "command failed: %08x\n",
2774                                     state->hs_instance, status);
2775                                 if (status == HERMON_CMD_INVALID_STATUS) {
2776                                         hermon_fm_ereport(state, HCA_SYS_ERR,
2777                                             HCA_ERR_SRV_LOST);
2778                                 }
2779                                 return (ibc_get_ci_failure(0));
2780                         }
2781                 }
2782         } else {
2783                 mask = (1 << (HERMON_SPECIAL_QP1_RSRC + port));
2784                 state->hs_spec_qpflags &= ~mask;
2785                 flags = state->hs_spec_qpflags;
2786 
2787                 /*
2788                  * If this is the last QP1 free, then post a CONF_SPECIAL_QP
2789                  * NOW, if this is the last special QP free, then post a
2790                  * CONF_SPECIAL_QP firmware command - it'll stop them all
2791                  */
2792                 if (flags) {
2793                         status = hermon_conf_special_qp_cmd_post(state, 0,
2794                             HERMON_CMD_QP_GSI, HERMON_CMD_NOSLEEP_SPIN, 0);
2795                         if (status != HERMON_CMD_SUCCESS) {
2796                                 mutex_exit(&state->hs_spec_qplock);
2797                                 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2798                                     "command failed: %08x\n",
2799                                     state->hs_instance, status);
2800                                 if (status == HERMON_CMD_INVALID_STATUS) {
2801                                         hermon_fm_ereport(state, HCA_SYS_ERR,
2802                                             HCA_ERR_SRV_LOST);
2803                                 }
2804                                 return (ibc_get_ci_failure(0));
2805                         }
2806                 }
2807         }
2808 
2809         mutex_exit(&state->hs_spec_qplock);
2810         return (DDI_SUCCESS);
2811 }
2812 
2813 
2814 /*
2815  * hermon_qp_sgl_to_logwqesz()
2816  *    Context: Can be called from interrupt or base context.
2817  */
2818 static void
2819 hermon_qp_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
2820     uint_t real_max_sgl, hermon_qp_wq_type_t wq_type,
2821     uint_t *logwqesz, uint_t *max_sgl)
2822 {
2823         uint_t  max_size, log2, actual_sgl;
2824 
2825         switch (wq_type) {
2826         case HERMON_QP_WQ_TYPE_SENDQ_UD:
2827                 /*
2828                  * Use requested maximum SGL to calculate max descriptor size
2829                  * (while guaranteeing that the descriptor size is a
2830                  * power-of-2 cachelines).
2831                  */
2832                 max_size = (HERMON_QP_WQE_MLX_SND_HDRS + (num_sgl << 4));
2833                 log2 = highbit(max_size);
2834                 if ((max_size & (max_size - 1)) == 0) {
2835                         log2 = log2 - 1;
2836                 }
2837 
2838                 /* Make sure descriptor is at least the minimum size */
2839                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2840 
2841                 /* Calculate actual number of SGL (given WQE size) */
2842                 actual_sgl = ((1 << log2) -
2843                     sizeof (hermon_hw_snd_wqe_ctrl_t)) >> 4;
2844                 break;
2845 
2846         case HERMON_QP_WQ_TYPE_SENDQ_CONN:
2847                 /*
2848                  * Use requested maximum SGL to calculate max descriptor size
2849                  * (while guaranteeing that the descriptor size is a
2850                  * power-of-2 cachelines).
2851                  */
2852                 max_size = (HERMON_QP_WQE_MLX_SND_HDRS + (num_sgl << 4));
2853                 log2 = highbit(max_size);
2854                 if ((max_size & (max_size - 1)) == 0) {
2855                         log2 = log2 - 1;
2856                 }
2857 
2858                 /* Make sure descriptor is at least the minimum size */
2859                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2860 
2861                 /* Calculate actual number of SGL (given WQE size) */
2862                 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_SND_HDRS) >> 4;
2863                 break;
2864 
2865         case HERMON_QP_WQ_TYPE_RECVQ:
2866                 /*
2867                  * Same as above (except for Recv WQEs)
2868                  */
2869                 max_size = (HERMON_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4));
2870                 log2 = highbit(max_size);
2871                 if ((max_size & (max_size - 1)) == 0) {
2872                         log2 = log2 - 1;
2873                 }
2874 
2875                 /* Make sure descriptor is at least the minimum size */
2876                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2877 
2878                 /* Calculate actual number of SGL (given WQE size) */
2879                 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_RCV_HDRS) >> 4;
2880                 break;
2881 
2882         case HERMON_QP_WQ_TYPE_SENDMLX_QP0:
2883                 /*
2884                  * Same as above (except for MLX transport WQEs).  For these
2885                  * WQEs we have to account for the space consumed by the
2886                  * "inline" packet headers.  (This is smaller than for QP1
2887                  * below because QP0 is not allowed to send packets with a GRH.
2888                  */
2889                 max_size = (HERMON_QP_WQE_MLX_QP0_HDRS + (num_sgl << 4));
2890                 log2 = highbit(max_size);
2891                 if ((max_size & (max_size - 1)) == 0) {
2892                         log2 = log2 - 1;
2893                 }
2894 
2895                 /* Make sure descriptor is at least the minimum size */
2896                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2897 
2898                 /* Calculate actual number of SGL (given WQE size) */
2899                 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_QP0_HDRS) >> 4;
2900                 break;
2901 
2902         case HERMON_QP_WQ_TYPE_SENDMLX_QP1:
2903                 /*
2904                  * Same as above.  For these WQEs we again have to account for
2905                  * the space consumed by the "inline" packet headers.  (This
2906                  * is larger than for QP0 above because we have to account for
2907                  * the possibility of a GRH in each packet - and this
2908                  * introduces an alignment issue that causes us to consume
2909                  * an additional 8 bytes).
2910                  */
2911                 max_size = (HERMON_QP_WQE_MLX_QP1_HDRS + (num_sgl << 4));
2912                 log2 = highbit(max_size);
2913                 if ((max_size & (max_size - 1)) == 0) {
2914                         log2 = log2 - 1;
2915                 }
2916 
2917                 /* Make sure descriptor is at least the minimum size */
2918                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2919 
2920                 /* Calculate actual number of SGL (given WQE size) */
2921                 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_QP1_HDRS) >> 4;
2922                 break;
2923 
2924         default:
2925                 HERMON_WARNING(state, "unexpected work queue type");
2926                 break;
2927         }
2928 
2929         /* Fill in the return values */
2930         *logwqesz = log2;
2931         *max_sgl  = min(real_max_sgl, actual_sgl);
2932 }