1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 #include <sys/types.h>
  25 #include <sys/stream.h>
  26 #include <sys/dlpi.h>
  27 #include <sys/stropts.h>
  28 #include <sys/strsun.h>
  29 #include <sys/sysmacros.h>
  30 #include <sys/strlog.h>
  31 #include <sys/ddi.h>
  32 #include <sys/cmn_err.h>
  33 #include <sys/socket.h>
  34 #include <net/if.h>
  35 #include <net/if_types.h>
  36 #include <netinet/in.h>
  37 #include <sys/ethernet.h>
  38 #include <inet/arp.h>
  39 #include <inet/ip.h>
  40 #include <inet/ip6.h>
  41 #include <inet/ip_ire.h>
  42 #include <inet/ip_if.h>
  43 #include <inet/ip_ftable.h>
  44 
  45 #include <sys/sunddi.h>
  46 #include <sys/ksynch.h>
  47 
  48 #include <sys/rds.h>
  49 #include <sys/socket.h>
  50 #include <sys/socketvar.h>
  51 #include <sys/sockio.h>
  52 #include <sys/sysmacros.h>
  53 #include <inet/common.h>
  54 #include <inet/ip.h>
  55 #include <net/if_types.h>
  56 
  57 #include <sys/ib/clients/rdsv3/rdsv3.h>
  58 #include <sys/ib/clients/rdsv3/rdma.h>
  59 #include <sys/ib/clients/rdsv3/ib.h>
  60 #include <sys/ib/clients/rdsv3/rdsv3_impl.h>
  61 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
  62 
  63 #include <sys/dls.h>
  64 #include <sys/mac.h>
  65 #include <sys/mac_client.h>
  66 #include <sys/mac_provider.h>
  67 #include <sys/mac_client_priv.h>
  68 
  69 ddi_taskq_t             *rdsv3_taskq = NULL;
  70 extern kmem_cache_t     *rdsv3_alloc_cache;
  71 
  72 extern unsigned int     ip_ocsum(ushort_t *address, int halfword_count,
  73     unsigned int sum);
  74 
  75 /*
  76  * Check if the IP interface named by `lifrp' is RDS-capable.
  77  */
  78 boolean_t
  79 rdsv3_capable_interface(struct lifreq *lifrp)
  80 {
  81         char    ifname[LIFNAMSIZ];
  82         char    drv[MAXLINKNAMELEN];
  83         uint_t  ppa;
  84         char    *cp;
  85 
  86         RDSV3_DPRINTF4("rdsv3_capable_interface", "Enter");
  87 
  88         if (lifrp->lifr_type == IFT_IB)
  89                 return (B_TRUE);
  90 
  91         /*
  92          * Strip off the logical interface portion before getting
  93          * intimate with the name.
  94          */
  95         (void) strlcpy(ifname, lifrp->lifr_name, LIFNAMSIZ);
  96         if ((cp = strchr(ifname, ':')) != NULL)
  97                 *cp = '\0';
  98 
  99         if (strcmp("lo0", ifname) == 0) {
 100                 /*
 101                  * loopback is considered RDS-capable
 102                  */
 103                 return (B_TRUE);
 104         }
 105 
 106         return (ddi_parse(ifname, drv, &ppa) == DDI_SUCCESS &&
 107             rdsv3_if_lookup_by_name(drv));
 108 }
 109 
 110 int
 111 rdsv3_do_ip_ioctl(ksocket_t so4, void **ipaddrs, int *size, int *nifs)
 112 {
 113         struct lifnum           lifn;
 114         struct lifconf          lifc;
 115         struct lifreq           *lp, *rlp, lifr;
 116         int                     rval = 0;
 117         int                     numifs;
 118         int                     bufsize, rbufsize;
 119         void                    *buf, *rbuf;
 120         int                     i, j, n, rc;
 121 
 122         *ipaddrs = NULL;
 123         *size = 0;
 124         *nifs = 0;
 125 
 126         RDSV3_DPRINTF4("rdsv3_do_ip_ioctl", "Enter");
 127 
 128 retry_count:
 129         /* snapshot the current number of interfaces */
 130         lifn.lifn_family = PF_UNSPEC;
 131         lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
 132         lifn.lifn_count = 0;
 133         rval = ksocket_ioctl(so4, SIOCGLIFNUM, (intptr_t)&lifn, &rval,
 134             CRED());
 135         if (rval != 0) {
 136                 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl",
 137                     "ksocket_ioctl returned: %d", rval);
 138                 return (rval);
 139         }
 140 
 141         numifs = lifn.lifn_count;
 142         if (numifs <= 0) {
 143                 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", "No interfaces found");
 144                 return (0);
 145         }
 146 
 147         /* allocate extra room in case more interfaces appear */
 148         numifs += 10;
 149 
 150         /* get the interface names and ip addresses */
 151         bufsize = numifs * sizeof (struct lifreq);
 152         buf = kmem_alloc(bufsize, KM_SLEEP);
 153 
 154         lifc.lifc_family = AF_UNSPEC;
 155         lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
 156         lifc.lifc_len = bufsize;
 157         lifc.lifc_buf = buf;
 158         rc = ksocket_ioctl(so4, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
 159         if (rc != 0) {
 160                 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", "SIOCGLIFCONF failed");
 161                 kmem_free(buf, bufsize);
 162                 return (rc);
 163         }
 164         /* if our extra room is used up, try again */
 165         if (bufsize <= lifc.lifc_len) {
 166                 kmem_free(buf, bufsize);
 167                 buf = NULL;
 168                 goto retry_count;
 169         }
 170         /* calc actual number of ifconfs */
 171         n = lifc.lifc_len / sizeof (struct lifreq);
 172 
 173         /*
 174          * Count the RDS interfaces
 175          */
 176         for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
 177 
 178                 /*
 179                  * Copy as the SIOCGLIFFLAGS ioctl is destructive
 180                  */
 181                 bcopy(lp, &lifr, sizeof (struct lifreq));
 182                 /*
 183                  * fetch the flags using the socket of the correct family
 184                  */
 185                 switch (lifr.lifr_addr.ss_family) {
 186                 case AF_INET:
 187                         rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)&lifr,
 188                             &rval, CRED());
 189                         break;
 190                 default:
 191                         continue;
 192                 }
 193 
 194                 if (rc != 0) continue;
 195 
 196                 /*
 197                  * If we got the flags, skip uninteresting
 198                  * interfaces based on flags
 199                  */
 200                 if ((lifr.lifr_flags & IFF_UP) != IFF_UP)
 201                         continue;
 202                 if (lifr.lifr_flags &
 203                     (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
 204                         continue;
 205                 if (!rdsv3_capable_interface(&lifr))
 206                         continue;
 207                 j++;
 208         }
 209 
 210         if (j <= 0) {
 211                 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl", "No RDS interfaces");
 212                 kmem_free(buf, bufsize);
 213                 return (rval);
 214         }
 215 
 216         numifs = j;
 217 
 218         /* This is the buffer we pass back */
 219         rbufsize = numifs * sizeof (struct lifreq);
 220         rbuf = kmem_alloc(rbufsize, KM_SLEEP);
 221         rlp = (struct lifreq *)rbuf;
 222 
 223         /*
 224          * Examine the array of interfaces and filter uninteresting ones
 225          */
 226         for (i = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
 227 
 228                 /*
 229                  * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
 230                  */
 231                 bcopy(lp, &lifr, sizeof (struct lifreq));
 232                 /*
 233                  * fetch the flags using the socket of the correct family
 234                  */
 235                 switch (lifr.lifr_addr.ss_family) {
 236                 case AF_INET:
 237                         rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)&lifr,
 238                             &rval, CRED());
 239                         break;
 240                 default:
 241                         continue;
 242                 }
 243 
 244 
 245                 if (rc != 0) {
 246                         RDSV3_DPRINTF2("rdsv3_do_ip_ioctl",
 247                             "ksocket_ioctl failed" " for %s", lifr.lifr_name);
 248                         continue;
 249                 }
 250 
 251                 /*
 252                  * If we got the flags, skip uninteresting
 253                  * interfaces based on flags
 254                  */
 255                 if ((lifr.lifr_flags & IFF_UP) != IFF_UP)
 256                         continue;
 257                 if (lifr.lifr_flags &
 258                     (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
 259                         continue;
 260                 if (!rdsv3_capable_interface(&lifr))
 261                         continue;
 262 
 263                 /* save the record */
 264                 bcopy(lp, rlp, sizeof (struct lifreq));
 265                 rlp->lifr_addr.ss_family = AF_INET_OFFLOAD;
 266                 rlp++;
 267         }
 268 
 269         kmem_free(buf, bufsize);
 270 
 271         *ipaddrs = rbuf;
 272         *size = rbufsize;
 273         *nifs = numifs;
 274 
 275         RDSV3_DPRINTF4("rdsv3_do_ip_ioctl", "Return");
 276 
 277         return (rval);
 278 }
 279 
 280 /*
 281  * Check if the IP interface named by `ifrp' is RDS-capable.
 282  */
 283 boolean_t
 284 rdsv3_capable_interface_old(struct ifreq *ifrp)
 285 {
 286         char    ifname[IFNAMSIZ];
 287         char    drv[MAXLINKNAMELEN];
 288         uint_t  ppa;
 289         char    *cp;
 290 
 291         RDSV3_DPRINTF4("rdsv3_capable_interface_old", "Enter");
 292 
 293         /*
 294          * Strip off the logical interface portion before getting
 295          * intimate with the name.
 296          */
 297         (void) strlcpy(ifname, ifrp->ifr_name, IFNAMSIZ);
 298         if ((cp = strchr(ifname, ':')) != NULL)
 299                 *cp = '\0';
 300 
 301         RDSV3_DPRINTF4("rdsv3_capable_interface_old", "ifname: %s", ifname);
 302 
 303         if ((strcmp("lo0", ifname) == 0) ||
 304             (strncmp("ibd", ifname, 3) == 0)) {
 305                 /*
 306                  * loopback and IB are considered RDS-capable
 307                  */
 308                 return (B_TRUE);
 309         }
 310 
 311         return (ddi_parse(ifname, drv, &ppa) == DDI_SUCCESS &&
 312             rdsv3_if_lookup_by_name(drv));
 313 }
 314 
 315 int
 316 rdsv3_do_ip_ioctl_old(ksocket_t so4, void **ipaddrs, int *size, int *nifs)
 317 {
 318         uint_t                  ifn;
 319         struct ifconf           ifc;
 320         struct ifreq            *lp, *rlp, ifr;
 321         int                     rval = 0;
 322         int                     numifs;
 323         int                     bufsize, rbufsize;
 324         void                    *buf, *rbuf;
 325         int                     i, j, n, rc;
 326 
 327         *ipaddrs = NULL;
 328         *size = 0;
 329         *nifs = 0;
 330 
 331         RDSV3_DPRINTF4("rdsv3_do_ip_ioctl_old", "Enter");
 332 
 333 retry_count:
 334         rval = ksocket_ioctl(so4, SIOCGIFNUM, (intptr_t)&ifn, &rval,
 335             CRED());
 336         if (rval != 0) {
 337                 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
 338                     "ksocket_ioctl(SIOCGIFNUM) returned: %d", rval);
 339                 return (rval);
 340         }
 341 
 342         numifs = ifn;
 343         if (numifs <= 0) {
 344                 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", "No interfaces found");
 345                 return (0);
 346         }
 347 
 348         /* allocate extra room in case more interfaces appear */
 349         numifs += 10;
 350 
 351         /* get the interface names and ip addresses */
 352         bufsize = numifs * sizeof (struct ifreq);
 353         buf = kmem_alloc(bufsize, KM_SLEEP);
 354 
 355         ifc.ifc_len = bufsize;
 356         ifc.ifc_buf = buf;
 357         rc = ksocket_ioctl(so4, SIOCGIFCONF, (intptr_t)&ifc, &rval, CRED());
 358         if (rc != 0) {
 359                 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
 360                     "SIOCGLIFCONF failed: %d", rc);
 361                 kmem_free(buf, bufsize);
 362                 return (rc);
 363         }
 364         /* if our extra room is used up, try again */
 365         if (bufsize <= ifc.ifc_len) {
 366                 kmem_free(buf, bufsize);
 367                 buf = NULL;
 368                 goto retry_count;
 369         }
 370         /* calc actual number of ifconfs */
 371         n = ifc.ifc_len / sizeof (struct ifreq);
 372 
 373         /*
 374          * Count the RDS interfaces
 375          */
 376         for (i = 0, j = 0, lp = ifc.ifc_req; i < n; i++, lp++) {
 377 
 378                 /*
 379                  * Copy as the SIOCGIFFLAGS ioctl is destructive
 380                  */
 381                 bcopy(lp, &ifr, sizeof (struct ifreq));
 382                 /*
 383                  * fetch the flags using the socket of the correct family
 384                  */
 385                 switch (ifr.ifr_addr.sa_family) {
 386                 case AF_INET:
 387                         rc = ksocket_ioctl(so4, SIOCGIFFLAGS, (intptr_t)&ifr,
 388                             &rval, CRED());
 389                         break;
 390                 default:
 391                         continue;
 392                 }
 393 
 394                 if (rc != 0) continue;
 395 
 396                 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
 397                     "1. ifr_name: %s, flags: %d", ifr.ifr_name,
 398                     (ushort_t)ifr.ifr_flags);
 399 
 400                 /*
 401                  * If we got the flags, skip uninteresting
 402                  * interfaces based on flags
 403                  */
 404                 if ((((ushort_t)ifr.ifr_flags) & IFF_UP) != IFF_UP)
 405                         continue;
 406                 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
 407                     "2. ifr_name: %s, flags: %d", ifr.ifr_name,
 408                     (ushort_t)ifr.ifr_flags);
 409                 if (((ushort_t)ifr.ifr_flags) &
 410                     (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
 411                         continue;
 412                 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
 413                     "3. ifr_name: %s, flags: %d", ifr.ifr_name,
 414                     (ushort_t)ifr.ifr_flags);
 415                 if (!rdsv3_capable_interface_old(&ifr))
 416                         continue;
 417                 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
 418                     "4. ifr_name: %s, flags: %d", ifr.ifr_name,
 419                     (ushort_t)ifr.ifr_flags);
 420                 j++;
 421         }
 422 
 423         if (j <= 0) {
 424                 RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old", "No RDS interfaces");
 425                 kmem_free(buf, bufsize);
 426                 return (rval);
 427         }
 428 
 429         numifs = j;
 430 
 431         /* This is the buffer we pass back */
 432         rbufsize = numifs * sizeof (struct ifreq);
 433         rbuf = kmem_alloc(rbufsize, KM_SLEEP);
 434         rlp = (struct ifreq *)rbuf;
 435 
 436         /*
 437          * Examine the array of interfaces and filter uninteresting ones
 438          */
 439         for (i = 0, lp = ifc.ifc_req; i < n; i++, lp++) {
 440 
 441                 /*
 442                  * Copy the address as the SIOCGIFFLAGS ioctl is destructive
 443                  */
 444                 bcopy(lp, &ifr, sizeof (struct ifreq));
 445                 /*
 446                  * fetch the flags using the socket of the correct family
 447                  */
 448                 switch (ifr.ifr_addr.sa_family) {
 449                 case AF_INET:
 450                         rc = ksocket_ioctl(so4, SIOCGIFFLAGS, (intptr_t)&ifr,
 451                             &rval, CRED());
 452                         break;
 453                 default:
 454                         continue;
 455                 }
 456 
 457 
 458                 if (rc != 0) {
 459                         RDSV3_DPRINTF2("rdsv3_do_ip_ioctl_old",
 460                             "ksocket_ioctl failed: %d for %s",
 461                             rc, ifr.ifr_name);
 462                         continue;
 463                 }
 464 
 465                 /*
 466                  * If we got the flags, skip uninteresting
 467                  * interfaces based on flags
 468                  */
 469                 if ((((ushort_t)ifr.ifr_flags) & IFF_UP) != IFF_UP)
 470                         continue;
 471                 if (((ushort_t)ifr.ifr_flags) &
 472                     (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
 473                         continue;
 474                 if (!rdsv3_capable_interface_old(&ifr))
 475                         continue;
 476 
 477                 /* save the record */
 478                 bcopy(lp, rlp, sizeof (struct ifreq));
 479                 rlp->ifr_addr.sa_family = AF_INET_OFFLOAD;
 480                 rlp++;
 481         }
 482 
 483         kmem_free(buf, bufsize);
 484 
 485         *ipaddrs = rbuf;
 486         *size = rbufsize;
 487         *nifs = numifs;
 488 
 489         RDSV3_DPRINTF4("rdsv3_do_ip_ioctl_old", "Return");
 490 
 491         return (rval);
 492 }
 493 
 494 boolean_t
 495 rdsv3_isloopback(ipaddr_t addr)
 496 {
 497         ip_stack_t *ipst;
 498 
 499         ipst = netstack_find_by_zoneid(GLOBAL_ZONEID)->netstack_ip;
 500         ASSERT(ipst != NULL);
 501         if (ip_type_v4(addr, ipst) != IRE_LOOPBACK) {
 502                 netstack_rele(ipst->ips_netstack);
 503                 return (B_FALSE);
 504         }
 505         netstack_rele(ipst->ips_netstack);
 506         return (B_TRUE);
 507 }
 508 
 509 /*
 510  * Work Queue Implementation
 511  */
 512 
 513 #define RDSV3_WQ_THREAD_IDLE            0
 514 #define RDSV3_WQ_THREAD_RUNNING         1
 515 #define RDSV3_WQ_THREAD_FLUSHING        2
 516 #define RDSV3_WQ_THREAD_EXITING         3
 517 
 518 /* worker thread */
 519 void
 520 rdsv3_worker_thread(void *arg)
 521 {
 522         rdsv3_workqueue_struct_t *wq = arg;
 523         rdsv3_work_t *work;
 524 
 525         RDSV3_DPRINTF4("rdsv3_worker_thread", "Enter(wq: 0x%p)", wq);
 526 
 527         mutex_enter(&wq->wq_lock);
 528         work = list_remove_head(&wq->wq_queue);
 529         while (work) {
 530                 mutex_exit(&wq->wq_lock);
 531 
 532                 /* process work */
 533                 work->func(work);
 534 
 535                 mutex_enter(&wq->wq_lock);
 536                 work = list_remove_head(&wq->wq_queue);
 537         }
 538 
 539         /* No more work, go home, until called again */
 540         if (wq->wq_state != RDSV3_WQ_THREAD_EXITING) {
 541                 wq->wq_state = RDSV3_WQ_THREAD_IDLE;
 542         }
 543         mutex_exit(&wq->wq_lock);
 544 
 545         RDSV3_DPRINTF4("rdsv3_worker_thread", "Return(wq: 0x%p)", wq);
 546 }
 547 
 548 /* XXX */
 549 void
 550 rdsv3_flush_workqueue(rdsv3_workqueue_struct_t *wq)
 551 {
 552         RDSV3_DPRINTF4("rdsv3_flush_workqueue", "Enter(wq: %p)", wq);
 553 
 554         mutex_enter(&wq->wq_lock);
 555         switch (wq->wq_state) {
 556         case RDSV3_WQ_THREAD_IDLE:
 557                 /* nothing to do */
 558                 ASSERT(list_is_empty(&wq->wq_queue));
 559                 break;
 560 
 561         case RDSV3_WQ_THREAD_RUNNING:
 562                 wq->wq_state = RDSV3_WQ_THREAD_FLUSHING;
 563                 /* FALLTHRU */
 564         case RDSV3_WQ_THREAD_FLUSHING:
 565                 /* already flushing, wait until the flushing is complete */
 566                 do {
 567                         mutex_exit(&wq->wq_lock);
 568                         delay(drv_sectohz(1));
 569                         mutex_enter(&wq->wq_lock);
 570                 } while (wq->wq_state == RDSV3_WQ_THREAD_FLUSHING);
 571                 break;
 572         case RDSV3_WQ_THREAD_EXITING:
 573                 mutex_exit(&wq->wq_lock);
 574                 rdsv3_worker_thread(wq);
 575                 return;
 576         }
 577         mutex_exit(&wq->wq_lock);
 578 
 579         RDSV3_DPRINTF4("rdsv3_flush_workqueue", "Return(wq: %p)", wq);
 580 }
 581 
 582 void
 583 rdsv3_queue_work(rdsv3_workqueue_struct_t *wq, rdsv3_work_t *wp)
 584 {
 585         RDSV3_DPRINTF4("rdsv3_queue_work", "Enter(wq: %p, wp: %p)", wq, wp);
 586 
 587         mutex_enter(&wq->wq_lock);
 588 
 589         if (list_link_active(&wp->work_item)) {
 590                 /* This is already in the queue, ignore this call */
 591                 mutex_exit(&wq->wq_lock);
 592                 RDSV3_DPRINTF3("rdsv3_queue_work", "already queued: %p", wp);
 593                 return;
 594         }
 595 
 596         switch (wq->wq_state) {
 597         case RDSV3_WQ_THREAD_RUNNING:
 598                 list_insert_tail(&wq->wq_queue, wp);
 599                 mutex_exit(&wq->wq_lock);
 600                 break;
 601 
 602         case RDSV3_WQ_THREAD_FLUSHING:
 603                 do {
 604                         mutex_exit(&wq->wq_lock);
 605                         delay(drv_sectohz(1));
 606                         mutex_enter(&wq->wq_lock);
 607                 } while (wq->wq_state == RDSV3_WQ_THREAD_FLUSHING);
 608 
 609                 if (wq->wq_state == RDSV3_WQ_THREAD_RUNNING) {
 610                         list_insert_tail(&wq->wq_queue, wp);
 611                         mutex_exit(&wq->wq_lock);
 612                         break;
 613                 }
 614                 /* FALLTHRU */
 615 
 616         case RDSV3_WQ_THREAD_IDLE:
 617                 list_insert_tail(&wq->wq_queue, wp);
 618                 wq->wq_state = RDSV3_WQ_THREAD_RUNNING;
 619                 mutex_exit(&wq->wq_lock);
 620 
 621                 (void) ddi_taskq_dispatch(rdsv3_taskq, rdsv3_worker_thread, wq,
 622                     DDI_SLEEP);
 623                 break;
 624 
 625         case RDSV3_WQ_THREAD_EXITING:
 626                 mutex_exit(&wq->wq_lock);
 627                 break;
 628         }
 629 
 630         RDSV3_DPRINTF4("rdsv3_queue_work", "Return(wq: %p, wp: %p)", wq, wp);
 631 }
 632 
 633 /* timeout handler for delayed work queuing */
 634 void
 635 rdsv3_work_timeout_handler(void *arg)
 636 {
 637         rdsv3_delayed_work_t *dwp = (rdsv3_delayed_work_t *)arg;
 638 
 639         RDSV3_DPRINTF4("rdsv3_work_timeout_handler",
 640             "Enter(wq: %p, wp: %p)", dwp->wq, &dwp->work);
 641 
 642         mutex_enter(&dwp->lock);
 643         dwp->timeid = 0;
 644         mutex_exit(&dwp->lock);
 645 
 646         mutex_enter(&dwp->wq->wq_lock);
 647         dwp->wq->wq_pending--;
 648         if (dwp->wq->wq_state == RDSV3_WQ_THREAD_EXITING) {
 649                 mutex_exit(&dwp->wq->wq_lock);
 650                 return;
 651         }
 652         mutex_exit(&dwp->wq->wq_lock);
 653 
 654         rdsv3_queue_work(dwp->wq, &dwp->work);
 655 
 656         RDSV3_DPRINTF4("rdsv3_work_timeout_handler",
 657             "Return(wq: %p, wp: %p)", dwp->wq, &dwp->work);
 658 }
 659 
 660 void
 661 rdsv3_queue_delayed_work(rdsv3_workqueue_struct_t *wq,
 662     rdsv3_delayed_work_t *dwp, uint_t delay)
 663 {
 664         RDSV3_DPRINTF4("rdsv3_queue_delayed_work",
 665             "Enter(wq: %p, wp: %p)", wq, dwp);
 666 
 667         if (delay == 0) {
 668                 rdsv3_queue_work(wq, &dwp->work);
 669                 return;
 670         }
 671 
 672         mutex_enter(&wq->wq_lock);
 673         if (wq->wq_state == RDSV3_WQ_THREAD_EXITING) {
 674                 mutex_exit(&wq->wq_lock);
 675                 RDSV3_DPRINTF4("rdsv3_queue_delayed_work",
 676                     "WQ exiting - don't queue (wq: %p, wp: %p)", wq, dwp);
 677                 return;
 678         }
 679         wq->wq_pending++;
 680         mutex_exit(&wq->wq_lock);
 681 
 682         mutex_enter(&dwp->lock);
 683         if (dwp->timeid == 0) {
 684                 dwp->wq = wq;
 685                 dwp->timeid = timeout(rdsv3_work_timeout_handler, dwp,
 686                     jiffies + (delay * rdsv3_one_sec_in_hz));
 687                 mutex_exit(&dwp->lock);
 688         } else {
 689                 mutex_exit(&dwp->lock);
 690                 RDSV3_DPRINTF4("rdsv3_queue_delayed_work", "Already queued: %p",
 691                     dwp);
 692                 mutex_enter(&wq->wq_lock);
 693                 wq->wq_pending--;
 694                 mutex_exit(&wq->wq_lock);
 695         }
 696 
 697         RDSV3_DPRINTF4("rdsv3_queue_delayed_work",
 698             "Return(wq: %p, wp: %p)", wq, dwp);
 699 }
 700 
 701 void
 702 rdsv3_cancel_delayed_work(rdsv3_delayed_work_t *dwp)
 703 {
 704         RDSV3_DPRINTF4("rdsv3_cancel_delayed_work",
 705             "Enter(wq: %p, dwp: %p)", dwp->wq, dwp);
 706 
 707         mutex_enter(&dwp->lock);
 708         if (dwp->timeid != 0) {
 709                 (void) untimeout(dwp->timeid);
 710                 dwp->timeid = 0;
 711         } else {
 712                 RDSV3_DPRINTF4("rdsv3_cancel_delayed_work",
 713                     "Nothing to cancel (wq: %p, dwp: %p)", dwp->wq, dwp);
 714                 mutex_exit(&dwp->lock);
 715                 return;
 716         }
 717         mutex_exit(&dwp->lock);
 718 
 719         mutex_enter(&dwp->wq->wq_lock);
 720         dwp->wq->wq_pending--;
 721         mutex_exit(&dwp->wq->wq_lock);
 722 
 723         RDSV3_DPRINTF4("rdsv3_cancel_delayed_work",
 724             "Return(wq: %p, dwp: %p)", dwp->wq, dwp);
 725 }
 726 
 727 void
 728 rdsv3_destroy_task_workqueue(rdsv3_workqueue_struct_t *wq)
 729 {
 730         RDSV3_DPRINTF2("rdsv3_destroy_workqueue", "Enter");
 731 
 732         ASSERT(wq);
 733 
 734         mutex_enter(&wq->wq_lock);
 735         wq->wq_state = RDSV3_WQ_THREAD_EXITING;
 736 
 737         while (wq->wq_pending > 0) {
 738                 mutex_exit(&wq->wq_lock);
 739                 delay(drv_sectohz(1));
 740                 mutex_enter(&wq->wq_lock);
 741         };
 742         mutex_exit(&wq->wq_lock);
 743 
 744         rdsv3_flush_workqueue(wq);
 745 
 746         list_destroy(&wq->wq_queue);
 747         mutex_destroy(&wq->wq_lock);
 748         kmem_free(wq, sizeof (rdsv3_workqueue_struct_t));
 749 
 750         ASSERT(rdsv3_taskq);
 751         ddi_taskq_destroy(rdsv3_taskq);
 752 
 753         wq = NULL;
 754         rdsv3_taskq = NULL;
 755 
 756         RDSV3_DPRINTF2("rdsv3_destroy_workqueue", "Return");
 757 }
 758 
 759 /* ARGSUSED */
 760 void
 761 rdsv3_rdma_init_worker(struct rdsv3_work_s *work)
 762 {
 763         rdsv3_rdma_init();
 764 }
 765 
 766 #define RDSV3_NUM_TASKQ_THREADS 1
 767 rdsv3_workqueue_struct_t *
 768 rdsv3_create_task_workqueue(char *name)
 769 {
 770         rdsv3_workqueue_struct_t        *wq;
 771 
 772         RDSV3_DPRINTF2("create_singlethread_workqueue", "Enter (dip: %p)",
 773             rdsv3_dev_info);
 774 
 775         rdsv3_taskq = ddi_taskq_create(rdsv3_dev_info, name,
 776             RDSV3_NUM_TASKQ_THREADS, TASKQ_DEFAULTPRI, 0);
 777         if (rdsv3_taskq == NULL) {
 778                 RDSV3_DPRINTF2(__FILE__,
 779                     "ddi_taskq_create failed for rdsv3_taskq");
 780                 return (NULL);
 781         }
 782 
 783         wq = kmem_zalloc(sizeof (rdsv3_workqueue_struct_t), KM_NOSLEEP);
 784         if (wq == NULL) {
 785                 RDSV3_DPRINTF2(__FILE__, "kmem_zalloc failed for wq");
 786                 ddi_taskq_destroy(rdsv3_taskq);
 787                 return (NULL);
 788         }
 789 
 790         list_create(&wq->wq_queue, sizeof (struct rdsv3_work_s),
 791             offsetof(struct rdsv3_work_s, work_item));
 792         mutex_init(&wq->wq_lock, NULL, MUTEX_DRIVER, NULL);
 793         wq->wq_state = RDSV3_WQ_THREAD_IDLE;
 794         wq->wq_pending = 0;
 795         rdsv3_one_sec_in_hz = drv_sectohz(1);
 796 
 797         RDSV3_DPRINTF2("create_singlethread_workqueue", "Return");
 798 
 799         return (wq);
 800 }
 801 
 802 /*
 803  * Implementation for struct sock
 804  */
 805 
 806 void
 807 rdsv3_sock_exit_data(struct rsock *sk)
 808 {
 809         struct rdsv3_sock *rs = sk->sk_protinfo;
 810 
 811         RDSV3_DPRINTF4("rdsv3_sock_exit_data", "rs: %p sk: %p", rs, sk);
 812 
 813         ASSERT(rs != NULL);
 814         ASSERT(rdsv3_sk_sock_flag(sk, SOCK_DEAD));
 815 
 816         rs->rs_sk = NULL;
 817 
 818         list_destroy(&rs->rs_send_queue);
 819         list_destroy(&rs->rs_notify_queue);
 820         list_destroy(&rs->rs_recv_queue);
 821 
 822         rw_destroy(&rs->rs_recv_lock);
 823         mutex_destroy(&rs->rs_lock);
 824 
 825         mutex_destroy(&rs->rs_rdma_lock);
 826         avl_destroy(&rs->rs_rdma_keys);
 827 
 828         mutex_destroy(&rs->rs_conn_lock);
 829         mutex_destroy(&rs->rs_congested_lock);
 830         cv_destroy(&rs->rs_congested_cv);
 831 
 832         rdsv3_exit_waitqueue(sk->sk_sleep);
 833         kmem_free(sk->sk_sleep, sizeof (rdsv3_wait_queue_t));
 834         mutex_destroy(&sk->sk_lock);
 835 
 836         kmem_cache_free(rdsv3_alloc_cache, sk);
 837         RDSV3_DPRINTF4("rdsv3_sock_exit_data", "rs: %p sk: %p", rs, sk);
 838 }
 839 
 840 /* XXX - figure out right values */
 841 #define RDSV3_RECV_HIWATER      (256 * 1024)
 842 #define RDSV3_RECV_LOWATER      128
 843 #define RDSV3_XMIT_HIWATER      (256 * 1024)
 844 #define RDSV3_XMIT_LOWATER      1024
 845 
 846 struct rsock *
 847 rdsv3_sk_alloc()
 848 {
 849         struct rsock *sk;
 850 
 851         sk = kmem_cache_alloc(rdsv3_alloc_cache, KM_SLEEP);
 852         if (sk == NULL) {
 853                 RDSV3_DPRINTF2("rdsv3_create", "kmem_cache_alloc failed");
 854                 return (NULL);
 855         }
 856 
 857         bzero(sk, sizeof (struct rsock) + sizeof (struct rdsv3_sock));
 858         return (sk);
 859 }
 860 
 861 void
 862 rdsv3_sock_init_data(struct rsock *sk)
 863 {
 864         sk->sk_sleep = kmem_zalloc(sizeof (rdsv3_wait_queue_t), KM_SLEEP);
 865         rdsv3_init_waitqueue(sk->sk_sleep);
 866 
 867         mutex_init(&sk->sk_lock, NULL, MUTEX_DRIVER, NULL);
 868         sk->sk_refcount = 1;
 869         sk->sk_protinfo = (struct rdsv3_sock *)(sk + 1);
 870         sk->sk_sndbuf = RDSV3_XMIT_HIWATER;
 871         sk->sk_rcvbuf = RDSV3_RECV_HIWATER;
 872 }
 873 
 874 /*
 875  * Connection cache
 876  */
 877 /* ARGSUSED */
 878 int
 879 rdsv3_conn_constructor(void *buf, void *arg, int kmflags)
 880 {
 881         struct rdsv3_connection *conn = buf;
 882 
 883         bzero(conn, sizeof (struct rdsv3_connection));
 884 
 885         conn->c_next_tx_seq = 1;
 886         mutex_init(&conn->c_lock, NULL, MUTEX_DRIVER, NULL);
 887         mutex_init(&conn->c_send_lock, NULL, MUTEX_DRIVER, NULL);
 888         conn->c_send_generation = 1;
 889         conn->c_senders = 0;
 890 
 891         list_create(&conn->c_send_queue, sizeof (struct rdsv3_message),
 892             offsetof(struct rdsv3_message, m_conn_item));
 893         list_create(&conn->c_retrans, sizeof (struct rdsv3_message),
 894             offsetof(struct rdsv3_message, m_conn_item));
 895         return (0);
 896 }
 897 
 898 /* ARGSUSED */
 899 void
 900 rdsv3_conn_destructor(void *buf, void *arg)
 901 {
 902         struct rdsv3_connection *conn = buf;
 903 
 904         ASSERT(list_is_empty(&conn->c_send_queue));
 905         ASSERT(list_is_empty(&conn->c_retrans));
 906         list_destroy(&conn->c_send_queue);
 907         list_destroy(&conn->c_retrans);
 908         mutex_destroy(&conn->c_send_lock);
 909         mutex_destroy(&conn->c_lock);
 910 }
 911 
 912 int
 913 rdsv3_conn_compare(const void *conn1, const void *conn2)
 914 {
 915         uint32_be_t     laddr1, faddr1, laddr2, faddr2;
 916 
 917         laddr1 = ((rdsv3_conn_info_t *)conn1)->c_laddr;
 918         laddr2 = ((struct rdsv3_connection *)conn2)->c_laddr;
 919 
 920         if (laddr1 == laddr2) {
 921                 faddr1 = ((rdsv3_conn_info_t *)conn1)->c_faddr;
 922                 faddr2 = ((struct rdsv3_connection *)conn2)->c_faddr;
 923                 if (faddr1 == faddr2)
 924                         return (0);
 925                 if (faddr1 < faddr2)
 926                         return (-1);
 927                 return (1);
 928         }
 929 
 930         if (laddr1 < laddr2)
 931                 return (-1);
 932 
 933         return (1);
 934 }
 935 
 936 /* rdsv3_ib_incoming cache */
 937 /* ARGSUSED */
 938 int
 939 rdsv3_ib_inc_constructor(void *buf, void *arg, int kmflags)
 940 {
 941         list_create(&((struct rdsv3_ib_incoming *)buf)->ii_frags,
 942             sizeof (struct rdsv3_page_frag),
 943             offsetof(struct rdsv3_page_frag, f_item));
 944 
 945         return (0);
 946 }
 947 
 948 /* ARGSUSED */
 949 void
 950 rdsv3_ib_inc_destructor(void *buf, void *arg)
 951 {
 952         list_destroy(&((struct rdsv3_ib_incoming *)buf)->ii_frags);
 953 }
 954 
 955 /* ib_frag_slab cache */
 956 /* ARGSUSED */
 957 int
 958 rdsv3_ib_frag_constructor(void *buf, void *arg, int kmflags)
 959 {
 960         struct rdsv3_page_frag *frag = (struct rdsv3_page_frag *)buf;
 961         struct rdsv3_ib_device *rds_ibdev = (struct rdsv3_ib_device *)arg;
 962         ibt_iov_attr_t iov_attr;
 963         ibt_iov_t iov_arr[1];
 964         ibt_all_wr_t wr;
 965 
 966         bzero(frag, sizeof (struct rdsv3_page_frag));
 967         list_link_init(&frag->f_item);
 968 
 969         frag->f_page = kmem_alloc(PAGE_SIZE, kmflags);
 970         if (frag->f_page == NULL) {
 971                 RDSV3_DPRINTF2("rdsv3_ib_frag_constructor",
 972                     "kmem_alloc for %d failed", PAGE_SIZE);
 973                 return (-1);
 974         }
 975         frag->f_offset = 0;
 976 
 977         iov_attr.iov_as = NULL;
 978         iov_attr.iov = &iov_arr[0];
 979         iov_attr.iov_buf = NULL;
 980         iov_attr.iov_list_len = 1;
 981         iov_attr.iov_wr_nds = 1;
 982         iov_attr.iov_lso_hdr_sz = 0;
 983         iov_attr.iov_flags = IBT_IOV_SLEEP | IBT_IOV_RECV;
 984 
 985         iov_arr[0].iov_addr = frag->f_page;
 986         iov_arr[0].iov_len = PAGE_SIZE;
 987 
 988         wr.recv.wr_nds = 1;
 989         wr.recv.wr_sgl = &frag->f_sge;
 990 
 991         if (ibt_map_mem_iov(ib_get_ibt_hca_hdl(rds_ibdev->dev),
 992             &iov_attr, &wr, &frag->f_mapped) != IBT_SUCCESS) {
 993                 RDSV3_DPRINTF2("rdsv3_ib_frag_constructor",
 994                     "ibt_map_mem_iov failed");
 995                 kmem_free(frag->f_page, PAGE_SIZE);
 996                 return (-1);
 997         }
 998 
 999         return (0);
1000 }
1001 
1002 /* ARGSUSED */
1003 void
1004 rdsv3_ib_frag_destructor(void *buf, void *arg)
1005 {
1006         struct rdsv3_page_frag *frag = (struct rdsv3_page_frag *)buf;
1007         struct rdsv3_ib_device *rds_ibdev = (struct rdsv3_ib_device *)arg;
1008 
1009         /* unmap the page */
1010         if (ibt_unmap_mem_iov(ib_get_ibt_hca_hdl(rds_ibdev->dev),
1011             frag->f_mapped) != IBT_SUCCESS)
1012                 RDSV3_DPRINTF2("rdsv3_ib_frag_destructor",
1013                     "ibt_unmap_mem_iov failed");
1014 
1015         /* free the page */
1016         kmem_free(frag->f_page, PAGE_SIZE);
1017 }
1018 
1019 /* loop.c */
1020 extern kmutex_t loop_conns_lock;
1021 extern list_t loop_conns;
1022 
1023 struct rdsv3_loop_connection
1024 {
1025         struct list_node loop_node;
1026         struct rdsv3_connection *conn;
1027 };
1028 
1029 void
1030 rdsv3_loop_init(void)
1031 {
1032         list_create(&loop_conns, sizeof (struct rdsv3_loop_connection),
1033             offsetof(struct rdsv3_loop_connection, loop_node));
1034         mutex_init(&loop_conns_lock, NULL, MUTEX_DRIVER, NULL);
1035 }
1036 
1037 /* rdma.c */
1038 /* IB Rkey is used here for comparison */
1039 int
1040 rdsv3_mr_compare(const void *mr1, const void *mr2)
1041 {
1042         uint32_t key1 = *(uint32_t *)mr1;
1043         uint32_t key2 = ((struct rdsv3_mr *)mr2)->r_key;
1044 
1045         if (key1 < key2)
1046                 return (-1);
1047         if (key1 > key2)
1048                 return (1);
1049         return (0);
1050 }
1051 
1052 /* transport.c */
1053 extern struct rdsv3_transport *transports[];
1054 extern krwlock_t                trans_sem;
1055 
1056 void
1057 rdsv3_trans_exit(void)
1058 {
1059         struct rdsv3_transport *trans;
1060         int i;
1061 
1062         RDSV3_DPRINTF2("rdsv3_trans_exit", "Enter");
1063 
1064         /* currently, only IB transport */
1065         rw_enter(&trans_sem, RW_READER);
1066         trans = NULL;
1067         for (i = 0; i < RDS_TRANS_COUNT; i++) {
1068                 if (transports[i]) {
1069                         trans = transports[i];
1070                         break;
1071                 }
1072         }
1073         rw_exit(&trans_sem);
1074 
1075         /* trans->exit() will remove the trans from the list */
1076         if (trans)
1077                 trans->exit();
1078 
1079         rw_destroy(&trans_sem);
1080 
1081         RDSV3_DPRINTF2("rdsv3_trans_exit", "Return");
1082 }
1083 
1084 void
1085 rdsv3_trans_init()
1086 {
1087         RDSV3_DPRINTF2("rdsv3_trans_init", "Enter");
1088 
1089         rw_init(&trans_sem, NULL, RW_DRIVER, NULL);
1090 
1091         RDSV3_DPRINTF2("rdsv3_trans_init", "Return");
1092 }
1093 
1094 int
1095 rdsv3_put_cmsg(struct nmsghdr *msg, int level, int type, size_t size,
1096         void *payload)
1097 {
1098         struct cmsghdr *cp;
1099         char *bp;
1100         size_t cmlen;
1101         size_t cmspace;
1102         size_t bufsz;
1103 
1104         RDSV3_DPRINTF4("rdsv3_put_cmsg",
1105             "Enter(msg: %p level: %d type: %d sz: %d)",
1106             msg, level, type, size);
1107 
1108         if (msg == NULL || msg->msg_controllen == 0) {
1109                 return (0);
1110         }
1111         /* check for first cmsg or this is another cmsg to be appended */
1112         if (msg->msg_control == NULL)
1113                 msg->msg_controllen = 0;
1114 
1115         cmlen = CMSG_LEN(size);
1116         cmspace = CMSG_SPACE(size);
1117         bufsz = msg->msg_controllen + cmspace;
1118 
1119         /* extend the existing cmsg to append the next cmsg */
1120         bp = kmem_alloc(bufsz, KM_SLEEP);
1121         if (msg->msg_control) {
1122                 bcopy(msg->msg_control, bp, msg->msg_controllen);
1123                 kmem_free(msg->msg_control, (size_t)msg->msg_controllen);
1124         }
1125 
1126         /* assign payload the proper cmsg location */
1127         cp = (struct cmsghdr *)(bp + msg->msg_controllen);
1128         cp->cmsg_len = cmlen;
1129         cp->cmsg_level = level;
1130         cp->cmsg_type = type;
1131 
1132         bcopy(payload, CMSG_DATA(cp), cmlen -
1133             (unsigned int)_CMSG_DATA_ALIGN(sizeof (struct cmsghdr)));
1134 
1135         msg->msg_control = bp;
1136         msg->msg_controllen = bufsz;
1137 
1138         RDSV3_DPRINTF4("rdsv3_put_cmsg", "Return(cmsg_len: %d)", cp->cmsg_len);
1139 
1140         return (0);
1141 }
1142 
1143 /* ARGSUSED */
1144 int
1145 rdsv3_verify_bind_address(ipaddr_t addr)
1146 {
1147         return (1);
1148 }
1149 
1150 /* checksum */
1151 uint16_t
1152 rdsv3_ip_fast_csum(void *hdr, size_t length)
1153 {
1154         return (0xffff &
1155             (uint16_t)(~ip_ocsum((ushort_t *)hdr, (int)length <<1, 0)));
1156 }
1157 
1158 /* scatterlist implementation */
1159 /* ARGSUSED */
1160 caddr_t
1161 rdsv3_ib_sg_dma_address(ib_device_t *dev, struct rdsv3_scatterlist *scat,
1162     uint_t offset)
1163 {
1164         return (0);
1165 }
1166 
1167 uint_t
1168 rdsv3_ib_dma_map_sg(struct ib_device *dev, struct rdsv3_scatterlist *scat,
1169     uint_t num)
1170 {
1171         struct rdsv3_scatterlist *s, *first;
1172         ibt_iov_t *iov;
1173         ibt_wr_ds_t *sgl;
1174         ibt_iov_attr_t iov_attr;
1175         ibt_send_wr_t swr;
1176         uint_t i;
1177 
1178         RDSV3_DPRINTF4("rdsv3_ib_dma_map_sg", "scat %p, num: %d", scat, num);
1179 
1180         s = first = &scat[0];
1181         ASSERT(first->mihdl == NULL);
1182 
1183         iov = kmem_alloc(num * sizeof (ibt_iov_t), KM_SLEEP);
1184         sgl = kmem_zalloc((num * 2) *  sizeof (ibt_wr_ds_t), KM_SLEEP);
1185 
1186         for (i = 0; i < num; i++, s++) {
1187                 iov[i].iov_addr = s->vaddr;
1188                 iov[i].iov_len = s->length;
1189         }
1190 
1191         iov_attr.iov_as = NULL;
1192         iov_attr.iov = iov;
1193         iov_attr.iov_buf = NULL;
1194         iov_attr.iov_list_len = num;
1195         iov_attr.iov_wr_nds = num * 2;
1196         iov_attr.iov_lso_hdr_sz = 0;
1197         iov_attr.iov_flags = IBT_IOV_SLEEP;
1198 
1199         swr.wr_sgl = sgl;
1200 
1201         i = ibt_map_mem_iov(ib_get_ibt_hca_hdl(dev),
1202             &iov_attr, (ibt_all_wr_t *)&swr, &first->mihdl);
1203         kmem_free(iov, num * sizeof (ibt_iov_t));
1204         if (i != IBT_SUCCESS) {
1205                 RDSV3_DPRINTF2("rdsv3_ib_dma_map_sg",
1206                     "ibt_map_mem_iov returned: %d", i);
1207                 return (0);
1208         }
1209 
1210         s = first;
1211         for (i = 0; i < num; i++, s++, sgl++) {
1212                 s->sgl = sgl;
1213         }
1214 
1215         return (num);
1216 }
1217 
1218 void
1219 rdsv3_ib_dma_unmap_sg(ib_device_t *dev, struct rdsv3_scatterlist *scat,
1220     uint_t num)
1221 {
1222         /* Zero length messages have no scatter gather entries */
1223         if (num != 0) {
1224                 ASSERT(scat->mihdl != NULL);
1225                 ASSERT(scat->sgl != NULL);
1226 
1227                 (void) ibt_unmap_mem_iov(ib_get_ibt_hca_hdl(dev), scat->mihdl);
1228 
1229                 kmem_free(scat->sgl, (num * 2)  * sizeof (ibt_wr_ds_t));
1230                 scat->sgl = NULL;
1231                 scat->mihdl = NULL;
1232         }
1233 }
1234 
1235 int
1236 rdsv3_ib_alloc_hdrs(ib_device_t *dev, struct rdsv3_ib_connection *ic)
1237 {
1238         caddr_t addr;
1239         size_t size;
1240         ibt_mr_attr_t mr_attr;
1241         ibt_mr_desc_t mr_desc;
1242         ibt_mr_hdl_t mr_hdl;
1243         int ret;
1244 
1245         RDSV3_DPRINTF4("rdsv3_ib_alloc_hdrs", "Enter(dev: %p)", dev);
1246 
1247         ASSERT(ic->i_mr == NULL);
1248 
1249         size = (ic->i_send_ring.w_nr + ic->i_recv_ring.w_nr + 1) *
1250             sizeof (struct rdsv3_header);
1251 
1252         addr = kmem_zalloc(size, KM_NOSLEEP);
1253         if (addr == NULL)
1254                 return (-1);
1255 
1256         mr_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)addr;
1257         mr_attr.mr_len = size;
1258         mr_attr.mr_as = NULL;
1259         mr_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE;
1260         ret = ibt_register_mr(ib_get_ibt_hca_hdl(dev), RDSV3_PD2PDHDL(ic->i_pd),
1261             &mr_attr, &mr_hdl, &mr_desc);
1262         if (ret != IBT_SUCCESS) {
1263                 RDSV3_DPRINTF2("rdsv3_ib_alloc_hdrs",
1264                     "ibt_register_mr returned: " "%d", ret);
1265                 return (-1);
1266         }
1267 
1268         ic->i_mr =
1269             (struct rdsv3_hdrs_mr *)kmem_alloc(sizeof (struct rdsv3_hdrs_mr),
1270             KM_SLEEP);
1271         ic->i_mr->addr = addr;
1272         ic->i_mr->size = size;
1273         ic->i_mr->hdl =   mr_hdl;
1274         ic->i_mr->lkey = mr_desc.md_lkey;
1275 
1276         ic->i_send_hdrs = (struct rdsv3_header *)addr;
1277         ic->i_send_hdrs_dma = (uint64_t)(uintptr_t)addr;
1278 
1279         ic->i_recv_hdrs = (struct rdsv3_header *)(addr +
1280             (ic->i_send_ring.w_nr * sizeof (struct rdsv3_header)));
1281         ic->i_recv_hdrs_dma = (uint64_t)(uintptr_t)(addr +
1282             (ic->i_send_ring.w_nr * sizeof (struct rdsv3_header)));
1283 
1284         ic->i_ack = (struct rdsv3_header *)(addr +
1285             ((ic->i_send_ring.w_nr + ic->i_recv_ring.w_nr) *
1286             sizeof (struct rdsv3_header)));
1287         ic->i_ack_dma = (uint64_t)(uintptr_t)(addr +
1288             ((ic->i_send_ring.w_nr + ic->i_recv_ring.w_nr) *
1289             sizeof (struct rdsv3_header)));
1290 
1291         RDSV3_DPRINTF4("rdsv3_ib_alloc_hdrs", "Return(dev: %p)", dev);
1292 
1293         return (0);
1294 }
1295 
1296 void
1297 rdsv3_ib_free_hdrs(ib_device_t *dev, struct rdsv3_ib_connection *ic)
1298 {
1299         RDSV3_DPRINTF4("rdsv3_ib_free_hdrs", "Enter(dev: %p)", dev);
1300         ASSERT(ic->i_mr != NULL);
1301 
1302         ic->i_send_hdrs = NULL;
1303         ic->i_send_hdrs_dma = NULL;
1304 
1305         ic->i_recv_hdrs = NULL;
1306         ic->i_recv_hdrs_dma = NULL;
1307 
1308         ic->i_ack = NULL;
1309         ic->i_ack_dma = NULL;
1310 
1311         (void) ibt_deregister_mr(ib_get_ibt_hca_hdl(dev), ic->i_mr->hdl);
1312 
1313         kmem_free(ic->i_mr->addr, ic->i_mr->size);
1314         kmem_free(ic->i_mr, sizeof (struct rdsv3_hdrs_mr));
1315 
1316         ic->i_mr = NULL;
1317         RDSV3_DPRINTF4("rdsv3_ib_free_hdrs", "Return(dev: %p)", dev);
1318 }
1319 
1320 /*
1321  * atomic_add_unless - add unless the number is a given value
1322  * @v: pointer of type atomic_t
1323  * @a: the amount to add to v...
1324  * @u: ...unless v is equal to u.
1325  *
1326  * Atomically adds @a to @v, so long as it was not @u.
1327  * Returns non-zero if @v was not @u, and zero otherwise.
1328  */
1329 int
1330 atomic_add_unless(atomic_t *v, uint_t a, ulong_t u)
1331 {
1332         uint_t c, old;
1333 
1334         c = *v;
1335         while (c != u && (old = atomic_cas_uint(v, c, c + a)) != c) {
1336                 c = old;
1337         }
1338         return ((ulong_t)c != u);
1339 }