1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/errno.h>
  28 #include <sys/debug.h>
  29 #include <sys/time.h>
  30 #include <sys/sysmacros.h>
  31 #include <sys/systm.h>
  32 #include <sys/user.h>
  33 #include <sys/stropts.h>
  34 #include <sys/stream.h>
  35 #include <sys/strlog.h>
  36 #include <sys/strsubr.h>
  37 #include <sys/cmn_err.h>
  38 #include <sys/cpu.h>
  39 #include <sys/kmem.h>
  40 #include <sys/conf.h>
  41 #include <sys/ddi.h>
  42 #include <sys/sunddi.h>
  43 #include <sys/ksynch.h>
  44 #include <sys/stat.h>
  45 #include <sys/kstat.h>
  46 #include <sys/vtrace.h>
  47 #include <sys/strsun.h>
  48 #include <sys/dlpi.h>
  49 #include <sys/ethernet.h>
  50 #include <net/if.h>
  51 #include <sys/varargs.h>
  52 #include <sys/machsystm.h>
  53 #include <sys/modctl.h>
  54 #include <sys/modhash.h>
  55 #include <sys/mac.h>
  56 #include <sys/mac_ether.h>
  57 #include <sys/taskq.h>
  58 #include <sys/note.h>
  59 #include <sys/mach_descrip.h>
  60 #include <sys/mdeg.h>
  61 #include <sys/ldc.h>
  62 #include <sys/vsw_fdb.h>
  63 #include <sys/vsw.h>
  64 #include <sys/vio_mailbox.h>
  65 #include <sys/vnet_mailbox.h>
  66 #include <sys/vnet_common.h>
  67 #include <sys/vio_util.h>
  68 #include <sys/sdt.h>
  69 #include <sys/atomic.h>
  70 #include <sys/vlan.h>
  71 
  72 /* Switching setup routines */
  73 void vsw_setup_switching_thread(void *arg);
  74 int vsw_setup_switching_start(vsw_t *vswp);
  75 void vsw_setup_switching_stop(vsw_t *vswp);
  76 int vsw_setup_switching(vsw_t *);
  77 void vsw_setup_switching_post_process(vsw_t *vswp);
  78 void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
  79     vsw_port_t *port, mac_resource_handle_t mrh);
  80 static  int vsw_setup_layer2(vsw_t *);
  81 static  int vsw_setup_layer3(vsw_t *);
  82 
  83 /* Switching/data transmit routines */
  84 static  void vsw_switch_l2_frame_mac_client(vsw_t *vswp, mblk_t *mp, int caller,
  85     vsw_port_t *port, mac_resource_handle_t);
  86 static  void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
  87         vsw_port_t *port, mac_resource_handle_t);
  88 static  void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
  89         vsw_port_t *port, mac_resource_handle_t);
  90 static  int vsw_forward_all(vsw_t *vswp, mblk_t *mp,
  91         int caller, vsw_port_t *port);
  92 static  int vsw_forward_grp(vsw_t *vswp, mblk_t *mp,
  93     int caller, vsw_port_t *port);
  94 
  95 /* VLAN routines */
  96 void vsw_create_vlans(void *arg, int type);
  97 void vsw_destroy_vlans(void *arg, int type);
  98 void vsw_vlan_add_ids(void *arg, int type);
  99 void vsw_vlan_remove_ids(void *arg, int type);
 100 static  void vsw_vlan_create_hash(void *arg, int type);
 101 static  void vsw_vlan_destroy_hash(void *arg, int type);
 102 boolean_t vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp,
 103         uint16_t *vidp);
 104 mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
 105 uint32_t vsw_vlan_frames_untag(void *arg, int type, mblk_t **np, mblk_t **npt);
 106 boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
 107 
 108 /* Forwarding database (FDB) routines */
 109 void vsw_fdbe_add(vsw_t *vswp, void *port);
 110 void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr);
 111 static  vsw_fdbe_t *vsw_fdbe_find(vsw_t *vswp, struct ether_addr *);
 112 static void vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
 113 
 114 int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *);
 115 int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
 116 int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
 117 void vsw_del_mcst_vsw(vsw_t *);
 118 
 119 /* Support functions */
 120 static mblk_t *vsw_dupmsgchain(mblk_t *mp);
 121 static mblk_t *vsw_get_same_dest_list(struct ether_header *ehp, mblk_t **mpp);
 122 
 123 
 124 /*
 125  * Functions imported from other files.
 126  */
 127 extern mblk_t *vsw_tx_msg(vsw_t *, mblk_t *, int, vsw_port_t *);
 128 extern mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t);
 129 extern int vsw_mac_open(vsw_t *vswp);
 130 extern void vsw_mac_close(vsw_t *vswp);
 131 extern void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
 132     mblk_t *mp, vsw_macrx_flags_t flags);
 133 extern void vsw_set_addrs(vsw_t *vswp);
 134 extern int vsw_portsend(vsw_port_t *port, mblk_t *mp);
 135 extern void vsw_hio_init(vsw_t *vswp);
 136 extern void vsw_hio_start_ports(vsw_t *vswp);
 137 extern int vsw_mac_multicast_add(vsw_t *vswp, vsw_port_t *port,
 138     mcst_addr_t *mcst_p, int type);
 139 extern void vsw_mac_multicast_remove(vsw_t *vswp, vsw_port_t *port,
 140     mcst_addr_t *mcst_p, int type);
 141 extern void vsw_mac_link_update(vsw_t *vswp, link_state_t link_state);
 142 extern void vsw_physlink_update_ports(vsw_t *vswp);
 143 
 144 /*
 145  * Tunables used in this file.
 146  */
 147 extern  int vsw_setup_switching_delay;
 148 extern  uint32_t vsw_vlan_nchains;
 149 extern  uint32_t vsw_fdbe_refcnt_delay;
 150 
 151 #define VSW_FDBE_REFHOLD(p)                                             \
 152 {                                                                       \
 153         atomic_inc_32(&(p)->refcnt);                                     \
 154         ASSERT((p)->refcnt != 0);                                    \
 155 }
 156 
 157 #define VSW_FDBE_REFRELE(p)                                             \
 158 {                                                                       \
 159         ASSERT((p)->refcnt != 0);                                    \
 160         atomic_dec_32(&(p)->refcnt);                                     \
 161 }
 162 
 163 /*
 164  * Thread to setup switching mode. This thread is created during vsw_attach()
 165  * initially. It invokes vsw_setup_switching() and keeps retrying while the
 166  * returned value is EAGAIN. The thread exits when the switching mode setup is
 167  * done successfully or when the error returned is not EAGAIN. This thread may
 168  * also get created from vsw_update_md_prop() if the switching mode needs to be
 169  * updated.
 170  */
 171 void
 172 vsw_setup_switching_thread(void *arg)
 173 {
 174         callb_cpr_t     cprinfo;
 175         vsw_t           *vswp =  (vsw_t *)arg;
 176         clock_t         wait_time;
 177         clock_t         xwait;
 178         clock_t         wait_rv;
 179         int             rv;
 180 
 181         /* wait time used on successive retries */
 182         xwait = drv_usectohz(vsw_setup_switching_delay * MICROSEC);
 183 
 184         CALLB_CPR_INIT(&cprinfo, &vswp->sw_thr_lock, callb_generic_cpr,
 185             "vsw_setup_sw_thread");
 186 
 187         mutex_enter(&vswp->sw_thr_lock);
 188 
 189         while ((vswp->sw_thr_flags & VSW_SWTHR_STOP) == 0) {
 190 
 191                 CALLB_CPR_SAFE_BEGIN(&cprinfo);
 192 
 193                 /* Wait for sometime before (re)trying setup_switching() */
 194                 wait_time = ddi_get_lbolt() + xwait;
 195                 while ((vswp->sw_thr_flags & VSW_SWTHR_STOP) == 0) {
 196                         wait_rv = cv_timedwait(&vswp->sw_thr_cv,
 197                             &vswp->sw_thr_lock, wait_time);
 198                         if (wait_rv == -1) {    /* timed out */
 199                                 break;
 200                         }
 201                 }
 202 
 203                 CALLB_CPR_SAFE_END(&cprinfo, &vswp->sw_thr_lock)
 204 
 205                 if ((vswp->sw_thr_flags & VSW_SWTHR_STOP) != 0) {
 206                         /*
 207                          * If there is a stop request, process that first and
 208                          * exit the loop. Continue to hold the mutex which gets
 209                          * released in CALLB_CPR_EXIT().
 210                          */
 211                         break;
 212                 }
 213 
 214                 mutex_exit(&vswp->sw_thr_lock);
 215                 rv = vsw_setup_switching(vswp);
 216                 if (rv == 0) {
 217                         vsw_setup_switching_post_process(vswp);
 218                 }
 219                 mutex_enter(&vswp->sw_thr_lock);
 220                 if (rv != EAGAIN) {
 221                         break;
 222                 }
 223 
 224         }
 225 
 226         vswp->sw_thr_flags &= ~VSW_SWTHR_STOP;
 227         vswp->sw_thread = NULL;
 228         CALLB_CPR_EXIT(&cprinfo);
 229         thread_exit();
 230 }
 231 
 232 /*
 233  * Create a thread to setup the switching mode.
 234  * Returns 0 on success; 1 on failure.
 235  */
 236 int
 237 vsw_setup_switching_start(vsw_t *vswp)
 238 {
 239         mutex_enter(&vswp->sw_thr_lock);
 240 
 241         vswp->sw_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
 242             vsw_setup_switching_thread, vswp, 0, &p0, TS_RUN, minclsyspri);
 243 
 244         if (vswp->sw_thread == NULL) {
 245                 mutex_exit(&vswp->sw_thr_lock);
 246                 return (1);
 247         }
 248 
 249         mutex_exit(&vswp->sw_thr_lock);
 250         return (0);
 251 }
 252 
 253 /*
 254  * Stop the thread to setup switching mode.
 255  */
 256 void
 257 vsw_setup_switching_stop(vsw_t *vswp)
 258 {
 259         kt_did_t        tid = 0;
 260 
 261         /*
 262          * Signal the setup_switching thread to stop and wait until it stops.
 263          */
 264         mutex_enter(&vswp->sw_thr_lock);
 265 
 266         if (vswp->sw_thread != NULL) {
 267                 tid = vswp->sw_thread->t_did;
 268                 vswp->sw_thr_flags |= VSW_SWTHR_STOP;
 269                 cv_signal(&vswp->sw_thr_cv);
 270         }
 271 
 272         mutex_exit(&vswp->sw_thr_lock);
 273 
 274         if (tid != 0)
 275                 thread_join(tid);
 276 
 277         (void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE);
 278 
 279         vswp->mac_open_retries = 0;
 280 }
 281 
 282 /*
 283  * Setup the required switching mode.
 284  * Returns:
 285  *  0 on success.
 286  *  EAGAIN if retry is needed.
 287  *  1 on all other failures.
 288  */
 289 int
 290 vsw_setup_switching(vsw_t *vswp)
 291 {
 292         int     rv = 1;
 293 
 294         D1(vswp, "%s: enter", __func__);
 295 
 296         /*
 297          * Select best switching mode.
 298          * This is done as this routine can be called from the timeout
 299          * handler to retry setting up a specific mode. Currently only
 300          * the function which sets up layer2/promisc mode returns EAGAIN
 301          * if the underlying network device is not available yet, causing
 302          * retries.
 303          */
 304         if (vswp->smode & VSW_LAYER2) {
 305                 rv = vsw_setup_layer2(vswp);
 306         } else if (vswp->smode & VSW_LAYER3) {
 307                 rv = vsw_setup_layer3(vswp);
 308         } else {
 309                 DERR(vswp, "unknown switch mode");
 310                 rv = 1;
 311         }
 312 
 313         if (rv && (rv != EAGAIN)) {
 314                 cmn_err(CE_WARN, "!vsw%d: Unable to setup specified "
 315                     "switching mode", vswp->instance);
 316         } else if (rv == 0) {
 317                 (void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE);
 318         }
 319 
 320         D2(vswp, "%s: Operating in mode %d", __func__,
 321             vswp->smode);
 322 
 323         D1(vswp, "%s: exit", __func__);
 324 
 325         return (rv);
 326 }
 327 
 328 /*
 329  * Setup for layer 2 switching.
 330  *
 331  * Returns:
 332  *  0 on success.
 333  *  EAGAIN if retry is needed.
 334  *  EIO on all other failures.
 335  */
 336 static int
 337 vsw_setup_layer2(vsw_t *vswp)
 338 {
 339         int     rv;
 340 
 341         D1(vswp, "%s: enter", __func__);
 342 
 343         /*
 344          * Until the network device is successfully opened,
 345          * set the switching to use vsw_switch_l2_frame.
 346          */
 347         vswp->vsw_switch_frame = vsw_switch_l2_frame;
 348         vswp->mac_cl_switching = B_FALSE;
 349 
 350         rv = strlen(vswp->physname);
 351         if (rv == 0) {
 352                 /*
 353                  * Physical device name is NULL, which is
 354                  * required for layer 2.
 355                  */
 356                 cmn_err(CE_WARN, "!vsw%d: no network device name specified",
 357                     vswp->instance);
 358                 return (EIO);
 359         }
 360 
 361         mutex_enter(&vswp->mac_lock);
 362 
 363         rv = vsw_mac_open(vswp);
 364         if (rv != 0) {
 365                 if (rv != EAGAIN) {
 366                         cmn_err(CE_WARN, "!vsw%d: Unable to open network "
 367                             "device: %s\n", vswp->instance, vswp->physname);
 368                 }
 369                 mutex_exit(&vswp->mac_lock);
 370                 return (rv);
 371         }
 372 
 373         /*
 374          * Now we can use the mac client switching, so set the switching
 375          * function to use vsw_switch_l2_frame_mac_client(), which simply
 376          * sends the packets to MAC layer for switching.
 377          */
 378         vswp->vsw_switch_frame = vsw_switch_l2_frame_mac_client;
 379         vswp->mac_cl_switching = B_TRUE;
 380 
 381         D1(vswp, "%s: exit", __func__);
 382 
 383         /* Initialize HybridIO related stuff */
 384         vsw_hio_init(vswp);
 385 
 386         mutex_exit(&vswp->mac_lock);
 387         return (0);
 388 
 389 exit_error:
 390         vsw_mac_close(vswp);
 391         mutex_exit(&vswp->mac_lock);
 392         return (EIO);
 393 }
 394 
 395 static int
 396 vsw_setup_layer3(vsw_t *vswp)
 397 {
 398         D1(vswp, "%s: enter", __func__);
 399 
 400         D2(vswp, "%s: operating in layer 3 mode", __func__);
 401         vswp->vsw_switch_frame = vsw_switch_l3_frame;
 402 
 403         D1(vswp, "%s: exit", __func__);
 404 
 405         return (0);
 406 }
 407 
 408 /* ARGSUSED */
 409 void
 410 vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *port,
 411                         mac_resource_handle_t mrh)
 412 {
 413         freemsgchain(mp);
 414 }
 415 
 416 /*
 417  * Use mac client for layer 2 switching .
 418  */
 419 static void
 420 vsw_switch_l2_frame_mac_client(vsw_t *vswp, mblk_t *mp, int caller,
 421     vsw_port_t *port, mac_resource_handle_t mrh)
 422 {
 423         _NOTE(ARGUNUSED(mrh))
 424 
 425         mblk_t          *ret_m;
 426 
 427         /*
 428          * This switching function is expected to be called by
 429          * the ports or the interface only. The packets from
 430          * physical interface already switched.
 431          */
 432         ASSERT((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV));
 433 
 434         if ((ret_m = vsw_tx_msg(vswp, mp, caller, port)) != NULL) {
 435                 DERR(vswp, "%s: drop mblks to "
 436                     "phys dev", __func__);
 437                 freemsgchain(ret_m);
 438         }
 439 }
 440 
 441 /*
 442  * Switch the given ethernet frame when operating in layer 2 mode.
 443  *
 444  * vswp: pointer to the vsw instance
 445  * mp: pointer to chain of ethernet frame(s) to be switched
 446  * caller: identifies the source of this frame as:
 447  *              1. VSW_VNETPORT - a vsw port (connected to a vnet).
 448  *              2. VSW_PHYSDEV - the physical ethernet device
 449  *              3. VSW_LOCALDEV - vsw configured as a virtual interface
 450  * arg: argument provided by the caller.
 451  *              1. for VNETPORT - pointer to the corresponding vsw_port_t.
 452  *              2. for PHYSDEV - NULL
 453  *              3. for LOCALDEV - pointer to to this vsw_t(self)
 454  */
 455 void
 456 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
 457                         vsw_port_t *arg, mac_resource_handle_t mrh)
 458 {
 459         struct ether_header     *ehp;
 460         mblk_t                  *bp, *ret_m;
 461         vsw_fdbe_t              *fp;
 462 
 463         D1(vswp, "%s: enter (caller %d)", __func__, caller);
 464 
 465         /*
 466          * PERF: rather than breaking up the chain here, scan it
 467          * to find all mblks heading to same destination and then
 468          * pass that sub-chain to the lower transmit functions.
 469          */
 470 
 471         /* process the chain of packets */
 472         bp = mp;
 473         while (bp) {
 474                 ehp = (struct ether_header *)bp->b_rptr;
 475                 mp = vsw_get_same_dest_list(ehp, &bp);
 476                 ASSERT(mp != NULL);
 477 
 478                 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
 479                     __func__, MBLKSIZE(mp), MBLKL(mp));
 480 
 481                 if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) {
 482                         /*
 483                          * If destination is VSW_LOCALDEV (vsw as an eth
 484                          * interface) and if the device is up & running,
 485                          * send the packet up the stack on this host.
 486                          * If the virtual interface is down, drop the packet.
 487                          */
 488                         if (caller != VSW_LOCALDEV) {
 489                                 vsw_mac_rx(vswp, mrh, mp, VSW_MACRX_FREEMSG);
 490                         } else {
 491                                 freemsgchain(mp);
 492                         }
 493                         continue;
 494                 }
 495 
 496                 /*
 497                  * Find fdb entry for the destination
 498                  * and hold a reference to it.
 499                  */
 500                 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost);
 501                 if (fp != NULL) {
 502 
 503                         /*
 504                          * If plumbed and in promisc mode then copy msg
 505                          * and send up the stack.
 506                          */
 507                         vsw_mac_rx(vswp, mrh, mp,
 508                             VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG);
 509 
 510                         /*
 511                          * If the destination is in FDB, the packet
 512                          * should be forwarded to the correponding
 513                          * vsw_port (connected to a vnet device -
 514                          * VSW_VNETPORT)
 515                          */
 516                         (void) vsw_portsend(fp->portp, mp);
 517 
 518                         /* Release the reference on the fdb entry */
 519                         VSW_FDBE_REFRELE(fp);
 520                 } else {
 521                         /*
 522                          * Destination not in FDB.
 523                          *
 524                          * If the destination is broadcast or
 525                          * multicast forward the packet to all
 526                          * (VNETPORTs, PHYSDEV, LOCALDEV),
 527                          * except the caller.
 528                          */
 529                         if (IS_BROADCAST(ehp)) {
 530                                 D2(vswp, "%s: BROADCAST pkt", __func__);
 531                                 (void) vsw_forward_all(vswp, mp, caller, arg);
 532                         } else if (IS_MULTICAST(ehp)) {
 533                                 D2(vswp, "%s: MULTICAST pkt", __func__);
 534                                 (void) vsw_forward_grp(vswp, mp, caller, arg);
 535                         } else {
 536                                 /*
 537                                  * If the destination is unicast, and came
 538                                  * from either a logical network device or
 539                                  * the switch itself when it is plumbed, then
 540                                  * send it out on the physical device and also
 541                                  * up the stack if the logical interface is
 542                                  * in promiscious mode.
 543                                  *
 544                                  * NOTE:  The assumption here is that if we
 545                                  * cannot find the destination in our fdb, its
 546                                  * a unicast address, and came from either a
 547                                  * vnet or down the stack (when plumbed) it
 548                                  * must be destinded for an ethernet device
 549                                  * outside our ldoms.
 550                                  */
 551                                 if (caller == VSW_VNETPORT) {
 552                                         /* promisc check copy etc */
 553                                         vsw_mac_rx(vswp, mrh, mp,
 554                                             VSW_MACRX_PROMISC |
 555                                             VSW_MACRX_COPYMSG);
 556 
 557                                         if ((ret_m = vsw_tx_msg(vswp, mp,
 558                                             caller, arg)) != NULL) {
 559                                                 DERR(vswp, "%s: drop mblks to "
 560                                                     "phys dev", __func__);
 561                                                 freemsgchain(ret_m);
 562                                         }
 563 
 564                                 } else if (caller == VSW_PHYSDEV) {
 565                                         /*
 566                                          * Pkt seen because card in promisc
 567                                          * mode. Send up stack if plumbed in
 568                                          * promisc mode, else drop it.
 569                                          */
 570                                         vsw_mac_rx(vswp, mrh, mp,
 571                                             VSW_MACRX_PROMISC |
 572                                             VSW_MACRX_FREEMSG);
 573 
 574                                 } else if (caller == VSW_LOCALDEV) {
 575                                         /*
 576                                          * Pkt came down the stack, send out
 577                                          * over physical device.
 578                                          */
 579                                         if ((ret_m = vsw_tx_msg(vswp, mp,
 580                                             caller, NULL)) != NULL) {
 581                                                 DERR(vswp, "%s: drop mblks to "
 582                                                     "phys dev", __func__);
 583                                                 freemsgchain(ret_m);
 584                                         }
 585                                 }
 586                         }
 587                 }
 588         }
 589         D1(vswp, "%s: exit\n", __func__);
 590 }
 591 
 592 /*
 593  * Switch ethernet frame when in layer 3 mode (i.e. using IP
 594  * layer to do the routing).
 595  *
 596  * There is a large amount of overlap between this function and
 597  * vsw_switch_l2_frame. At some stage we need to revisit and refactor
 598  * both these functions.
 599  */
 600 void
 601 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
 602                         vsw_port_t *arg, mac_resource_handle_t mrh)
 603 {
 604         struct ether_header     *ehp;
 605         mblk_t                  *bp = NULL;
 606         vsw_fdbe_t              *fp;
 607 
 608         D1(vswp, "%s: enter (caller %d)", __func__, caller);
 609 
 610         /*
 611          * In layer 3 mode should only ever be switching packets
 612          * between IP layer and vnet devices. So make sure thats
 613          * who is invoking us.
 614          */
 615         if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) {
 616                 DERR(vswp, "%s: unexpected caller (%d)", __func__, caller);
 617                 freemsgchain(mp);
 618                 return;
 619         }
 620 
 621         /* process the chain of packets */
 622         bp = mp;
 623         while (bp) {
 624                 ehp = (struct ether_header *)bp->b_rptr;
 625                 mp = vsw_get_same_dest_list(ehp, &bp);
 626                 ASSERT(mp != NULL);
 627 
 628                 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
 629                     __func__, MBLKSIZE(mp), MBLKL(mp));
 630 
 631                 /*
 632                  * Find fdb entry for the destination
 633                  * and hold a reference to it.
 634                  */
 635                 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost);
 636                 if (fp != NULL) {
 637 
 638                         D2(vswp, "%s: sending to target port", __func__);
 639                         (void) vsw_portsend(fp->portp, mp);
 640 
 641                         /* Release the reference on the fdb entry */
 642                         VSW_FDBE_REFRELE(fp);
 643                 } else {
 644                         /*
 645                          * Destination not in FDB
 646                          *
 647                          * If the destination is broadcast or
 648                          * multicast forward the packet to all
 649                          * (VNETPORTs, PHYSDEV, LOCALDEV),
 650                          * except the caller.
 651                          */
 652                         if (IS_BROADCAST(ehp)) {
 653                                 D2(vswp, "%s: BROADCAST pkt", __func__);
 654                                 (void) vsw_forward_all(vswp, mp, caller, arg);
 655                         } else if (IS_MULTICAST(ehp)) {
 656                                 D2(vswp, "%s: MULTICAST pkt", __func__);
 657                                 (void) vsw_forward_grp(vswp, mp, caller, arg);
 658                         } else {
 659                                 /*
 660                                  * Unicast pkt from vnet that we don't have
 661                                  * an FDB entry for, so must be destinded for
 662                                  * the outside world. Attempt to send up to the
 663                                  * IP layer to allow it to deal with it.
 664                                  */
 665                                 if (caller == VSW_VNETPORT) {
 666                                         vsw_mac_rx(vswp, mrh,
 667                                             mp, VSW_MACRX_FREEMSG);
 668                                 }
 669                         }
 670                 }
 671         }
 672 
 673         D1(vswp, "%s: exit", __func__);
 674 }
 675 
 676 /*
 677  * Additional initializations that are needed for the specific switching mode.
 678  */
 679 void
 680 vsw_setup_switching_post_process(vsw_t *vswp)
 681 {
 682         link_state_t    link_state = LINK_STATE_UP;
 683 
 684         if (vswp->smode & VSW_LAYER2) {
 685                 /*
 686                  * Program unicst, mcst addrs of vsw
 687                  * interface and ports in the physdev.
 688                  */
 689                 vsw_set_addrs(vswp);
 690 
 691                 /* Start HIO for ports that have already connected */
 692                 vsw_hio_start_ports(vswp);
 693 
 694                 if (vswp->pls_update == B_TRUE) {
 695                         link_state = vswp->phys_link_state;
 696                 }
 697 
 698                 /* Update physical link info to any ports already connected */
 699                 vsw_physlink_update_ports(vswp);
 700         }
 701 
 702         vsw_mac_link_update(vswp, link_state);
 703 }
 704 
 705 /*
 706  * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV),
 707  * except the caller (port on which frame arrived).
 708  */
 709 static int
 710 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
 711 {
 712         vsw_port_list_t *plist = &vswp->plist;
 713         vsw_port_t      *portp;
 714         mblk_t          *nmp = NULL;
 715         mblk_t          *ret_m = NULL;
 716         int             skip_port = 0;
 717 
 718         D1(vswp, "vsw_forward_all: enter\n");
 719 
 720         /*
 721          * Broadcast message from inside ldoms so send to outside
 722          * world if in either of layer 2 modes.
 723          */
 724         if ((vswp->smode & VSW_LAYER2) &&
 725             ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) {
 726 
 727                 nmp = vsw_dupmsgchain(mp);
 728                 if (nmp) {
 729                         if ((ret_m = vsw_tx_msg(vswp, nmp, caller, arg))
 730                             != NULL) {
 731                                 DERR(vswp, "%s: dropping pkt(s) "
 732                                     "consisting of %ld bytes of data for"
 733                                     " physical device", __func__, MBLKL(ret_m));
 734                                 freemsgchain(ret_m);
 735                         }
 736                 }
 737         }
 738 
 739         if (caller == VSW_VNETPORT)
 740                 skip_port = 1;
 741 
 742         /*
 743          * Broadcast message from other vnet (layer 2 or 3) or outside
 744          * world (layer 2 only), send up stack if plumbed.
 745          */
 746         if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) {
 747                 vsw_mac_rx(vswp, NULL, mp, VSW_MACRX_COPYMSG);
 748         }
 749 
 750         /* send it to all VNETPORTs */
 751         READ_ENTER(&plist->lockrw);
 752         for (portp = plist->head; portp != NULL; portp = portp->p_next) {
 753                 D2(vswp, "vsw_forward_all: port %d", portp->p_instance);
 754                 /*
 755                  * Caution ! - don't reorder these two checks as arg
 756                  * will be NULL if the caller is PHYSDEV. skip_port is
 757                  * only set if caller is VNETPORT.
 758                  */
 759                 if ((skip_port) && (portp == arg)) {
 760                         continue;
 761                 } else {
 762                         nmp = vsw_dupmsgchain(mp);
 763                         if (nmp) {
 764                                 /*
 765                                  * The plist->lockrw is protecting the
 766                                  * portp from getting destroyed here.
 767                                  * So, no ref_cnt is incremented here.
 768                                  */
 769                                 (void) vsw_portsend(portp, nmp);
 770                         } else {
 771                                 DERR(vswp, "vsw_forward_all: nmp NULL");
 772                         }
 773                 }
 774         }
 775         RW_EXIT(&plist->lockrw);
 776 
 777         freemsgchain(mp);
 778 
 779         D1(vswp, "vsw_forward_all: exit\n");
 780         return (0);
 781 }
 782 
 783 /*
 784  * Forward pkts to any devices or interfaces which have registered
 785  * an interest in them (i.e. multicast groups).
 786  */
 787 static int
 788 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
 789 {
 790         struct ether_header     *ehp = (struct ether_header *)mp->b_rptr;
 791         mfdb_ent_t              *entp = NULL;
 792         mfdb_ent_t              *tpp = NULL;
 793         vsw_port_t              *port;
 794         uint64_t                key = 0;
 795         mblk_t                  *nmp = NULL;
 796         mblk_t                  *ret_m = NULL;
 797         boolean_t               check_if = B_TRUE;
 798 
 799         /*
 800          * Convert address to hash table key
 801          */
 802         KEY_HASH(key, &ehp->ether_dhost);
 803 
 804         D1(vswp, "%s: key 0x%llx", __func__, key);
 805 
 806         /*
 807          * If pkt came from either a vnet or down the stack (if we are
 808          * plumbed) and we are in layer 2 mode, then we send the pkt out
 809          * over the physical adapter, and then check to see if any other
 810          * vnets are interested in it.
 811          */
 812         if ((vswp->smode & VSW_LAYER2) &&
 813             ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) {
 814                 nmp = vsw_dupmsgchain(mp);
 815                 if (nmp) {
 816                         if ((ret_m = vsw_tx_msg(vswp, nmp, caller, arg))
 817                             != NULL) {
 818                                 DERR(vswp, "%s: dropping pkt(s) consisting of "
 819                                     "%ld bytes of data for physical device",
 820                                     __func__, MBLKL(ret_m));
 821                                 freemsgchain(ret_m);
 822                         }
 823                 }
 824         }
 825 
 826         READ_ENTER(&vswp->mfdbrw);
 827         if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key,
 828             (mod_hash_val_t *)&entp) != 0) {
 829                 D3(vswp, "%s: no table entry found for addr 0x%llx",
 830                     __func__, key);
 831         } else {
 832                 /*
 833                  * Send to list of devices associated with this address...
 834                  */
 835                 for (tpp = entp; tpp != NULL; tpp = tpp->nextp) {
 836 
 837                         /* dont send to ourselves */
 838                         if ((caller == VSW_VNETPORT) &&
 839                             (tpp->d_addr == (void *)arg)) {
 840                                 port = (vsw_port_t *)tpp->d_addr;
 841                                 D3(vswp, "%s: not sending to ourselves"
 842                                     " : port %d", __func__, port->p_instance);
 843                                 continue;
 844 
 845                         } else if ((caller == VSW_LOCALDEV) &&
 846                             (tpp->d_type == VSW_LOCALDEV)) {
 847                                 D2(vswp, "%s: not sending back up stack",
 848                                     __func__);
 849                                 continue;
 850                         }
 851 
 852                         if (tpp->d_type == VSW_VNETPORT) {
 853                                 port = (vsw_port_t *)tpp->d_addr;
 854                                 D3(vswp, "%s: sending to port %ld for addr "
 855                                     "0x%llx", __func__, port->p_instance, key);
 856 
 857                                 nmp = vsw_dupmsgchain(mp);
 858                                 if (nmp) {
 859                                         /*
 860                                          * The vswp->mfdbrw is protecting the
 861                                          * portp from getting destroyed here.
 862                                          * So, no ref_cnt is incremented here.
 863                                          */
 864                                         (void) vsw_portsend(port, nmp);
 865                                 }
 866                         } else {
 867                                 vsw_mac_rx(vswp, NULL,
 868                                     mp, VSW_MACRX_COPYMSG);
 869                                 D2(vswp, "%s: sending up stack"
 870                                     " for addr 0x%llx", __func__, key);
 871                                 check_if = B_FALSE;
 872                         }
 873                 }
 874         }
 875 
 876         RW_EXIT(&vswp->mfdbrw);
 877 
 878         /*
 879          * If the pkt came from either a vnet or from physical device,
 880          * and if we havent already sent the pkt up the stack then we
 881          * check now if we can/should (i.e. the interface is plumbed
 882          * and in promisc mode).
 883          */
 884         if ((check_if) &&
 885             ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) {
 886                 vsw_mac_rx(vswp, NULL, mp,
 887                     VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG);
 888         }
 889 
 890         freemsgchain(mp);
 891 
 892         D1(vswp, "%s: exit", __func__);
 893 
 894         return (0);
 895 }
 896 
 897 /*
 898  * This function creates the vlan id hash table for the given vsw device or
 899  * port. It then adds each vlan that the device or port has been assigned,
 900  * into this hash table.
 901  * Arguments:
 902  *   arg:  vsw device or port.
 903  *   type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port).
 904  */
 905 void
 906 vsw_create_vlans(void *arg, int type)
 907 {
 908         /* create vlan hash table */
 909         vsw_vlan_create_hash(arg, type);
 910 
 911         /* add vlan ids of the vsw device into its hash table */
 912         vsw_vlan_add_ids(arg, type);
 913 }
 914 
 915 /*
 916  * This function removes the vlan ids of the vsw device or port from its hash
 917  * table. It then destroys the vlan hash table.
 918  * Arguments:
 919  *   arg:  vsw device or port.
 920  *   type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port).
 921  */
 922 void
 923 vsw_destroy_vlans(void *arg, int type)
 924 {
 925         /* remove vlan ids from the hash table */
 926         vsw_vlan_remove_ids(arg, type);
 927 
 928         /* destroy vlan-hash-table */
 929         vsw_vlan_destroy_hash(arg, type);
 930 }
 931 
 932 /*
 933  * Create a vlan-id hash table for the given vsw device or port.
 934  */
 935 static void
 936 vsw_vlan_create_hash(void *arg, int type)
 937 {
 938         char            hashname[MAXNAMELEN];
 939 
 940         if (type == VSW_LOCALDEV) {
 941                 vsw_t           *vswp = (vsw_t *)arg;
 942 
 943                 (void) snprintf(hashname, MAXNAMELEN, "vsw%d-vlan-hash",
 944                     vswp->instance);
 945 
 946                 vswp->vlan_nchains = vsw_vlan_nchains;
 947                 vswp->vlan_hashp = mod_hash_create_idhash(hashname,
 948                     vswp->vlan_nchains, mod_hash_null_valdtor);
 949 
 950         } else if (type == VSW_VNETPORT) {
 951                 vsw_port_t      *portp = (vsw_port_t *)arg;
 952 
 953                 (void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
 954                     portp->p_instance);
 955 
 956                 portp->vlan_nchains = vsw_vlan_nchains;
 957                 portp->vlan_hashp = mod_hash_create_idhash(hashname,
 958                     portp->vlan_nchains, mod_hash_null_valdtor);
 959 
 960         } else {
 961                 return;
 962         }
 963 }
 964 
 965 /*
 966  * Destroy the vlan-id hash table for the given vsw device or port.
 967  */
 968 static void
 969 vsw_vlan_destroy_hash(void *arg, int type)
 970 {
 971         if (type == VSW_LOCALDEV) {
 972                 vsw_t           *vswp = (vsw_t *)arg;
 973 
 974                 mod_hash_destroy_hash(vswp->vlan_hashp);
 975                 vswp->vlan_nchains = 0;
 976         } else if (type == VSW_VNETPORT) {
 977                 vsw_port_t      *portp = (vsw_port_t *)arg;
 978 
 979                 mod_hash_destroy_hash(portp->vlan_hashp);
 980                 portp->vlan_nchains = 0;
 981         } else {
 982                 return;
 983         }
 984 }
 985 
 986 /*
 987  * Add vlan ids of the given vsw device or port into its hash table.
 988  */
 989 void
 990 vsw_vlan_add_ids(void *arg, int type)
 991 {
 992         int     rv;
 993         int     i;
 994 
 995         if (type == VSW_LOCALDEV) {
 996                 vsw_t           *vswp = (vsw_t *)arg;
 997 
 998                 rv = mod_hash_insert(vswp->vlan_hashp,
 999                     (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid),
1000                     (mod_hash_val_t)B_TRUE);
1001                 if (rv != 0) {
1002                         cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d) for "
1003                             "the interface", vswp->instance, vswp->pvid);
1004                 }
1005 
1006                 for (i = 0; i < vswp->nvids; i++) {
1007                         rv = mod_hash_insert(vswp->vlan_hashp,
1008                             (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i].vl_vid),
1009                             (mod_hash_val_t)B_TRUE);
1010                         if (rv != 0) {
1011                                 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d)"
1012                                     " for the interface", vswp->instance,
1013                                     vswp->pvid);
1014                         }
1015                 }
1016 
1017         } else if (type == VSW_VNETPORT) {
1018                 vsw_port_t      *portp = (vsw_port_t *)arg;
1019                 vsw_t           *vswp = portp->p_vswp;
1020 
1021                 rv = mod_hash_insert(portp->vlan_hashp,
1022                     (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1023                     (mod_hash_val_t)B_TRUE);
1024                 if (rv != 0) {
1025                         cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d) for "
1026                             "the port(%d)", vswp->instance, vswp->pvid,
1027                             portp->p_instance);
1028                 }
1029 
1030                 for (i = 0; i < portp->nvids; i++) {
1031                         rv = mod_hash_insert(portp->vlan_hashp,
1032                             (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i].vl_vid),
1033                             (mod_hash_val_t)B_TRUE);
1034                         if (rv != 0) {
1035                                 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d)"
1036                                     " for the port(%d)", vswp->instance,
1037                                     vswp->pvid, portp->p_instance);
1038                         }
1039                 }
1040 
1041         }
1042 }
1043 
1044 /*
1045  * Remove vlan ids of the given vsw device or port from its hash table.
1046  */
1047 void
1048 vsw_vlan_remove_ids(void *arg, int type)
1049 {
1050         mod_hash_val_t  vp;
1051         int             rv;
1052         int             i;
1053 
1054         if (type == VSW_LOCALDEV) {
1055                 vsw_t           *vswp = (vsw_t *)arg;
1056 
1057                 rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->pvid);
1058                 if (rv == B_TRUE) {
1059                         rv = mod_hash_remove(vswp->vlan_hashp,
1060                             (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid),
1061                             (mod_hash_val_t *)&vp);
1062                         ASSERT(rv == 0);
1063                 }
1064 
1065                 for (i = 0; i < vswp->nvids; i++) {
1066                         rv = vsw_vlan_lookup(vswp->vlan_hashp,
1067                             vswp->vids[i].vl_vid);
1068                         if (rv == B_TRUE) {
1069                                 rv = mod_hash_remove(vswp->vlan_hashp,
1070                                     (mod_hash_key_t)VLAN_ID_KEY(
1071                                     vswp->vids[i].vl_vid),
1072                                     (mod_hash_val_t *)&vp);
1073                                 ASSERT(rv == 0);
1074                         }
1075                 }
1076 
1077         } else if (type == VSW_VNETPORT) {
1078                 vsw_port_t      *portp = (vsw_port_t *)arg;
1079 
1080                 portp = (vsw_port_t *)arg;
1081                 rv = vsw_vlan_lookup(portp->vlan_hashp, portp->pvid);
1082                 if (rv == B_TRUE) {
1083                         rv = mod_hash_remove(portp->vlan_hashp,
1084                             (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1085                             (mod_hash_val_t *)&vp);
1086                         ASSERT(rv == 0);
1087                 }
1088 
1089                 for (i = 0; i < portp->nvids; i++) {
1090                         rv = vsw_vlan_lookup(portp->vlan_hashp,
1091                             portp->vids[i].vl_vid);
1092                         if (rv == B_TRUE) {
1093                                 rv = mod_hash_remove(portp->vlan_hashp,
1094                                     (mod_hash_key_t)VLAN_ID_KEY(
1095                                     portp->vids[i].vl_vid),
1096                                     (mod_hash_val_t *)&vp);
1097                                 ASSERT(rv == 0);
1098                         }
1099                 }
1100 
1101         } else {
1102                 return;
1103         }
1104 }
1105 
1106 /*
1107  * Find the given vlan id in the hash table.
1108  * Return: B_TRUE if the id is found; B_FALSE if not found.
1109  */
1110 boolean_t
1111 vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1112 {
1113         int             rv;
1114         mod_hash_val_t  vp;
1115 
1116         rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1117 
1118         if (rv != 0)
1119                 return (B_FALSE);
1120 
1121         return (B_TRUE);
1122 }
1123 
1124 /*
1125  * Add an entry into FDB for the given vsw.
1126  */
1127 void
1128 vsw_fdbe_add(vsw_t *vswp, void *port)
1129 {
1130         uint64_t        addr = 0;
1131         vsw_port_t      *portp;
1132         vsw_fdbe_t      *fp;
1133         int             rv;
1134 
1135         portp = (vsw_port_t *)port;
1136         KEY_HASH(addr, &portp->p_macaddr);
1137 
1138         fp = kmem_zalloc(sizeof (vsw_fdbe_t), KM_SLEEP);
1139         fp->portp = port;
1140 
1141         /*
1142          * Note: duplicate keys will be rejected by mod_hash.
1143          */
1144         rv = mod_hash_insert(vswp->fdb_hashp, (mod_hash_key_t)addr,
1145             (mod_hash_val_t)fp);
1146         if (rv != 0) {
1147                 cmn_err(CE_WARN, "vsw%d: Duplicate mac-address(%s) for "
1148                     "the port(%d)", vswp->instance,
1149                     ether_sprintf(&portp->p_macaddr), portp->p_instance);
1150                 kmem_free(fp, sizeof (*fp));
1151         }
1152 }
1153 
1154 /*
1155  * Remove an entry from FDB.
1156  */
1157 void
1158 vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr)
1159 {
1160         uint64_t        addr = 0;
1161         vsw_fdbe_t      *fp;
1162         int             rv;
1163 
1164         KEY_HASH(addr, eaddr);
1165 
1166         /*
1167          * Remove the entry from fdb hash table.
1168          * This prevents further references to this fdb entry.
1169          */
1170         rv = mod_hash_remove(vswp->fdb_hashp, (mod_hash_key_t)addr,
1171             (mod_hash_val_t *)&fp);
1172         if (rv != 0) {
1173                 /* invalid key? */
1174                 return;
1175         }
1176 
1177         /*
1178          * If there are threads already ref holding before the entry was
1179          * removed from hash table, then wait for ref count to drop to zero.
1180          */
1181         while (fp->refcnt != 0) {
1182                 delay(drv_usectohz(vsw_fdbe_refcnt_delay));
1183         }
1184 
1185         kmem_free(fp, sizeof (*fp));
1186 }
1187 
1188 /*
1189  * Search fdb for a given mac address. If an entry is found, hold
1190  * a reference to it and return the entry, else returns NULL.
1191  */
1192 static vsw_fdbe_t *
1193 vsw_fdbe_find(vsw_t *vswp, struct ether_addr *addrp)
1194 {
1195         uint64_t        key = 0;
1196         vsw_fdbe_t      *fp;
1197         int             rv;
1198 
1199         KEY_HASH(key, addrp);
1200 
1201         rv = mod_hash_find_cb(vswp->fdb_hashp, (mod_hash_key_t)key,
1202             (mod_hash_val_t *)&fp, vsw_fdbe_find_cb);
1203 
1204         if (rv != 0)
1205                 return (NULL);
1206 
1207         return (fp);
1208 }
1209 
1210 /*
1211  * Callback function provided to mod_hash_find_cb(). After finding the fdb
1212  * entry corresponding to the key (macaddr), this callback will be invoked by
1213  * mod_hash_find_cb() to atomically increment the reference count on the fdb
1214  * entry before returning the found entry.
1215  */
1216 static void
1217 vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1218 {
1219         _NOTE(ARGUNUSED(key))
1220         VSW_FDBE_REFHOLD((vsw_fdbe_t *)val);
1221 }
1222 
1223 /*
1224  * A given frame must be always tagged with the appropriate vlan id (unless it
1225  * is in the default-vlan) before the mac address switching function is called.
1226  * Otherwise, after switching function determines the destination, we cannot
1227  * figure out if the destination belongs to the the same vlan that the frame
1228  * originated from and if it needs tag/untag. Frames which are inbound from
1229  * the external(physical) network over a vlan trunk link are always tagged.
1230  * However frames which are received from a vnet-port over ldc or frames which
1231  * are coming down the stack on the service domain over vsw interface may be
1232  * untagged. These frames must be tagged with the appropriate pvid of the
1233  * sender (vnet-port or vsw device), before invoking the switching function.
1234  *
1235  * Arguments:
1236  *   arg:    caller of the function.
1237  *   type:   type of arg(caller): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port)
1238  *   mp:     frame(s) to be tagged.
1239  */
1240 mblk_t *
1241 vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp)
1242 {
1243         vsw_t                   *vswp;
1244         vsw_port_t              *portp;
1245         struct ether_header     *ehp;
1246         mblk_t                  *bp;
1247         mblk_t                  *bpt;
1248         mblk_t                  *bph;
1249         mblk_t                  *bpn;
1250         uint16_t                pvid;
1251 
1252         ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
1253 
1254         if (type == VSW_LOCALDEV) {
1255                 vswp = (vsw_t *)arg;
1256                 pvid = vswp->pvid;
1257                 portp = NULL;
1258         } else {
1259                 /* VSW_VNETPORT */
1260                 portp = (vsw_port_t *)arg;
1261                 pvid = portp->pvid;
1262                 vswp = portp->p_vswp;
1263         }
1264 
1265         bpn = bph = bpt = NULL;
1266 
1267         for (bp = mp; bp != NULL; bp = bpn) {
1268 
1269                 bpn = bp->b_next;
1270                 bp->b_next = bp->b_prev = NULL;
1271 
1272                 /* Determine if it is an untagged frame */
1273                 ehp = (struct ether_header *)bp->b_rptr;
1274 
1275                 if (ehp->ether_type != ETHERTYPE_VLAN) {     /* untagged */
1276 
1277                         /* no need to tag if the frame is in default vlan */
1278                         if (pvid != vswp->default_vlan_id) {
1279                                 bp = vnet_vlan_insert_tag(bp, pvid);
1280                                 if (bp == NULL) {
1281                                         continue;
1282                                 }
1283                         }
1284                 }
1285 
1286                 /* build a chain of processed packets */
1287                 if (bph == NULL) {
1288                         bph = bpt = bp;
1289                 } else {
1290                         bpt->b_next = bp;
1291                         bpt = bp;
1292                 }
1293 
1294         }
1295 
1296         return (bph);
1297 }
1298 
1299 /*
1300  * Frames destined to a vnet-port or to the local vsw interface, must be
1301  * untagged if necessary before sending. This function first checks that the
1302  * frame can be sent to the destination in the vlan identified by the frame
1303  * tag. Note that when this function is invoked the frame must have been
1304  * already tagged (unless it is in the default-vlan). Because, this function is
1305  * called when the switching function determines the destination and invokes
1306  * its send function (vnet-port or vsw interface) and all frames would have
1307  * been tagged by this time (see comments in vsw_vlan_frame_pretag()).
1308  *
1309  * Arguments:
1310  *   arg:    destination device.
1311  *   type:   type of arg(destination): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port)
1312  *   np:     head of pkt chain to be validated and untagged.
1313  *   npt:    tail of pkt chain to be validated and untagged.
1314  *
1315  * Returns:
1316  *   np:     head of updated chain of packets
1317  *   npt:    tail of updated chain of packets
1318  *   rv:     count of the packets in the returned list
1319  */
1320 uint32_t
1321 vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, mblk_t **npt)
1322 {
1323         mblk_t                  *bp;
1324         mblk_t                  *bpt;
1325         mblk_t                  *bph;
1326         mblk_t                  *bpn;
1327         vsw_port_t              *portp;
1328         vsw_t                   *vswp;
1329         uint32_t                count;
1330         struct ether_header     *ehp;
1331         boolean_t               is_tagged;
1332         boolean_t               rv;
1333         uint16_t                vlan_id;
1334         uint16_t                pvid;
1335         mod_hash_t              *vlan_hashp;
1336 
1337         ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
1338 
1339 
1340         if (type == VSW_LOCALDEV) {
1341                 vswp = (vsw_t *)arg;
1342                 pvid = vswp->pvid;
1343                 vlan_hashp = vswp->vlan_hashp;
1344                 portp = NULL;
1345         } else {
1346                 /* type == VSW_VNETPORT */
1347                 portp = (vsw_port_t *)arg;
1348                 vswp = portp->p_vswp;
1349                 vlan_hashp = portp->vlan_hashp;
1350                 pvid = portp->pvid;
1351         }
1352 
1353         /*
1354          * If the MAC layer switching in place, then
1355          * untagging required only if the pvid is not
1356          * the same as default_vlan_id. This is because,
1357          * the MAC layer will send packets for the
1358          * registered vlans only.
1359          */
1360         if ((vswp->mac_cl_switching == B_TRUE) &&
1361             (pvid == vswp->default_vlan_id)) {
1362                 /* simply count and set the tail */
1363                 count = 1;
1364                 bp = *np;
1365                 ASSERT(bp != NULL);
1366                 while (bp->b_next != NULL) {
1367                         bp = bp->b_next;
1368                         count++;
1369                 }
1370                 *npt = bp;
1371                 return (count);
1372         }
1373 
1374         bpn = bph = bpt = NULL;
1375         count = 0;
1376 
1377         for (bp = *np; bp != NULL; bp = bpn) {
1378 
1379                 bpn = bp->b_next;
1380                 bp->b_next = bp->b_prev = NULL;
1381 
1382                 /*
1383                  * Determine the vlan id that the frame belongs to.
1384                  */
1385                 ehp = (struct ether_header *)bp->b_rptr;
1386                 is_tagged = vsw_frame_lookup_vid(arg, type, ehp, &vlan_id);
1387 
1388                 /*
1389                  * If MAC layer switching in place, then we
1390                  * need to untag only if the tagged packet has
1391                  * vlan-id same as the pvid.
1392                  */
1393                 if (vswp->mac_cl_switching == B_TRUE) {
1394 
1395                         /* only tagged packets expected here */
1396                         ASSERT(is_tagged == B_TRUE);
1397                         if (vlan_id == pvid) {
1398                                 bp = vnet_vlan_remove_tag(bp);
1399                                 if (bp == NULL) {
1400                                         /* packet dropped */
1401                                         continue;
1402                                 }
1403                         }
1404                 } else { /* No MAC layer switching */
1405 
1406                         /*
1407                          * Check the frame header if tag/untag is  needed.
1408                          */
1409                         if (is_tagged == B_FALSE) {
1410                                 /*
1411                                  * Untagged frame. We shouldn't have an
1412                                  * untagged packet at this point, unless
1413                                  * the destination's  vlan id is
1414                                  * default-vlan-id; if it is not the
1415                                  * default-vlan-id, we drop the packet.
1416                                  */
1417                                 if (vlan_id != vswp->default_vlan_id) {
1418                                         /* drop the packet */
1419                                         freemsg(bp);
1420                                         continue;
1421                                 }
1422                         } else {        /* Tagged */
1423                                 /*
1424                                  * Tagged frame, untag if it's the
1425                                  * destination's pvid.
1426                                  */
1427                                 if (vlan_id == pvid) {
1428 
1429                                         bp = vnet_vlan_remove_tag(bp);
1430                                         if (bp == NULL) {
1431                                                 /* packet dropped */
1432                                                 continue;
1433                                         }
1434                                 } else {
1435 
1436                                         /*
1437                                          * Check if the destination is in the
1438                                          * same vlan.
1439                                          */
1440                                         rv = vsw_vlan_lookup(vlan_hashp,
1441                                             vlan_id);
1442                                         if (rv == B_FALSE) {
1443                                                 /* drop the packet */
1444                                                 freemsg(bp);
1445                                                 continue;
1446                                         }
1447                                 }
1448 
1449                         }
1450                 }
1451 
1452                 /* build a chain of processed packets */
1453                 if (bph == NULL) {
1454                         bph = bpt = bp;
1455                 } else {
1456                         bpt->b_next = bp;
1457                         bpt = bp;
1458                 }
1459                 count++;
1460         }
1461 
1462         *np = bph;
1463         *npt = bpt;
1464         return (count);
1465 }
1466 
1467 /*
1468  * Lookup the vlan id of the given frame. If it is a vlan-tagged frame,
1469  * then the vlan-id is available in the tag; otherwise, its vlan id is
1470  * implicitly obtained based on the caller (destination of the frame:
1471  * VSW_VNETPORT or VSW_LOCALDEV).
1472  * The vlan id determined is returned in vidp.
1473  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1474  */
1475 boolean_t
1476 vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp,
1477         uint16_t *vidp)
1478 {
1479         struct ether_vlan_header        *evhp;
1480         vsw_t                           *vswp;
1481         vsw_port_t                      *portp;
1482 
1483         /* If it's a tagged frame, get the vid from vlan header */
1484         if (ehp->ether_type == ETHERTYPE_VLAN) {
1485 
1486                 evhp = (struct ether_vlan_header *)ehp;
1487                 *vidp = VLAN_ID(ntohs(evhp->ether_tci));
1488                 return (B_TRUE);
1489         }
1490 
1491         /* Untagged frame; determine vlan id based on caller */
1492         switch (caller) {
1493 
1494         case VSW_VNETPORT:
1495                 /*
1496                  * packet destined to a vnet; vlan-id is pvid of vnet-port.
1497                  */
1498                 portp = (vsw_port_t *)arg;
1499                 *vidp = portp->pvid;
1500                 break;
1501 
1502         case VSW_LOCALDEV:
1503 
1504                 /*
1505                  * packet destined to vsw interface;
1506                  * vlan-id is port-vlan-id of vsw device.
1507                  */
1508                 vswp = (vsw_t *)arg;
1509                 *vidp = vswp->pvid;
1510                 break;
1511         }
1512 
1513         return (B_FALSE);
1514 }
1515 
1516 /*
1517  * Add or remove multicast address(es).
1518  *
1519  * Returns 0 on success, 1 on failure.
1520  */
1521 int
1522 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port)
1523 {
1524         mcst_addr_t             *mcst_p = NULL;
1525         vsw_t                   *vswp = port->p_vswp;
1526         uint64_t                addr = 0x0;
1527         int                     i;
1528 
1529         D1(vswp, "%s: enter", __func__);
1530 
1531         D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count);
1532 
1533         for (i = 0; i < mcst_pkt->count; i++) {
1534                 /*
1535                  * Convert address into form that can be used
1536                  * as hash table key.
1537                  */
1538                 KEY_HASH(addr, &(mcst_pkt->mca[i]));
1539 
1540                 /*
1541                  * Add or delete the specified address/port combination.
1542                  */
1543                 if (mcst_pkt->set == 0x1) {
1544                         D3(vswp, "%s: adding multicast address 0x%llx for "
1545                             "port %ld", __func__, addr, port->p_instance);
1546                         if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
1547                                 /*
1548                                  * Update the list of multicast
1549                                  * addresses contained within the
1550                                  * port structure to include this new
1551                                  * one.
1552                                  */
1553                                 mcst_p = kmem_zalloc(sizeof (mcst_addr_t),
1554                                     KM_NOSLEEP);
1555                                 if (mcst_p == NULL) {
1556                                         DERR(vswp, "%s: unable to alloc mem",
1557                                             __func__);
1558                                         (void) vsw_del_mcst(vswp,
1559                                             VSW_VNETPORT, addr, port);
1560                                         return (1);
1561                                 }
1562 
1563                                 mcst_p->nextp = NULL;
1564                                 mcst_p->addr = addr;
1565                                 ether_copy(&mcst_pkt->mca[i], &mcst_p->mca);
1566 
1567                                 /*
1568                                  * Program the address into HW. If the addr
1569                                  * has already been programmed then the MAC
1570                                  * just increments a ref counter (which is
1571                                  * used when the address is being deleted)
1572                                  */
1573                                 if (vsw_mac_multicast_add(vswp, port, mcst_p,
1574                                     VSW_VNETPORT)) {
1575                                         (void) vsw_del_mcst(vswp,
1576                                             VSW_VNETPORT, addr, port);
1577                                         kmem_free(mcst_p, sizeof (*mcst_p));
1578                                         return (1);
1579                                 }
1580 
1581                                 mutex_enter(&port->mca_lock);
1582                                 mcst_p->nextp = port->mcap;
1583                                 port->mcap = mcst_p;
1584                                 mutex_exit(&port->mca_lock);
1585 
1586                         } else {
1587                                 DERR(vswp, "%s: error adding multicast "
1588                                     "address 0x%llx for port %ld",
1589                                     __func__, addr, port->p_instance);
1590                                 return (1);
1591                         }
1592                 } else {
1593                         /*
1594                          * Delete an entry from the multicast hash
1595                          * table and update the address list
1596                          * appropriately.
1597                          */
1598                         if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
1599                                 D3(vswp, "%s: deleting multicast address "
1600                                     "0x%llx for port %ld", __func__, addr,
1601                                     port->p_instance);
1602 
1603                                 mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr);
1604                                 ASSERT(mcst_p != NULL);
1605 
1606                                 /*
1607                                  * Remove the address from HW. The address
1608                                  * will actually only be removed once the ref
1609                                  * count within the MAC layer has dropped to
1610                                  * zero. I.e. we can safely call this fn even
1611                                  * if other ports are interested in this
1612                                  * address.
1613                                  */
1614                                 vsw_mac_multicast_remove(vswp, port, mcst_p,
1615                                     VSW_VNETPORT);
1616                                 kmem_free(mcst_p, sizeof (*mcst_p));
1617 
1618                         } else {
1619                                 DERR(vswp, "%s: error deleting multicast "
1620                                     "addr 0x%llx for port %ld",
1621                                     __func__, addr, port->p_instance);
1622                                 return (1);
1623                         }
1624                 }
1625         }
1626         D1(vswp, "%s: exit", __func__);
1627         return (0);
1628 }
1629 
1630 /*
1631  * Add a new multicast entry.
1632  *
1633  * Search hash table based on address. If match found then
1634  * update associated val (which is chain of ports), otherwise
1635  * create new key/val (addr/port) pair and insert into table.
1636  */
1637 int
1638 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
1639 {
1640         int             dup = 0;
1641         int             rv = 0;
1642         mfdb_ent_t      *ment = NULL;
1643         mfdb_ent_t      *tmp_ent = NULL;
1644         mfdb_ent_t      *new_ent = NULL;
1645         void            *tgt = NULL;
1646 
1647         if (devtype == VSW_VNETPORT) {
1648                 /*
1649                  * Being invoked from a vnet.
1650                  */
1651                 ASSERT(arg != NULL);
1652                 tgt = arg;
1653                 D2(NULL, "%s: port %d : address 0x%llx", __func__,
1654                     ((vsw_port_t *)arg)->p_instance, addr);
1655         } else {
1656                 /*
1657                  * We are being invoked via the m_multicst mac entry
1658                  * point.
1659                  */
1660                 D2(NULL, "%s: address 0x%llx", __func__, addr);
1661                 tgt = (void *)vswp;
1662         }
1663 
1664         WRITE_ENTER(&vswp->mfdbrw);
1665         if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
1666             (mod_hash_val_t *)&ment) != 0) {
1667 
1668                 /* address not currently in table */
1669                 ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
1670                 ment->d_addr = (void *)tgt;
1671                 ment->d_type = devtype;
1672                 ment->nextp = NULL;
1673 
1674                 if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr,
1675                     (mod_hash_val_t)ment) != 0) {
1676                         DERR(vswp, "%s: hash table insertion failed", __func__);
1677                         kmem_free(ment, sizeof (mfdb_ent_t));
1678                         rv = 1;
1679                 } else {
1680                         D2(vswp, "%s: added initial entry for 0x%llx to "
1681                             "table", __func__, addr);
1682                 }
1683         } else {
1684                 /*
1685                  * Address in table. Check to see if specified port
1686                  * is already associated with the address. If not add
1687                  * it now.
1688                  */
1689                 tmp_ent = ment;
1690                 while (tmp_ent != NULL) {
1691                         if (tmp_ent->d_addr == (void *)tgt) {
1692                                 if (devtype == VSW_VNETPORT) {
1693                                         DERR(vswp, "%s: duplicate port entry "
1694                                             "found for portid %ld and key "
1695                                             "0x%llx", __func__,
1696                                             ((vsw_port_t *)arg)->p_instance,
1697                                             addr);
1698                                 } else {
1699                                         DERR(vswp, "%s: duplicate entry found"
1700                                             "for key 0x%llx", __func__, addr);
1701                                 }
1702                                 rv = 1;
1703                                 dup = 1;
1704                                 break;
1705                         }
1706                         tmp_ent = tmp_ent->nextp;
1707                 }
1708 
1709                 /*
1710                  * Port not on list so add it to end now.
1711                  */
1712                 if (0 == dup) {
1713                         D2(vswp, "%s: added entry for 0x%llx to table",
1714                             __func__, addr);
1715                         new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
1716                         new_ent->d_addr = (void *)tgt;
1717                         new_ent->d_type = devtype;
1718                         new_ent->nextp = NULL;
1719 
1720                         tmp_ent = ment;
1721                         while (tmp_ent->nextp != NULL)
1722                                 tmp_ent = tmp_ent->nextp;
1723 
1724                         tmp_ent->nextp = new_ent;
1725                 }
1726         }
1727 
1728         RW_EXIT(&vswp->mfdbrw);
1729         return (rv);
1730 }
1731 
1732 /*
1733  * Remove a multicast entry from the hashtable.
1734  *
1735  * Search hash table based on address. If match found, scan
1736  * list of ports associated with address. If specified port
1737  * found remove it from list.
1738  */
1739 int
1740 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
1741 {
1742         mfdb_ent_t      *ment = NULL;
1743         mfdb_ent_t      *curr_p, *prev_p;
1744         void            *tgt = NULL;
1745 
1746         D1(vswp, "%s: enter", __func__);
1747 
1748         if (devtype == VSW_VNETPORT) {
1749                 tgt = (vsw_port_t *)arg;
1750                 D2(vswp, "%s: removing port %d from mFDB for address"
1751                     " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr);
1752         } else {
1753                 D2(vswp, "%s: removing entry", __func__);
1754                 tgt = (void *)vswp;
1755         }
1756 
1757         WRITE_ENTER(&vswp->mfdbrw);
1758         if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
1759             (mod_hash_val_t *)&ment) != 0) {
1760                 D2(vswp, "%s: address 0x%llx not in table", __func__, addr);
1761                 RW_EXIT(&vswp->mfdbrw);
1762                 return (1);
1763         }
1764 
1765         prev_p = curr_p = ment;
1766 
1767         while (curr_p != NULL) {
1768                 if (curr_p->d_addr == (void *)tgt) {
1769                         if (devtype == VSW_VNETPORT) {
1770                                 D2(vswp, "%s: port %d found", __func__,
1771                                     ((vsw_port_t *)tgt)->p_instance);
1772                         } else {
1773                                 D2(vswp, "%s: instance found", __func__);
1774                         }
1775 
1776                         if (prev_p == curr_p) {
1777                                 /*
1778                                  * head of list, if no other element is in
1779                                  * list then destroy this entry, otherwise
1780                                  * just replace it with updated value.
1781                                  */
1782                                 ment = curr_p->nextp;
1783                                 if (ment == NULL) {
1784                                         (void) mod_hash_destroy(vswp->mfdb,
1785                                             (mod_hash_val_t)addr);
1786                                 } else {
1787                                         (void) mod_hash_replace(vswp->mfdb,
1788                                             (mod_hash_key_t)addr,
1789                                             (mod_hash_val_t)ment);
1790                                 }
1791                         } else {
1792                                 /*
1793                                  * Not head of list, no need to do
1794                                  * replacement, just adjust list pointers.
1795                                  */
1796                                 prev_p->nextp = curr_p->nextp;
1797                         }
1798                         break;
1799                 }
1800 
1801                 prev_p = curr_p;
1802                 curr_p = curr_p->nextp;
1803         }
1804 
1805         RW_EXIT(&vswp->mfdbrw);
1806 
1807         D1(vswp, "%s: exit", __func__);
1808 
1809         if (curr_p == NULL)
1810                 return (1);
1811         kmem_free(curr_p, sizeof (mfdb_ent_t));
1812         return (0);
1813 }
1814 
1815 /*
1816  * Port is being deleted, but has registered an interest in one
1817  * or more multicast groups. Using the list of addresses maintained
1818  * within the port structure find the appropriate entry in the hash
1819  * table and remove this port from the list of interested ports.
1820  */
1821 void
1822 vsw_del_mcst_port(vsw_port_t *port)
1823 {
1824         mcst_addr_t     *mcap = NULL;
1825         vsw_t           *vswp = port->p_vswp;
1826 
1827         D1(vswp, "%s: enter", __func__);
1828 
1829         mutex_enter(&port->mca_lock);
1830 
1831         while ((mcap = port->mcap) != NULL) {
1832 
1833                 port->mcap = mcap->nextp;
1834 
1835                 mutex_exit(&port->mca_lock);
1836 
1837                 (void) vsw_del_mcst(vswp, VSW_VNETPORT,
1838                     mcap->addr, port);
1839 
1840                 /*
1841                  * Remove the address from HW. The address
1842                  * will actually only be removed once the ref
1843                  * count within the MAC layer has dropped to
1844                  * zero. I.e. we can safely call this fn even
1845                  * if other ports are interested in this
1846                  * address.
1847                  */
1848                 vsw_mac_multicast_remove(vswp, port, mcap, VSW_VNETPORT);
1849                 kmem_free(mcap, sizeof (*mcap));
1850 
1851                 mutex_enter(&port->mca_lock);
1852 
1853         }
1854 
1855         mutex_exit(&port->mca_lock);
1856 
1857         D1(vswp, "%s: exit", __func__);
1858 }
1859 
1860 /*
1861  * This vsw instance is detaching, but has registered an interest in one
1862  * or more multicast groups. Using the list of addresses maintained
1863  * within the vsw structure find the appropriate entry in the hash
1864  * table and remove this instance from the list of interested ports.
1865  */
1866 void
1867 vsw_del_mcst_vsw(vsw_t *vswp)
1868 {
1869         mcst_addr_t     *next_p = NULL;
1870 
1871         D1(vswp, "%s: enter", __func__);
1872 
1873         mutex_enter(&vswp->mca_lock);
1874 
1875         while (vswp->mcap != NULL) {
1876                 DERR(vswp, "%s: deleting addr 0x%llx",
1877                     __func__, vswp->mcap->addr);
1878                 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL);
1879 
1880                 next_p = vswp->mcap->nextp;
1881                 kmem_free(vswp->mcap, sizeof (mcst_addr_t));
1882                 vswp->mcap = next_p;
1883         }
1884 
1885         vswp->mcap = NULL;
1886         mutex_exit(&vswp->mca_lock);
1887 
1888         D1(vswp, "%s: exit", __func__);
1889 }
1890 
1891 mblk_t *
1892 vsw_get_same_dest_list(struct ether_header *ehp, mblk_t **mpp)
1893 {
1894         mblk_t                  *bp;
1895         mblk_t                  *nbp;
1896         mblk_t                  *head = NULL;
1897         mblk_t                  *tail = NULL;
1898         mblk_t                  *prev = NULL;
1899         struct ether_header     *behp;
1900 
1901         /* process the chain of packets */
1902         bp = *mpp;
1903         while (bp) {
1904                 nbp = bp->b_next;
1905                 behp = (struct ether_header *)bp->b_rptr;
1906                 bp->b_prev = NULL;
1907                 if (ether_cmp(&ehp->ether_dhost, &behp->ether_dhost) == 0) {
1908                         if (prev == NULL) {
1909                                 *mpp = nbp;
1910                         } else {
1911                                 prev->b_next = nbp;
1912                         }
1913                         bp->b_next =  NULL;
1914                         if (head == NULL) {
1915                                 head = tail = bp;
1916                         } else {
1917                                 tail->b_next = bp;
1918                                 tail = bp;
1919                         }
1920                 } else {
1921                         prev = bp;
1922                 }
1923                 bp = nbp;
1924         }
1925         return (head);
1926 }
1927 
1928 static mblk_t *
1929 vsw_dupmsgchain(mblk_t *mp)
1930 {
1931         mblk_t  *nmp = NULL;
1932         mblk_t  **nmpp = &nmp;
1933 
1934         for (; mp != NULL; mp = mp->b_next) {
1935                 if ((*nmpp = dupmsg(mp)) == NULL) {
1936                         freemsgchain(nmp);
1937                         return (NULL);
1938                 }
1939 
1940                 nmpp = &((*nmpp)->b_next);
1941         }
1942 
1943         return (nmp);
1944 }