Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/dld/dld_str.c
+++ new/usr/src/uts/common/io/dld/dld_str.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /*
26 26 * Data-Link Driver
27 27 */
28 28
29 29 #include <inet/common.h>
30 30 #include <sys/strsubr.h>
31 31 #include <sys/stropts.h>
32 32 #include <sys/strsun.h>
33 33 #include <sys/vlan.h>
34 34 #include <sys/dld_impl.h>
35 35 #include <sys/cpuvar.h>
36 36 #include <sys/callb.h>
37 37 #include <sys/list.h>
38 38 #include <sys/mac_client.h>
39 39 #include <sys/mac_client_priv.h>
40 40 #include <sys/mac_flow.h>
41 41
42 42 static int str_constructor(void *, void *, int);
43 43 static void str_destructor(void *, void *);
44 44 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *, boolean_t);
45 45 static void str_notify_promisc_on_phys(dld_str_t *);
46 46 static void str_notify_promisc_off_phys(dld_str_t *);
47 47 static void str_notify_phys_addr(dld_str_t *, uint_t, const uint8_t *);
48 48 static void str_notify_link_up(dld_str_t *);
49 49 static void str_notify_link_down(dld_str_t *);
50 50 static void str_notify_capab_reneg(dld_str_t *);
51 51 static void str_notify_speed(dld_str_t *, uint32_t);
52 52
53 53 static void ioc_native(dld_str_t *, mblk_t *);
54 54 static void ioc_margin(dld_str_t *, mblk_t *);
55 55 static void ioc_raw(dld_str_t *, mblk_t *);
56 56 static void ioc_fast(dld_str_t *, mblk_t *);
57 57 static void ioc_lowlink(dld_str_t *, mblk_t *);
58 58 static void ioc(dld_str_t *, mblk_t *);
59 59 static void dld_ioc(dld_str_t *, mblk_t *);
60 60 static void dld_wput_nondata(dld_str_t *, mblk_t *);
61 61
62 62 static void str_mdata_raw_put(dld_str_t *, mblk_t *);
63 63 static mblk_t *i_dld_ether_header_update_tag(mblk_t *, uint_t, uint16_t,
64 64 link_tagmode_t);
65 65 static mblk_t *i_dld_ether_header_strip_tag(mblk_t *, boolean_t);
66 66
67 67 static uint32_t str_count;
68 68 static kmem_cache_t *str_cachep;
69 69 static mod_hash_t *str_hashp;
70 70
71 71 #define STR_HASHSZ 64
72 72 #define STR_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key))
73 73
74 74 #define dld_taskq system_taskq
75 75
76 76 static kmutex_t dld_taskq_lock;
77 77 static kcondvar_t dld_taskq_cv;
78 78 static list_t dld_taskq_list; /* List of dld_str_t */
79 79 boolean_t dld_taskq_quit;
80 80 boolean_t dld_taskq_done;
81 81
82 82 static void dld_taskq_dispatch(void);
83 83
84 84 /*
85 85 * Some notes on entry points, flow-control, queueing.
86 86 *
87 87 * This driver exports the traditional STREAMS put entry point as well as
88 88 * the non-STREAMS fast-path transmit routine which is provided to IP via
89 89 * the DL_CAPAB_POLL negotiation. The put procedure handles all control
90 90 * and data operations, while the fast-path routine deals only with M_DATA
91 91 * fast-path packets. Regardless of the entry point, all outbound packets
92 92 * will end up in DLD_TX(), where they will be delivered to the MAC layer.
93 93 *
94 94 * The transmit logic operates in the following way: All packets coming
95 95 * into DLD will be sent to the MAC layer through DLD_TX(). Flow-control
96 96 * happens when the MAC layer indicates the packets couldn't be
97 97 * transmitted due to 1) lack of resources (e.g. running out of
98 98 * descriptors), or 2) reaching the allowed bandwidth limit for this
99 99 * particular flow. The indication comes in the form of a Tx cookie that
100 100 * identifies the blocked ring. In such case, DLD will place a
101 101 * dummy message on its write-side STREAMS queue so that the queue is
102 102 * marked as "full". Any subsequent packets arriving at the driver will
103 103 * still be sent to the MAC layer where it either gets queued in the Tx
104 104 * SRS or discarded it if queue limit is exceeded. The write-side STREAMS
105 105 * queue gets enabled when MAC layer notifies DLD through MAC_NOTE_TX.
106 106 * When the write service procedure runs, it will remove the dummy
107 107 * message from the write-side STREAMS queue; in effect this will trigger
108 108 * backenabling. The sizes of q_hiwat and q_lowat are set to 1 and 0,
109 109 * respectively, due to the above reasons.
110 110 *
111 111 * All non-data operations, both DLPI and ioctls are single threaded on a per
112 112 * dld_str_t endpoint. This is done using a taskq so that the control operation
113 113 * has kernel context and can cv_wait for resources. In addition all set type
114 114 * operations that involve mac level state modification are serialized on a
115 115 * per mac end point using the perimeter mechanism provided by the mac layer.
116 116 * This serializes all mac clients trying to modify a single mac end point over
117 117 * the entire sequence of mac calls made by that client as an atomic unit. The
118 118 * mac framework locking is described in mac.c. A critical element is that
119 119 * DLD/DLS does not hold any locks across the mac perimeter.
120 120 *
121 121 * dld_finddevinfo() returns the dev_info_t * corresponding to a particular
122 122 * dev_t. It searches str_hashp (a table of dld_str_t's) for streams that
123 123 * match dev_t. If a stream is found and it is attached, its dev_info_t *
124 124 * is returned. If the mac handle is non-null, it can be safely accessed
125 125 * below. The mac handle won't be freed until the mac_unregister which
126 126 * won't happen until the driver detaches. The DDI framework ensures that
127 127 * the detach won't happen while a getinfo is in progress.
128 128 */
129 129 typedef struct i_dld_str_state_s {
130 130 major_t ds_major;
131 131 minor_t ds_minor;
132 132 int ds_instance;
133 133 dev_info_t *ds_dip;
134 134 } i_dld_str_state_t;
135 135
136 136 /* ARGSUSED */
137 137 static uint_t
138 138 i_dld_str_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
139 139 {
140 140 i_dld_str_state_t *statep = arg;
141 141 dld_str_t *dsp = (dld_str_t *)val;
142 142 mac_handle_t mh;
143 143
144 144 if (statep->ds_major != dsp->ds_major)
145 145 return (MH_WALK_CONTINUE);
146 146
147 147 ASSERT(statep->ds_minor != 0);
148 148 mh = dsp->ds_mh;
149 149
150 150 if (statep->ds_minor == dsp->ds_minor) {
151 151 /*
152 152 * Clone: a clone minor is unique. we can terminate the
153 153 * walk if we find a matching stream -- even if we fail
154 154 * to obtain the devinfo.
155 155 */
156 156 if (mh != NULL) {
157 157 statep->ds_dip = mac_devinfo_get(mh);
158 158 statep->ds_instance = DLS_MINOR2INST(mac_minor(mh));
159 159 }
160 160 return (MH_WALK_TERMINATE);
161 161 }
162 162 return (MH_WALK_CONTINUE);
163 163 }
164 164
165 165 static dev_info_t *
166 166 dld_finddevinfo(dev_t dev)
167 167 {
168 168 dev_info_t *dip;
169 169 i_dld_str_state_t state;
170 170
171 171 if (getminor(dev) == 0)
172 172 return (NULL);
173 173
174 174 /*
175 175 * See if it's a minor node of a link
176 176 */
177 177 if ((dip = dls_link_devinfo(dev)) != NULL)
178 178 return (dip);
179 179
180 180 state.ds_minor = getminor(dev);
181 181 state.ds_major = getmajor(dev);
182 182 state.ds_dip = NULL;
183 183 state.ds_instance = -1;
184 184
185 185 mod_hash_walk(str_hashp, i_dld_str_walker, &state);
186 186 return (state.ds_dip);
187 187 }
188 188
189 189 int
190 190 dld_devt_to_instance(dev_t dev)
191 191 {
192 192 minor_t minor;
193 193 i_dld_str_state_t state;
194 194
195 195 /*
196 196 * GLDv3 numbers DLPI style 1 node as the instance number + 1.
197 197 * Minor number 0 is reserved for the DLPI style 2 unattached
198 198 * node.
199 199 */
200 200
201 201 if ((minor = getminor(dev)) == 0)
202 202 return (-1);
203 203
204 204 /*
205 205 * Check for unopened style 1 node.
206 206 * Note that this doesn't *necessarily* work for legacy
207 207 * devices, but this code is only called within the
208 208 * getinfo(9e) implementation for true GLDv3 devices, so it
209 209 * doesn't matter.
210 210 */
211 211 if (minor > 0 && minor <= DLS_MAX_MINOR) {
212 212 return (DLS_MINOR2INST(minor));
213 213 }
214 214
215 215 state.ds_minor = getminor(dev);
216 216 state.ds_major = getmajor(dev);
217 217 state.ds_dip = NULL;
218 218 state.ds_instance = -1;
219 219
220 220 mod_hash_walk(str_hashp, i_dld_str_walker, &state);
221 221 return (state.ds_instance);
222 222 }
223 223
224 224 /*
225 225 * devo_getinfo: getinfo(9e)
226 226 *
227 227 * NB: This may be called for a provider before the provider's
228 228 * instances are attached. Hence, if a particular provider needs a
229 229 * special mapping (the mac instance != ddi_get_instance()), then it
230 230 * may need to provide its own implmentation using the
231 231 * mac_devt_to_instance() function, and translating the returned mac
232 232 * instance to a devinfo instance. For dev_t's where the minor number
233 233 * is too large (i.e. > MAC_MAX_MINOR), the provider can call this
234 234 * function indirectly via the mac_getinfo() function.
235 235 */
236 236 /*ARGSUSED*/
237 237 int
238 238 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp)
239 239 {
240 240 dev_info_t *devinfo;
241 241 minor_t minor = getminor((dev_t)arg);
242 242 int rc = DDI_FAILURE;
243 243
244 244 switch (cmd) {
245 245 case DDI_INFO_DEVT2DEVINFO:
246 246 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) {
247 247 *(dev_info_t **)resp = devinfo;
248 248 rc = DDI_SUCCESS;
249 249 }
250 250 break;
251 251 case DDI_INFO_DEVT2INSTANCE:
252 252 if (minor > 0 && minor <= DLS_MAX_MINOR) {
253 253 *resp = (void *)(uintptr_t)DLS_MINOR2INST(minor);
254 254 rc = DDI_SUCCESS;
255 255 } else if (minor > DLS_MAX_MINOR &&
256 256 (devinfo = dld_finddevinfo((dev_t)arg)) != NULL) {
257 257 *resp = (void *)(uintptr_t)ddi_get_instance(devinfo);
258 258 rc = DDI_SUCCESS;
259 259 }
260 260 break;
261 261 }
262 262 return (rc);
263 263 }
264 264
265 265 void *
266 266 dld_str_private(queue_t *q)
267 267 {
268 268 return (((dld_str_t *)(q->q_ptr))->ds_private);
269 269 }
270 270
271 271 int
272 272 dld_str_open(queue_t *rq, dev_t *devp, void *private)
273 273 {
274 274 dld_str_t *dsp;
275 275 major_t major;
276 276 minor_t minor;
277 277 int err;
278 278
279 279 major = getmajor(*devp);
280 280 minor = getminor(*devp);
281 281
282 282 /*
283 283 * Create a new dld_str_t for the stream. This will grab a new minor
284 284 * number that will be handed back in the cloned dev_t. Creation may
285 285 * fail if we can't allocate the dummy mblk used for flow-control.
286 286 */
287 287 dsp = dld_str_create(rq, DLD_DLPI, major,
288 288 ((minor == 0) ? DL_STYLE2 : DL_STYLE1));
289 289 if (dsp == NULL)
290 290 return (ENOSR);
291 291
292 292 ASSERT(dsp->ds_dlstate == DL_UNATTACHED);
293 293 dsp->ds_private = private;
294 294 if (minor != 0) {
295 295 /*
296 296 * Style 1 open
297 297 */
298 298 if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0)
299 299 goto failed;
300 300
301 301 ASSERT(dsp->ds_dlstate == DL_UNBOUND);
302 302 } else {
303 303 (void) qassociate(rq, -1);
304 304 }
305 305
306 306 /*
307 307 * Enable the queue srv(9e) routine.
308 308 */
309 309 qprocson(rq);
310 310
311 311 /*
312 312 * Construct a cloned dev_t to hand back.
313 313 */
314 314 *devp = makedevice(getmajor(*devp), dsp->ds_minor);
315 315 return (0);
316 316
317 317 failed:
318 318 dld_str_destroy(dsp);
319 319 return (err);
320 320 }
321 321
322 322 int
323 323 dld_str_close(queue_t *rq)
324 324 {
325 325 dld_str_t *dsp = rq->q_ptr;
326 326
327 327 /*
328 328 * All modules on top have been popped off. So there can't be any
329 329 * threads from the top.
330 330 */
331 331 ASSERT(dsp->ds_datathr_cnt == 0);
332 332
333 333 /*
334 334 * Wait until pending DLPI requests are processed.
335 335 */
336 336 mutex_enter(&dsp->ds_lock);
337 337 while (dsp->ds_dlpi_pending)
338 338 cv_wait(&dsp->ds_dlpi_pending_cv, &dsp->ds_lock);
339 339 mutex_exit(&dsp->ds_lock);
340 340
341 341
342 342 /*
343 343 * This stream was open to a provider node. Check to see
344 344 * if it has been cleanly shut down.
345 345 */
346 346 if (dsp->ds_dlstate != DL_UNATTACHED) {
347 347 /*
348 348 * The stream is either open to a style 1 provider or
349 349 * this is not clean shutdown. Detach from the PPA.
350 350 * (This is still ok even in the style 1 case).
351 351 */
352 352 dld_str_detach(dsp);
353 353 }
354 354
355 355 dld_str_destroy(dsp);
356 356 return (0);
357 357 }
358 358
359 359 /*
360 360 * qi_qopen: open(9e)
361 361 */
362 362 /*ARGSUSED*/
363 363 int
364 364 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp)
365 365 {
366 366 if (sflag == MODOPEN)
367 367 return (ENOTSUP);
368 368
369 369 /*
370 370 * This is a cloning driver and therefore each queue should only
371 371 * ever get opened once.
372 372 */
373 373 if (rq->q_ptr != NULL)
374 374 return (EBUSY);
375 375
376 376 return (dld_str_open(rq, devp, NULL));
377 377 }
378 378
379 379 /*
380 380 * qi_qclose: close(9e)
381 381 */
382 382 int
383 383 dld_close(queue_t *rq)
384 384 {
385 385 /*
386 386 * Disable the queue srv(9e) routine.
387 387 */
388 388 qprocsoff(rq);
389 389
390 390 return (dld_str_close(rq));
391 391 }
392 392
393 393 /*
394 394 * qi_qputp: put(9e)
395 395 */
396 396 void
397 397 dld_wput(queue_t *wq, mblk_t *mp)
398 398 {
399 399 dld_str_t *dsp = (dld_str_t *)wq->q_ptr;
400 400 dld_str_mode_t mode;
401 401
402 402 switch (DB_TYPE(mp)) {
403 403 case M_DATA:
404 404 mutex_enter(&dsp->ds_lock);
405 405 mode = dsp->ds_mode;
406 406 if ((dsp->ds_dlstate != DL_IDLE) ||
407 407 (mode != DLD_FASTPATH && mode != DLD_RAW)) {
408 408 mutex_exit(&dsp->ds_lock);
409 409 freemsg(mp);
410 410 break;
411 411 }
412 412
413 413 DLD_DATATHR_INC(dsp);
414 414 mutex_exit(&dsp->ds_lock);
415 415 if (mode == DLD_FASTPATH) {
416 416 if (dsp->ds_mip->mi_media == DL_ETHER &&
417 417 (MBLKL(mp) < sizeof (struct ether_header))) {
418 418 freemsg(mp);
419 419 } else {
420 420 (void) str_mdata_fastpath_put(dsp, mp, 0, 0);
421 421 }
422 422 } else {
423 423 str_mdata_raw_put(dsp, mp);
424 424 }
425 425 DLD_DATATHR_DCR(dsp);
426 426 break;
427 427 case M_PROTO:
428 428 case M_PCPROTO: {
429 429 t_uscalar_t prim;
430 430
431 431 if (MBLKL(mp) < sizeof (t_uscalar_t))
432 432 break;
433 433
434 434 prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
435 435
436 436 if (prim == DL_UNITDATA_REQ) {
437 437 proto_unitdata_req(dsp, mp);
438 438 } else {
439 439 dld_wput_nondata(dsp, mp);
440 440 }
441 441 break;
442 442 }
443 443
444 444 case M_IOCTL:
445 445 dld_wput_nondata(dsp, mp);
446 446 break;
447 447
448 448 case M_FLUSH:
449 449 if (*mp->b_rptr & FLUSHW) {
450 450 DLD_CLRQFULL(dsp);
451 451 *mp->b_rptr &= ~FLUSHW;
452 452 }
453 453
454 454 if (*mp->b_rptr & FLUSHR) {
455 455 qreply(wq, mp);
456 456 } else {
457 457 freemsg(mp);
458 458 }
459 459 break;
460 460
461 461 default:
462 462 freemsg(mp);
463 463 break;
464 464 }
465 465 }
466 466
467 467 /*
468 468 * qi_srvp: srv(9e)
469 469 */
470 470 void
471 471 dld_wsrv(queue_t *wq)
472 472 {
473 473 dld_str_t *dsp = wq->q_ptr;
474 474
475 475 DLD_CLRQFULL(dsp);
476 476 }
477 477
478 478 void
479 479 dld_init_ops(struct dev_ops *ops, const char *name)
480 480 {
481 481 struct streamtab *stream;
482 482 struct qinit *rq, *wq;
483 483 struct module_info *modinfo;
484 484
485 485 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP);
486 486 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP);
487 487 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name);
488 488 modinfo->mi_minpsz = 0;
489 489 modinfo->mi_maxpsz = 64*1024;
490 490 modinfo->mi_hiwat = 1;
491 491 modinfo->mi_lowat = 0;
492 492
493 493 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP);
494 494 rq->qi_qopen = dld_open;
495 495 rq->qi_qclose = dld_close;
496 496 rq->qi_minfo = modinfo;
497 497
498 498 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP);
499 499 wq->qi_putp = (pfi_t)dld_wput;
500 500 wq->qi_srvp = (pfi_t)dld_wsrv;
501 501 wq->qi_minfo = modinfo;
502 502
503 503 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP);
504 504 stream->st_rdinit = rq;
505 505 stream->st_wrinit = wq;
506 506 ops->devo_cb_ops->cb_str = stream;
507 507
508 508 if (ops->devo_getinfo == NULL)
509 509 ops->devo_getinfo = &dld_getinfo;
510 510 }
511 511
512 512 void
513 513 dld_fini_ops(struct dev_ops *ops)
514 514 {
515 515 struct streamtab *stream;
516 516 struct qinit *rq, *wq;
517 517 struct module_info *modinfo;
518 518
519 519 stream = ops->devo_cb_ops->cb_str;
520 520 rq = stream->st_rdinit;
521 521 wq = stream->st_wrinit;
522 522 modinfo = rq->qi_minfo;
523 523 ASSERT(wq->qi_minfo == modinfo);
524 524
525 525 kmem_free(stream, sizeof (struct streamtab));
526 526 kmem_free(wq, sizeof (struct qinit));
527 527 kmem_free(rq, sizeof (struct qinit));
528 528 kmem_free(modinfo->mi_idname, FMNAMESZ);
529 529 kmem_free(modinfo, sizeof (struct module_info));
530 530 }
531 531
532 532 /*
533 533 * Initialize this module's data structures.
534 534 */
535 535 void
536 536 dld_str_init(void)
537 537 {
538 538 /*
539 539 * Create dld_str_t object cache.
540 540 */
541 541 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t),
542 542 0, str_constructor, str_destructor, NULL, NULL, NULL, 0);
543 543 ASSERT(str_cachep != NULL);
544 544
545 545 /*
546 546 * Create a hash table for maintaining dld_str_t's.
547 547 * The ds_minor field (the clone minor number) of a dld_str_t
548 548 * is used as a key for this hash table because this number is
549 549 * globally unique (allocated from "dls_minor_arena").
550 550 */
551 551 str_hashp = mod_hash_create_idhash("dld_str_hash", STR_HASHSZ,
552 552 mod_hash_null_valdtor);
553 553
554 554 mutex_init(&dld_taskq_lock, NULL, MUTEX_DRIVER, NULL);
555 555 cv_init(&dld_taskq_cv, NULL, CV_DRIVER, NULL);
556 556
557 557 dld_taskq_quit = B_FALSE;
558 558 dld_taskq_done = B_FALSE;
559 559 list_create(&dld_taskq_list, sizeof (dld_str_t),
560 560 offsetof(dld_str_t, ds_tqlist));
561 561 (void) thread_create(NULL, 0, dld_taskq_dispatch, NULL, 0,
562 562 &p0, TS_RUN, minclsyspri);
563 563 }
564 564
565 565 /*
566 566 * Tear down this module's data structures.
567 567 */
568 568 int
569 569 dld_str_fini(void)
570 570 {
571 571 /*
572 572 * Make sure that there are no objects in use.
573 573 */
574 574 if (str_count != 0)
575 575 return (EBUSY);
576 576
577 577 /*
578 578 * Ask the dld_taskq thread to quit and wait for it to be done
579 579 */
580 580 mutex_enter(&dld_taskq_lock);
581 581 dld_taskq_quit = B_TRUE;
582 582 cv_signal(&dld_taskq_cv);
583 583 while (!dld_taskq_done)
584 584 cv_wait(&dld_taskq_cv, &dld_taskq_lock);
585 585 mutex_exit(&dld_taskq_lock);
586 586 list_destroy(&dld_taskq_list);
587 587 /*
588 588 * Destroy object cache.
589 589 */
590 590 kmem_cache_destroy(str_cachep);
591 591 mod_hash_destroy_idhash(str_hashp);
592 592 return (0);
593 593 }
594 594
595 595 /*
596 596 * Create a new dld_str_t object.
↓ open down ↓ |
596 lines elided |
↑ open up ↑ |
597 597 */
598 598 dld_str_t *
599 599 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style)
600 600 {
601 601 dld_str_t *dsp;
602 602 int err;
603 603
604 604 /*
605 605 * Allocate an object from the cache.
606 606 */
607 - atomic_add_32(&str_count, 1);
607 + atomic_inc_32(&str_count);
608 608 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP);
609 609
610 610 /*
611 611 * Allocate the dummy mblk for flow-control.
612 612 */
613 613 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI);
614 614 if (dsp->ds_tx_flow_mp == NULL) {
615 615 kmem_cache_free(str_cachep, dsp);
616 - atomic_add_32(&str_count, -1);
616 + atomic_dec_32(&str_count);
617 617 return (NULL);
618 618 }
619 619 dsp->ds_type = type;
620 620 dsp->ds_major = major;
621 621 dsp->ds_style = style;
622 622
623 623 /*
624 624 * Initialize the queue pointers.
625 625 */
626 626 ASSERT(RD(rq) == rq);
627 627 dsp->ds_rq = rq;
628 628 dsp->ds_wq = WR(rq);
629 629 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp;
630 630
631 631 /*
632 632 * We want explicit control over our write-side STREAMS queue
633 633 * where the dummy mblk gets added/removed for flow-control.
634 634 */
635 635 noenable(WR(rq));
636 636
637 637 err = mod_hash_insert(str_hashp, STR_HASH_KEY(dsp->ds_minor),
638 638 (mod_hash_val_t)dsp);
639 639 ASSERT(err == 0);
640 640 return (dsp);
641 641 }
642 642
643 643 /*
644 644 * Destroy a dld_str_t object.
645 645 */
646 646 void
647 647 dld_str_destroy(dld_str_t *dsp)
648 648 {
649 649 queue_t *rq;
650 650 queue_t *wq;
651 651 mod_hash_val_t val;
652 652
653 653 /*
654 654 * Clear the queue pointers.
655 655 */
656 656 rq = dsp->ds_rq;
657 657 wq = dsp->ds_wq;
658 658 ASSERT(wq == WR(rq));
659 659 rq->q_ptr = wq->q_ptr = NULL;
660 660 dsp->ds_rq = dsp->ds_wq = NULL;
661 661
662 662 ASSERT(dsp->ds_dlstate == DL_UNATTACHED);
663 663 ASSERT(dsp->ds_sap == 0);
664 664 ASSERT(dsp->ds_mh == NULL);
665 665 ASSERT(dsp->ds_mch == NULL);
666 666 ASSERT(dsp->ds_promisc == 0);
667 667 ASSERT(dsp->ds_mph == NULL);
668 668 ASSERT(dsp->ds_mip == NULL);
669 669 ASSERT(dsp->ds_mnh == NULL);
670 670
671 671 ASSERT(dsp->ds_polling == B_FALSE);
672 672 ASSERT(dsp->ds_direct == B_FALSE);
673 673 ASSERT(dsp->ds_lso == B_FALSE);
674 674 ASSERT(dsp->ds_lso_max == 0);
675 675 ASSERT(dsp->ds_passivestate != DLD_ACTIVE);
676 676
677 677 /*
678 678 * Reinitialize all the flags.
679 679 */
680 680 dsp->ds_notifications = 0;
681 681 dsp->ds_passivestate = DLD_UNINITIALIZED;
682 682 dsp->ds_mode = DLD_UNITDATA;
683 683 dsp->ds_native = B_FALSE;
684 684 dsp->ds_nonip = B_FALSE;
685 685
686 686 ASSERT(dsp->ds_datathr_cnt == 0);
687 687 ASSERT(dsp->ds_pending_head == NULL);
688 688 ASSERT(dsp->ds_pending_tail == NULL);
689 689 ASSERT(!dsp->ds_dlpi_pending);
690 690
691 691 ASSERT(dsp->ds_dlp == NULL);
692 692 ASSERT(dsp->ds_dmap == NULL);
693 693 ASSERT(dsp->ds_rx == NULL);
694 694 ASSERT(dsp->ds_rx_arg == NULL);
695 695 ASSERT(dsp->ds_next == NULL);
696 696 ASSERT(dsp->ds_head == NULL);
697 697
698 698 /*
699 699 * Free the dummy mblk if exists.
700 700 */
701 701 if (dsp->ds_tx_flow_mp != NULL) {
702 702 freeb(dsp->ds_tx_flow_mp);
↓ open down ↓ |
76 lines elided |
↑ open up ↑ |
703 703 dsp->ds_tx_flow_mp = NULL;
704 704 }
705 705
706 706 (void) mod_hash_remove(str_hashp, STR_HASH_KEY(dsp->ds_minor), &val);
707 707 ASSERT(dsp == (dld_str_t *)val);
708 708
709 709 /*
710 710 * Free the object back to the cache.
711 711 */
712 712 kmem_cache_free(str_cachep, dsp);
713 - atomic_add_32(&str_count, -1);
713 + atomic_dec_32(&str_count);
714 714 }
715 715
716 716 /*
717 717 * kmem_cache contructor function: see kmem_cache_create(9f).
718 718 */
719 719 /*ARGSUSED*/
720 720 static int
721 721 str_constructor(void *buf, void *cdrarg, int kmflags)
722 722 {
723 723 dld_str_t *dsp = buf;
724 724
725 725 bzero(buf, sizeof (dld_str_t));
726 726
727 727 /*
728 728 * Allocate a new minor number.
729 729 */
730 730 if ((dsp->ds_minor = mac_minor_hold(kmflags == KM_SLEEP)) == 0)
731 731 return (-1);
732 732
733 733 /*
734 734 * Initialize the DLPI state machine.
735 735 */
736 736 dsp->ds_dlstate = DL_UNATTACHED;
737 737
738 738 mutex_init(&dsp->ds_lock, NULL, MUTEX_DRIVER, NULL);
739 739 cv_init(&dsp->ds_datathr_cv, NULL, CV_DRIVER, NULL);
740 740 cv_init(&dsp->ds_dlpi_pending_cv, NULL, CV_DRIVER, NULL);
741 741
742 742 return (0);
743 743 }
744 744
745 745 /*
746 746 * kmem_cache destructor function.
747 747 */
748 748 /*ARGSUSED*/
749 749 static void
750 750 str_destructor(void *buf, void *cdrarg)
751 751 {
752 752 dld_str_t *dsp = buf;
753 753
754 754 /*
755 755 * Release the minor number.
756 756 */
757 757 mac_minor_rele(dsp->ds_minor);
758 758
759 759 ASSERT(dsp->ds_tx_flow_mp == NULL);
760 760
761 761 mutex_destroy(&dsp->ds_lock);
762 762 cv_destroy(&dsp->ds_datathr_cv);
763 763 cv_destroy(&dsp->ds_dlpi_pending_cv);
764 764 }
765 765
766 766 /*
767 767 * Update the priority bits and VID (may need to insert tag if mp points
768 768 * to an untagged packet.
769 769 * If vid is VLAN_ID_NONE, use the VID encoded in the packet.
770 770 */
771 771 static mblk_t *
772 772 i_dld_ether_header_update_tag(mblk_t *mp, uint_t pri, uint16_t vid,
773 773 link_tagmode_t tagmode)
774 774 {
775 775 mblk_t *hmp;
776 776 struct ether_vlan_header *evhp;
777 777 struct ether_header *ehp;
778 778 uint16_t old_tci = 0;
779 779 size_t len;
780 780
781 781 ASSERT(pri != 0 || vid != VLAN_ID_NONE);
782 782
783 783 evhp = (struct ether_vlan_header *)mp->b_rptr;
784 784 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) {
785 785 /*
786 786 * Tagged packet, update the priority bits.
787 787 */
788 788 len = sizeof (struct ether_vlan_header);
789 789
790 790 if ((DB_REF(mp) > 1) || (MBLKL(mp) < len)) {
791 791 /*
792 792 * In case some drivers only check the db_ref
793 793 * count of the first mblk, we pullup the
794 794 * message into a single mblk.
795 795 */
796 796 hmp = msgpullup(mp, -1);
797 797 if ((hmp == NULL) || (MBLKL(hmp) < len)) {
798 798 freemsg(hmp);
799 799 return (NULL);
800 800 } else {
801 801 freemsg(mp);
802 802 mp = hmp;
803 803 }
804 804 }
805 805
806 806 evhp = (struct ether_vlan_header *)mp->b_rptr;
807 807 old_tci = ntohs(evhp->ether_tci);
808 808 } else {
809 809 /*
810 810 * Untagged packet. Two factors will cause us to insert a
811 811 * VLAN header:
812 812 * - This is a VLAN link (vid is specified)
813 813 * - The link supports user priority tagging and the priority
814 814 * is non-zero.
815 815 */
816 816 if (vid == VLAN_ID_NONE && tagmode == LINK_TAGMODE_VLANONLY)
817 817 return (mp);
818 818
819 819 hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED);
820 820 if (hmp == NULL)
821 821 return (NULL);
822 822
823 823 evhp = (struct ether_vlan_header *)hmp->b_rptr;
824 824 ehp = (struct ether_header *)mp->b_rptr;
825 825
826 826 /*
827 827 * Copy the MAC addresses and typelen
828 828 */
829 829 bcopy(ehp, evhp, (ETHERADDRL * 2));
830 830 evhp->ether_type = ehp->ether_type;
831 831 evhp->ether_tpid = htons(ETHERTYPE_VLAN);
832 832
833 833 hmp->b_wptr += sizeof (struct ether_vlan_header);
834 834 mp->b_rptr += sizeof (struct ether_header);
835 835
836 836 /*
837 837 * Free the original message if it's now empty. Link the
838 838 * rest of the messages to the header message.
839 839 */
840 840 if (MBLKL(mp) == 0) {
841 841 hmp->b_cont = mp->b_cont;
842 842 freeb(mp);
843 843 } else {
844 844 hmp->b_cont = mp;
845 845 }
846 846 mp = hmp;
847 847 }
848 848
849 849 if (pri == 0)
850 850 pri = VLAN_PRI(old_tci);
851 851 if (vid == VLAN_ID_NONE)
852 852 vid = VLAN_ID(old_tci);
853 853 evhp->ether_tci = htons(VLAN_TCI(pri, VLAN_CFI(old_tci), vid));
854 854 return (mp);
855 855 }
856 856
857 857 /*
858 858 * M_DATA put (IP fast-path mode)
859 859 */
860 860 mac_tx_cookie_t
861 861 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp, uintptr_t f_hint,
862 862 uint16_t flag)
863 863 {
864 864 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER);
865 865 mblk_t *newmp;
866 866 uint_t pri;
867 867 mac_tx_cookie_t cookie;
868 868
869 869 if (is_ethernet) {
870 870 /*
871 871 * Update the priority bits to the assigned priority.
872 872 */
873 873 pri = (VLAN_MBLKPRI(mp) == 0) ? dsp->ds_pri : VLAN_MBLKPRI(mp);
874 874
875 875 if (pri != 0) {
876 876 newmp = i_dld_ether_header_update_tag(mp, pri,
877 877 VLAN_ID_NONE, dsp->ds_dlp->dl_tagmode);
878 878 if (newmp == NULL)
879 879 goto discard;
880 880 mp = newmp;
881 881 }
882 882 }
883 883
884 884 if ((cookie = DLD_TX(dsp, mp, f_hint, flag)) != NULL) {
885 885 DLD_SETQFULL(dsp);
886 886 }
887 887 return (cookie);
888 888
889 889 discard:
890 890 /* TODO: bump kstat? */
891 891 freemsg(mp);
892 892 return (NULL);
893 893 }
894 894
895 895 /*
896 896 * M_DATA put (DLIOCRAW mode)
897 897 */
898 898 static void
899 899 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp)
900 900 {
901 901 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER);
902 902 mblk_t *bp, *newmp;
903 903 size_t size;
904 904 mac_header_info_t mhi;
905 905 uint_t pri, vid, dvid;
906 906 uint_t max_sdu;
907 907
908 908 /*
909 909 * Certain MAC type plugins provide an illusion for raw DLPI
910 910 * consumers. They pretend that the MAC layer is something that
911 911 * it's not for the benefit of observability tools. For example,
912 912 * mac_wifi pretends that it's Ethernet for such consumers.
913 913 * Here, unless native mode is enabled, we call into the MAC layer so
914 914 * that this illusion can be maintained. The plugin will optionally
915 915 * transform the MAC header here into something that can be passed
916 916 * down. The header goes from raw mode to "cooked" mode.
917 917 */
918 918 if (!dsp->ds_native) {
919 919 if ((newmp = mac_header_cook(dsp->ds_mh, mp)) == NULL)
920 920 goto discard;
921 921 mp = newmp;
922 922 }
923 923
924 924 size = MBLKL(mp);
925 925
926 926 /*
927 927 * Check the packet is not too big and that any remaining
928 928 * fragment list is composed entirely of M_DATA messages. (We
929 929 * know the first fragment was M_DATA otherwise we could not
930 930 * have got here).
931 931 */
932 932 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) {
933 933 if (DB_TYPE(bp) != M_DATA)
934 934 goto discard;
935 935 size += MBLKL(bp);
936 936 }
937 937
938 938 if (mac_vlan_header_info(dsp->ds_mh, mp, &mhi) != 0)
939 939 goto discard;
940 940
941 941 mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
942 942 /*
943 943 * If LSO is enabled, check the size against lso_max. Otherwise,
944 944 * compare the packet size with max_sdu.
945 945 */
946 946 max_sdu = dsp->ds_lso ? dsp->ds_lso_max : max_sdu;
947 947 if (size > max_sdu + mhi.mhi_hdrsize)
948 948 goto discard;
949 949
950 950 if (is_ethernet) {
951 951 dvid = mac_client_vid(dsp->ds_mch);
952 952
953 953 /*
954 954 * Discard the packet if this is a VLAN stream but the VID in
955 955 * the packet is not correct.
956 956 */
957 957 vid = VLAN_ID(mhi.mhi_tci);
958 958 if ((dvid != VLAN_ID_NONE) && (vid != VLAN_ID_NONE))
959 959 goto discard;
960 960
961 961 /*
962 962 * Discard the packet if this packet is a tagged packet
963 963 * but both pri and VID are 0.
964 964 */
965 965 pri = VLAN_PRI(mhi.mhi_tci);
966 966 if (mhi.mhi_istagged && !mhi.mhi_ispvid && pri == 0 &&
967 967 vid == VLAN_ID_NONE)
968 968 goto discard;
969 969
970 970 /*
971 971 * Update the priority bits to the per-stream priority if
972 972 * priority is not set in the packet. Update the VID for
973 973 * packets on a VLAN stream.
974 974 */
975 975 pri = (pri == 0) ? dsp->ds_pri : 0;
976 976 if ((pri != 0) || (dvid != VLAN_ID_NONE)) {
977 977 if ((newmp = i_dld_ether_header_update_tag(mp, pri,
978 978 dvid, dsp->ds_dlp->dl_tagmode)) == NULL) {
979 979 goto discard;
980 980 }
981 981 mp = newmp;
982 982 }
983 983 }
984 984
985 985 if (DLD_TX(dsp, mp, 0, 0) != NULL) {
986 986 /* Turn on flow-control for dld */
987 987 DLD_SETQFULL(dsp);
988 988 }
989 989 return;
990 990
991 991 discard:
992 992 /* TODO: bump kstat? */
993 993 freemsg(mp);
994 994 }
995 995
996 996 /*
997 997 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1).
998 998 */
999 999 int
1000 1000 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa)
1001 1001 {
1002 1002 dev_t dev;
1003 1003 int err;
1004 1004 const char *drvname;
1005 1005 mac_perim_handle_t mph = NULL;
1006 1006 boolean_t qassociated = B_FALSE;
1007 1007 dls_link_t *dlp = NULL;
1008 1008 dls_dl_handle_t ddp = NULL;
1009 1009
1010 1010 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL)
1011 1011 return (EINVAL);
1012 1012
1013 1013 if (dsp->ds_style == DL_STYLE2 && ppa > DLS_MAX_PPA)
1014 1014 return (ENOTSUP);
1015 1015
1016 1016 /*
1017 1017 * /dev node access. This will still be supported for backward
1018 1018 * compatibility reason.
1019 1019 */
1020 1020 if ((dsp->ds_style == DL_STYLE2) && (strcmp(drvname, "aggr") != 0) &&
1021 1021 (strcmp(drvname, "vnic") != 0)) {
1022 1022 if (qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0)
1023 1023 return (EINVAL);
1024 1024 qassociated = B_TRUE;
1025 1025 }
1026 1026
1027 1027 dev = makedevice(dsp->ds_major, (minor_t)ppa + 1);
1028 1028 if ((err = dls_devnet_hold_by_dev(dev, &ddp)) != 0)
1029 1029 goto failed;
1030 1030
1031 1031 if ((err = mac_perim_enter_by_macname(dls_devnet_mac(ddp), &mph)) != 0)
1032 1032 goto failed;
1033 1033
1034 1034 /*
1035 1035 * Open a channel.
1036 1036 */
1037 1037 if ((err = dls_link_hold(dls_devnet_mac(ddp), &dlp)) != 0)
1038 1038 goto failed;
1039 1039
1040 1040 if ((err = dls_open(dlp, ddp, dsp)) != 0)
1041 1041 goto failed;
1042 1042
1043 1043 /*
1044 1044 * Set the default packet priority.
1045 1045 */
1046 1046 dsp->ds_pri = 0;
1047 1047
1048 1048 /*
1049 1049 * Add a notify function so that the we get updates from the MAC.
1050 1050 */
1051 1051 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, dsp);
1052 1052 dsp->ds_dlstate = DL_UNBOUND;
1053 1053 mac_perim_exit(mph);
1054 1054 return (0);
1055 1055
1056 1056 failed:
1057 1057 if (dlp != NULL)
1058 1058 dls_link_rele(dlp);
1059 1059 if (mph != NULL)
1060 1060 mac_perim_exit(mph);
1061 1061 if (ddp != NULL)
1062 1062 dls_devnet_rele(ddp);
1063 1063 if (qassociated)
1064 1064 (void) qassociate(dsp->ds_wq, -1);
1065 1065
1066 1066 return (err);
1067 1067 }
1068 1068
1069 1069 /*
1070 1070 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called
1071 1071 * from close(2) for style 2.
1072 1072 */
1073 1073 void
1074 1074 dld_str_detach(dld_str_t *dsp)
1075 1075 {
1076 1076 mac_perim_handle_t mph;
1077 1077 int err;
1078 1078
1079 1079 ASSERT(dsp->ds_datathr_cnt == 0);
1080 1080
1081 1081 mac_perim_enter_by_mh(dsp->ds_mh, &mph);
1082 1082 /*
1083 1083 * Remove the notify function.
1084 1084 *
1085 1085 * Note that we cannot wait for the notification callback to be removed
1086 1086 * since it could cause the deadlock with str_notify() since they both
1087 1087 * need the mac perimeter. Continue if we cannot remove the
1088 1088 * notification callback right now and wait after we leave the
1089 1089 * perimeter.
1090 1090 */
1091 1091 err = mac_notify_remove(dsp->ds_mnh, B_FALSE);
1092 1092 dsp->ds_mnh = NULL;
1093 1093
1094 1094 /*
1095 1095 * Disable the capabilities
1096 1096 */
1097 1097 dld_capabilities_disable(dsp);
1098 1098
1099 1099 /*
1100 1100 * Clear LSO flags.
1101 1101 */
1102 1102 dsp->ds_lso = B_FALSE;
1103 1103 dsp->ds_lso_max = 0;
1104 1104
1105 1105 dls_close(dsp);
1106 1106 mac_perim_exit(mph);
1107 1107
1108 1108 /*
1109 1109 * Now we leave the mac perimeter. If mac_notify_remove() failed
1110 1110 * because the notification callback was in progress, wait for
1111 1111 * it to finish before we proceed.
1112 1112 */
1113 1113 if (err != 0)
1114 1114 mac_notify_remove_wait(dsp->ds_mh);
1115 1115
1116 1116 /*
1117 1117 * An unreferenced tagged (non-persistent) vlan gets destroyed
1118 1118 * automatically in the call to dls_devnet_rele.
1119 1119 */
1120 1120 dls_devnet_rele(dsp->ds_ddh);
1121 1121
1122 1122 dsp->ds_sap = 0;
1123 1123 dsp->ds_mh = NULL;
1124 1124 dsp->ds_mch = NULL;
1125 1125 dsp->ds_mip = NULL;
1126 1126
1127 1127 if (dsp->ds_style == DL_STYLE2)
1128 1128 (void) qassociate(dsp->ds_wq, -1);
1129 1129
1130 1130 /*
1131 1131 * Re-initialize the DLPI state machine.
1132 1132 */
1133 1133 dsp->ds_dlstate = DL_UNATTACHED;
1134 1134 }
1135 1135
1136 1136 /*
1137 1137 * This function is only called for VLAN streams. In raw mode, we strip VLAN
1138 1138 * tags before sending packets up to the DLS clients, with the exception of
1139 1139 * special priority tagged packets, in that case, we set the VID to 0.
1140 1140 * mp must be a VLAN tagged packet.
1141 1141 */
1142 1142 static mblk_t *
1143 1143 i_dld_ether_header_strip_tag(mblk_t *mp, boolean_t keep_pri)
1144 1144 {
1145 1145 mblk_t *newmp;
1146 1146 struct ether_vlan_header *evhp;
1147 1147 uint16_t tci, new_tci;
1148 1148
1149 1149 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header));
1150 1150 if (DB_REF(mp) > 1) {
1151 1151 newmp = copymsg(mp);
1152 1152 if (newmp == NULL)
1153 1153 return (NULL);
1154 1154 freemsg(mp);
1155 1155 mp = newmp;
1156 1156 }
1157 1157 evhp = (struct ether_vlan_header *)mp->b_rptr;
1158 1158
1159 1159 tci = ntohs(evhp->ether_tci);
1160 1160 if (VLAN_PRI(tci) == 0 || !keep_pri) {
1161 1161 /*
1162 1162 * Priority is 0, strip the tag.
1163 1163 */
1164 1164 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL);
1165 1165 mp->b_rptr += VLAN_TAGSZ;
1166 1166 } else {
1167 1167 /*
1168 1168 * Priority is not 0, update the VID to 0.
1169 1169 */
1170 1170 new_tci = VLAN_TCI(VLAN_PRI(tci), VLAN_CFI(tci), VLAN_ID_NONE);
1171 1171 evhp->ether_tci = htons(new_tci);
1172 1172 }
1173 1173 return (mp);
1174 1174 }
1175 1175
1176 1176 /*
1177 1177 * Raw mode receive function.
1178 1178 */
1179 1179 /*ARGSUSED*/
1180 1180 void
1181 1181 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
1182 1182 mac_header_info_t *mhip)
1183 1183 {
1184 1184 dld_str_t *dsp = (dld_str_t *)arg;
1185 1185 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER);
1186 1186 mblk_t *next, *newmp;
1187 1187
1188 1188 ASSERT(mp != NULL);
1189 1189 do {
1190 1190 /*
1191 1191 * Get the pointer to the next packet in the chain and then
1192 1192 * clear b_next before the packet gets passed on.
1193 1193 */
1194 1194 next = mp->b_next;
1195 1195 mp->b_next = NULL;
1196 1196
1197 1197 /*
1198 1198 * Wind back b_rptr to point at the MAC header.
1199 1199 */
1200 1200 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize);
1201 1201 mp->b_rptr -= mhip->mhi_hdrsize;
1202 1202
1203 1203 /*
1204 1204 * Certain MAC type plugins provide an illusion for raw
1205 1205 * DLPI consumers. They pretend that the MAC layer is
1206 1206 * something that it's not for the benefit of observability
1207 1207 * tools. For example, mac_wifi pretends that it's Ethernet
1208 1208 * for such consumers. Here, unless native mode is enabled,
1209 1209 * we call into the MAC layer so that this illusion can be
1210 1210 * maintained. The plugin will optionally transform the MAC
1211 1211 * header here into something that can be passed up to raw
1212 1212 * consumers. The header goes from "cooked" mode to raw mode.
1213 1213 */
1214 1214 if (!dsp->ds_native) {
1215 1215 newmp = mac_header_uncook(dsp->ds_mh, mp);
1216 1216 if (newmp == NULL) {
1217 1217 freemsg(mp);
1218 1218 goto next;
1219 1219 }
1220 1220 mp = newmp;
1221 1221 }
1222 1222
1223 1223 /*
1224 1224 * Strip the VLAN tag for VLAN streams.
1225 1225 */
1226 1226 if (is_ethernet &&
1227 1227 mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) {
1228 1228 /*
1229 1229 * The priority should be kept only for VLAN
1230 1230 * data-links.
1231 1231 */
1232 1232 newmp = i_dld_ether_header_strip_tag(mp,
1233 1233 mac_client_is_vlan_vnic(dsp->ds_mch));
1234 1234 if (newmp == NULL) {
1235 1235 freemsg(mp);
1236 1236 goto next;
1237 1237 }
1238 1238 mp = newmp;
1239 1239 }
1240 1240
1241 1241 /*
1242 1242 * Pass the packet on.
1243 1243 */
1244 1244 if (canputnext(dsp->ds_rq))
1245 1245 putnext(dsp->ds_rq, mp);
1246 1246 else
1247 1247 freemsg(mp);
1248 1248
1249 1249 next:
1250 1250 /*
1251 1251 * Move on to the next packet in the chain.
1252 1252 */
1253 1253 mp = next;
1254 1254 } while (mp != NULL);
1255 1255 }
1256 1256
1257 1257 /*
1258 1258 * Fast-path receive function.
1259 1259 */
1260 1260 /*ARGSUSED*/
1261 1261 void
1262 1262 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
1263 1263 mac_header_info_t *mhip)
1264 1264 {
1265 1265 dld_str_t *dsp = (dld_str_t *)arg;
1266 1266 mblk_t *next;
1267 1267 size_t offset = 0;
1268 1268
1269 1269 /*
1270 1270 * MAC header stripping rules:
1271 1271 * - Tagged packets:
1272 1272 * a. VLAN streams. Strip the whole VLAN header including the tag.
1273 1273 * b. Physical streams
1274 1274 * - VLAN packets (non-zero VID). The stream must be either a
1275 1275 * DL_PROMISC_SAP listener or a ETHERTYPE_VLAN listener.
1276 1276 * Strip the Ethernet header but keep the VLAN header.
1277 1277 * - Special tagged packets (zero VID)
1278 1278 * * The stream is either a DL_PROMISC_SAP listener or a
1279 1279 * ETHERTYPE_VLAN listener, strip the Ethernet header but
1280 1280 * keep the VLAN header.
1281 1281 * * Otherwise, strip the whole VLAN header.
1282 1282 * - Untagged packets. Strip the whole MAC header.
1283 1283 */
1284 1284 if (mhip->mhi_istagged &&
1285 1285 (mac_client_vid(dsp->ds_mch) == VLAN_ID_NONE) &&
1286 1286 ((dsp->ds_sap == ETHERTYPE_VLAN) ||
1287 1287 (dsp->ds_promisc & DLS_PROMISC_SAP))) {
1288 1288 offset = VLAN_TAGSZ;
1289 1289 }
1290 1290
1291 1291 ASSERT(mp != NULL);
1292 1292 do {
1293 1293 /*
1294 1294 * Get the pointer to the next packet in the chain and then
1295 1295 * clear b_next before the packet gets passed on.
1296 1296 */
1297 1297 next = mp->b_next;
1298 1298 mp->b_next = NULL;
1299 1299
1300 1300 /*
1301 1301 * Wind back b_rptr to point at the VLAN header.
1302 1302 */
1303 1303 ASSERT(mp->b_rptr >= DB_BASE(mp) + offset);
1304 1304 mp->b_rptr -= offset;
1305 1305
1306 1306 /*
1307 1307 * Pass the packet on.
1308 1308 */
1309 1309 if (canputnext(dsp->ds_rq))
1310 1310 putnext(dsp->ds_rq, mp);
1311 1311 else
1312 1312 freemsg(mp);
1313 1313 /*
1314 1314 * Move on to the next packet in the chain.
1315 1315 */
1316 1316 mp = next;
1317 1317 } while (mp != NULL);
1318 1318 }
1319 1319
1320 1320 /*
1321 1321 * Default receive function (send DL_UNITDATA_IND messages).
1322 1322 */
1323 1323 /*ARGSUSED*/
1324 1324 void
1325 1325 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
1326 1326 mac_header_info_t *mhip)
1327 1327 {
1328 1328 dld_str_t *dsp = (dld_str_t *)arg;
1329 1329 mblk_t *ud_mp;
1330 1330 mblk_t *next;
1331 1331 size_t offset = 0;
1332 1332 boolean_t strip_vlan = B_TRUE;
1333 1333
1334 1334 /*
1335 1335 * See MAC header stripping rules in the dld_str_rx_fastpath() function.
1336 1336 */
1337 1337 if (mhip->mhi_istagged &&
1338 1338 (mac_client_vid(dsp->ds_mch) == VLAN_ID_NONE) &&
1339 1339 ((dsp->ds_sap == ETHERTYPE_VLAN) ||
1340 1340 (dsp->ds_promisc & DLS_PROMISC_SAP))) {
1341 1341 offset = VLAN_TAGSZ;
1342 1342 strip_vlan = B_FALSE;
1343 1343 }
1344 1344
1345 1345 ASSERT(mp != NULL);
1346 1346 do {
1347 1347 /*
1348 1348 * Get the pointer to the next packet in the chain and then
1349 1349 * clear b_next before the packet gets passed on.
1350 1350 */
1351 1351 next = mp->b_next;
1352 1352 mp->b_next = NULL;
1353 1353
1354 1354 /*
1355 1355 * Wind back b_rptr to point at the MAC header.
1356 1356 */
1357 1357 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize);
1358 1358 mp->b_rptr -= mhip->mhi_hdrsize;
1359 1359
1360 1360 /*
1361 1361 * Create the DL_UNITDATA_IND M_PROTO.
1362 1362 */
1363 1363 if ((ud_mp = str_unitdata_ind(dsp, mp, strip_vlan)) == NULL) {
1364 1364 freemsgchain(mp);
1365 1365 return;
1366 1366 }
1367 1367
1368 1368 /*
1369 1369 * Advance b_rptr to point at the payload (or the VLAN header).
1370 1370 */
1371 1371 mp->b_rptr += (mhip->mhi_hdrsize - offset);
1372 1372
1373 1373 /*
1374 1374 * Prepend the DL_UNITDATA_IND.
1375 1375 */
1376 1376 ud_mp->b_cont = mp;
1377 1377
1378 1378 /*
1379 1379 * Send the message.
1380 1380 */
1381 1381 if (canputnext(dsp->ds_rq))
1382 1382 putnext(dsp->ds_rq, ud_mp);
1383 1383 else
1384 1384 freemsg(ud_mp);
1385 1385
1386 1386 /*
1387 1387 * Move on to the next packet in the chain.
1388 1388 */
1389 1389 mp = next;
1390 1390 } while (mp != NULL);
1391 1391 }
1392 1392
1393 1393 /*
1394 1394 * DL_NOTIFY_IND: DL_NOTE_SDU_SIZE
1395 1395 */
1396 1396 static void
1397 1397 str_notify_sdu_size(dld_str_t *dsp, uint_t max_sdu, uint_t multicast_sdu)
1398 1398 {
1399 1399 mblk_t *mp;
1400 1400 dl_notify_ind_t *dlip;
1401 1401
1402 1402 if (!(dsp->ds_notifications & (DL_NOTE_SDU_SIZE|DL_NOTE_SDU_SIZE2)))
1403 1403 return;
1404 1404
1405 1405 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1406 1406 M_PROTO, 0)) == NULL)
1407 1407 return;
1408 1408
1409 1409 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1410 1410 dlip = (dl_notify_ind_t *)mp->b_rptr;
1411 1411 dlip->dl_primitive = DL_NOTIFY_IND;
1412 1412 if (dsp->ds_notifications & DL_NOTE_SDU_SIZE2) {
1413 1413 dlip->dl_notification = DL_NOTE_SDU_SIZE2;
1414 1414 dlip->dl_data1 = max_sdu;
1415 1415 dlip->dl_data2 = multicast_sdu;
1416 1416 } else {
1417 1417 dlip->dl_notification = DL_NOTE_SDU_SIZE;
1418 1418 dlip->dl_data = max_sdu;
1419 1419 }
1420 1420
1421 1421 qreply(dsp->ds_wq, mp);
1422 1422 }
1423 1423
1424 1424 /*
1425 1425 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the
1426 1426 * current state of the interface.
1427 1427 */
1428 1428 void
1429 1429 dld_str_notify_ind(dld_str_t *dsp)
1430 1430 {
1431 1431 mac_notify_type_t type;
1432 1432
1433 1433 for (type = 0; type < MAC_NNOTE; type++)
1434 1434 str_notify(dsp, type);
1435 1435 }
1436 1436
1437 1437 typedef struct dl_unitdata_ind_wrapper {
1438 1438 dl_unitdata_ind_t dl_unitdata;
1439 1439 uint8_t dl_dest_addr[MAXMACADDRLEN + sizeof (uint16_t)];
1440 1440 uint8_t dl_src_addr[MAXMACADDRLEN + sizeof (uint16_t)];
1441 1441 } dl_unitdata_ind_wrapper_t;
1442 1442
1443 1443 /*
1444 1444 * Create a DL_UNITDATA_IND M_PROTO message.
1445 1445 */
1446 1446 static mblk_t *
1447 1447 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp, boolean_t strip_vlan)
1448 1448 {
1449 1449 mblk_t *nmp;
1450 1450 dl_unitdata_ind_wrapper_t *dlwp;
1451 1451 dl_unitdata_ind_t *dlp;
1452 1452 mac_header_info_t mhi;
1453 1453 uint_t addr_length;
1454 1454 uint8_t *daddr;
1455 1455 uint8_t *saddr;
1456 1456
1457 1457 /*
1458 1458 * Get the packet header information.
1459 1459 */
1460 1460 if (mac_vlan_header_info(dsp->ds_mh, mp, &mhi) != 0)
1461 1461 return (NULL);
1462 1462
1463 1463 /*
1464 1464 * Allocate a message large enough to contain the wrapper structure
1465 1465 * defined above.
1466 1466 */
1467 1467 if ((nmp = mexchange(dsp->ds_wq, NULL,
1468 1468 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO,
1469 1469 DL_UNITDATA_IND)) == NULL)
1470 1470 return (NULL);
1471 1471
1472 1472 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr;
1473 1473
1474 1474 dlp = &(dlwp->dl_unitdata);
1475 1475 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr);
1476 1476 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND);
1477 1477
1478 1478 /*
1479 1479 * Copy in the destination address.
1480 1480 */
1481 1481 addr_length = dsp->ds_mip->mi_addr_length;
1482 1482 daddr = dlwp->dl_dest_addr;
1483 1483 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp;
1484 1484 bcopy(mhi.mhi_daddr, daddr, addr_length);
1485 1485
1486 1486 /*
1487 1487 * Set the destination DLSAP to the SAP value encoded in the packet.
1488 1488 */
1489 1489 if (mhi.mhi_istagged && !strip_vlan)
1490 1490 *(uint16_t *)(daddr + addr_length) = ETHERTYPE_VLAN;
1491 1491 else
1492 1492 *(uint16_t *)(daddr + addr_length) = mhi.mhi_bindsap;
1493 1493 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t);
1494 1494
1495 1495 /*
1496 1496 * If the destination address was multicast or broadcast then the
1497 1497 * dl_group_address field should be non-zero.
1498 1498 */
1499 1499 dlp->dl_group_address = (mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) ||
1500 1500 (mhi.mhi_dsttype == MAC_ADDRTYPE_BROADCAST);
1501 1501
1502 1502 /*
1503 1503 * Copy in the source address if one exists. Some MAC types (DL_IB
1504 1504 * for example) may not have access to source information.
1505 1505 */
1506 1506 if (mhi.mhi_saddr == NULL) {
1507 1507 dlp->dl_src_addr_offset = dlp->dl_src_addr_length = 0;
1508 1508 } else {
1509 1509 saddr = dlwp->dl_src_addr;
1510 1510 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp;
1511 1511 bcopy(mhi.mhi_saddr, saddr, addr_length);
1512 1512
1513 1513 /*
1514 1514 * Set the source DLSAP to the packet ethertype.
1515 1515 */
1516 1516 *(uint16_t *)(saddr + addr_length) = mhi.mhi_origsap;
1517 1517 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t);
1518 1518 }
1519 1519
1520 1520 return (nmp);
1521 1521 }
1522 1522
1523 1523 /*
1524 1524 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS
1525 1525 */
1526 1526 static void
1527 1527 str_notify_promisc_on_phys(dld_str_t *dsp)
1528 1528 {
1529 1529 mblk_t *mp;
1530 1530 dl_notify_ind_t *dlip;
1531 1531
1532 1532 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS))
1533 1533 return;
1534 1534
1535 1535 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1536 1536 M_PROTO, 0)) == NULL)
1537 1537 return;
1538 1538
1539 1539 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1540 1540 dlip = (dl_notify_ind_t *)mp->b_rptr;
1541 1541 dlip->dl_primitive = DL_NOTIFY_IND;
1542 1542 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS;
1543 1543
1544 1544 qreply(dsp->ds_wq, mp);
1545 1545 }
1546 1546
1547 1547 /*
1548 1548 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS
1549 1549 */
1550 1550 static void
1551 1551 str_notify_promisc_off_phys(dld_str_t *dsp)
1552 1552 {
1553 1553 mblk_t *mp;
1554 1554 dl_notify_ind_t *dlip;
1555 1555
1556 1556 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS))
1557 1557 return;
1558 1558
1559 1559 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1560 1560 M_PROTO, 0)) == NULL)
1561 1561 return;
1562 1562
1563 1563 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1564 1564 dlip = (dl_notify_ind_t *)mp->b_rptr;
1565 1565 dlip->dl_primitive = DL_NOTIFY_IND;
1566 1566 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS;
1567 1567
1568 1568 qreply(dsp->ds_wq, mp);
1569 1569 }
1570 1570
1571 1571 /*
1572 1572 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR
1573 1573 */
1574 1574 static void
1575 1575 str_notify_phys_addr(dld_str_t *dsp, uint_t addr_type, const uint8_t *addr)
1576 1576 {
1577 1577 mblk_t *mp;
1578 1578 dl_notify_ind_t *dlip;
1579 1579 uint_t addr_length;
1580 1580 uint16_t ethertype;
1581 1581
1582 1582 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR))
1583 1583 return;
1584 1584
1585 1585 addr_length = dsp->ds_mip->mi_addr_length;
1586 1586 if ((mp = mexchange(dsp->ds_wq, NULL,
1587 1587 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t),
1588 1588 M_PROTO, 0)) == NULL)
1589 1589 return;
1590 1590
1591 1591 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1592 1592 dlip = (dl_notify_ind_t *)mp->b_rptr;
1593 1593 dlip->dl_primitive = DL_NOTIFY_IND;
1594 1594 dlip->dl_notification = DL_NOTE_PHYS_ADDR;
1595 1595 dlip->dl_data = addr_type;
1596 1596 dlip->dl_addr_offset = sizeof (dl_notify_ind_t);
1597 1597 dlip->dl_addr_length = addr_length + sizeof (uint16_t);
1598 1598
1599 1599 bcopy(addr, &dlip[1], addr_length);
1600 1600
1601 1601 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap;
1602 1602 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = ethertype;
1603 1603
1604 1604 qreply(dsp->ds_wq, mp);
1605 1605 }
1606 1606
1607 1607 /*
1608 1608 * DL_NOTIFY_IND: DL_NOTE_LINK_UP
1609 1609 */
1610 1610 static void
1611 1611 str_notify_link_up(dld_str_t *dsp)
1612 1612 {
1613 1613 mblk_t *mp;
1614 1614 dl_notify_ind_t *dlip;
1615 1615
1616 1616 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP))
1617 1617 return;
1618 1618
1619 1619 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1620 1620 M_PROTO, 0)) == NULL)
1621 1621 return;
1622 1622
1623 1623 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1624 1624 dlip = (dl_notify_ind_t *)mp->b_rptr;
1625 1625 dlip->dl_primitive = DL_NOTIFY_IND;
1626 1626 dlip->dl_notification = DL_NOTE_LINK_UP;
1627 1627
1628 1628 qreply(dsp->ds_wq, mp);
1629 1629 }
1630 1630
1631 1631 /*
1632 1632 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN
1633 1633 */
1634 1634 static void
1635 1635 str_notify_link_down(dld_str_t *dsp)
1636 1636 {
1637 1637 mblk_t *mp;
1638 1638 dl_notify_ind_t *dlip;
1639 1639
1640 1640 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN))
1641 1641 return;
1642 1642
1643 1643 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1644 1644 M_PROTO, 0)) == NULL)
1645 1645 return;
1646 1646
1647 1647 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1648 1648 dlip = (dl_notify_ind_t *)mp->b_rptr;
1649 1649 dlip->dl_primitive = DL_NOTIFY_IND;
1650 1650 dlip->dl_notification = DL_NOTE_LINK_DOWN;
1651 1651
1652 1652 qreply(dsp->ds_wq, mp);
1653 1653 }
1654 1654
1655 1655 /*
1656 1656 * DL_NOTIFY_IND: DL_NOTE_SPEED
1657 1657 */
1658 1658 static void
1659 1659 str_notify_speed(dld_str_t *dsp, uint32_t speed)
1660 1660 {
1661 1661 mblk_t *mp;
1662 1662 dl_notify_ind_t *dlip;
1663 1663
1664 1664 if (!(dsp->ds_notifications & DL_NOTE_SPEED))
1665 1665 return;
1666 1666
1667 1667 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1668 1668 M_PROTO, 0)) == NULL)
1669 1669 return;
1670 1670
1671 1671 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1672 1672 dlip = (dl_notify_ind_t *)mp->b_rptr;
1673 1673 dlip->dl_primitive = DL_NOTIFY_IND;
1674 1674 dlip->dl_notification = DL_NOTE_SPEED;
1675 1675 dlip->dl_data = speed;
1676 1676
1677 1677 qreply(dsp->ds_wq, mp);
1678 1678 }
1679 1679
1680 1680 /*
1681 1681 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG
1682 1682 */
1683 1683 static void
1684 1684 str_notify_capab_reneg(dld_str_t *dsp)
1685 1685 {
1686 1686 mblk_t *mp;
1687 1687 dl_notify_ind_t *dlip;
1688 1688
1689 1689 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG))
1690 1690 return;
1691 1691
1692 1692 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1693 1693 M_PROTO, 0)) == NULL)
1694 1694 return;
1695 1695
1696 1696 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1697 1697 dlip = (dl_notify_ind_t *)mp->b_rptr;
1698 1698 dlip->dl_primitive = DL_NOTIFY_IND;
1699 1699 dlip->dl_notification = DL_NOTE_CAPAB_RENEG;
1700 1700
1701 1701 qreply(dsp->ds_wq, mp);
1702 1702 }
1703 1703
1704 1704 /*
1705 1705 * DL_NOTIFY_IND: DL_NOTE_FASTPATH_FLUSH
1706 1706 */
1707 1707 static void
1708 1708 str_notify_fastpath_flush(dld_str_t *dsp)
1709 1709 {
1710 1710 mblk_t *mp;
1711 1711 dl_notify_ind_t *dlip;
1712 1712
1713 1713 if (!(dsp->ds_notifications & DL_NOTE_FASTPATH_FLUSH))
1714 1714 return;
1715 1715
1716 1716 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t),
1717 1717 M_PROTO, 0)) == NULL)
1718 1718 return;
1719 1719
1720 1720 bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1721 1721 dlip = (dl_notify_ind_t *)mp->b_rptr;
1722 1722 dlip->dl_primitive = DL_NOTIFY_IND;
1723 1723 dlip->dl_notification = DL_NOTE_FASTPATH_FLUSH;
1724 1724
1725 1725 qreply(dsp->ds_wq, mp);
1726 1726 }
1727 1727
1728 1728 static void
1729 1729 str_notify_allowed_ips(dld_str_t *dsp)
1730 1730 {
1731 1731 mblk_t *mp;
1732 1732 dl_notify_ind_t *dlip;
1733 1733 size_t mp_size;
1734 1734 mac_protect_t *mrp;
1735 1735
1736 1736 if (!(dsp->ds_notifications & DL_NOTE_ALLOWED_IPS))
1737 1737 return;
1738 1738
1739 1739 mp_size = sizeof (mac_protect_t) + sizeof (dl_notify_ind_t);
1740 1740 if ((mp = mexchange(dsp->ds_wq, NULL, mp_size, M_PROTO, 0)) == NULL)
1741 1741 return;
1742 1742
1743 1743 mrp = mac_protect_get(dsp->ds_mh);
1744 1744 bzero(mp->b_rptr, mp_size);
1745 1745 dlip = (dl_notify_ind_t *)mp->b_rptr;
1746 1746 dlip->dl_primitive = DL_NOTIFY_IND;
1747 1747 dlip->dl_notification = DL_NOTE_ALLOWED_IPS;
1748 1748 dlip->dl_data = 0;
1749 1749 dlip->dl_addr_offset = sizeof (dl_notify_ind_t);
1750 1750 dlip->dl_addr_length = sizeof (mac_protect_t);
1751 1751 bcopy(mrp, mp->b_rptr + sizeof (dl_notify_ind_t),
1752 1752 sizeof (mac_protect_t));
1753 1753
1754 1754 qreply(dsp->ds_wq, mp);
1755 1755 }
1756 1756
1757 1757 /*
1758 1758 * MAC notification callback.
1759 1759 */
1760 1760 void
1761 1761 str_notify(void *arg, mac_notify_type_t type)
1762 1762 {
1763 1763 dld_str_t *dsp = (dld_str_t *)arg;
1764 1764 queue_t *q = dsp->ds_wq;
1765 1765 mac_handle_t mh = dsp->ds_mh;
1766 1766 mac_client_handle_t mch = dsp->ds_mch;
1767 1767 uint8_t addr[MAXMACADDRLEN];
1768 1768
1769 1769 switch (type) {
1770 1770 case MAC_NOTE_TX:
1771 1771 qenable(q);
1772 1772 break;
1773 1773
1774 1774 case MAC_NOTE_DEVPROMISC:
1775 1775 /*
1776 1776 * Send the appropriate DL_NOTIFY_IND.
1777 1777 */
1778 1778 if (mac_promisc_get(mh))
1779 1779 str_notify_promisc_on_phys(dsp);
1780 1780 else
1781 1781 str_notify_promisc_off_phys(dsp);
1782 1782 break;
1783 1783
1784 1784 case MAC_NOTE_UNICST:
1785 1785 /*
1786 1786 * This notification is sent whenever the MAC unicast
1787 1787 * address changes.
1788 1788 */
1789 1789 mac_unicast_primary_get(mh, addr);
1790 1790
1791 1791 /*
1792 1792 * Send the appropriate DL_NOTIFY_IND.
1793 1793 */
1794 1794 str_notify_phys_addr(dsp, DL_CURR_PHYS_ADDR, addr);
1795 1795 break;
1796 1796
1797 1797 case MAC_NOTE_DEST:
1798 1798 /*
1799 1799 * Only send up DL_NOTE_DEST_ADDR if the link has a
1800 1800 * destination address.
1801 1801 */
1802 1802 if (mac_dst_get(dsp->ds_mh, addr))
1803 1803 str_notify_phys_addr(dsp, DL_CURR_DEST_ADDR, addr);
1804 1804 break;
1805 1805
1806 1806 case MAC_NOTE_LOWLINK:
1807 1807 case MAC_NOTE_LINK:
1808 1808 /*
1809 1809 * LOWLINK refers to the actual link status. For links that
1810 1810 * are not part of a bridge instance LOWLINK and LINK state
1811 1811 * are the same. But for a link part of a bridge instance
1812 1812 * LINK state refers to the aggregate link status: "up" when
1813 1813 * at least one link part of the bridge is up and is "down"
1814 1814 * when all links part of the bridge are down.
1815 1815 *
1816 1816 * Clients can request to be notified of the LOWLINK state
1817 1817 * using the DLIOCLOWLINK ioctl. Clients such as the bridge
1818 1818 * daemon request lowlink state changes and upper layer clients
1819 1819 * receive notifications of the aggregate link state changes
1820 1820 * which is the default when requesting LINK UP/DOWN state
1821 1821 * notifications.
1822 1822 */
1823 1823
1824 1824 /*
1825 1825 * Check that the notification type matches the one that we
1826 1826 * want. If we want lower-level link notifications, and this
1827 1827 * is upper, or if we want upper and this is lower, then
1828 1828 * ignore.
1829 1829 */
1830 1830 if ((type == MAC_NOTE_LOWLINK) != dsp->ds_lowlink)
1831 1831 break;
1832 1832 /*
1833 1833 * This notification is sent every time the MAC driver
1834 1834 * updates the link state.
1835 1835 */
1836 1836 switch (mac_client_stat_get(mch, dsp->ds_lowlink ?
1837 1837 MAC_STAT_LOWLINK_STATE : MAC_STAT_LINK_STATE)) {
1838 1838 case LINK_STATE_UP: {
1839 1839 uint64_t speed;
1840 1840 /*
1841 1841 * The link is up so send the appropriate
1842 1842 * DL_NOTIFY_IND.
1843 1843 */
1844 1844 str_notify_link_up(dsp);
1845 1845
1846 1846 speed = mac_stat_get(mh, MAC_STAT_IFSPEED);
1847 1847 str_notify_speed(dsp, (uint32_t)(speed / 1000ull));
1848 1848 break;
1849 1849 }
1850 1850 case LINK_STATE_DOWN:
1851 1851 /*
1852 1852 * The link is down so send the appropriate
1853 1853 * DL_NOTIFY_IND.
1854 1854 */
1855 1855 str_notify_link_down(dsp);
1856 1856 break;
1857 1857
1858 1858 default:
1859 1859 break;
1860 1860 }
1861 1861 break;
1862 1862
1863 1863 case MAC_NOTE_CAPAB_CHG:
1864 1864 /*
1865 1865 * This notification is sent whenever the MAC resources
1866 1866 * change or capabilities change. We need to renegotiate
1867 1867 * the capabilities. Send the appropriate DL_NOTIFY_IND.
1868 1868 */
1869 1869 str_notify_capab_reneg(dsp);
1870 1870 break;
1871 1871
1872 1872 case MAC_NOTE_SDU_SIZE: {
1873 1873 uint_t max_sdu;
1874 1874 uint_t multicast_sdu;
1875 1875 mac_sdu_get2(dsp->ds_mh, NULL, &max_sdu, &multicast_sdu);
1876 1876 str_notify_sdu_size(dsp, max_sdu, multicast_sdu);
1877 1877 break;
1878 1878 }
1879 1879
1880 1880 case MAC_NOTE_FASTPATH_FLUSH:
1881 1881 str_notify_fastpath_flush(dsp);
1882 1882 break;
1883 1883
1884 1884 /* Unused notifications */
1885 1885 case MAC_NOTE_MARGIN:
1886 1886 break;
1887 1887
1888 1888 case MAC_NOTE_ALLOWED_IPS:
1889 1889 str_notify_allowed_ips(dsp);
1890 1890 break;
1891 1891
1892 1892 default:
1893 1893 ASSERT(B_FALSE);
1894 1894 break;
1895 1895 }
1896 1896 }
1897 1897
1898 1898 /*
1899 1899 * This function is called via a taskq mechansim to process all control
1900 1900 * messages on a per 'dsp' end point.
1901 1901 */
1902 1902 static void
1903 1903 dld_wput_nondata_task(void *arg)
1904 1904 {
1905 1905 dld_str_t *dsp = arg;
1906 1906 mblk_t *mp;
1907 1907
1908 1908 mutex_enter(&dsp->ds_lock);
1909 1909 while (dsp->ds_pending_head != NULL) {
1910 1910 mp = dsp->ds_pending_head;
1911 1911 dsp->ds_pending_head = mp->b_next;
1912 1912 mp->b_next = NULL;
1913 1913 if (dsp->ds_pending_head == NULL)
1914 1914 dsp->ds_pending_tail = NULL;
1915 1915 mutex_exit(&dsp->ds_lock);
1916 1916
1917 1917 switch (DB_TYPE(mp)) {
1918 1918 case M_PROTO:
1919 1919 case M_PCPROTO:
1920 1920 dld_proto(dsp, mp);
1921 1921 break;
1922 1922 case M_IOCTL:
1923 1923 dld_ioc(dsp, mp);
1924 1924 break;
1925 1925 default:
1926 1926 ASSERT(0);
1927 1927 }
1928 1928
1929 1929 mutex_enter(&dsp->ds_lock);
1930 1930 }
1931 1931 ASSERT(dsp->ds_pending_tail == NULL);
1932 1932 dsp->ds_dlpi_pending = 0;
1933 1933 cv_broadcast(&dsp->ds_dlpi_pending_cv);
1934 1934 mutex_exit(&dsp->ds_lock);
1935 1935 }
1936 1936
1937 1937 /*
1938 1938 * Kernel thread to handle taskq dispatch failures in dld_wput_data. This
1939 1939 * thread is started at boot time.
1940 1940 */
1941 1941 static void
1942 1942 dld_taskq_dispatch(void)
1943 1943 {
1944 1944 callb_cpr_t cprinfo;
1945 1945 dld_str_t *dsp;
1946 1946
1947 1947 CALLB_CPR_INIT(&cprinfo, &dld_taskq_lock, callb_generic_cpr,
1948 1948 "dld_taskq_dispatch");
1949 1949 mutex_enter(&dld_taskq_lock);
1950 1950
1951 1951 while (!dld_taskq_quit) {
1952 1952 dsp = list_head(&dld_taskq_list);
1953 1953 while (dsp != NULL) {
1954 1954 list_remove(&dld_taskq_list, dsp);
1955 1955 mutex_exit(&dld_taskq_lock);
1956 1956 VERIFY(taskq_dispatch(dld_taskq, dld_wput_nondata_task,
1957 1957 dsp, TQ_SLEEP) != 0);
1958 1958 mutex_enter(&dld_taskq_lock);
1959 1959 dsp = list_head(&dld_taskq_list);
1960 1960 }
1961 1961
1962 1962 CALLB_CPR_SAFE_BEGIN(&cprinfo);
1963 1963 cv_wait(&dld_taskq_cv, &dld_taskq_lock);
1964 1964 CALLB_CPR_SAFE_END(&cprinfo, &dld_taskq_lock);
1965 1965 }
1966 1966
1967 1967 dld_taskq_done = B_TRUE;
1968 1968 cv_signal(&dld_taskq_cv);
1969 1969 CALLB_CPR_EXIT(&cprinfo);
1970 1970 thread_exit();
1971 1971 }
1972 1972
1973 1973 /*
1974 1974 * All control operations are serialized on the 'dsp' and are also funneled
1975 1975 * through a taskq mechanism to ensure that subsequent processing has kernel
1976 1976 * context and can safely use cv_wait.
1977 1977 *
1978 1978 * Mechanisms to handle taskq dispatch failures
1979 1979 *
1980 1980 * The only way to be sure that taskq dispatch does not fail is to either
1981 1981 * specify TQ_SLEEP or to use a static taskq and prepopulate it with
1982 1982 * some number of entries and make sure that the number of outstanding requests
1983 1983 * are less than that number. We can't use TQ_SLEEP since we don't know the
1984 1984 * context. Nor can we bound the total number of 'dsp' end points. So we are
1985 1985 * unable to use either of the above schemes, and are forced to deal with
1986 1986 * taskq dispatch failures. Note that even dynamic taskq could fail in
1987 1987 * dispatch if TQ_NOSLEEP is specified, since this flag is translated
1988 1988 * eventually to KM_NOSLEEP and kmem allocations could fail in the taskq
1989 1989 * framework.
1990 1990 *
1991 1991 * We maintain a queue of 'dsp's that encountered taskq dispatch failure.
1992 1992 * We also have a single global thread to retry the taskq dispatch. This
1993 1993 * thread loops in 'dld_taskq_dispatch' and retries the taskq dispatch, but
1994 1994 * uses TQ_SLEEP to ensure eventual success of the dispatch operation.
1995 1995 */
1996 1996 static void
1997 1997 dld_wput_nondata(dld_str_t *dsp, mblk_t *mp)
1998 1998 {
1999 1999 ASSERT(mp->b_next == NULL);
2000 2000 mutex_enter(&dsp->ds_lock);
2001 2001 if (dsp->ds_pending_head != NULL) {
2002 2002 ASSERT(dsp->ds_dlpi_pending);
2003 2003 dsp->ds_pending_tail->b_next = mp;
2004 2004 dsp->ds_pending_tail = mp;
2005 2005 mutex_exit(&dsp->ds_lock);
2006 2006 return;
2007 2007 }
2008 2008 ASSERT(dsp->ds_pending_tail == NULL);
2009 2009 dsp->ds_pending_head = dsp->ds_pending_tail = mp;
2010 2010 /*
2011 2011 * At this point if ds_dlpi_pending is set, it implies that the taskq
2012 2012 * thread is still active and is processing the last message, though
2013 2013 * the pending queue has been emptied.
2014 2014 */
2015 2015 if (dsp->ds_dlpi_pending) {
2016 2016 mutex_exit(&dsp->ds_lock);
2017 2017 return;
2018 2018 }
2019 2019
2020 2020 dsp->ds_dlpi_pending = 1;
2021 2021 mutex_exit(&dsp->ds_lock);
2022 2022
2023 2023 if (taskq_dispatch(dld_taskq, dld_wput_nondata_task, dsp,
2024 2024 TQ_NOSLEEP) != 0)
2025 2025 return;
2026 2026
2027 2027 mutex_enter(&dld_taskq_lock);
2028 2028 list_insert_tail(&dld_taskq_list, dsp);
2029 2029 cv_signal(&dld_taskq_cv);
2030 2030 mutex_exit(&dld_taskq_lock);
2031 2031 }
2032 2032
2033 2033 /*
2034 2034 * Process an M_IOCTL message.
2035 2035 */
2036 2036 static void
2037 2037 dld_ioc(dld_str_t *dsp, mblk_t *mp)
2038 2038 {
2039 2039 uint_t cmd;
2040 2040
2041 2041 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd;
2042 2042 ASSERT(dsp->ds_type == DLD_DLPI);
2043 2043
2044 2044 switch (cmd) {
2045 2045 case DLIOCNATIVE:
2046 2046 ioc_native(dsp, mp);
2047 2047 break;
2048 2048 case DLIOCMARGININFO:
2049 2049 ioc_margin(dsp, mp);
2050 2050 break;
2051 2051 case DLIOCRAW:
2052 2052 ioc_raw(dsp, mp);
2053 2053 break;
2054 2054 case DLIOCHDRINFO:
2055 2055 ioc_fast(dsp, mp);
2056 2056 break;
2057 2057 case DLIOCLOWLINK:
2058 2058 ioc_lowlink(dsp, mp);
2059 2059 break;
2060 2060 default:
2061 2061 ioc(dsp, mp);
2062 2062 }
2063 2063 }
2064 2064
2065 2065 /*
2066 2066 * DLIOCNATIVE
2067 2067 */
2068 2068 static void
2069 2069 ioc_native(dld_str_t *dsp, mblk_t *mp)
2070 2070 {
2071 2071 queue_t *q = dsp->ds_wq;
2072 2072 const mac_info_t *mip = dsp->ds_mip;
2073 2073
2074 2074 /*
2075 2075 * Native mode can be enabled if it's disabled and if the
2076 2076 * native media type is different.
2077 2077 */
2078 2078 if (!dsp->ds_native && mip->mi_media != mip->mi_nativemedia)
2079 2079 dsp->ds_native = B_TRUE;
2080 2080
2081 2081 if (dsp->ds_native)
2082 2082 miocack(q, mp, 0, mip->mi_nativemedia);
2083 2083 else
2084 2084 miocnak(q, mp, 0, ENOTSUP);
2085 2085 }
2086 2086
2087 2087 /*
2088 2088 * DLIOCMARGININFO
2089 2089 */
2090 2090 static void
2091 2091 ioc_margin(dld_str_t *dsp, mblk_t *mp)
2092 2092 {
2093 2093 queue_t *q = dsp->ds_wq;
2094 2094 uint32_t margin;
2095 2095 int err;
2096 2096
2097 2097 if (dsp->ds_dlstate == DL_UNATTACHED) {
2098 2098 err = EINVAL;
2099 2099 goto failed;
2100 2100 }
2101 2101 if ((err = miocpullup(mp, sizeof (uint32_t))) != 0)
2102 2102 goto failed;
2103 2103
2104 2104 mac_margin_get(dsp->ds_mh, &margin);
2105 2105 *((uint32_t *)mp->b_cont->b_rptr) = margin;
2106 2106 miocack(q, mp, sizeof (uint32_t), 0);
2107 2107 return;
2108 2108
2109 2109 failed:
2110 2110 miocnak(q, mp, 0, err);
2111 2111 }
2112 2112
2113 2113 /*
2114 2114 * DLIOCRAW
2115 2115 */
2116 2116 static void
2117 2117 ioc_raw(dld_str_t *dsp, mblk_t *mp)
2118 2118 {
2119 2119 queue_t *q = dsp->ds_wq;
2120 2120 mac_perim_handle_t mph;
2121 2121
2122 2122 if (dsp->ds_mh == NULL) {
2123 2123 dsp->ds_mode = DLD_RAW;
2124 2124 miocack(q, mp, 0, 0);
2125 2125 return;
2126 2126 }
2127 2127
2128 2128 mac_perim_enter_by_mh(dsp->ds_mh, &mph);
2129 2129 if (dsp->ds_polling || dsp->ds_direct) {
2130 2130 mac_perim_exit(mph);
2131 2131 miocnak(q, mp, 0, EPROTO);
2132 2132 return;
2133 2133 }
2134 2134
2135 2135 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) {
2136 2136 /*
2137 2137 * Set the receive callback.
2138 2138 */
2139 2139 dls_rx_set(dsp, dld_str_rx_raw, dsp);
2140 2140 }
2141 2141
2142 2142 /*
2143 2143 * Note that raw mode is enabled.
2144 2144 */
2145 2145 dsp->ds_mode = DLD_RAW;
2146 2146 mac_perim_exit(mph);
2147 2147
2148 2148 miocack(q, mp, 0, 0);
2149 2149 }
2150 2150
2151 2151 /*
2152 2152 * DLIOCHDRINFO
2153 2153 */
2154 2154 static void
2155 2155 ioc_fast(dld_str_t *dsp, mblk_t *mp)
2156 2156 {
2157 2157 dl_unitdata_req_t *dlp;
2158 2158 off_t off;
2159 2159 size_t len;
2160 2160 const uint8_t *addr;
2161 2161 uint16_t sap;
2162 2162 mblk_t *nmp;
2163 2163 mblk_t *hmp;
2164 2164 uint_t addr_length;
2165 2165 queue_t *q = dsp->ds_wq;
2166 2166 int err;
2167 2167 mac_perim_handle_t mph;
2168 2168
2169 2169 if (dld_opt & DLD_OPT_NO_FASTPATH) {
2170 2170 err = ENOTSUP;
2171 2171 goto failed;
2172 2172 }
2173 2173
2174 2174 /*
2175 2175 * DLIOCHDRINFO should only come from IP. The one initiated from
2176 2176 * user-land should not be allowed.
2177 2177 */
2178 2178 if (((struct iocblk *)mp->b_rptr)->ioc_cr != kcred) {
2179 2179 err = EINVAL;
2180 2180 goto failed;
2181 2181 }
2182 2182
2183 2183 nmp = mp->b_cont;
2184 2184 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) ||
2185 2185 (dlp = (dl_unitdata_req_t *)nmp->b_rptr,
2186 2186 dlp->dl_primitive != DL_UNITDATA_REQ)) {
2187 2187 err = EINVAL;
2188 2188 goto failed;
2189 2189 }
2190 2190
2191 2191 off = dlp->dl_dest_addr_offset;
2192 2192 len = dlp->dl_dest_addr_length;
2193 2193
2194 2194 if (!MBLKIN(nmp, off, len)) {
2195 2195 err = EINVAL;
2196 2196 goto failed;
2197 2197 }
2198 2198
2199 2199 if (dsp->ds_dlstate != DL_IDLE) {
2200 2200 err = ENOTSUP;
2201 2201 goto failed;
2202 2202 }
2203 2203
2204 2204 addr_length = dsp->ds_mip->mi_addr_length;
2205 2205 if (len != addr_length + sizeof (uint16_t)) {
2206 2206 err = EINVAL;
2207 2207 goto failed;
2208 2208 }
2209 2209
2210 2210 addr = nmp->b_rptr + off;
2211 2211 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length);
2212 2212
2213 2213 if ((hmp = dls_header(dsp, addr, sap, 0, NULL)) == NULL) {
2214 2214 err = ENOMEM;
2215 2215 goto failed;
2216 2216 }
2217 2217
2218 2218 /*
2219 2219 * This ioctl might happen concurrently with a direct call to dld_capab
2220 2220 * that tries to enable direct and/or poll capabilities. Since the
2221 2221 * stack does not serialize them, we do so here to avoid mixing
2222 2222 * the callbacks.
2223 2223 */
2224 2224 mac_perim_enter_by_mh(dsp->ds_mh, &mph);
2225 2225 if (dsp->ds_mode != DLD_FASTPATH) {
2226 2226 /*
2227 2227 * Set the receive callback (unless polling is enabled).
2228 2228 */
2229 2229 if (!dsp->ds_polling && !dsp->ds_direct)
2230 2230 dls_rx_set(dsp, dld_str_rx_fastpath, dsp);
2231 2231
2232 2232 /*
2233 2233 * Note that fast-path mode is enabled.
2234 2234 */
2235 2235 dsp->ds_mode = DLD_FASTPATH;
2236 2236 }
2237 2237 mac_perim_exit(mph);
2238 2238
2239 2239 freemsg(nmp->b_cont);
2240 2240 nmp->b_cont = hmp;
2241 2241
2242 2242 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0);
2243 2243 return;
2244 2244 failed:
2245 2245 miocnak(q, mp, 0, err);
2246 2246 }
2247 2247
2248 2248 /*
2249 2249 * DLIOCLOWLINK: request actual link state changes. When the
2250 2250 * link is part of a bridge instance the client receives actual
2251 2251 * link state changes and not the aggregate link status. Used by
2252 2252 * the bridging daemon (bridged) for proper RSTP operation.
2253 2253 */
2254 2254 static void
2255 2255 ioc_lowlink(dld_str_t *dsp, mblk_t *mp)
2256 2256 {
2257 2257 queue_t *q = dsp->ds_wq;
2258 2258 int err;
2259 2259
2260 2260 if ((err = miocpullup(mp, sizeof (int))) != 0) {
2261 2261 miocnak(q, mp, 0, err);
2262 2262 } else {
2263 2263 /* LINTED: alignment */
2264 2264 dsp->ds_lowlink = *(boolean_t *)mp->b_cont->b_rptr;
2265 2265 miocack(q, mp, 0, 0);
2266 2266 }
2267 2267 }
2268 2268
2269 2269 /*
2270 2270 * Catch-all handler.
2271 2271 */
2272 2272 static void
2273 2273 ioc(dld_str_t *dsp, mblk_t *mp)
2274 2274 {
2275 2275 queue_t *q = dsp->ds_wq;
2276 2276
2277 2277 if (dsp->ds_dlstate == DL_UNATTACHED) {
2278 2278 miocnak(q, mp, 0, EINVAL);
2279 2279 return;
2280 2280 }
2281 2281 mac_ioctl(dsp->ds_mh, q, mp);
2282 2282 }
↓ open down ↓ |
1559 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX