Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/inet/ip/ip_attr.c
+++ new/usr/src/uts/common/inet/ip/ip_attr.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25 /* Copyright (c) 1990 Mentat Inc. */
26 26
27 27 #include <sys/types.h>
28 28 #include <sys/stream.h>
29 29 #include <sys/strsun.h>
30 30 #include <sys/zone.h>
31 31 #include <sys/ddi.h>
32 32 #include <sys/sunddi.h>
33 33 #include <sys/cmn_err.h>
34 34 #include <sys/debug.h>
35 35 #include <sys/atomic.h>
36 36
37 37 #include <sys/systm.h>
38 38 #include <sys/param.h>
39 39 #include <sys/kmem.h>
40 40 #include <sys/sdt.h>
41 41 #include <sys/socket.h>
42 42 #include <sys/mac.h>
43 43 #include <net/if.h>
44 44 #include <net/if_arp.h>
45 45 #include <net/route.h>
46 46 #include <sys/sockio.h>
47 47 #include <netinet/in.h>
48 48 #include <net/if_dl.h>
49 49
50 50 #include <inet/common.h>
51 51 #include <inet/mi.h>
52 52 #include <inet/mib2.h>
53 53 #include <inet/nd.h>
54 54 #include <inet/arp.h>
55 55 #include <inet/snmpcom.h>
56 56 #include <inet/kstatcom.h>
57 57
58 58 #include <netinet/igmp_var.h>
59 59 #include <netinet/ip6.h>
60 60 #include <netinet/icmp6.h>
61 61 #include <netinet/sctp.h>
62 62
63 63 #include <inet/ip.h>
64 64 #include <inet/ip_impl.h>
65 65 #include <inet/ip6.h>
66 66 #include <inet/ip6_asp.h>
67 67 #include <inet/tcp.h>
68 68 #include <inet/ip_multi.h>
69 69 #include <inet/ip_if.h>
70 70 #include <inet/ip_ire.h>
71 71 #include <inet/ip_ftable.h>
72 72 #include <inet/ip_rts.h>
73 73 #include <inet/optcom.h>
74 74 #include <inet/ip_ndp.h>
75 75 #include <inet/ip_listutils.h>
76 76 #include <netinet/igmp.h>
77 77 #include <netinet/ip_mroute.h>
78 78 #include <inet/ipp_common.h>
79 79
80 80 #include <net/pfkeyv2.h>
81 81 #include <inet/sadb.h>
82 82 #include <inet/ipsec_impl.h>
83 83 #include <inet/ipdrop.h>
84 84 #include <inet/ip_netinfo.h>
85 85 #include <sys/squeue_impl.h>
86 86 #include <sys/squeue.h>
87 87
88 88 #include <inet/ipclassifier.h>
89 89 #include <inet/sctp_ip.h>
90 90 #include <inet/sctp/sctp_impl.h>
91 91 #include <inet/udp_impl.h>
92 92 #include <sys/sunddi.h>
↓ open down ↓ |
92 lines elided |
↑ open up ↑ |
93 93
94 94 #include <sys/tsol/label.h>
95 95 #include <sys/tsol/tnet.h>
96 96
97 97 /*
98 98 * Release a reference on ip_xmit_attr.
99 99 * The reference is acquired by conn_get_ixa()
100 100 */
101 101 #define IXA_REFRELE(ixa) \
102 102 { \
103 - if (atomic_add_32_nv(&(ixa)->ixa_refcnt, -1) == 0) \
103 + if (atomic_dec_32_nv(&(ixa)->ixa_refcnt) == 0) \
104 104 ixa_inactive(ixa); \
105 105 }
106 106
107 107 #define IXA_REFHOLD(ixa) \
108 108 { \
109 109 ASSERT((ixa)->ixa_refcnt != 0); \
110 - atomic_add_32(&(ixa)->ixa_refcnt, 1); \
110 + atomic_inc_32(&(ixa)->ixa_refcnt); \
111 111 }
112 112
113 113 /*
114 114 * When we need to handle a transmit side asynchronous operation, then we need
115 115 * to save sufficient information so that we can call the fragment and postfrag
116 116 * functions. That information is captured in an mblk containing this structure.
117 117 *
118 118 * Since this is currently only used for IPsec, we include information for
119 119 * the kernel crypto framework.
120 120 */
121 121 typedef struct ixamblk_s {
122 122 boolean_t ixm_inbound; /* B_FALSE */
123 123 iaflags_t ixm_flags; /* ixa_flags */
124 124 netstackid_t ixm_stackid; /* Verify it didn't go away */
125 125 uint_t ixm_ifindex; /* Used to find the nce */
126 126 in6_addr_t ixm_nceaddr_v6; /* Used to find nce */
127 127 #define ixm_nceaddr_v4 V4_PART_OF_V6(ixm_nceaddr_v6)
128 128 uint32_t ixm_fragsize;
129 129 uint_t ixm_pktlen;
130 130 uint16_t ixm_ip_hdr_length; /* Points to ULP header */
131 131 uint8_t ixm_protocol; /* Protocol number for ULP cksum */
132 132 pfirepostfrag_t ixm_postfragfn;
133 133
134 134 zoneid_t ixm_zoneid; /* Needed for ipobs */
135 135 zoneid_t ixm_no_loop_zoneid; /* IXAF_NO_LOOP_ZONEID_SET */
136 136
137 137 uint_t ixm_scopeid; /* For IPv6 link-locals */
138 138
139 139 uint32_t ixm_ident; /* For IPv6 fragment header */
140 140 uint32_t ixm_xmit_hint;
141 141
142 142 uint64_t ixm_conn_id; /* Used by DTrace */
143 143 cred_t *ixm_cred; /* For getpeerucred - refhold if set */
144 144 pid_t ixm_cpid; /* For getpeerucred */
145 145
146 146 ts_label_t *ixm_tsl; /* Refhold if set. */
147 147
148 148 /*
149 149 * When the pointers below are set they have a refhold on the struct.
150 150 */
151 151 ipsec_latch_t *ixm_ipsec_latch;
152 152 struct ipsa_s *ixm_ipsec_ah_sa; /* SA for AH */
153 153 struct ipsa_s *ixm_ipsec_esp_sa; /* SA for ESP */
154 154 struct ipsec_policy_s *ixm_ipsec_policy; /* why are we here? */
155 155 struct ipsec_action_s *ixm_ipsec_action; /* For reflected packets */
156 156
157 157 ipsa_ref_t ixm_ipsec_ref[2]; /* Soft reference to SA */
158 158
159 159 /* Need these while waiting for SA */
160 160 uint16_t ixm_ipsec_src_port; /* Source port number of d-gram. */
161 161 uint16_t ixm_ipsec_dst_port; /* Destination port number of d-gram. */
162 162 uint8_t ixm_ipsec_icmp_type; /* ICMP type of d-gram */
163 163 uint8_t ixm_ipsec_icmp_code; /* ICMP code of d-gram */
164 164
165 165 sa_family_t ixm_ipsec_inaf; /* Inner address family */
166 166 uint32_t ixm_ipsec_insrc[IXA_MAX_ADDRLEN]; /* Inner src address */
167 167 uint32_t ixm_ipsec_indst[IXA_MAX_ADDRLEN]; /* Inner dest address */
168 168 uint8_t ixm_ipsec_insrcpfx; /* Inner source prefix */
169 169 uint8_t ixm_ipsec_indstpfx; /* Inner destination prefix */
170 170
171 171 uint8_t ixm_ipsec_proto; /* IP protocol number for d-gram. */
172 172 } ixamblk_t;
173 173
174 174
175 175 /*
176 176 * When we need to handle a receive side asynchronous operation, then we need
177 177 * to save sufficient information so that we can call ip_fanout.
178 178 * That information is captured in an mblk containing this structure.
179 179 *
180 180 * Since this is currently only used for IPsec, we include information for
181 181 * the kernel crypto framework.
182 182 */
183 183 typedef struct iramblk_s {
184 184 boolean_t irm_inbound; /* B_TRUE */
185 185 iaflags_t irm_flags; /* ira_flags */
186 186 netstackid_t irm_stackid; /* Verify it didn't go away */
187 187 uint_t irm_ifindex; /* To find ira_ill */
188 188
189 189 uint_t irm_rifindex; /* ira_rifindex */
190 190 uint_t irm_ruifindex; /* ira_ruifindex */
191 191 uint_t irm_pktlen;
192 192 uint16_t irm_ip_hdr_length; /* Points to ULP header */
193 193 uint8_t irm_protocol; /* Protocol number for ULP cksum */
194 194 zoneid_t irm_zoneid; /* ALL_ZONES unless local delivery */
195 195
196 196 squeue_t *irm_sqp;
197 197 ill_rx_ring_t *irm_ring;
198 198
199 199 ipaddr_t irm_mroute_tunnel; /* IRAF_MROUTE_TUNNEL_SET */
200 200 zoneid_t irm_no_loop_zoneid; /* IRAF_NO_LOOP_ZONEID_SET */
201 201 uint32_t irm_esp_udp_ports; /* IRAF_ESP_UDP_PORTS */
202 202
203 203 char irm_l2src[IRA_L2SRC_SIZE]; /* If IRAF_L2SRC_SET */
204 204
205 205 cred_t *irm_cred; /* For getpeerucred - refhold if set */
206 206 pid_t irm_cpid; /* For getpeerucred */
207 207
208 208 ts_label_t *irm_tsl; /* Refhold if set. */
209 209
210 210 /*
211 211 * When set these correspond to a refhold on the object.
212 212 */
213 213 struct ipsa_s *irm_ipsec_ah_sa; /* SA for AH */
214 214 struct ipsa_s *irm_ipsec_esp_sa; /* SA for ESP */
215 215 struct ipsec_action_s *irm_ipsec_action; /* For reflected packets */
216 216 } iramblk_t;
217 217
218 218
219 219 /*
220 220 * Take the information in ip_xmit_attr_t and stick it in an mblk
221 221 * that can later be passed to ip_xmit_attr_from_mblk to recreate the
222 222 * ip_xmit_attr_t.
223 223 *
224 224 * Returns NULL on memory allocation failure.
225 225 */
226 226 mblk_t *
227 227 ip_xmit_attr_to_mblk(ip_xmit_attr_t *ixa)
228 228 {
229 229 mblk_t *ixamp;
230 230 ixamblk_t *ixm;
231 231 nce_t *nce = ixa->ixa_nce;
232 232
233 233 ASSERT(nce != NULL);
234 234 ixamp = allocb(sizeof (*ixm), BPRI_MED);
235 235 if (ixamp == NULL)
236 236 return (NULL);
237 237
238 238 ixamp->b_datap->db_type = M_BREAK;
239 239 ixamp->b_wptr += sizeof (*ixm);
240 240 ixm = (ixamblk_t *)ixamp->b_rptr;
241 241
242 242 bzero(ixm, sizeof (*ixm));
243 243 ixm->ixm_inbound = B_FALSE;
244 244 ixm->ixm_flags = ixa->ixa_flags;
245 245 ixm->ixm_stackid = ixa->ixa_ipst->ips_netstack->netstack_stackid;
246 246 ixm->ixm_ifindex = nce->nce_ill->ill_phyint->phyint_ifindex;
247 247 ixm->ixm_nceaddr_v6 = nce->nce_addr;
248 248 ixm->ixm_fragsize = ixa->ixa_fragsize;
249 249 ixm->ixm_pktlen = ixa->ixa_pktlen;
250 250 ixm->ixm_ip_hdr_length = ixa->ixa_ip_hdr_length;
251 251 ixm->ixm_protocol = ixa->ixa_protocol;
252 252 ixm->ixm_postfragfn = ixa->ixa_postfragfn;
253 253 ixm->ixm_zoneid = ixa->ixa_zoneid;
254 254 ixm->ixm_no_loop_zoneid = ixa->ixa_no_loop_zoneid;
255 255 ixm->ixm_scopeid = ixa->ixa_scopeid;
256 256 ixm->ixm_ident = ixa->ixa_ident;
257 257 ixm->ixm_xmit_hint = ixa->ixa_xmit_hint;
258 258
259 259 if (ixa->ixa_tsl != NULL) {
260 260 ixm->ixm_tsl = ixa->ixa_tsl;
261 261 label_hold(ixm->ixm_tsl);
262 262 }
263 263 if (ixa->ixa_cred != NULL) {
264 264 ixm->ixm_cred = ixa->ixa_cred;
265 265 crhold(ixa->ixa_cred);
266 266 }
267 267 ixm->ixm_cpid = ixa->ixa_cpid;
268 268 ixm->ixm_conn_id = ixa->ixa_conn_id;
269 269
270 270 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) {
271 271 if (ixa->ixa_ipsec_ah_sa != NULL) {
272 272 ixm->ixm_ipsec_ah_sa = ixa->ixa_ipsec_ah_sa;
273 273 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa);
274 274 }
275 275 if (ixa->ixa_ipsec_esp_sa != NULL) {
276 276 ixm->ixm_ipsec_esp_sa = ixa->ixa_ipsec_esp_sa;
277 277 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa);
278 278 }
279 279 if (ixa->ixa_ipsec_policy != NULL) {
280 280 ixm->ixm_ipsec_policy = ixa->ixa_ipsec_policy;
281 281 IPPOL_REFHOLD(ixa->ixa_ipsec_policy);
282 282 }
283 283 if (ixa->ixa_ipsec_action != NULL) {
284 284 ixm->ixm_ipsec_action = ixa->ixa_ipsec_action;
285 285 IPACT_REFHOLD(ixa->ixa_ipsec_action);
286 286 }
287 287 if (ixa->ixa_ipsec_latch != NULL) {
288 288 ixm->ixm_ipsec_latch = ixa->ixa_ipsec_latch;
289 289 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch);
290 290 }
291 291 ixm->ixm_ipsec_ref[0] = ixa->ixa_ipsec_ref[0];
292 292 ixm->ixm_ipsec_ref[1] = ixa->ixa_ipsec_ref[1];
293 293 ixm->ixm_ipsec_src_port = ixa->ixa_ipsec_src_port;
294 294 ixm->ixm_ipsec_dst_port = ixa->ixa_ipsec_dst_port;
295 295 ixm->ixm_ipsec_icmp_type = ixa->ixa_ipsec_icmp_type;
296 296 ixm->ixm_ipsec_icmp_code = ixa->ixa_ipsec_icmp_code;
297 297 ixm->ixm_ipsec_inaf = ixa->ixa_ipsec_inaf;
298 298 ixm->ixm_ipsec_insrc[0] = ixa->ixa_ipsec_insrc[0];
299 299 ixm->ixm_ipsec_insrc[1] = ixa->ixa_ipsec_insrc[1];
300 300 ixm->ixm_ipsec_insrc[2] = ixa->ixa_ipsec_insrc[2];
301 301 ixm->ixm_ipsec_insrc[3] = ixa->ixa_ipsec_insrc[3];
302 302 ixm->ixm_ipsec_indst[0] = ixa->ixa_ipsec_indst[0];
303 303 ixm->ixm_ipsec_indst[1] = ixa->ixa_ipsec_indst[1];
304 304 ixm->ixm_ipsec_indst[2] = ixa->ixa_ipsec_indst[2];
305 305 ixm->ixm_ipsec_indst[3] = ixa->ixa_ipsec_indst[3];
306 306 ixm->ixm_ipsec_insrcpfx = ixa->ixa_ipsec_insrcpfx;
307 307 ixm->ixm_ipsec_indstpfx = ixa->ixa_ipsec_indstpfx;
308 308 ixm->ixm_ipsec_proto = ixa->ixa_ipsec_proto;
309 309 }
310 310 return (ixamp);
311 311 }
312 312
313 313 /*
314 314 * Extract the ip_xmit_attr_t from the mblk, checking that the
315 315 * ip_stack_t, ill_t, and nce_t still exist. Returns B_FALSE if that is
316 316 * not the case.
317 317 *
318 318 * Otherwise ixa is updated.
319 319 * Caller needs to release references on the ixa by calling ixa_refrele()
320 320 * which will imediately call ixa_inactive to release the references.
321 321 */
322 322 boolean_t
323 323 ip_xmit_attr_from_mblk(mblk_t *ixamp, ip_xmit_attr_t *ixa)
324 324 {
325 325 ixamblk_t *ixm;
326 326 netstack_t *ns;
327 327 ip_stack_t *ipst;
328 328 ill_t *ill;
329 329 nce_t *nce;
330 330
331 331 /* We assume the caller hasn't initialized ixa */
332 332 bzero(ixa, sizeof (*ixa));
333 333
334 334 ASSERT(DB_TYPE(ixamp) == M_BREAK);
335 335 ASSERT(ixamp->b_cont == NULL);
336 336
337 337 ixm = (ixamblk_t *)ixamp->b_rptr;
338 338 ASSERT(!ixm->ixm_inbound);
339 339
340 340 /* Verify the netstack is still around */
341 341 ns = netstack_find_by_stackid(ixm->ixm_stackid);
342 342 if (ns == NULL) {
343 343 /* Disappeared on us */
344 344 (void) ip_xmit_attr_free_mblk(ixamp);
345 345 return (B_FALSE);
346 346 }
347 347 ipst = ns->netstack_ip;
348 348
349 349 /* Verify the ill is still around */
350 350 ill = ill_lookup_on_ifindex(ixm->ixm_ifindex,
351 351 !(ixm->ixm_flags & IXAF_IS_IPV4), ipst);
352 352
353 353 /* We have the ill, hence the netstack can't go away */
354 354 netstack_rele(ns);
355 355 if (ill == NULL) {
356 356 /* Disappeared on us */
357 357 (void) ip_xmit_attr_free_mblk(ixamp);
358 358 return (B_FALSE);
359 359 }
360 360 /*
361 361 * Find the nce. We don't load-spread (only lookup nce's on the ill)
362 362 * because we want to find the same nce as the one we had when
363 363 * ip_xmit_attr_to_mblk was called.
364 364 */
365 365 if (ixm->ixm_flags & IXAF_IS_IPV4) {
366 366 nce = nce_lookup_v4(ill, &ixm->ixm_nceaddr_v4);
367 367 } else {
368 368 nce = nce_lookup_v6(ill, &ixm->ixm_nceaddr_v6);
369 369 }
370 370
371 371 /* We have the nce, hence the ill can't go away */
372 372 ill_refrele(ill);
373 373 if (nce == NULL) {
374 374 /*
375 375 * Since this is unusual and we don't know what type of
376 376 * nce it was, we drop the packet.
377 377 */
378 378 (void) ip_xmit_attr_free_mblk(ixamp);
379 379 return (B_FALSE);
380 380 }
381 381
382 382 ixa->ixa_flags = ixm->ixm_flags;
383 383 ixa->ixa_refcnt = 1;
384 384 ixa->ixa_ipst = ipst;
385 385 ixa->ixa_fragsize = ixm->ixm_fragsize;
386 386 ixa->ixa_pktlen = ixm->ixm_pktlen;
387 387 ixa->ixa_ip_hdr_length = ixm->ixm_ip_hdr_length;
388 388 ixa->ixa_protocol = ixm->ixm_protocol;
389 389 ixa->ixa_nce = nce;
390 390 ixa->ixa_postfragfn = ixm->ixm_postfragfn;
391 391 ixa->ixa_zoneid = ixm->ixm_zoneid;
392 392 ixa->ixa_no_loop_zoneid = ixm->ixm_no_loop_zoneid;
393 393 ixa->ixa_scopeid = ixm->ixm_scopeid;
394 394 ixa->ixa_ident = ixm->ixm_ident;
395 395 ixa->ixa_xmit_hint = ixm->ixm_xmit_hint;
396 396
397 397 if (ixm->ixm_tsl != NULL) {
398 398 ixa->ixa_tsl = ixm->ixm_tsl;
399 399 ixa->ixa_free_flags |= IXA_FREE_TSL;
400 400 ixm->ixm_tsl = NULL;
401 401 }
402 402 if (ixm->ixm_cred != NULL) {
403 403 ixa->ixa_cred = ixm->ixm_cred;
404 404 ixa->ixa_free_flags |= IXA_FREE_CRED;
405 405 ixm->ixm_cred = NULL;
406 406 }
407 407 ixa->ixa_cpid = ixm->ixm_cpid;
408 408 ixa->ixa_conn_id = ixm->ixm_conn_id;
409 409
410 410 ixa->ixa_ipsec_ah_sa = ixm->ixm_ipsec_ah_sa;
411 411 ixa->ixa_ipsec_esp_sa = ixm->ixm_ipsec_esp_sa;
412 412 ixa->ixa_ipsec_policy = ixm->ixm_ipsec_policy;
413 413 ixa->ixa_ipsec_action = ixm->ixm_ipsec_action;
414 414 ixa->ixa_ipsec_latch = ixm->ixm_ipsec_latch;
415 415
416 416 ixa->ixa_ipsec_ref[0] = ixm->ixm_ipsec_ref[0];
417 417 ixa->ixa_ipsec_ref[1] = ixm->ixm_ipsec_ref[1];
418 418 ixa->ixa_ipsec_src_port = ixm->ixm_ipsec_src_port;
419 419 ixa->ixa_ipsec_dst_port = ixm->ixm_ipsec_dst_port;
420 420 ixa->ixa_ipsec_icmp_type = ixm->ixm_ipsec_icmp_type;
421 421 ixa->ixa_ipsec_icmp_code = ixm->ixm_ipsec_icmp_code;
422 422 ixa->ixa_ipsec_inaf = ixm->ixm_ipsec_inaf;
423 423 ixa->ixa_ipsec_insrc[0] = ixm->ixm_ipsec_insrc[0];
424 424 ixa->ixa_ipsec_insrc[1] = ixm->ixm_ipsec_insrc[1];
425 425 ixa->ixa_ipsec_insrc[2] = ixm->ixm_ipsec_insrc[2];
426 426 ixa->ixa_ipsec_insrc[3] = ixm->ixm_ipsec_insrc[3];
427 427 ixa->ixa_ipsec_indst[0] = ixm->ixm_ipsec_indst[0];
428 428 ixa->ixa_ipsec_indst[1] = ixm->ixm_ipsec_indst[1];
429 429 ixa->ixa_ipsec_indst[2] = ixm->ixm_ipsec_indst[2];
430 430 ixa->ixa_ipsec_indst[3] = ixm->ixm_ipsec_indst[3];
431 431 ixa->ixa_ipsec_insrcpfx = ixm->ixm_ipsec_insrcpfx;
432 432 ixa->ixa_ipsec_indstpfx = ixm->ixm_ipsec_indstpfx;
433 433 ixa->ixa_ipsec_proto = ixm->ixm_ipsec_proto;
434 434
435 435 freeb(ixamp);
436 436 return (B_TRUE);
437 437 }
438 438
439 439 /*
440 440 * Free the ixm mblk and any references it holds
441 441 * Returns b_cont.
442 442 */
443 443 mblk_t *
444 444 ip_xmit_attr_free_mblk(mblk_t *ixamp)
445 445 {
446 446 ixamblk_t *ixm;
447 447 mblk_t *mp;
448 448
449 449 /* Consume mp */
450 450 ASSERT(DB_TYPE(ixamp) == M_BREAK);
451 451 mp = ixamp->b_cont;
452 452
453 453 ixm = (ixamblk_t *)ixamp->b_rptr;
454 454 ASSERT(!ixm->ixm_inbound);
455 455
456 456 if (ixm->ixm_ipsec_ah_sa != NULL) {
457 457 IPSA_REFRELE(ixm->ixm_ipsec_ah_sa);
458 458 ixm->ixm_ipsec_ah_sa = NULL;
459 459 }
460 460 if (ixm->ixm_ipsec_esp_sa != NULL) {
461 461 IPSA_REFRELE(ixm->ixm_ipsec_esp_sa);
462 462 ixm->ixm_ipsec_esp_sa = NULL;
463 463 }
464 464 if (ixm->ixm_ipsec_policy != NULL) {
465 465 IPPOL_REFRELE(ixm->ixm_ipsec_policy);
466 466 ixm->ixm_ipsec_policy = NULL;
467 467 }
468 468 if (ixm->ixm_ipsec_action != NULL) {
469 469 IPACT_REFRELE(ixm->ixm_ipsec_action);
470 470 ixm->ixm_ipsec_action = NULL;
471 471 }
472 472 if (ixm->ixm_ipsec_latch) {
473 473 IPLATCH_REFRELE(ixm->ixm_ipsec_latch);
474 474 ixm->ixm_ipsec_latch = NULL;
475 475 }
476 476
477 477 if (ixm->ixm_tsl != NULL) {
478 478 label_rele(ixm->ixm_tsl);
479 479 ixm->ixm_tsl = NULL;
480 480 }
481 481 if (ixm->ixm_cred != NULL) {
482 482 crfree(ixm->ixm_cred);
483 483 ixm->ixm_cred = NULL;
484 484 }
485 485 freeb(ixamp);
486 486 return (mp);
487 487 }
488 488
489 489 /*
490 490 * Take the information in ip_recv_attr_t and stick it in an mblk
491 491 * that can later be passed to ip_recv_attr_from_mblk to recreate the
492 492 * ip_recv_attr_t.
493 493 *
494 494 * Returns NULL on memory allocation failure.
495 495 */
496 496 mblk_t *
497 497 ip_recv_attr_to_mblk(ip_recv_attr_t *ira)
498 498 {
499 499 mblk_t *iramp;
500 500 iramblk_t *irm;
501 501 ill_t *ill = ira->ira_ill;
502 502
503 503 ASSERT(ira->ira_ill != NULL || ira->ira_ruifindex != 0);
504 504
505 505 iramp = allocb(sizeof (*irm), BPRI_MED);
506 506 if (iramp == NULL)
507 507 return (NULL);
508 508
509 509 iramp->b_datap->db_type = M_BREAK;
510 510 iramp->b_wptr += sizeof (*irm);
511 511 irm = (iramblk_t *)iramp->b_rptr;
512 512
513 513 bzero(irm, sizeof (*irm));
514 514 irm->irm_inbound = B_TRUE;
515 515 irm->irm_flags = ira->ira_flags;
516 516 if (ill != NULL) {
517 517 /* Internal to IP - preserve ip_stack_t, ill and rill */
518 518 irm->irm_stackid =
519 519 ill->ill_ipst->ips_netstack->netstack_stackid;
520 520 irm->irm_ifindex = ira->ira_ill->ill_phyint->phyint_ifindex;
521 521 ASSERT(ira->ira_rill->ill_phyint->phyint_ifindex ==
522 522 ira->ira_rifindex);
523 523 } else {
524 524 /* Let ip_recv_attr_from_stackid know there isn't one */
525 525 irm->irm_stackid = -1;
526 526 }
527 527 irm->irm_rifindex = ira->ira_rifindex;
528 528 irm->irm_ruifindex = ira->ira_ruifindex;
529 529 irm->irm_pktlen = ira->ira_pktlen;
530 530 irm->irm_ip_hdr_length = ira->ira_ip_hdr_length;
531 531 irm->irm_protocol = ira->ira_protocol;
532 532
533 533 irm->irm_sqp = ira->ira_sqp;
534 534 irm->irm_ring = ira->ira_ring;
535 535
536 536 irm->irm_zoneid = ira->ira_zoneid;
537 537 irm->irm_mroute_tunnel = ira->ira_mroute_tunnel;
538 538 irm->irm_no_loop_zoneid = ira->ira_no_loop_zoneid;
539 539 irm->irm_esp_udp_ports = ira->ira_esp_udp_ports;
540 540
541 541 if (ira->ira_tsl != NULL) {
542 542 irm->irm_tsl = ira->ira_tsl;
543 543 label_hold(irm->irm_tsl);
544 544 }
545 545 if (ira->ira_cred != NULL) {
546 546 irm->irm_cred = ira->ira_cred;
547 547 crhold(ira->ira_cred);
548 548 }
549 549 irm->irm_cpid = ira->ira_cpid;
550 550
551 551 if (ira->ira_flags & IRAF_L2SRC_SET)
552 552 bcopy(ira->ira_l2src, irm->irm_l2src, IRA_L2SRC_SIZE);
553 553
554 554 if (ira->ira_flags & IRAF_IPSEC_SECURE) {
555 555 if (ira->ira_ipsec_ah_sa != NULL) {
556 556 irm->irm_ipsec_ah_sa = ira->ira_ipsec_ah_sa;
557 557 IPSA_REFHOLD(ira->ira_ipsec_ah_sa);
558 558 }
559 559 if (ira->ira_ipsec_esp_sa != NULL) {
560 560 irm->irm_ipsec_esp_sa = ira->ira_ipsec_esp_sa;
561 561 IPSA_REFHOLD(ira->ira_ipsec_esp_sa);
562 562 }
563 563 if (ira->ira_ipsec_action != NULL) {
564 564 irm->irm_ipsec_action = ira->ira_ipsec_action;
565 565 IPACT_REFHOLD(ira->ira_ipsec_action);
566 566 }
567 567 }
568 568 return (iramp);
569 569 }
570 570
571 571 /*
572 572 * Extract the ip_recv_attr_t from the mblk. If we are used inside IP
573 573 * then irm_stackid is not -1, in which case we check that the
574 574 * ip_stack_t and ill_t still exist. Returns B_FALSE if that is
575 575 * not the case.
576 576 * If irm_stackid is zero then we are used by an ULP (e.g., squeue_enter)
577 577 * and we just proceed with ira_ill and ira_rill as NULL.
578 578 *
579 579 * The caller needs to release any references on the pointers inside the ire
580 580 * by calling ira_cleanup.
581 581 */
582 582 boolean_t
583 583 ip_recv_attr_from_mblk(mblk_t *iramp, ip_recv_attr_t *ira)
584 584 {
585 585 iramblk_t *irm;
586 586 netstack_t *ns;
587 587 ip_stack_t *ipst = NULL;
588 588 ill_t *ill = NULL, *rill = NULL;
589 589
590 590 /* We assume the caller hasn't initialized ira */
591 591 bzero(ira, sizeof (*ira));
592 592
593 593 ASSERT(DB_TYPE(iramp) == M_BREAK);
594 594 ASSERT(iramp->b_cont == NULL);
595 595
596 596 irm = (iramblk_t *)iramp->b_rptr;
597 597 ASSERT(irm->irm_inbound);
598 598
599 599 if (irm->irm_stackid != -1) {
600 600 /* Verify the netstack is still around */
601 601 ns = netstack_find_by_stackid(irm->irm_stackid);
602 602 if (ns == NULL) {
603 603 /* Disappeared on us */
604 604 (void) ip_recv_attr_free_mblk(iramp);
605 605 return (B_FALSE);
606 606 }
607 607 ipst = ns->netstack_ip;
608 608
609 609 /* Verify the ill is still around */
610 610 ill = ill_lookup_on_ifindex(irm->irm_ifindex,
611 611 !(irm->irm_flags & IRAF_IS_IPV4), ipst);
612 612
613 613 if (irm->irm_ifindex == irm->irm_rifindex) {
614 614 rill = ill;
615 615 } else {
616 616 rill = ill_lookup_on_ifindex(irm->irm_rifindex,
617 617 !(irm->irm_flags & IRAF_IS_IPV4), ipst);
618 618 }
619 619
620 620 /* We have the ill, hence the netstack can't go away */
621 621 netstack_rele(ns);
622 622 if (ill == NULL || rill == NULL) {
623 623 /* Disappeared on us */
624 624 if (ill != NULL)
625 625 ill_refrele(ill);
626 626 if (rill != NULL && rill != ill)
627 627 ill_refrele(rill);
628 628 (void) ip_recv_attr_free_mblk(iramp);
629 629 return (B_FALSE);
630 630 }
631 631 }
632 632
633 633 ira->ira_flags = irm->irm_flags;
634 634 /* Caller must ill_refele(ira_ill) by using ira_cleanup() */
635 635 ira->ira_ill = ill;
636 636 ira->ira_rill = rill;
637 637
638 638 ira->ira_rifindex = irm->irm_rifindex;
639 639 ira->ira_ruifindex = irm->irm_ruifindex;
640 640 ira->ira_pktlen = irm->irm_pktlen;
641 641 ira->ira_ip_hdr_length = irm->irm_ip_hdr_length;
642 642 ira->ira_protocol = irm->irm_protocol;
643 643
644 644 ira->ira_sqp = irm->irm_sqp;
645 645 /* The rest of IP assumes that the rings never go away. */
646 646 ira->ira_ring = irm->irm_ring;
647 647
648 648 ira->ira_zoneid = irm->irm_zoneid;
649 649 ira->ira_mroute_tunnel = irm->irm_mroute_tunnel;
650 650 ira->ira_no_loop_zoneid = irm->irm_no_loop_zoneid;
651 651 ira->ira_esp_udp_ports = irm->irm_esp_udp_ports;
652 652
653 653 if (irm->irm_tsl != NULL) {
654 654 ira->ira_tsl = irm->irm_tsl;
655 655 ira->ira_free_flags |= IRA_FREE_TSL;
656 656 irm->irm_tsl = NULL;
657 657 }
658 658 if (irm->irm_cred != NULL) {
659 659 ira->ira_cred = irm->irm_cred;
660 660 ira->ira_free_flags |= IRA_FREE_CRED;
661 661 irm->irm_cred = NULL;
662 662 }
663 663 ira->ira_cpid = irm->irm_cpid;
664 664
665 665 if (ira->ira_flags & IRAF_L2SRC_SET)
666 666 bcopy(irm->irm_l2src, ira->ira_l2src, IRA_L2SRC_SIZE);
667 667
668 668 ira->ira_ipsec_ah_sa = irm->irm_ipsec_ah_sa;
669 669 ira->ira_ipsec_esp_sa = irm->irm_ipsec_esp_sa;
670 670 ira->ira_ipsec_action = irm->irm_ipsec_action;
671 671
672 672 freeb(iramp);
673 673 return (B_TRUE);
674 674 }
675 675
676 676 /*
677 677 * Free the irm mblk and any references it holds
678 678 * Returns b_cont.
679 679 */
680 680 mblk_t *
681 681 ip_recv_attr_free_mblk(mblk_t *iramp)
682 682 {
683 683 iramblk_t *irm;
684 684 mblk_t *mp;
685 685
686 686 /* Consume mp */
687 687 ASSERT(DB_TYPE(iramp) == M_BREAK);
688 688 mp = iramp->b_cont;
689 689
690 690 irm = (iramblk_t *)iramp->b_rptr;
691 691 ASSERT(irm->irm_inbound);
692 692
693 693 if (irm->irm_ipsec_ah_sa != NULL) {
694 694 IPSA_REFRELE(irm->irm_ipsec_ah_sa);
695 695 irm->irm_ipsec_ah_sa = NULL;
696 696 }
697 697 if (irm->irm_ipsec_esp_sa != NULL) {
698 698 IPSA_REFRELE(irm->irm_ipsec_esp_sa);
699 699 irm->irm_ipsec_esp_sa = NULL;
700 700 }
701 701 if (irm->irm_ipsec_action != NULL) {
702 702 IPACT_REFRELE(irm->irm_ipsec_action);
703 703 irm->irm_ipsec_action = NULL;
704 704 }
705 705 if (irm->irm_tsl != NULL) {
706 706 label_rele(irm->irm_tsl);
707 707 irm->irm_tsl = NULL;
708 708 }
709 709 if (irm->irm_cred != NULL) {
710 710 crfree(irm->irm_cred);
711 711 irm->irm_cred = NULL;
712 712 }
713 713
714 714 freeb(iramp);
715 715 return (mp);
716 716 }
717 717
718 718 /*
719 719 * Returns true if the mblk contains an ip_recv_attr_t
720 720 * For now we just check db_type.
721 721 */
722 722 boolean_t
723 723 ip_recv_attr_is_mblk(mblk_t *mp)
724 724 {
725 725 /*
726 726 * Need to handle the various forms of tcp_timermp which are tagged
727 727 * with b_wptr and might have a NULL b_datap.
728 728 */
729 729 if (mp->b_wptr == NULL || mp->b_wptr == (uchar_t *)-1)
730 730 return (B_FALSE);
731 731
732 732 #ifdef DEBUG
733 733 iramblk_t *irm;
734 734
735 735 if (DB_TYPE(mp) != M_BREAK)
736 736 return (B_FALSE);
737 737
738 738 irm = (iramblk_t *)mp->b_rptr;
739 739 ASSERT(irm->irm_inbound);
740 740 return (B_TRUE);
741 741 #else
742 742 return (DB_TYPE(mp) == M_BREAK);
743 743 #endif
744 744 }
745 745
746 746 static ip_xmit_attr_t *
↓ open down ↓ |
626 lines elided |
↑ open up ↑ |
747 747 conn_get_ixa_impl(conn_t *connp, boolean_t replace, int kmflag)
748 748 {
749 749 ip_xmit_attr_t *ixa;
750 750 ip_xmit_attr_t *oldixa;
751 751
752 752 mutex_enter(&connp->conn_lock);
753 753 ixa = connp->conn_ixa;
754 754
755 755 /* At least one references for the conn_t */
756 756 ASSERT(ixa->ixa_refcnt >= 1);
757 - if (atomic_add_32_nv(&ixa->ixa_refcnt, 1) == 2) {
757 + if (atomic_inc_32_nv(&ixa->ixa_refcnt) == 2) {
758 758 /* No other thread using conn_ixa */
759 759 mutex_exit(&connp->conn_lock);
760 760 return (ixa);
761 761 }
762 762 ixa = kmem_alloc(sizeof (*ixa), kmflag);
763 763 if (ixa == NULL) {
764 764 mutex_exit(&connp->conn_lock);
765 765 ixa_refrele(connp->conn_ixa);
766 766 return (NULL);
767 767 }
768 768 ixa_safe_copy(connp->conn_ixa, ixa);
769 769
770 770 /* Make sure we drop conn_lock before any refrele */
771 771 if (replace) {
772 772 ixa->ixa_refcnt++; /* No atomic needed - not visible */
773 773 oldixa = connp->conn_ixa;
774 774 connp->conn_ixa = ixa;
775 775 mutex_exit(&connp->conn_lock);
776 776 IXA_REFRELE(oldixa); /* Undo refcnt from conn_t */
777 777 } else {
778 778 oldixa = connp->conn_ixa;
779 779 mutex_exit(&connp->conn_lock);
780 780 }
781 781 IXA_REFRELE(oldixa); /* Undo above atomic_add_32_nv */
782 782
783 783 return (ixa);
784 784 }
785 785
786 786 /*
787 787 * Return an ip_xmit_attr_t to use with a conn_t that ensures that only
788 788 * the caller can access the ip_xmit_attr_t.
789 789 *
790 790 * If nobody else is using conn_ixa we return it.
791 791 * Otherwise we make a "safe" copy of conn_ixa
792 792 * and return it. The "safe" copy has the pointers set to NULL
793 793 * (since the pointers might be changed by another thread using
794 794 * conn_ixa). The caller needs to check for NULL pointers to see
795 795 * if ip_set_destination needs to be called to re-establish the pointers.
796 796 *
797 797 * If 'replace' is set then we replace conn_ixa with the new ip_xmit_attr_t.
798 798 * That is used when we connect() the ULP.
799 799 */
800 800 ip_xmit_attr_t *
801 801 conn_get_ixa(conn_t *connp, boolean_t replace)
802 802 {
803 803 return (conn_get_ixa_impl(connp, replace, KM_NOSLEEP));
804 804 }
805 805
806 806 /*
807 807 * Used only when the option is to have the kernel hang due to not
808 808 * cleaning up ixa references on ills etc.
809 809 */
810 810 ip_xmit_attr_t *
811 811 conn_get_ixa_tryhard(conn_t *connp, boolean_t replace)
812 812 {
813 813 return (conn_get_ixa_impl(connp, replace, KM_SLEEP));
814 814 }
815 815
816 816 /*
817 817 * Replace conn_ixa with the ixa argument.
818 818 *
819 819 * The caller must hold conn_lock.
820 820 *
821 821 * We return the old ixa; the caller must ixa_refrele that after conn_lock
822 822 * has been dropped.
823 823 */
824 824 ip_xmit_attr_t *
825 825 conn_replace_ixa(conn_t *connp, ip_xmit_attr_t *ixa)
826 826 {
827 827 ip_xmit_attr_t *oldixa;
828 828
829 829 ASSERT(MUTEX_HELD(&connp->conn_lock));
830 830
831 831 oldixa = connp->conn_ixa;
832 832 IXA_REFHOLD(ixa);
833 833 ixa->ixa_conn_id = oldixa->ixa_conn_id;
834 834 connp->conn_ixa = ixa;
835 835 return (oldixa);
836 836 }
837 837
838 838 /*
839 839 * Return a ip_xmit_attr_t to use with a conn_t that is based on but
840 840 * separate from conn_ixa.
841 841 *
842 842 * This "safe" copy has the pointers set to NULL
843 843 * (since the pointers might be changed by another thread using
844 844 * conn_ixa). The caller needs to check for NULL pointers to see
845 845 * if ip_set_destination needs to be called to re-establish the pointers.
846 846 */
847 847 ip_xmit_attr_t *
848 848 conn_get_ixa_exclusive(conn_t *connp)
↓ open down ↓ |
81 lines elided |
↑ open up ↑ |
849 849 {
850 850 ip_xmit_attr_t *ixa;
851 851
852 852 mutex_enter(&connp->conn_lock);
853 853 ixa = connp->conn_ixa;
854 854
855 855 /* At least one references for the conn_t */
856 856 ASSERT(ixa->ixa_refcnt >= 1);
857 857
858 858 /* Make sure conn_ixa doesn't disappear while we copy it */
859 - atomic_add_32(&ixa->ixa_refcnt, 1);
859 + atomic_inc_32(&ixa->ixa_refcnt);
860 860
861 861 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP);
862 862 if (ixa == NULL) {
863 863 mutex_exit(&connp->conn_lock);
864 864 ixa_refrele(connp->conn_ixa);
865 865 return (NULL);
866 866 }
867 867 ixa_safe_copy(connp->conn_ixa, ixa);
868 868 mutex_exit(&connp->conn_lock);
869 869 IXA_REFRELE(connp->conn_ixa);
870 870 return (ixa);
871 871 }
872 872
873 873 void
874 874 ixa_safe_copy(ip_xmit_attr_t *src, ip_xmit_attr_t *ixa)
875 875 {
876 876 bcopy(src, ixa, sizeof (*ixa));
877 877 ixa->ixa_refcnt = 1;
878 878 /*
879 879 * Clear any pointers that have references and might be changed
880 880 * by ip_set_destination or the ULP
881 881 */
882 882 ixa->ixa_ire = NULL;
883 883 ixa->ixa_nce = NULL;
884 884 ixa->ixa_dce = NULL;
885 885 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
886 886 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
887 887 #ifdef DEBUG
888 888 ixa->ixa_curthread = NULL;
889 889 #endif
890 890 /* Clear all the IPsec pointers and the flag as well. */
891 891 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE;
892 892
893 893 ixa->ixa_ipsec_latch = NULL;
894 894 ixa->ixa_ipsec_ah_sa = NULL;
895 895 ixa->ixa_ipsec_esp_sa = NULL;
896 896 ixa->ixa_ipsec_policy = NULL;
897 897 ixa->ixa_ipsec_action = NULL;
898 898
899 899 /*
900 900 * We leave ixa_tsl unchanged, but if it has a refhold we need
901 901 * to get an extra refhold.
902 902 */
903 903 if (ixa->ixa_free_flags & IXA_FREE_TSL)
904 904 label_hold(ixa->ixa_tsl);
905 905
906 906 /*
907 907 * We leave ixa_cred unchanged, but if it has a refhold we need
908 908 * to get an extra refhold.
909 909 */
910 910 if (ixa->ixa_free_flags & IXA_FREE_CRED)
911 911 crhold(ixa->ixa_cred);
912 912 }
913 913
914 914 /*
915 915 * Duplicate an ip_xmit_attr_t.
916 916 * Assumes that the caller controls the ixa, hence we do not need to use
917 917 * a safe copy. We just have to increase the refcnt on any pointers.
918 918 */
919 919 ip_xmit_attr_t *
920 920 ip_xmit_attr_duplicate(ip_xmit_attr_t *src_ixa)
921 921 {
922 922 ip_xmit_attr_t *ixa;
923 923
924 924 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP);
925 925 if (ixa == NULL)
926 926 return (NULL);
927 927 bcopy(src_ixa, ixa, sizeof (*ixa));
928 928 ixa->ixa_refcnt = 1;
929 929
930 930 if (ixa->ixa_ire != NULL)
931 931 ire_refhold_notr(ixa->ixa_ire);
932 932 if (ixa->ixa_nce != NULL)
933 933 nce_refhold(ixa->ixa_nce);
934 934 if (ixa->ixa_dce != NULL)
935 935 dce_refhold_notr(ixa->ixa_dce);
936 936
937 937 #ifdef DEBUG
938 938 ixa->ixa_curthread = NULL;
939 939 #endif
940 940
941 941 if (ixa->ixa_ipsec_latch != NULL)
942 942 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch);
943 943 if (ixa->ixa_ipsec_ah_sa != NULL)
944 944 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa);
945 945 if (ixa->ixa_ipsec_esp_sa != NULL)
946 946 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa);
947 947 if (ixa->ixa_ipsec_policy != NULL)
948 948 IPPOL_REFHOLD(ixa->ixa_ipsec_policy);
949 949 if (ixa->ixa_ipsec_action != NULL)
950 950 IPACT_REFHOLD(ixa->ixa_ipsec_action);
951 951
952 952 if (ixa->ixa_tsl != NULL) {
953 953 label_hold(ixa->ixa_tsl);
954 954 ixa->ixa_free_flags |= IXA_FREE_TSL;
955 955 }
956 956 if (ixa->ixa_cred != NULL) {
957 957 crhold(ixa->ixa_cred);
958 958 ixa->ixa_free_flags |= IXA_FREE_CRED;
959 959 }
960 960 return (ixa);
961 961 }
962 962
963 963 /*
964 964 * Used to replace the ixa_label field.
965 965 * The caller should have a reference on the label, which we transfer to
966 966 * the attributes so that when the attribute is freed/cleaned up
967 967 * we will release that reference.
968 968 */
969 969 void
970 970 ip_xmit_attr_replace_tsl(ip_xmit_attr_t *ixa, ts_label_t *tsl)
971 971 {
972 972 ASSERT(tsl != NULL);
973 973
974 974 if (ixa->ixa_free_flags & IXA_FREE_TSL) {
975 975 ASSERT(ixa->ixa_tsl != NULL);
976 976 label_rele(ixa->ixa_tsl);
977 977 } else {
978 978 ixa->ixa_free_flags |= IXA_FREE_TSL;
979 979 }
980 980 ixa->ixa_tsl = tsl;
981 981 }
982 982
983 983 /*
984 984 * Replace the ip_recv_attr_t's label.
985 985 * Due to kernel RPC's use of db_credp we also need to replace ira_cred;
986 986 * TCP/UDP uses ira_cred to set db_credp for non-socket users.
987 987 * This can fail (and return B_FALSE) due to lack of memory.
988 988 */
989 989 boolean_t
990 990 ip_recv_attr_replace_label(ip_recv_attr_t *ira, ts_label_t *tsl)
991 991 {
992 992 cred_t *newcr;
993 993
994 994 if (ira->ira_free_flags & IRA_FREE_TSL) {
995 995 ASSERT(ira->ira_tsl != NULL);
996 996 label_rele(ira->ira_tsl);
997 997 }
998 998 label_hold(tsl);
999 999 ira->ira_tsl = tsl;
1000 1000 ira->ira_free_flags |= IRA_FREE_TSL;
1001 1001
1002 1002 /*
1003 1003 * Reset zoneid if we have a shared address. That allows
1004 1004 * ip_fanout_tx_v4/v6 to determine the zoneid again.
1005 1005 */
1006 1006 if (ira->ira_flags & IRAF_TX_SHARED_ADDR)
1007 1007 ira->ira_zoneid = ALL_ZONES;
1008 1008
1009 1009 /* We update ira_cred for RPC */
1010 1010 newcr = copycred_from_tslabel(ira->ira_cred, ira->ira_tsl, KM_NOSLEEP);
1011 1011 if (newcr == NULL)
1012 1012 return (B_FALSE);
1013 1013 if (ira->ira_free_flags & IRA_FREE_CRED)
1014 1014 crfree(ira->ira_cred);
1015 1015 ira->ira_cred = newcr;
1016 1016 ira->ira_free_flags |= IRA_FREE_CRED;
1017 1017 return (B_TRUE);
1018 1018 }
1019 1019
1020 1020 /*
1021 1021 * This needs to be called after ip_set_destination/tsol_check_dest might
1022 1022 * have changed ixa_tsl to be specific for a destination, and we now want to
1023 1023 * send to a different destination.
1024 1024 * We have to restart with crgetlabel() since ip_set_destination/
1025 1025 * tsol_check_dest will start with ixa_tsl.
1026 1026 */
1027 1027 void
1028 1028 ip_xmit_attr_restore_tsl(ip_xmit_attr_t *ixa, cred_t *cr)
1029 1029 {
1030 1030 if (!is_system_labeled())
1031 1031 return;
1032 1032
1033 1033 if (ixa->ixa_free_flags & IXA_FREE_TSL) {
1034 1034 ASSERT(ixa->ixa_tsl != NULL);
1035 1035 label_rele(ixa->ixa_tsl);
1036 1036 ixa->ixa_free_flags &= ~IXA_FREE_TSL;
1037 1037 }
1038 1038 ixa->ixa_tsl = crgetlabel(cr);
1039 1039 }
1040 1040
1041 1041 void
1042 1042 ixa_refrele(ip_xmit_attr_t *ixa)
1043 1043 {
1044 1044 IXA_REFRELE(ixa);
1045 1045 }
1046 1046
1047 1047 void
1048 1048 ixa_inactive(ip_xmit_attr_t *ixa)
1049 1049 {
1050 1050 ASSERT(ixa->ixa_refcnt == 0);
1051 1051
1052 1052 ixa_cleanup(ixa);
1053 1053 kmem_free(ixa, sizeof (*ixa));
1054 1054 }
1055 1055
1056 1056 /*
1057 1057 * Release any references contained in the ixa.
1058 1058 * Also clear any fields that are not controlled by ixa_flags.
1059 1059 */
1060 1060 void
1061 1061 ixa_cleanup(ip_xmit_attr_t *ixa)
1062 1062 {
1063 1063 if (ixa->ixa_ire != NULL) {
1064 1064 ire_refrele_notr(ixa->ixa_ire);
1065 1065 ixa->ixa_ire = NULL;
1066 1066 }
1067 1067 if (ixa->ixa_dce != NULL) {
1068 1068 dce_refrele_notr(ixa->ixa_dce);
1069 1069 ixa->ixa_dce = NULL;
1070 1070 }
1071 1071 if (ixa->ixa_nce != NULL) {
1072 1072 nce_refrele(ixa->ixa_nce);
1073 1073 ixa->ixa_nce = NULL;
1074 1074 }
1075 1075 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
1076 1076 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
1077 1077 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) {
1078 1078 ipsec_out_release_refs(ixa);
1079 1079 }
1080 1080 if (ixa->ixa_free_flags & IXA_FREE_TSL) {
1081 1081 ASSERT(ixa->ixa_tsl != NULL);
1082 1082 label_rele(ixa->ixa_tsl);
1083 1083 ixa->ixa_free_flags &= ~IXA_FREE_TSL;
1084 1084 }
1085 1085 ixa->ixa_tsl = NULL;
1086 1086 if (ixa->ixa_free_flags & IXA_FREE_CRED) {
1087 1087 ASSERT(ixa->ixa_cred != NULL);
1088 1088 crfree(ixa->ixa_cred);
1089 1089 ixa->ixa_free_flags &= ~IXA_FREE_CRED;
1090 1090 }
1091 1091 ixa->ixa_cred = NULL;
1092 1092 ixa->ixa_src_preferences = 0;
1093 1093 ixa->ixa_ifindex = 0;
1094 1094 ixa->ixa_multicast_ifindex = 0;
1095 1095 ixa->ixa_multicast_ifaddr = INADDR_ANY;
1096 1096 }
1097 1097
1098 1098 /*
1099 1099 * Release any references contained in the ira.
1100 1100 * Callers which use ip_recv_attr_from_mblk() would pass B_TRUE as the second
1101 1101 * argument.
1102 1102 */
1103 1103 void
1104 1104 ira_cleanup(ip_recv_attr_t *ira, boolean_t refrele_ill)
1105 1105 {
1106 1106 if (ira->ira_ill != NULL) {
1107 1107 if (ira->ira_rill != ira->ira_ill) {
1108 1108 /* Caused by async processing */
1109 1109 ill_refrele(ira->ira_rill);
1110 1110 }
1111 1111 if (refrele_ill)
1112 1112 ill_refrele(ira->ira_ill);
1113 1113 }
1114 1114 if (ira->ira_flags & IRAF_IPSEC_SECURE) {
1115 1115 ipsec_in_release_refs(ira);
1116 1116 }
1117 1117 if (ira->ira_free_flags & IRA_FREE_TSL) {
1118 1118 ASSERT(ira->ira_tsl != NULL);
1119 1119 label_rele(ira->ira_tsl);
1120 1120 ira->ira_free_flags &= ~IRA_FREE_TSL;
1121 1121 }
1122 1122 ira->ira_tsl = NULL;
1123 1123 if (ira->ira_free_flags & IRA_FREE_CRED) {
1124 1124 ASSERT(ira->ira_cred != NULL);
1125 1125 crfree(ira->ira_cred);
1126 1126 ira->ira_free_flags &= ~IRA_FREE_CRED;
1127 1127 }
1128 1128 ira->ira_cred = NULL;
1129 1129 }
1130 1130
1131 1131 /*
1132 1132 * Function to help release any IRE, NCE, or DCEs that
1133 1133 * have been deleted and are marked as condemned.
1134 1134 * The caller is responsible for any serialization which is different
1135 1135 * for TCP, SCTP, and others.
1136 1136 */
1137 1137 static void
1138 1138 ixa_cleanup_stale(ip_xmit_attr_t *ixa)
1139 1139 {
1140 1140 ire_t *ire;
1141 1141 nce_t *nce;
1142 1142 dce_t *dce;
1143 1143
1144 1144 ire = ixa->ixa_ire;
1145 1145 nce = ixa->ixa_nce;
1146 1146 dce = ixa->ixa_dce;
1147 1147
1148 1148 if (ire != NULL && IRE_IS_CONDEMNED(ire)) {
1149 1149 ire_refrele_notr(ire);
1150 1150 ire = ire_blackhole(ixa->ixa_ipst,
1151 1151 !(ixa->ixa_flags & IXAF_IS_IPV4));
1152 1152 ASSERT(ire != NULL);
1153 1153 #ifdef DEBUG
1154 1154 ire_refhold_notr(ire);
1155 1155 ire_refrele(ire);
1156 1156 #endif
1157 1157 ixa->ixa_ire = ire;
1158 1158 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
1159 1159 }
1160 1160 if (nce != NULL && nce->nce_is_condemned) {
1161 1161 /* Can make it NULL as long as we set IRE_GENERATION_VERIFY */
1162 1162 nce_refrele(nce);
1163 1163 ixa->ixa_nce = NULL;
1164 1164 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
1165 1165 }
1166 1166 if (dce != NULL && DCE_IS_CONDEMNED(dce)) {
1167 1167 dce_refrele_notr(dce);
1168 1168 dce = dce_get_default(ixa->ixa_ipst);
1169 1169 ASSERT(dce != NULL);
1170 1170 #ifdef DEBUG
1171 1171 dce_refhold_notr(dce);
1172 1172 dce_refrele(dce);
1173 1173 #endif
1174 1174 ixa->ixa_dce = dce;
1175 1175 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
1176 1176 }
1177 1177 }
1178 1178
1179 1179 static mblk_t *
1180 1180 tcp_ixa_cleanup_getmblk(conn_t *connp)
1181 1181 {
1182 1182 tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp;
1183 1183 int need_retry;
1184 1184 mblk_t *mp;
1185 1185
1186 1186 mutex_enter(&tcps->tcps_ixa_cleanup_lock);
1187 1187
1188 1188 /*
1189 1189 * It's possible that someone else came in and started cleaning up
1190 1190 * another connection between the time we verified this one is not being
1191 1191 * cleaned up and the time we actually get the shared mblk. If that's
1192 1192 * the case, we've dropped the lock, and some other thread may have
1193 1193 * cleaned up this connection again, and is still waiting for
1194 1194 * notification of that cleanup's completion. Therefore we need to
1195 1195 * recheck.
1196 1196 */
1197 1197 do {
1198 1198 need_retry = 0;
1199 1199 while (connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE) {
1200 1200 cv_wait(&tcps->tcps_ixa_cleanup_done_cv,
1201 1201 &tcps->tcps_ixa_cleanup_lock);
1202 1202 }
1203 1203
1204 1204 while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) {
1205 1205 /*
1206 1206 * Multiple concurrent cleanups; need to have the last
1207 1207 * one run since it could be an unplumb.
1208 1208 */
1209 1209 need_retry = 1;
1210 1210 cv_wait(&tcps->tcps_ixa_cleanup_ready_cv,
1211 1211 &tcps->tcps_ixa_cleanup_lock);
1212 1212 }
1213 1213 } while (need_retry);
1214 1214
1215 1215 /*
1216 1216 * We now have the lock and the mblk; now make sure that no one else can
1217 1217 * try to clean up this connection or enqueue it for cleanup, clear the
1218 1218 * mblk pointer for this stack, drop the lock, and return the mblk.
1219 1219 */
1220 1220 ASSERT(MUTEX_HELD(&tcps->tcps_ixa_cleanup_lock));
1221 1221 ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_IDLE);
1222 1222 ASSERT(tcps->tcps_ixa_cleanup_mp == mp);
1223 1223 ASSERT(mp != NULL);
1224 1224
1225 1225 connp->conn_ixa->ixa_tcpcleanup = IXATC_INPROGRESS;
1226 1226 tcps->tcps_ixa_cleanup_mp = NULL;
1227 1227 mutex_exit(&tcps->tcps_ixa_cleanup_lock);
1228 1228
1229 1229 return (mp);
1230 1230 }
1231 1231
1232 1232 /*
1233 1233 * Used to run ixa_cleanup_stale inside the tcp squeue.
1234 1234 * When done we hand the mp back by assigning it to tcps_ixa_cleanup_mp
1235 1235 * and waking up the caller.
1236 1236 */
1237 1237 /* ARGSUSED2 */
1238 1238 static void
1239 1239 tcp_ixa_cleanup(void *arg, mblk_t *mp, void *arg2,
1240 1240 ip_recv_attr_t *dummy)
1241 1241 {
1242 1242 conn_t *connp = (conn_t *)arg;
1243 1243 tcp_stack_t *tcps;
1244 1244
1245 1245 tcps = connp->conn_netstack->netstack_tcp;
1246 1246
1247 1247 ixa_cleanup_stale(connp->conn_ixa);
1248 1248
1249 1249 mutex_enter(&tcps->tcps_ixa_cleanup_lock);
1250 1250 ASSERT(tcps->tcps_ixa_cleanup_mp == NULL);
1251 1251 connp->conn_ixa->ixa_tcpcleanup = IXATC_COMPLETE;
1252 1252 tcps->tcps_ixa_cleanup_mp = mp;
1253 1253 cv_signal(&tcps->tcps_ixa_cleanup_ready_cv);
1254 1254 /*
1255 1255 * It is possible for any number of threads to be waiting for cleanup of
1256 1256 * different connections. Absent a per-connection (or per-IXA) CV, we
1257 1257 * need to wake them all up even though only one can be waiting on this
1258 1258 * particular cleanup.
1259 1259 */
1260 1260 cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv);
1261 1261 mutex_exit(&tcps->tcps_ixa_cleanup_lock);
1262 1262 }
1263 1263
1264 1264 static void
1265 1265 tcp_ixa_cleanup_wait_and_finish(conn_t *connp)
1266 1266 {
1267 1267 tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp;
1268 1268
1269 1269 mutex_enter(&tcps->tcps_ixa_cleanup_lock);
1270 1270
1271 1271 ASSERT(connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE);
1272 1272
1273 1273 while (connp->conn_ixa->ixa_tcpcleanup == IXATC_INPROGRESS) {
1274 1274 cv_wait(&tcps->tcps_ixa_cleanup_done_cv,
1275 1275 &tcps->tcps_ixa_cleanup_lock);
1276 1276 }
1277 1277
1278 1278 ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_COMPLETE);
1279 1279 connp->conn_ixa->ixa_tcpcleanup = IXATC_IDLE;
1280 1280 cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv);
1281 1281
1282 1282 mutex_exit(&tcps->tcps_ixa_cleanup_lock);
1283 1283 }
1284 1284
1285 1285 /*
1286 1286 * ipcl_walk() function to help release any IRE, NCE, or DCEs that
1287 1287 * have been deleted and are marked as condemned.
1288 1288 * Note that we can't cleanup the pointers since there can be threads
1289 1289 * in conn_ip_output() sending while we are called.
1290 1290 */
1291 1291 void
1292 1292 conn_ixa_cleanup(conn_t *connp, void *arg)
1293 1293 {
1294 1294 boolean_t tryhard = (boolean_t)arg;
1295 1295
1296 1296 if (IPCL_IS_TCP(connp)) {
1297 1297 mblk_t *mp;
1298 1298
1299 1299 mp = tcp_ixa_cleanup_getmblk(connp);
1300 1300
1301 1301 if (connp->conn_sqp->sq_run == curthread) {
1302 1302 /* Already on squeue */
1303 1303 tcp_ixa_cleanup(connp, mp, NULL, NULL);
1304 1304 } else {
1305 1305 CONN_INC_REF(connp);
1306 1306 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_ixa_cleanup,
1307 1307 connp, NULL, SQ_PROCESS, SQTAG_TCP_IXA_CLEANUP);
1308 1308 }
1309 1309 tcp_ixa_cleanup_wait_and_finish(connp);
1310 1310 } else if (IPCL_IS_SCTP(connp)) {
1311 1311 sctp_t *sctp;
1312 1312 sctp_faddr_t *fp;
1313 1313
1314 1314 sctp = CONN2SCTP(connp);
1315 1315 RUN_SCTP(sctp);
1316 1316 ixa_cleanup_stale(connp->conn_ixa);
1317 1317 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next)
1318 1318 ixa_cleanup_stale(fp->sf_ixa);
1319 1319 WAKE_SCTP(sctp);
1320 1320 } else {
1321 1321 ip_xmit_attr_t *ixa;
1322 1322
1323 1323 /*
1324 1324 * If there is a different thread using conn_ixa then we get a
1325 1325 * new copy and cut the old one loose from conn_ixa. Otherwise
1326 1326 * we use conn_ixa and prevent any other thread from
1327 1327 * using/changing it. Anybody using conn_ixa (e.g., a thread in
1328 1328 * conn_ip_output) will do an ixa_refrele which will remove any
1329 1329 * references on the ire etc.
1330 1330 *
1331 1331 * Once we are done other threads can use conn_ixa since the
1332 1332 * refcnt will be back at one.
1333 1333 *
1334 1334 * We are called either because an ill is going away, or
1335 1335 * due to memory reclaim. In the former case we wait for
1336 1336 * memory since we must remove the refcnts on the ill.
1337 1337 */
1338 1338 if (tryhard) {
1339 1339 ixa = conn_get_ixa_tryhard(connp, B_TRUE);
1340 1340 ASSERT(ixa != NULL);
1341 1341 } else {
1342 1342 ixa = conn_get_ixa(connp, B_TRUE);
1343 1343 if (ixa == NULL) {
1344 1344 /*
1345 1345 * Somebody else was using it and kmem_alloc
1346 1346 * failed! Next memory reclaim will try to
1347 1347 * clean up.
1348 1348 */
1349 1349 DTRACE_PROBE1(conn__ixa__cleanup__bail,
1350 1350 conn_t *, connp);
1351 1351 return;
1352 1352 }
1353 1353 }
1354 1354 ixa_cleanup_stale(ixa);
1355 1355 ixa_refrele(ixa);
1356 1356 }
1357 1357 }
1358 1358
1359 1359 /*
1360 1360 * ixa needs to be an exclusive copy so that no one changes the cookie
1361 1361 * or the ixa_nce.
1362 1362 */
1363 1363 boolean_t
1364 1364 ixa_check_drain_insert(conn_t *connp, ip_xmit_attr_t *ixa)
1365 1365 {
1366 1366 uintptr_t cookie = ixa->ixa_cookie;
1367 1367 ill_dld_direct_t *idd;
1368 1368 idl_tx_list_t *idl_txl;
1369 1369 ill_t *ill = ixa->ixa_nce->nce_ill;
1370 1370 boolean_t inserted = B_FALSE;
1371 1371
1372 1372 idd = &(ill)->ill_dld_capab->idc_direct;
1373 1373 idl_txl = &ixa->ixa_ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)];
1374 1374 mutex_enter(&idl_txl->txl_lock);
1375 1375
1376 1376 /*
1377 1377 * If `cookie' is zero, ip_xmit() -> canputnext() failed -- i.e., flow
1378 1378 * control is asserted on an ill that does not support direct calls.
1379 1379 * Jump to insert.
1380 1380 */
1381 1381 if (cookie == 0)
1382 1382 goto tryinsert;
1383 1383
1384 1384 ASSERT(ILL_DIRECT_CAPABLE(ill));
1385 1385
1386 1386 if (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, cookie) == 0) {
1387 1387 DTRACE_PROBE1(ill__tx__not__blocked, uintptr_t, cookie);
1388 1388 } else if (idl_txl->txl_cookie != NULL &&
1389 1389 idl_txl->txl_cookie != ixa->ixa_cookie) {
1390 1390 DTRACE_PROBE2(ill__tx__cookie__collision, uintptr_t, cookie,
1391 1391 uintptr_t, idl_txl->txl_cookie);
1392 1392 /* TODO: bump kstat for cookie collision */
1393 1393 } else {
1394 1394 /*
1395 1395 * Check/set conn_blocked under conn_lock. Note that txl_lock
1396 1396 * will not suffice since two separate UDP threads may be
1397 1397 * racing to send to different destinations that are
1398 1398 * associated with different cookies and thus may not be
1399 1399 * holding the same txl_lock. Further, since a given conn_t
1400 1400 * can only be on a single drain list, the conn_t will be
1401 1401 * enqueued on whichever thread wins this race.
1402 1402 */
1403 1403 tryinsert: mutex_enter(&connp->conn_lock);
1404 1404 if (connp->conn_blocked) {
1405 1405 DTRACE_PROBE1(ill__tx__conn__already__blocked,
1406 1406 conn_t *, connp);
1407 1407 mutex_exit(&connp->conn_lock);
1408 1408 } else {
1409 1409 connp->conn_blocked = B_TRUE;
1410 1410 mutex_exit(&connp->conn_lock);
1411 1411 idl_txl->txl_cookie = cookie;
1412 1412 conn_drain_insert(connp, idl_txl);
1413 1413 if (!IPCL_IS_NONSTR(connp))
1414 1414 noenable(connp->conn_wq);
1415 1415 inserted = B_TRUE;
1416 1416 }
1417 1417 }
1418 1418 mutex_exit(&idl_txl->txl_lock);
1419 1419 return (inserted);
1420 1420 }
↓ open down ↓ |
551 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX