Print this page
5255 uts shouldn't open-code ISP2
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/inet/ilb/ilb_conn.c
+++ new/usr/src/uts/common/inet/ilb/ilb_conn.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 +#include <sys/sysmacros.h>
27 28 #include <sys/types.h>
28 29 #include <sys/conf.h>
29 30 #include <sys/time.h>
30 31 #include <sys/taskq.h>
31 32 #include <sys/cmn_err.h>
32 33 #include <sys/sdt.h>
33 34 #include <sys/atomic.h>
34 35 #include <netinet/in.h>
35 36 #include <inet/ip.h>
36 37 #include <inet/ip6.h>
37 38 #include <inet/tcp.h>
38 39 #include <inet/udp_impl.h>
39 40 #include <inet/ilb.h>
40 41
41 42 #include "ilb_stack.h"
42 43 #include "ilb_impl.h"
43 44 #include "ilb_conn.h"
44 45 #include "ilb_nat.h"
45 46
46 47 /*
47 48 * Timer struct for ilb_conn_t and ilb_sticky_t garbage collection
48 49 *
49 50 * start: starting index into the hash table to do gc
50 51 * end: ending index into the hash table to do gc
51 52 * ilbs: pointer to the ilb_stack_t of the IP stack
52 53 * tid_lock: mutex to protect the timer id.
53 54 * tid: timer id of the timer
54 55 */
55 56 typedef struct ilb_timer_s {
56 57 uint32_t start;
57 58 uint32_t end;
58 59 ilb_stack_t *ilbs;
59 60 kmutex_t tid_lock;
60 61 timeout_id_t tid;
61 62 } ilb_timer_t;
62 63
63 64 /* Hash macro for finding the index to the conn hash table */
64 65 #define ILB_CONN_HASH(saddr, sport, daddr, dport, hash_size) \
65 66 (((*((saddr) + 3) ^ *((daddr) + 3)) * 50653 + \
66 67 (*((saddr) + 2) ^ *((daddr) + 2)) * 1369 + \
67 68 (*((saddr) + 1) ^ *((daddr) + 1)) * 37 + \
68 69 (*(saddr) ^ *(daddr)) + (sport) * 37 + (dport)) & \
69 70 ((hash_size) - 1))
70 71
71 72 /* Kmem cache for the conn hash entry */
72 73 static struct kmem_cache *ilb_conn_cache = NULL;
73 74
74 75 /*
75 76 * There are 60 timers running to do conn cache garbage collection. Each
76 77 * gc thread is responsible for 1/60 of the conn hash table.
77 78 */
78 79 static int ilb_conn_timer_size = 60;
79 80
80 81 /* Each of the above gc timers wake up every 15s to do the gc. */
81 82 static int ilb_conn_cache_timeout = 15;
82 83
83 84 #define ILB_STICKY_HASH(saddr, rule, hash_size) \
84 85 (((*((saddr) + 3) ^ ((rule) >> 24)) * 29791 + \
85 86 (*((saddr) + 2) ^ ((rule) >> 16)) * 961 + \
86 87 (*((saddr) + 1) ^ ((rule) >> 8)) * 31 + \
87 88 (*(saddr) ^ (rule))) & ((hash_size) - 1))
88 89
89 90 static struct kmem_cache *ilb_sticky_cache = NULL;
90 91
91 92 /*
92 93 * There are 60 timers running to do sticky cache garbage collection. Each
93 94 * gc thread is responsible for 1/60 of the sticky hash table.
94 95 */
95 96 static int ilb_sticky_timer_size = 60;
96 97
97 98 /* Each of the above gc timers wake up every 15s to do the gc. */
98 99 static int ilb_sticky_timeout = 15;
99 100
100 101 #define ILB_STICKY_REFRELE(s) \
101 102 { \
102 103 mutex_enter(&(s)->hash->sticky_lock); \
103 104 (s)->refcnt--; \
104 105 (s)->atime = ddi_get_lbolt64(); \
105 106 mutex_exit(&s->hash->sticky_lock); \
106 107 }
107 108
108 109
109 110 static void
110 111 ilb_conn_cache_init(void)
111 112 {
112 113 ilb_conn_cache = kmem_cache_create("ilb_conn_cache",
113 114 sizeof (ilb_conn_t), 0, NULL, NULL, NULL, NULL, NULL,
114 115 ilb_kmem_flags);
115 116 }
116 117
117 118 void
118 119 ilb_conn_cache_fini(void)
119 120 {
120 121 if (ilb_conn_cache != NULL) {
121 122 kmem_cache_destroy(ilb_conn_cache);
122 123 ilb_conn_cache = NULL;
123 124 }
124 125 }
125 126
126 127 static void
127 128 ilb_conn_remove_common(ilb_conn_t *connp, boolean_t c2s)
128 129 {
129 130 ilb_conn_hash_t *hash;
130 131 ilb_conn_t **next, **prev;
131 132 ilb_conn_t **next_prev, **prev_next;
132 133
133 134 if (c2s) {
134 135 hash = connp->conn_c2s_hash;
135 136 ASSERT(MUTEX_HELD(&hash->ilb_conn_hash_lock));
136 137 next = &connp->conn_c2s_next;
137 138 prev = &connp->conn_c2s_prev;
138 139 if (*next != NULL)
139 140 next_prev = &(*next)->conn_c2s_prev;
140 141 if (*prev != NULL)
141 142 prev_next = &(*prev)->conn_c2s_next;
142 143 } else {
143 144 hash = connp->conn_s2c_hash;
144 145 ASSERT(MUTEX_HELD(&hash->ilb_conn_hash_lock));
145 146 next = &connp->conn_s2c_next;
146 147 prev = &connp->conn_s2c_prev;
147 148 if (*next != NULL)
148 149 next_prev = &(*next)->conn_s2c_prev;
149 150 if (*prev != NULL)
150 151 prev_next = &(*prev)->conn_s2c_next;
151 152 }
152 153
153 154 if (hash->ilb_connp == connp) {
154 155 hash->ilb_connp = *next;
155 156 if (*next != NULL)
156 157 *next_prev = NULL;
157 158 } else {
158 159 if (*prev != NULL)
159 160 *prev_next = *next;
160 161 if (*next != NULL)
161 162 *next_prev = *prev;
162 163 }
163 164 ASSERT(hash->ilb_conn_cnt > 0);
164 165 hash->ilb_conn_cnt--;
165 166
166 167 *next = NULL;
167 168 *prev = NULL;
168 169 }
169 170
170 171 static void
171 172 ilb_conn_remove(ilb_conn_t *connp)
172 173 {
173 174 ASSERT(MUTEX_HELD(&connp->conn_c2s_hash->ilb_conn_hash_lock));
174 175 ilb_conn_remove_common(connp, B_TRUE);
175 176 ASSERT(MUTEX_HELD(&connp->conn_s2c_hash->ilb_conn_hash_lock));
176 177 ilb_conn_remove_common(connp, B_FALSE);
177 178
178 179 if (connp->conn_rule_cache.topo == ILB_TOPO_IMPL_NAT) {
179 180 in_port_t port;
180 181
181 182 port = ntohs(connp->conn_rule_cache.info.nat_sport);
182 183 vmem_free(connp->conn_rule_cache.info.src_ent->nse_port_arena,
183 184 (void *)(uintptr_t)port, 1);
184 185 }
185 186
186 187 if (connp->conn_sticky != NULL)
187 188 ILB_STICKY_REFRELE(connp->conn_sticky);
188 189 ILB_SERVER_REFRELE(connp->conn_server);
189 190 kmem_cache_free(ilb_conn_cache, connp);
190 191 }
191 192
192 193 /*
193 194 * Routine to do periodic garbage collection of conn hash entries. When
194 195 * a conn hash timer fires, it dispatches a taskq to call this function
195 196 * to do the gc. Note that each taskq is responisble for a portion of
196 197 * the table. The portion is stored in timer->start, timer->end.
197 198 */
198 199 static void
199 200 ilb_conn_cleanup(void *arg)
200 201 {
201 202 ilb_timer_t *timer = (ilb_timer_t *)arg;
202 203 uint32_t i;
203 204 ilb_stack_t *ilbs;
204 205 ilb_conn_hash_t *c2s_hash, *s2c_hash;
205 206 ilb_conn_t *connp, *nxt_connp;
206 207 int64_t now;
207 208 int64_t expiry;
208 209 boolean_t die_now;
209 210
210 211 ilbs = timer->ilbs;
211 212 c2s_hash = ilbs->ilbs_c2s_conn_hash;
212 213 ASSERT(c2s_hash != NULL);
213 214
214 215 now = ddi_get_lbolt64();
215 216 for (i = timer->start; i < timer->end; i++) {
216 217 mutex_enter(&c2s_hash[i].ilb_conn_hash_lock);
217 218 if ((connp = c2s_hash[i].ilb_connp) == NULL) {
218 219 ASSERT(c2s_hash[i].ilb_conn_cnt == 0);
219 220 mutex_exit(&c2s_hash[i].ilb_conn_hash_lock);
220 221 continue;
221 222 }
222 223 do {
223 224 ASSERT(c2s_hash[i].ilb_conn_cnt > 0);
224 225 ASSERT(connp->conn_c2s_hash == &c2s_hash[i]);
225 226 nxt_connp = connp->conn_c2s_next;
226 227 expiry = now - SEC_TO_TICK(connp->conn_expiry);
227 228 if (connp->conn_server->iser_die_time != 0 &&
228 229 connp->conn_server->iser_die_time < now)
229 230 die_now = B_TRUE;
230 231 else
231 232 die_now = B_FALSE;
232 233 s2c_hash = connp->conn_s2c_hash;
233 234 mutex_enter(&s2c_hash->ilb_conn_hash_lock);
234 235
235 236 if (connp->conn_gc || die_now ||
236 237 (connp->conn_c2s_atime < expiry &&
237 238 connp->conn_s2c_atime < expiry)) {
238 239 /* Need to update the nat list cur_connp */
239 240 if (connp == ilbs->ilbs_conn_list_connp) {
240 241 ilbs->ilbs_conn_list_connp =
241 242 connp->conn_c2s_next;
242 243 }
243 244 ilb_conn_remove(connp);
244 245 goto nxt_connp;
245 246 }
246 247
247 248 if (connp->conn_l4 != IPPROTO_TCP)
248 249 goto nxt_connp;
249 250
250 251 /* Update and check TCP related conn info */
251 252 if (connp->conn_c2s_tcp_fin_sent &&
252 253 SEQ_GT(connp->conn_s2c_tcp_ack,
253 254 connp->conn_c2s_tcp_fss)) {
254 255 connp->conn_c2s_tcp_fin_acked = B_TRUE;
255 256 }
256 257 if (connp->conn_s2c_tcp_fin_sent &&
257 258 SEQ_GT(connp->conn_c2s_tcp_ack,
258 259 connp->conn_s2c_tcp_fss)) {
259 260 connp->conn_s2c_tcp_fin_acked = B_TRUE;
260 261 }
261 262 if (connp->conn_c2s_tcp_fin_acked &&
262 263 connp->conn_s2c_tcp_fin_acked) {
263 264 ilb_conn_remove(connp);
264 265 }
265 266 nxt_connp:
266 267 mutex_exit(&s2c_hash->ilb_conn_hash_lock);
267 268 connp = nxt_connp;
268 269 } while (connp != NULL);
269 270 mutex_exit(&c2s_hash[i].ilb_conn_hash_lock);
270 271 }
271 272 }
272 273
273 274 /* Conn hash timer routine. It dispatches a taskq and restart the timer */
274 275 static void
275 276 ilb_conn_timer(void *arg)
276 277 {
277 278 ilb_timer_t *timer = (ilb_timer_t *)arg;
278 279
279 280 (void) taskq_dispatch(timer->ilbs->ilbs_conn_taskq, ilb_conn_cleanup,
280 281 arg, TQ_SLEEP);
281 282 mutex_enter(&timer->tid_lock);
282 283 if (timer->tid == 0) {
283 284 mutex_exit(&timer->tid_lock);
284 285 } else {
285 286 timer->tid = timeout(ilb_conn_timer, arg,
286 287 SEC_TO_TICK(ilb_conn_cache_timeout));
287 288 mutex_exit(&timer->tid_lock);
288 289 }
289 290 }
290 291
291 292 void
292 293 ilb_conn_hash_init(ilb_stack_t *ilbs)
↓ open down ↓ |
256 lines elided |
↑ open up ↑ |
293 294 {
294 295 extern pri_t minclsyspri;
295 296 int i, part;
296 297 ilb_timer_t *tm;
297 298 char tq_name[TASKQ_NAMELEN];
298 299
299 300 /*
300 301 * If ilbs->ilbs_conn_hash_size is not a power of 2, bump it up to
301 302 * the next power of 2.
302 303 */
303 - if (ilbs->ilbs_conn_hash_size & (ilbs->ilbs_conn_hash_size - 1)) {
304 + if (!ISP2(ilbs->ilbs_conn_hash_size)) {
304 305 for (i = 0; i < 31; i++) {
305 306 if (ilbs->ilbs_conn_hash_size < (1 << i))
306 307 break;
307 308 }
308 309 ilbs->ilbs_conn_hash_size = 1 << i;
309 310 }
310 311
311 312 /*
312 313 * Can sleep since this should be called when a rule is being added,
313 314 * hence we are not in interrupt context.
314 315 */
315 316 ilbs->ilbs_c2s_conn_hash = kmem_zalloc(sizeof (ilb_conn_hash_t) *
316 317 ilbs->ilbs_conn_hash_size, KM_SLEEP);
317 318 ilbs->ilbs_s2c_conn_hash = kmem_zalloc(sizeof (ilb_conn_hash_t) *
318 319 ilbs->ilbs_conn_hash_size, KM_SLEEP);
319 320
320 321 for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) {
321 322 mutex_init(&ilbs->ilbs_c2s_conn_hash[i].ilb_conn_hash_lock,
322 323 NULL, MUTEX_DEFAULT, NULL);
323 324 }
324 325 for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) {
325 326 mutex_init(&ilbs->ilbs_s2c_conn_hash[i].ilb_conn_hash_lock,
326 327 NULL, MUTEX_DEFAULT, NULL);
327 328 }
328 329
329 330 if (ilb_conn_cache == NULL)
330 331 ilb_conn_cache_init();
331 332
332 333 (void) snprintf(tq_name, sizeof (tq_name), "ilb_conn_taskq_%p",
333 334 (void *)ilbs->ilbs_netstack);
334 335 ASSERT(ilbs->ilbs_conn_taskq == NULL);
335 336 ilbs->ilbs_conn_taskq = taskq_create(tq_name,
336 337 ilb_conn_timer_size * 2, minclsyspri, ilb_conn_timer_size,
337 338 ilb_conn_timer_size * 2, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
338 339
339 340 ASSERT(ilbs->ilbs_conn_timer_list == NULL);
340 341 ilbs->ilbs_conn_timer_list = kmem_zalloc(sizeof (ilb_timer_t) *
341 342 ilb_conn_timer_size, KM_SLEEP);
342 343
343 344 /*
344 345 * The hash table is divided in equal partition for those timers
345 346 * to do garbage collection.
346 347 */
347 348 part = ilbs->ilbs_conn_hash_size / ilb_conn_timer_size + 1;
348 349 for (i = 0; i < ilb_conn_timer_size; i++) {
349 350 tm = ilbs->ilbs_conn_timer_list + i;
350 351 tm->start = i * part;
351 352 tm->end = i * part + part;
352 353 if (tm->end > ilbs->ilbs_conn_hash_size)
353 354 tm->end = ilbs->ilbs_conn_hash_size;
354 355 tm->ilbs = ilbs;
355 356 mutex_init(&tm->tid_lock, NULL, MUTEX_DEFAULT, NULL);
356 357 /* Spread out the starting execution time of all the timers. */
357 358 tm->tid = timeout(ilb_conn_timer, tm,
358 359 SEC_TO_TICK(ilb_conn_cache_timeout + i));
359 360 }
360 361 }
361 362
362 363 void
363 364 ilb_conn_hash_fini(ilb_stack_t *ilbs)
364 365 {
365 366 uint32_t i;
366 367 ilb_conn_t *connp;
367 368
368 369 if (ilbs->ilbs_c2s_conn_hash == NULL) {
369 370 ASSERT(ilbs->ilbs_s2c_conn_hash == NULL);
370 371 return;
371 372 }
372 373
373 374 /* Stop all the timers first. */
374 375 for (i = 0; i < ilb_conn_timer_size; i++) {
375 376 timeout_id_t tid;
376 377
377 378 /* Setting tid to 0 tells the timer handler not to restart. */
378 379 mutex_enter(&ilbs->ilbs_conn_timer_list[i].tid_lock);
379 380 tid = ilbs->ilbs_conn_timer_list[i].tid;
380 381 ilbs->ilbs_conn_timer_list[i].tid = 0;
381 382 mutex_exit(&ilbs->ilbs_conn_timer_list[i].tid_lock);
382 383 (void) untimeout(tid);
383 384 }
384 385 kmem_free(ilbs->ilbs_conn_timer_list, sizeof (ilb_timer_t) *
385 386 ilb_conn_timer_size);
386 387 taskq_destroy(ilbs->ilbs_conn_taskq);
387 388 ilbs->ilbs_conn_taskq = NULL;
388 389
389 390 /* Then remove all the conns. */
390 391 for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) {
391 392 while ((connp = ilbs->ilbs_s2c_conn_hash->ilb_connp) != NULL) {
392 393 ilbs->ilbs_s2c_conn_hash->ilb_connp =
393 394 connp->conn_s2c_next;
394 395 ILB_SERVER_REFRELE(connp->conn_server);
395 396 if (connp->conn_rule_cache.topo == ILB_TOPO_IMPL_NAT) {
396 397 ilb_nat_src_entry_t *ent;
397 398 in_port_t port;
398 399
399 400 /*
400 401 * src_ent will be freed in ilb_nat_src_fini().
401 402 */
402 403 port = ntohs(
403 404 connp->conn_rule_cache.info.nat_sport);
404 405 ent = connp->conn_rule_cache.info.src_ent;
405 406 vmem_free(ent->nse_port_arena,
406 407 (void *)(uintptr_t)port, 1);
407 408 }
408 409 kmem_cache_free(ilb_conn_cache, connp);
409 410 }
410 411 }
411 412 kmem_free(ilbs->ilbs_c2s_conn_hash, sizeof (ilb_conn_hash_t) *
412 413 ilbs->ilbs_conn_hash_size);
413 414 kmem_free(ilbs->ilbs_s2c_conn_hash, sizeof (ilb_conn_hash_t) *
414 415 ilbs->ilbs_conn_hash_size);
415 416 }
416 417
417 418 /*
418 419 * Internet checksum adjustment calculation routines. We pre-calculate
419 420 * checksum adjustment so that we don't need to compute the checksum on
420 421 * the whole packet when we change address/port in the packet.
421 422 */
422 423
423 424 static void
424 425 hnat_cksum_v4(uint16_t *oaddr, uint16_t *naddr, in_port_t old_port,
425 426 in_port_t new_port, uint32_t *adj_sum)
426 427 {
427 428 uint32_t sum;
428 429
429 430 sum = *oaddr + *(oaddr + 1) + old_port;
430 431 while ((sum >> 16) != 0)
431 432 sum = (sum & 0xffff) + (sum >> 16);
432 433 *adj_sum = (uint16_t)~sum + *naddr + *(naddr + 1) + new_port;
433 434 }
434 435
435 436 static void
436 437 hnat_cksum_v6(uint16_t *oaddr, uint16_t *naddr, in_port_t old_port,
437 438 in_port_t new_port, uint32_t *adj_sum)
438 439 {
439 440 uint32_t sum = 0;
440 441
441 442 sum = *oaddr + *(oaddr + 1) + *(oaddr + 2) + *(oaddr + 3) +
442 443 *(oaddr + 4) + *(oaddr + 5) + *(oaddr + 6) + *(oaddr + 7) +
443 444 old_port;
444 445 while ((sum >> 16) != 0)
445 446 sum = (sum & 0xffff) + (sum >> 16);
446 447 *adj_sum = (uint16_t)~sum + *naddr + *(naddr + 1) +
447 448 *(naddr + 2) + *(naddr + 3) + *(naddr + 4) + *(naddr + 5) +
448 449 *(naddr + 6) + *(naddr + 7) + new_port;
449 450 }
450 451
451 452 static void
452 453 fnat_cksum_v4(uint16_t *oaddr1, uint16_t *oaddr2, uint16_t *naddr1,
453 454 uint16_t *naddr2, in_port_t old_port1, in_port_t old_port2,
454 455 in_port_t new_port1, in_port_t new_port2, uint32_t *adj_sum)
455 456 {
456 457 uint32_t sum;
457 458
458 459 sum = *oaddr1 + *(oaddr1 + 1) + old_port1 + *oaddr2 + *(oaddr2 + 1) +
459 460 old_port2;
460 461 while ((sum >> 16) != 0)
461 462 sum = (sum & 0xffff) + (sum >> 16);
462 463 *adj_sum = (uint16_t)~sum + *naddr1 + *(naddr1 + 1) + new_port1 +
463 464 *naddr2 + *(naddr2 + 1) + new_port2;
464 465 }
465 466
466 467 static void
467 468 fnat_cksum_v6(uint16_t *oaddr1, uint16_t *oaddr2, uint16_t *naddr1,
468 469 uint16_t *naddr2, in_port_t old_port1, in_port_t old_port2,
469 470 in_port_t new_port1, in_port_t new_port2, uint32_t *adj_sum)
470 471 {
471 472 uint32_t sum = 0;
472 473
473 474 sum = *oaddr1 + *(oaddr1 + 1) + *(oaddr1 + 2) + *(oaddr1 + 3) +
474 475 *(oaddr1 + 4) + *(oaddr1 + 5) + *(oaddr1 + 6) + *(oaddr1 + 7) +
475 476 old_port1;
476 477 sum += *oaddr2 + *(oaddr2 + 1) + *(oaddr2 + 2) + *(oaddr2 + 3) +
477 478 *(oaddr2 + 4) + *(oaddr2 + 5) + *(oaddr2 + 6) + *(oaddr2 + 7) +
478 479 old_port2;
479 480 while ((sum >> 16) != 0)
480 481 sum = (sum & 0xffff) + (sum >> 16);
481 482 sum = (uint16_t)~sum + *naddr1 + *(naddr1 + 1) + *(naddr1 + 2) +
482 483 *(naddr1 + 3) + *(naddr1 + 4) + *(naddr1 + 5) + *(naddr1 + 6) +
483 484 *(naddr1 + 7) + new_port1;
484 485 *adj_sum = sum + *naddr2 + *(naddr2 + 1) + *(naddr2 + 2) +
485 486 *(naddr2 + 3) + *(naddr2 + 4) + *(naddr2 + 5) + *(naddr2 + 6) +
486 487 *(naddr2 + 7) + new_port2;
487 488 }
488 489
489 490 /*
490 491 * Add a conn hash entry to the tables. Note that a conn hash entry
491 492 * (ilb_conn_t) contains info on both directions. And there are two hash
492 493 * tables, one for client to server and the other for server to client.
493 494 * So the same entry is added to both tables and can be ccessed by two
494 495 * thread simultaneously. But each thread will only access data on one
495 496 * direction, so there is no conflict.
496 497 */
497 498 int
498 499 ilb_conn_add(ilb_stack_t *ilbs, ilb_rule_t *rule, ilb_server_t *server,
499 500 in6_addr_t *src, in_port_t sport, in6_addr_t *dst, in_port_t dport,
500 501 ilb_nat_info_t *info, uint32_t *ip_sum, uint32_t *tp_sum, ilb_sticky_t *s)
501 502 {
502 503 ilb_conn_t *connp;
503 504 ilb_conn_hash_t *hash;
504 505 int i;
505 506
506 507 connp = kmem_cache_alloc(ilb_conn_cache, KM_NOSLEEP);
507 508 if (connp == NULL) {
508 509 if (s != NULL) {
509 510 if (rule->ir_topo == ILB_TOPO_IMPL_NAT) {
510 511 ilb_nat_src_entry_t **entry;
511 512
512 513 entry = s->server->iser_nat_src->src_list;
513 514 vmem_free(entry[s->nat_src_idx]->nse_port_arena,
514 515 (void *)(uintptr_t)ntohs(info->nat_sport),
515 516 1);
516 517 }
517 518 ILB_STICKY_REFRELE(s);
518 519 }
519 520 return (ENOMEM);
520 521 }
521 522
522 523 connp->conn_l4 = rule->ir_proto;
523 524
524 525 connp->conn_server = server;
525 526 ILB_SERVER_REFHOLD(server);
526 527 connp->conn_sticky = s;
527 528
528 529 connp->conn_rule_cache.topo = rule->ir_topo;
529 530 connp->conn_rule_cache.info = *info;
530 531
531 532 connp->conn_gc = B_FALSE;
532 533
533 534 connp->conn_expiry = rule->ir_nat_expiry;
534 535 connp->conn_cr_time = ddi_get_lbolt64();
535 536
536 537 /* Client to server info. */
537 538 connp->conn_c2s_saddr = *src;
538 539 connp->conn_c2s_sport = sport;
539 540 connp->conn_c2s_daddr = *dst;
540 541 connp->conn_c2s_dport = dport;
541 542
542 543 connp->conn_c2s_atime = ddi_get_lbolt64();
543 544 /* The packet ths triggers this creation should be counted */
544 545 connp->conn_c2s_pkt_cnt = 1;
545 546 connp->conn_c2s_tcp_fin_sent = B_FALSE;
546 547 connp->conn_c2s_tcp_fin_acked = B_FALSE;
547 548
548 549 /* Server to client info, before NAT */
549 550 switch (rule->ir_topo) {
550 551 case ILB_TOPO_IMPL_HALF_NAT:
551 552 connp->conn_s2c_saddr = info->nat_dst;
552 553 connp->conn_s2c_sport = info->nat_dport;
553 554 connp->conn_s2c_daddr = *src;
554 555 connp->conn_s2c_dport = sport;
555 556
556 557 /* Pre-calculate checksum changes for both directions */
557 558 if (rule->ir_ipver == IPPROTO_IP) {
558 559 hnat_cksum_v4((uint16_t *)&dst->s6_addr32[3],
559 560 (uint16_t *)&info->nat_dst.s6_addr32[3], 0, 0,
560 561 &connp->conn_c2s_ip_sum);
561 562 hnat_cksum_v4((uint16_t *)&dst->s6_addr32[3],
562 563 (uint16_t *)&info->nat_dst.s6_addr32[3], dport,
563 564 info->nat_dport, &connp->conn_c2s_tp_sum);
564 565 *ip_sum = connp->conn_c2s_ip_sum;
565 566 *tp_sum = connp->conn_c2s_tp_sum;
566 567
567 568 hnat_cksum_v4(
568 569 (uint16_t *)&info->nat_dst.s6_addr32[3],
569 570 (uint16_t *)&dst->s6_addr32[3], 0, 0,
570 571 &connp->conn_s2c_ip_sum);
571 572 hnat_cksum_v4(
572 573 (uint16_t *)&info->nat_dst.s6_addr32[3],
573 574 (uint16_t *)&dst->s6_addr32[3],
574 575 info->nat_dport, dport,
575 576 &connp->conn_s2c_tp_sum);
576 577 } else {
577 578 connp->conn_c2s_ip_sum = 0;
578 579 hnat_cksum_v6((uint16_t *)dst,
579 580 (uint16_t *)&info->nat_dst, dport,
580 581 info->nat_dport, &connp->conn_c2s_tp_sum);
581 582 *ip_sum = 0;
582 583 *tp_sum = connp->conn_c2s_tp_sum;
583 584
584 585 connp->conn_s2c_ip_sum = 0;
585 586 hnat_cksum_v6((uint16_t *)&info->nat_dst,
586 587 (uint16_t *)dst, info->nat_dport, dport,
587 588 &connp->conn_s2c_tp_sum);
588 589 }
589 590 break;
590 591 case ILB_TOPO_IMPL_NAT:
591 592 connp->conn_s2c_saddr = info->nat_dst;
592 593 connp->conn_s2c_sport = info->nat_dport;
593 594 connp->conn_s2c_daddr = info->nat_src;
594 595 connp->conn_s2c_dport = info->nat_sport;
595 596
596 597 if (rule->ir_ipver == IPPROTO_IP) {
597 598 fnat_cksum_v4((uint16_t *)&src->s6_addr32[3],
598 599 (uint16_t *)&dst->s6_addr32[3],
599 600 (uint16_t *)&info->nat_src.s6_addr32[3],
600 601 (uint16_t *)&info->nat_dst.s6_addr32[3],
601 602 0, 0, 0, 0, &connp->conn_c2s_ip_sum);
602 603 fnat_cksum_v4((uint16_t *)&src->s6_addr32[3],
603 604 (uint16_t *)&dst->s6_addr32[3],
604 605 (uint16_t *)&info->nat_src.s6_addr32[3],
605 606 (uint16_t *)&info->nat_dst.s6_addr32[3],
606 607 sport, dport, info->nat_sport,
607 608 info->nat_dport, &connp->conn_c2s_tp_sum);
608 609 *ip_sum = connp->conn_c2s_ip_sum;
609 610 *tp_sum = connp->conn_c2s_tp_sum;
610 611
611 612 fnat_cksum_v4(
612 613 (uint16_t *)&info->nat_src.s6_addr32[3],
613 614 (uint16_t *)&info->nat_dst.s6_addr32[3],
614 615 (uint16_t *)&src->s6_addr32[3],
615 616 (uint16_t *)&dst->s6_addr32[3],
616 617 0, 0, 0, 0, &connp->conn_s2c_ip_sum);
617 618 fnat_cksum_v4(
618 619 (uint16_t *)&info->nat_src.s6_addr32[3],
619 620 (uint16_t *)&info->nat_dst.s6_addr32[3],
620 621 (uint16_t *)&src->s6_addr32[3],
621 622 (uint16_t *)&dst->s6_addr32[3],
622 623 info->nat_sport, info->nat_dport,
623 624 sport, dport, &connp->conn_s2c_tp_sum);
624 625 } else {
625 626 fnat_cksum_v6((uint16_t *)src, (uint16_t *)dst,
626 627 (uint16_t *)&info->nat_src,
627 628 (uint16_t *)&info->nat_dst,
628 629 sport, dport, info->nat_sport,
629 630 info->nat_dport, &connp->conn_c2s_tp_sum);
630 631 connp->conn_c2s_ip_sum = 0;
631 632 *ip_sum = 0;
632 633 *tp_sum = connp->conn_c2s_tp_sum;
633 634
634 635 fnat_cksum_v6((uint16_t *)&info->nat_src,
635 636 (uint16_t *)&info->nat_dst, (uint16_t *)src,
636 637 (uint16_t *)dst, info->nat_sport,
637 638 info->nat_dport, sport, dport,
638 639 &connp->conn_s2c_tp_sum);
639 640 connp->conn_s2c_ip_sum = 0;
640 641 }
641 642 break;
642 643 }
643 644
644 645 connp->conn_s2c_atime = ddi_get_lbolt64();
645 646 connp->conn_s2c_pkt_cnt = 1;
646 647 connp->conn_s2c_tcp_fin_sent = B_FALSE;
647 648 connp->conn_s2c_tcp_fin_acked = B_FALSE;
648 649
649 650 /* Add it to the s2c hash table. */
650 651 hash = ilbs->ilbs_s2c_conn_hash;
651 652 i = ILB_CONN_HASH((uint8_t *)&connp->conn_s2c_saddr.s6_addr32[3],
652 653 ntohs(connp->conn_s2c_sport),
653 654 (uint8_t *)&connp->conn_s2c_daddr.s6_addr32[3],
654 655 ntohs(connp->conn_s2c_dport), ilbs->ilbs_conn_hash_size);
655 656 connp->conn_s2c_hash = &hash[i];
656 657 DTRACE_PROBE2(ilb__conn__hash__add__s2c, ilb_conn_t *, connp, int, i);
657 658
658 659 mutex_enter(&hash[i].ilb_conn_hash_lock);
659 660 hash[i].ilb_conn_cnt++;
660 661 connp->conn_s2c_next = hash[i].ilb_connp;
661 662 if (hash[i].ilb_connp != NULL)
662 663 hash[i].ilb_connp->conn_s2c_prev = connp;
663 664 connp->conn_s2c_prev = NULL;
664 665 hash[i].ilb_connp = connp;
665 666 mutex_exit(&hash[i].ilb_conn_hash_lock);
666 667
667 668 /* Add it to the c2s hash table. */
668 669 hash = ilbs->ilbs_c2s_conn_hash;
669 670 i = ILB_CONN_HASH((uint8_t *)&src->s6_addr32[3], ntohs(sport),
670 671 (uint8_t *)&dst->s6_addr32[3], ntohs(dport),
671 672 ilbs->ilbs_conn_hash_size);
672 673 connp->conn_c2s_hash = &hash[i];
673 674 DTRACE_PROBE2(ilb__conn__hash__add__c2s, ilb_conn_t *, connp, int, i);
674 675
675 676 mutex_enter(&hash[i].ilb_conn_hash_lock);
676 677 hash[i].ilb_conn_cnt++;
677 678 connp->conn_c2s_next = hash[i].ilb_connp;
678 679 if (hash[i].ilb_connp != NULL)
679 680 hash[i].ilb_connp->conn_c2s_prev = connp;
680 681 connp->conn_c2s_prev = NULL;
681 682 hash[i].ilb_connp = connp;
682 683 mutex_exit(&hash[i].ilb_conn_hash_lock);
683 684
684 685 return (0);
685 686 }
686 687
687 688 /*
688 689 * If a connection is using TCP, we keep track of simple TCP state transition
689 690 * so that we know when to clean up an entry.
690 691 */
691 692 static boolean_t
692 693 update_conn_tcp(ilb_conn_t *connp, void *iph, tcpha_t *tcpha, int32_t pkt_len,
693 694 boolean_t c2s)
694 695 {
695 696 uint32_t ack, seq;
696 697 int32_t seg_len;
697 698
698 699 if (tcpha->tha_flags & TH_RST)
699 700 return (B_FALSE);
700 701
701 702 seg_len = pkt_len - ((uint8_t *)tcpha - (uint8_t *)iph) -
702 703 TCP_HDR_LENGTH((tcph_t *)tcpha);
703 704
704 705 if (tcpha->tha_flags & TH_ACK)
705 706 ack = ntohl(tcpha->tha_ack);
706 707 seq = ntohl(tcpha->tha_seq);
707 708 if (c2s) {
708 709 ASSERT(MUTEX_HELD(&connp->conn_c2s_hash->ilb_conn_hash_lock));
709 710 if (tcpha->tha_flags & TH_FIN) {
710 711 connp->conn_c2s_tcp_fss = seq + seg_len;
711 712 connp->conn_c2s_tcp_fin_sent = B_TRUE;
712 713 }
713 714 connp->conn_c2s_tcp_ack = ack;
714 715
715 716 /* Port reuse by the client, restart the conn. */
716 717 if (connp->conn_c2s_tcp_fin_sent &&
717 718 SEQ_GT(seq, connp->conn_c2s_tcp_fss + 1)) {
718 719 connp->conn_c2s_tcp_fin_sent = B_FALSE;
719 720 connp->conn_c2s_tcp_fin_acked = B_FALSE;
720 721 }
721 722 } else {
722 723 ASSERT(MUTEX_HELD(&connp->conn_s2c_hash->ilb_conn_hash_lock));
723 724 if (tcpha->tha_flags & TH_FIN) {
724 725 connp->conn_s2c_tcp_fss = seq + seg_len;
725 726 connp->conn_s2c_tcp_fin_sent = B_TRUE;
726 727 }
727 728 connp->conn_s2c_tcp_ack = ack;
728 729
729 730 /* Port reuse by the client, restart the conn. */
730 731 if (connp->conn_s2c_tcp_fin_sent &&
731 732 SEQ_GT(seq, connp->conn_s2c_tcp_fss + 1)) {
732 733 connp->conn_s2c_tcp_fin_sent = B_FALSE;
733 734 connp->conn_s2c_tcp_fin_acked = B_FALSE;
734 735 }
735 736 }
736 737
737 738 return (B_TRUE);
738 739 }
739 740
740 741 /*
741 742 * Helper routint to find conn hash entry given some packet information and
742 743 * the traffic direction (c2s, client to server?)
743 744 */
744 745 static boolean_t
745 746 ilb_find_conn(ilb_stack_t *ilbs, void *iph, void *tph, int l4, in6_addr_t *src,
746 747 in_port_t sport, in6_addr_t *dst, in_port_t dport,
747 748 ilb_rule_info_t *rule_cache, uint32_t *ip_sum, uint32_t *tp_sum,
748 749 int32_t pkt_len, boolean_t c2s)
749 750 {
750 751 ilb_conn_hash_t *hash;
751 752 uint_t i;
752 753 ilb_conn_t *connp;
753 754 boolean_t tcp_alive;
754 755 boolean_t ret = B_FALSE;
755 756
756 757 i = ILB_CONN_HASH((uint8_t *)&src->s6_addr32[3], ntohs(sport),
757 758 (uint8_t *)&dst->s6_addr32[3], ntohs(dport),
758 759 ilbs->ilbs_conn_hash_size);
759 760 if (c2s) {
760 761 hash = ilbs->ilbs_c2s_conn_hash;
761 762 mutex_enter(&hash[i].ilb_conn_hash_lock);
762 763 for (connp = hash[i].ilb_connp; connp != NULL;
763 764 connp = connp->conn_c2s_next) {
764 765 if (connp->conn_l4 == l4 &&
765 766 connp->conn_c2s_dport == dport &&
766 767 connp->conn_c2s_sport == sport &&
767 768 IN6_ARE_ADDR_EQUAL(src, &connp->conn_c2s_saddr) &&
768 769 IN6_ARE_ADDR_EQUAL(dst, &connp->conn_c2s_daddr)) {
769 770 connp->conn_c2s_atime = ddi_get_lbolt64();
770 771 connp->conn_c2s_pkt_cnt++;
771 772 *rule_cache = connp->conn_rule_cache;
772 773 *ip_sum = connp->conn_c2s_ip_sum;
773 774 *tp_sum = connp->conn_c2s_tp_sum;
774 775 ret = B_TRUE;
775 776 break;
776 777 }
777 778 }
778 779 } else {
779 780 hash = ilbs->ilbs_s2c_conn_hash;
780 781 mutex_enter(&hash[i].ilb_conn_hash_lock);
781 782 for (connp = hash[i].ilb_connp; connp != NULL;
782 783 connp = connp->conn_s2c_next) {
783 784 if (connp->conn_l4 == l4 &&
784 785 connp->conn_s2c_dport == dport &&
785 786 connp->conn_s2c_sport == sport &&
786 787 IN6_ARE_ADDR_EQUAL(src, &connp->conn_s2c_saddr) &&
787 788 IN6_ARE_ADDR_EQUAL(dst, &connp->conn_s2c_daddr)) {
788 789 connp->conn_s2c_atime = ddi_get_lbolt64();
789 790 connp->conn_s2c_pkt_cnt++;
790 791 *rule_cache = connp->conn_rule_cache;
791 792 *ip_sum = connp->conn_s2c_ip_sum;
792 793 *tp_sum = connp->conn_s2c_tp_sum;
793 794 ret = B_TRUE;
794 795 break;
795 796 }
796 797 }
797 798 }
798 799 if (ret) {
799 800 ILB_S_KSTAT(connp->conn_server, pkt_processed);
800 801 ILB_S_KSTAT_UPDATE(connp->conn_server, bytes_processed,
801 802 pkt_len);
802 803
803 804 switch (l4) {
804 805 case (IPPROTO_TCP):
805 806 tcp_alive = update_conn_tcp(connp, iph, tph, pkt_len,
806 807 c2s);
807 808 if (!tcp_alive) {
808 809 connp->conn_gc = B_TRUE;
809 810 }
810 811 break;
811 812 default:
812 813 break;
813 814 }
814 815 }
815 816 mutex_exit(&hash[i].ilb_conn_hash_lock);
816 817
817 818 return (ret);
818 819 }
819 820
820 821 /*
821 822 * To check if a give packet matches an existing conn hash entry. If it
822 823 * does, return the information about this entry so that the caller can
823 824 * do the proper NAT.
824 825 */
825 826 boolean_t
826 827 ilb_check_conn(ilb_stack_t *ilbs, int l3, void *iph, int l4, void *tph,
827 828 in6_addr_t *src, in6_addr_t *dst, in_port_t sport, in_port_t dport,
828 829 uint32_t pkt_len, in6_addr_t *lb_dst)
829 830 {
830 831 ilb_rule_info_t rule_cache;
831 832 uint32_t adj_ip_sum, adj_tp_sum;
832 833 boolean_t ret;
833 834
834 835 /* Check the incoming hash table. */
835 836 if (ilb_find_conn(ilbs, iph, tph, l4, src, sport, dst, dport,
836 837 &rule_cache, &adj_ip_sum, &adj_tp_sum, pkt_len, B_TRUE)) {
837 838 switch (rule_cache.topo) {
838 839 case ILB_TOPO_IMPL_NAT:
839 840 *lb_dst = rule_cache.info.nat_dst;
840 841 ilb_full_nat(l3, iph, l4, tph, &rule_cache.info,
841 842 adj_ip_sum, adj_tp_sum, B_TRUE);
842 843 ret = B_TRUE;
843 844 break;
844 845 case ILB_TOPO_IMPL_HALF_NAT:
845 846 *lb_dst = rule_cache.info.nat_dst;
846 847 ilb_half_nat(l3, iph, l4, tph, &rule_cache.info,
847 848 adj_ip_sum, adj_tp_sum, B_TRUE);
848 849 ret = B_TRUE;
849 850 break;
850 851 default:
851 852 ret = B_FALSE;
852 853 break;
853 854 }
854 855 return (ret);
855 856 }
856 857 if (ilb_find_conn(ilbs, iph, tph, l4, src, sport, dst, dport,
857 858 &rule_cache, &adj_ip_sum, &adj_tp_sum, pkt_len, B_FALSE)) {
858 859 switch (rule_cache.topo) {
859 860 case ILB_TOPO_IMPL_NAT:
860 861 *lb_dst = rule_cache.info.src;
861 862 ilb_full_nat(l3, iph, l4, tph, &rule_cache.info,
862 863 adj_ip_sum, adj_tp_sum, B_FALSE);
863 864 ret = B_TRUE;
864 865 break;
865 866 case ILB_TOPO_IMPL_HALF_NAT:
866 867 *lb_dst = *dst;
867 868 ilb_half_nat(l3, iph, l4, tph, &rule_cache.info,
868 869 adj_ip_sum, adj_tp_sum, B_FALSE);
869 870 ret = B_TRUE;
870 871 break;
871 872 default:
872 873 ret = B_FALSE;
873 874 break;
874 875 }
875 876 return (ret);
876 877 }
877 878
878 879 return (B_FALSE);
879 880 }
880 881
881 882 /*
882 883 * To check if an ICMP packet belongs to a connection in one of the conn
883 884 * hash entries.
884 885 */
885 886 boolean_t
886 887 ilb_check_icmp_conn(ilb_stack_t *ilbs, mblk_t *mp, int l3, void *out_iph,
887 888 void *icmph, in6_addr_t *lb_dst)
888 889 {
889 890 ilb_conn_hash_t *hash;
890 891 ipha_t *in_iph4;
891 892 ip6_t *in_iph6;
892 893 icmph_t *icmph4;
893 894 icmp6_t *icmph6;
894 895 in6_addr_t *in_src_p, *in_dst_p;
895 896 in_port_t *sport, *dport;
896 897 int l4;
897 898 uint_t i;
898 899 ilb_conn_t *connp;
899 900 ilb_rule_info_t rule_cache;
900 901 uint32_t adj_ip_sum;
901 902 boolean_t full_nat;
902 903
903 904 if (l3 == IPPROTO_IP) {
904 905 in6_addr_t in_src, in_dst;
905 906
906 907 icmph4 = (icmph_t *)icmph;
907 908 in_iph4 = (ipha_t *)&icmph4[1];
908 909
909 910 if ((uint8_t *)in_iph4 + IPH_HDR_LENGTH(in_iph4) +
910 911 ICMP_MIN_TP_HDR_LEN > mp->b_wptr) {
911 912 return (B_FALSE);
912 913 }
913 914
914 915 IN6_IPADDR_TO_V4MAPPED(in_iph4->ipha_src, &in_src);
915 916 in_src_p = &in_src;
916 917 IN6_IPADDR_TO_V4MAPPED(in_iph4->ipha_dst, &in_dst);
917 918 in_dst_p = &in_dst;
918 919
919 920 l4 = in_iph4->ipha_protocol;
920 921 if (l4 != IPPROTO_TCP && l4 != IPPROTO_UDP)
921 922 return (B_FALSE);
922 923
923 924 sport = (in_port_t *)((char *)in_iph4 +
924 925 IPH_HDR_LENGTH(in_iph4));
925 926 dport = sport + 1;
926 927
927 928 DTRACE_PROBE4(ilb__chk__icmp__conn__v4, uint32_t,
928 929 in_iph4->ipha_src, uint32_t, in_iph4->ipha_dst, uint16_t,
929 930 ntohs(*sport), uint16_t, ntohs(*dport));
930 931 } else {
931 932 ASSERT(l3 == IPPROTO_IPV6);
932 933
933 934 icmph6 = (icmp6_t *)icmph;
934 935 in_iph6 = (ip6_t *)&icmph6[1];
935 936 in_src_p = &in_iph6->ip6_src;
936 937 in_dst_p = &in_iph6->ip6_dst;
937 938
938 939 if ((uint8_t *)in_iph6 + sizeof (ip6_t) +
939 940 ICMP_MIN_TP_HDR_LEN > mp->b_wptr) {
940 941 return (B_FALSE);
941 942 }
942 943
943 944 l4 = in_iph6->ip6_nxt;
944 945 /* We don't go deep inside an IPv6 packet yet. */
945 946 if (l4 != IPPROTO_TCP && l4 != IPPROTO_UDP)
946 947 return (B_FALSE);
947 948
948 949 sport = (in_port_t *)&in_iph6[1];
949 950 dport = sport + 1;
950 951
951 952 DTRACE_PROBE4(ilb__chk__icmp__conn__v6, in6_addr_t *,
952 953 &in_iph6->ip6_src, in6_addr_t *, &in_iph6->ip6_dst,
953 954 uint16_t, ntohs(*sport), uint16_t, ntohs(*dport));
954 955 }
955 956
956 957 i = ILB_CONN_HASH((uint8_t *)&in_dst_p->s6_addr32[3], ntohs(*dport),
957 958 (uint8_t *)&in_src_p->s6_addr32[3], ntohs(*sport),
958 959 ilbs->ilbs_conn_hash_size);
959 960 hash = ilbs->ilbs_c2s_conn_hash;
960 961
961 962 mutex_enter(&hash[i].ilb_conn_hash_lock);
962 963 for (connp = hash[i].ilb_connp; connp != NULL;
963 964 connp = connp->conn_c2s_next) {
964 965 if (connp->conn_l4 == l4 &&
965 966 connp->conn_c2s_dport == *sport &&
966 967 connp->conn_c2s_sport == *dport &&
967 968 IN6_ARE_ADDR_EQUAL(in_dst_p, &connp->conn_c2s_saddr) &&
968 969 IN6_ARE_ADDR_EQUAL(in_src_p, &connp->conn_c2s_daddr)) {
969 970 connp->conn_c2s_atime = ddi_get_lbolt64();
970 971 connp->conn_c2s_pkt_cnt++;
971 972 rule_cache = connp->conn_rule_cache;
972 973 adj_ip_sum = connp->conn_c2s_ip_sum;
973 974 break;
974 975 }
975 976 }
976 977 mutex_exit(&hash[i].ilb_conn_hash_lock);
977 978
978 979 if (connp == NULL) {
979 980 DTRACE_PROBE(ilb__chk__icmp__conn__failed);
980 981 return (B_FALSE);
981 982 }
982 983
983 984 switch (rule_cache.topo) {
984 985 case ILB_TOPO_IMPL_NAT:
985 986 full_nat = B_TRUE;
986 987 break;
987 988 case ILB_TOPO_IMPL_HALF_NAT:
988 989 full_nat = B_FALSE;
989 990 break;
990 991 default:
991 992 return (B_FALSE);
992 993 }
993 994
994 995 *lb_dst = rule_cache.info.nat_dst;
995 996 if (l3 == IPPROTO_IP) {
996 997 ilb_nat_icmpv4(mp, out_iph, icmph4, in_iph4, sport, dport,
997 998 &rule_cache.info, adj_ip_sum, full_nat);
998 999 } else {
999 1000 ilb_nat_icmpv6(mp, out_iph, icmph6, in_iph6, sport, dport,
1000 1001 &rule_cache.info, full_nat);
1001 1002 }
1002 1003 return (B_TRUE);
1003 1004 }
1004 1005
1005 1006 /*
1006 1007 * This routine sends up the conn hash table to user land. Note that the
1007 1008 * request is an ioctl, hence we cannot really differentiate requests
1008 1009 * from different clients. There is no context shared between different
1009 1010 * ioctls. Here we make the assumption that the user land ilbd will
1010 1011 * only allow one client to show the conn hash table at any time.
1011 1012 * Otherwise, the results will be "very" inconsistent.
1012 1013 *
1013 1014 * In each ioctl, a flag (ILB_LIST_BEGIN) indicates whether the client wants
1014 1015 * to read from the beginning of the able. After a certain entries
1015 1016 * are reported, the kernel remembers the position of the last returned
1016 1017 * entry. When the next ioctl comes in with the ILB_LIST_BEGIN flag,
1017 1018 * it will return entries starting from where it was left off. When
1018 1019 * the end of table is reached, a flag (ILB_LIST_END) is set to tell
1019 1020 * the client that there is no more entry.
1020 1021 *
1021 1022 * It is assumed that the caller has checked the size of nat so that it
1022 1023 * can hold num entries.
1023 1024 */
1024 1025 /* ARGSUSED */
1025 1026 int
1026 1027 ilb_list_nat(ilb_stack_t *ilbs, zoneid_t zoneid, ilb_nat_entry_t *nat,
1027 1028 uint32_t *num, uint32_t *flags)
1028 1029 {
1029 1030 ilb_conn_hash_t *hash;
1030 1031 ilb_conn_t *cur_connp;
1031 1032 uint32_t i, j;
1032 1033 int ret = 0;
1033 1034
1034 1035 mutex_enter(&ilbs->ilbs_conn_list_lock);
1035 1036 while (ilbs->ilbs_conn_list_busy) {
1036 1037 if (cv_wait_sig(&ilbs->ilbs_conn_list_cv,
1037 1038 &ilbs->ilbs_conn_list_lock) == 0) {
1038 1039 mutex_exit(&ilbs->ilbs_conn_list_lock);
1039 1040 return (EINTR);
1040 1041 }
1041 1042 }
1042 1043 if ((hash = ilbs->ilbs_c2s_conn_hash) == NULL) {
1043 1044 ASSERT(ilbs->ilbs_s2c_conn_hash == NULL);
1044 1045 mutex_exit(&ilbs->ilbs_conn_list_lock);
1045 1046 *num = 0;
1046 1047 *flags |= ILB_LIST_END;
1047 1048 return (0);
1048 1049 }
1049 1050 ilbs->ilbs_conn_list_busy = B_TRUE;
1050 1051 mutex_exit(&ilbs->ilbs_conn_list_lock);
1051 1052
1052 1053 if (*flags & ILB_LIST_BEGIN) {
1053 1054 i = 0;
1054 1055 mutex_enter(&hash[0].ilb_conn_hash_lock);
1055 1056 cur_connp = hash[0].ilb_connp;
1056 1057 } else if (*flags & ILB_LIST_CONT) {
1057 1058 if (ilbs->ilbs_conn_list_cur == ilbs->ilbs_conn_hash_size) {
1058 1059 *num = 0;
1059 1060 *flags |= ILB_LIST_END;
1060 1061 goto done;
1061 1062 }
1062 1063 i = ilbs->ilbs_conn_list_cur;
1063 1064 mutex_enter(&hash[i].ilb_conn_hash_lock);
1064 1065 cur_connp = ilbs->ilbs_conn_list_connp;
1065 1066 } else {
1066 1067 ret = EINVAL;
1067 1068 goto done;
1068 1069 }
1069 1070
1070 1071 j = 0;
1071 1072 while (j < *num) {
1072 1073 if (cur_connp == NULL) {
1073 1074 mutex_exit(&hash[i].ilb_conn_hash_lock);
1074 1075 if (++i == ilbs->ilbs_conn_hash_size) {
1075 1076 *flags |= ILB_LIST_END;
1076 1077 break;
1077 1078 }
1078 1079 mutex_enter(&hash[i].ilb_conn_hash_lock);
1079 1080 cur_connp = hash[i].ilb_connp;
1080 1081 continue;
1081 1082 }
1082 1083 nat[j].proto = cur_connp->conn_l4;
1083 1084
1084 1085 nat[j].in_global = cur_connp->conn_c2s_daddr;
1085 1086 nat[j].in_global_port = cur_connp->conn_c2s_dport;
1086 1087 nat[j].out_global = cur_connp->conn_c2s_saddr;
1087 1088 nat[j].out_global_port = cur_connp->conn_c2s_sport;
1088 1089
1089 1090 nat[j].in_local = cur_connp->conn_s2c_saddr;
1090 1091 nat[j].in_local_port = cur_connp->conn_s2c_sport;
1091 1092 nat[j].out_local = cur_connp->conn_s2c_daddr;
1092 1093 nat[j].out_local_port = cur_connp->conn_s2c_dport;
1093 1094
1094 1095 nat[j].create_time = TICK_TO_MSEC(cur_connp->conn_cr_time);
1095 1096 nat[j].last_access_time =
1096 1097 TICK_TO_MSEC(cur_connp->conn_c2s_atime);
1097 1098
1098 1099 /*
1099 1100 * The conn_s2c_pkt_cnt may not be accurate since we are not
1100 1101 * holding the s2c hash lock.
1101 1102 */
1102 1103 nat[j].pkt_cnt = cur_connp->conn_c2s_pkt_cnt +
1103 1104 cur_connp->conn_s2c_pkt_cnt;
1104 1105 j++;
1105 1106
1106 1107 cur_connp = cur_connp->conn_c2s_next;
1107 1108 }
1108 1109 ilbs->ilbs_conn_list_connp = cur_connp;
1109 1110 if (j == *num)
1110 1111 mutex_exit(&hash[i].ilb_conn_hash_lock);
1111 1112
1112 1113 ilbs->ilbs_conn_list_cur = i;
1113 1114
1114 1115 *num = j;
1115 1116 done:
1116 1117 mutex_enter(&ilbs->ilbs_conn_list_lock);
1117 1118 ilbs->ilbs_conn_list_busy = B_FALSE;
1118 1119 cv_signal(&ilbs->ilbs_conn_list_cv);
1119 1120 mutex_exit(&ilbs->ilbs_conn_list_lock);
1120 1121
1121 1122 return (ret);
1122 1123 }
1123 1124
1124 1125
1125 1126 /*
1126 1127 * Stickiness (persistence) handling routines.
1127 1128 */
1128 1129
1129 1130
1130 1131 static void
1131 1132 ilb_sticky_cache_init(void)
1132 1133 {
1133 1134 ilb_sticky_cache = kmem_cache_create("ilb_sticky_cache",
1134 1135 sizeof (ilb_sticky_t), 0, NULL, NULL, NULL, NULL, NULL,
1135 1136 ilb_kmem_flags);
1136 1137 }
1137 1138
1138 1139 void
1139 1140 ilb_sticky_cache_fini(void)
1140 1141 {
1141 1142 if (ilb_sticky_cache != NULL) {
1142 1143 kmem_cache_destroy(ilb_sticky_cache);
1143 1144 ilb_sticky_cache = NULL;
1144 1145 }
1145 1146 }
1146 1147
1147 1148 void
1148 1149 ilb_sticky_refrele(ilb_sticky_t *s)
1149 1150 {
1150 1151 ILB_STICKY_REFRELE(s);
1151 1152 }
1152 1153
1153 1154 static ilb_sticky_t *
1154 1155 ilb_sticky_lookup(ilb_sticky_hash_t *hash, ilb_rule_t *rule, in6_addr_t *src)
1155 1156 {
1156 1157 ilb_sticky_t *s;
1157 1158
1158 1159 ASSERT(mutex_owned(&hash->sticky_lock));
1159 1160
1160 1161 for (s = list_head(&hash->sticky_head); s != NULL;
1161 1162 s = list_next(&hash->sticky_head, s)) {
1162 1163 if (s->rule_instance == rule->ir_ks_instance) {
1163 1164 if (IN6_ARE_ADDR_EQUAL(src, &s->src))
1164 1165 return (s);
1165 1166 }
1166 1167 }
1167 1168 return (NULL);
1168 1169 }
1169 1170
1170 1171 static ilb_sticky_t *
1171 1172 ilb_sticky_add(ilb_sticky_hash_t *hash, ilb_rule_t *rule, ilb_server_t *server,
1172 1173 in6_addr_t *src)
1173 1174 {
1174 1175 ilb_sticky_t *s;
1175 1176
1176 1177 ASSERT(mutex_owned(&hash->sticky_lock));
1177 1178
1178 1179 if ((s = kmem_cache_alloc(ilb_sticky_cache, KM_NOSLEEP)) == NULL)
1179 1180 return (NULL);
1180 1181
1181 1182 /*
1182 1183 * The rule instance is for handling the scenario when the same
1183 1184 * client talks to different rules at the same time. Stickiness
1184 1185 * is per rule so we can use the rule instance to differentiate
1185 1186 * the client's request.
1186 1187 */
1187 1188 s->rule_instance = rule->ir_ks_instance;
1188 1189 /*
1189 1190 * Copy the rule name for listing all sticky cache entry. ir_name
1190 1191 * is guaranteed to be NULL terminated.
1191 1192 */
1192 1193 (void) strcpy(s->rule_name, rule->ir_name);
1193 1194 s->server = server;
1194 1195
1195 1196 /*
1196 1197 * Grab a ref cnt on the server so that it won't go away while
1197 1198 * it is still in the sticky table.
1198 1199 */
1199 1200 ILB_SERVER_REFHOLD(server);
1200 1201 s->src = *src;
1201 1202 s->expiry = rule->ir_sticky_expiry;
1202 1203 s->refcnt = 1;
1203 1204 s->hash = hash;
1204 1205
1205 1206 /*
1206 1207 * There is no need to set atime here since the refcnt is not
1207 1208 * zero. A sticky entry is removed only when the refcnt is
1208 1209 * zero. But just set it here for debugging purpose. The
1209 1210 * atime is set when a refrele is done on a sticky entry.
1210 1211 */
1211 1212 s->atime = ddi_get_lbolt64();
1212 1213
1213 1214 list_insert_head(&hash->sticky_head, s);
1214 1215 hash->sticky_cnt++;
1215 1216 return (s);
1216 1217 }
1217 1218
1218 1219 /*
1219 1220 * This routine checks if there is an existing sticky entry which matches
1220 1221 * a given packet. If there is one, return it. If there is not, create
1221 1222 * a sticky entry using the packet's info.
1222 1223 */
1223 1224 ilb_server_t *
1224 1225 ilb_sticky_find_add(ilb_stack_t *ilbs, ilb_rule_t *rule, in6_addr_t *src,
1225 1226 ilb_server_t *server, ilb_sticky_t **res, uint16_t *src_ent_idx)
1226 1227 {
1227 1228 int i;
1228 1229 ilb_sticky_hash_t *hash;
1229 1230 ilb_sticky_t *s;
1230 1231
1231 1232 ASSERT(server != NULL);
1232 1233
1233 1234 *res = NULL;
1234 1235
1235 1236 i = ILB_STICKY_HASH((uint8_t *)&src->s6_addr32[3],
1236 1237 (uint32_t)(uintptr_t)rule, ilbs->ilbs_sticky_hash_size);
1237 1238 hash = &ilbs->ilbs_sticky_hash[i];
1238 1239
1239 1240 /* First check if there is already an entry. */
1240 1241 mutex_enter(&hash->sticky_lock);
1241 1242 s = ilb_sticky_lookup(hash, rule, src);
1242 1243
1243 1244 /* No sticky entry, add one. */
1244 1245 if (s == NULL) {
1245 1246 add_new_entry:
1246 1247 s = ilb_sticky_add(hash, rule, server, src);
1247 1248 if (s == NULL) {
1248 1249 mutex_exit(&hash->sticky_lock);
1249 1250 return (NULL);
1250 1251 }
1251 1252 /*
1252 1253 * Find a source for this server. All subseqent requests from
1253 1254 * the same client matching this sticky entry will use this
1254 1255 * source address in doing NAT. The current algorithm is
1255 1256 * simple, rotate the source address. Note that the
1256 1257 * source address array does not change after it's created, so
1257 1258 * it is OK to just increment the cur index.
1258 1259 */
1259 1260 if (server->iser_nat_src != NULL) {
1260 1261 /* It is a hint, does not need to be atomic. */
1261 1262 *src_ent_idx = (server->iser_nat_src->cur++ %
1262 1263 server->iser_nat_src->num_src);
1263 1264 s->nat_src_idx = *src_ent_idx;
1264 1265 }
1265 1266 mutex_exit(&hash->sticky_lock);
1266 1267 *res = s;
1267 1268 return (server);
1268 1269 }
1269 1270
1270 1271 /*
1271 1272 * We don't hold any lock accessing iser_enabled. Refer to the
1272 1273 * comment in ilb_server_add() about iser_lock.
1273 1274 */
1274 1275 if (!s->server->iser_enabled) {
1275 1276 /*
1276 1277 * s->server == server can only happen if there is a race in
1277 1278 * toggling the iser_enabled flag (we don't hold a lock doing
1278 1279 * that) so that the load balance algorithm still returns a
1279 1280 * disabled server. In this case, just drop the packet...
1280 1281 */
1281 1282 if (s->server == server) {
1282 1283 mutex_exit(&hash->sticky_lock);
1283 1284 return (NULL);
1284 1285 }
1285 1286
1286 1287 /*
1287 1288 * The old server is disabled and there is a new server, use
1288 1289 * the new one to create a sticky entry. Since we will
1289 1290 * add the entry at the beginning, subsequent lookup will
1290 1291 * find this new entry instead of the old one.
1291 1292 */
1292 1293 goto add_new_entry;
1293 1294 }
1294 1295
1295 1296 s->refcnt++;
1296 1297 *res = s;
1297 1298 mutex_exit(&hash->sticky_lock);
1298 1299 if (server->iser_nat_src != NULL)
1299 1300 *src_ent_idx = s->nat_src_idx;
1300 1301 return (s->server);
1301 1302 }
1302 1303
1303 1304 static void
1304 1305 ilb_sticky_cleanup(void *arg)
1305 1306 {
1306 1307 ilb_timer_t *timer = (ilb_timer_t *)arg;
1307 1308 uint32_t i;
1308 1309 ilb_stack_t *ilbs;
1309 1310 ilb_sticky_hash_t *hash;
1310 1311 ilb_sticky_t *s, *nxt_s;
1311 1312 int64_t now, expiry;
1312 1313
1313 1314 ilbs = timer->ilbs;
1314 1315 hash = ilbs->ilbs_sticky_hash;
1315 1316 ASSERT(hash != NULL);
1316 1317
1317 1318 now = ddi_get_lbolt64();
1318 1319 for (i = timer->start; i < timer->end; i++) {
1319 1320 mutex_enter(&hash[i].sticky_lock);
1320 1321 for (s = list_head(&hash[i].sticky_head); s != NULL;
1321 1322 s = nxt_s) {
1322 1323 nxt_s = list_next(&hash[i].sticky_head, s);
1323 1324 if (s->refcnt != 0)
1324 1325 continue;
1325 1326 expiry = now - SEC_TO_TICK(s->expiry);
1326 1327 if (s->atime < expiry) {
1327 1328 ILB_SERVER_REFRELE(s->server);
1328 1329 list_remove(&hash[i].sticky_head, s);
1329 1330 kmem_cache_free(ilb_sticky_cache, s);
1330 1331 hash[i].sticky_cnt--;
1331 1332 }
1332 1333 }
1333 1334 mutex_exit(&hash[i].sticky_lock);
1334 1335 }
1335 1336 }
1336 1337
1337 1338 static void
1338 1339 ilb_sticky_timer(void *arg)
1339 1340 {
1340 1341 ilb_timer_t *timer = (ilb_timer_t *)arg;
1341 1342
1342 1343 (void) taskq_dispatch(timer->ilbs->ilbs_sticky_taskq,
1343 1344 ilb_sticky_cleanup, arg, TQ_SLEEP);
1344 1345 mutex_enter(&timer->tid_lock);
1345 1346 if (timer->tid == 0) {
1346 1347 mutex_exit(&timer->tid_lock);
1347 1348 } else {
1348 1349 timer->tid = timeout(ilb_sticky_timer, arg,
1349 1350 SEC_TO_TICK(ilb_sticky_timeout));
1350 1351 mutex_exit(&timer->tid_lock);
1351 1352 }
↓ open down ↓ |
1038 lines elided |
↑ open up ↑ |
1352 1353 }
1353 1354
1354 1355 void
1355 1356 ilb_sticky_hash_init(ilb_stack_t *ilbs)
1356 1357 {
1357 1358 extern pri_t minclsyspri;
1358 1359 int i, part;
1359 1360 char tq_name[TASKQ_NAMELEN];
1360 1361 ilb_timer_t *tm;
1361 1362
1362 - if (ilbs->ilbs_sticky_hash_size & (ilbs->ilbs_sticky_hash_size - 1)) {
1363 + if (!ISP2(ilbs->ilbs_sticky_hash_size)) {
1363 1364 for (i = 0; i < 31; i++) {
1364 1365 if (ilbs->ilbs_sticky_hash_size < (1 << i))
1365 1366 break;
1366 1367 }
1367 1368 ilbs->ilbs_sticky_hash_size = 1 << i;
1368 1369 }
1369 1370
1370 1371 ilbs->ilbs_sticky_hash = kmem_zalloc(sizeof (ilb_sticky_hash_t) *
1371 1372 ilbs->ilbs_sticky_hash_size, KM_SLEEP);
1372 1373 for (i = 0; i < ilbs->ilbs_sticky_hash_size; i++) {
1373 1374 mutex_init(&ilbs->ilbs_sticky_hash[i].sticky_lock, NULL,
1374 1375 MUTEX_DEFAULT, NULL);
1375 1376 list_create(&ilbs->ilbs_sticky_hash[i].sticky_head,
1376 1377 sizeof (ilb_sticky_t),
1377 1378 offsetof(ilb_sticky_t, list));
1378 1379 }
1379 1380
1380 1381 if (ilb_sticky_cache == NULL)
1381 1382 ilb_sticky_cache_init();
1382 1383
1383 1384 (void) snprintf(tq_name, sizeof (tq_name), "ilb_sticky_taskq_%p",
1384 1385 (void *)ilbs->ilbs_netstack);
1385 1386 ASSERT(ilbs->ilbs_sticky_taskq == NULL);
1386 1387 ilbs->ilbs_sticky_taskq = taskq_create(tq_name,
1387 1388 ilb_sticky_timer_size * 2, minclsyspri, ilb_sticky_timer_size,
1388 1389 ilb_sticky_timer_size * 2, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
1389 1390
1390 1391 ASSERT(ilbs->ilbs_sticky_timer_list == NULL);
1391 1392 ilbs->ilbs_sticky_timer_list = kmem_zalloc(sizeof (ilb_timer_t) *
1392 1393 ilb_sticky_timer_size, KM_SLEEP);
1393 1394 part = ilbs->ilbs_sticky_hash_size / ilb_sticky_timer_size + 1;
1394 1395 for (i = 0; i < ilb_sticky_timer_size; i++) {
1395 1396 tm = ilbs->ilbs_sticky_timer_list + i;
1396 1397 tm->start = i * part;
1397 1398 tm->end = i * part + part;
1398 1399 if (tm->end > ilbs->ilbs_sticky_hash_size)
1399 1400 tm->end = ilbs->ilbs_sticky_hash_size;
1400 1401 tm->ilbs = ilbs;
1401 1402 mutex_init(&tm->tid_lock, NULL, MUTEX_DEFAULT, NULL);
1402 1403 /* Spread out the starting execution time of all the timers. */
1403 1404 tm->tid = timeout(ilb_sticky_timer, tm,
1404 1405 SEC_TO_TICK(ilb_sticky_timeout + i));
1405 1406 }
1406 1407 }
1407 1408
1408 1409 void
1409 1410 ilb_sticky_hash_fini(ilb_stack_t *ilbs)
1410 1411 {
1411 1412 int i;
1412 1413 ilb_sticky_t *s;
1413 1414
1414 1415 if (ilbs->ilbs_sticky_hash == NULL)
1415 1416 return;
1416 1417
1417 1418 /* Stop all the timers first. */
1418 1419 for (i = 0; i < ilb_sticky_timer_size; i++) {
1419 1420 timeout_id_t tid;
1420 1421
1421 1422 /* Setting tid to 0 tells the timer handler not to restart. */
1422 1423 mutex_enter(&ilbs->ilbs_sticky_timer_list[i].tid_lock);
1423 1424 tid = ilbs->ilbs_sticky_timer_list[i].tid;
1424 1425 ilbs->ilbs_sticky_timer_list[i].tid = 0;
1425 1426 mutex_exit(&ilbs->ilbs_sticky_timer_list[i].tid_lock);
1426 1427 (void) untimeout(tid);
1427 1428 }
1428 1429 kmem_free(ilbs->ilbs_sticky_timer_list, sizeof (ilb_timer_t) *
1429 1430 ilb_sticky_timer_size);
1430 1431 taskq_destroy(ilbs->ilbs_sticky_taskq);
1431 1432 ilbs->ilbs_sticky_taskq = NULL;
1432 1433
1433 1434 for (i = 0; i < ilbs->ilbs_sticky_hash_size; i++) {
1434 1435 while ((s = list_head(&ilbs->ilbs_sticky_hash[i].sticky_head))
1435 1436 != NULL) {
1436 1437 list_remove(&ilbs->ilbs_sticky_hash[i].sticky_head, s);
1437 1438 ILB_SERVER_REFRELE(s->server);
1438 1439 kmem_free(s, sizeof (ilb_sticky_t));
1439 1440 }
1440 1441 }
1441 1442 kmem_free(ilbs->ilbs_sticky_hash, ilbs->ilbs_sticky_hash_size *
1442 1443 sizeof (ilb_sticky_hash_t));
1443 1444 }
1444 1445
1445 1446 /*
1446 1447 * This routine sends up the sticky hash table to user land. Refer to
1447 1448 * the comments before ilb_list_nat(). Both routines assume similar
1448 1449 * conditions.
1449 1450 *
1450 1451 * It is assumed that the caller has checked the size of st so that it
1451 1452 * can hold num entries.
1452 1453 */
1453 1454 /* ARGSUSED */
1454 1455 int
1455 1456 ilb_list_sticky(ilb_stack_t *ilbs, zoneid_t zoneid, ilb_sticky_entry_t *st,
1456 1457 uint32_t *num, uint32_t *flags)
1457 1458 {
1458 1459 ilb_sticky_hash_t *hash;
1459 1460 ilb_sticky_t *curp;
1460 1461 uint32_t i, j;
1461 1462 int ret = 0;
1462 1463
1463 1464 mutex_enter(&ilbs->ilbs_sticky_list_lock);
1464 1465 while (ilbs->ilbs_sticky_list_busy) {
1465 1466 if (cv_wait_sig(&ilbs->ilbs_sticky_list_cv,
1466 1467 &ilbs->ilbs_sticky_list_lock) == 0) {
1467 1468 mutex_exit(&ilbs->ilbs_sticky_list_lock);
1468 1469 return (EINTR);
1469 1470 }
1470 1471 }
1471 1472 if ((hash = ilbs->ilbs_sticky_hash) == NULL) {
1472 1473 mutex_exit(&ilbs->ilbs_sticky_list_lock);
1473 1474 *num = 0;
1474 1475 *flags |= ILB_LIST_END;
1475 1476 return (0);
1476 1477 }
1477 1478 ilbs->ilbs_sticky_list_busy = B_TRUE;
1478 1479 mutex_exit(&ilbs->ilbs_sticky_list_lock);
1479 1480
1480 1481 if (*flags & ILB_LIST_BEGIN) {
1481 1482 i = 0;
1482 1483 mutex_enter(&hash[0].sticky_lock);
1483 1484 curp = list_head(&hash[0].sticky_head);
1484 1485 } else if (*flags & ILB_LIST_CONT) {
1485 1486 if (ilbs->ilbs_sticky_list_cur == ilbs->ilbs_sticky_hash_size) {
1486 1487 *num = 0;
1487 1488 *flags |= ILB_LIST_END;
1488 1489 goto done;
1489 1490 }
1490 1491 i = ilbs->ilbs_sticky_list_cur;
1491 1492 mutex_enter(&hash[i].sticky_lock);
1492 1493 curp = ilbs->ilbs_sticky_list_curp;
1493 1494 } else {
1494 1495 ret = EINVAL;
1495 1496 goto done;
1496 1497 }
1497 1498
1498 1499 j = 0;
1499 1500 while (j < *num) {
1500 1501 if (curp == NULL) {
1501 1502 mutex_exit(&hash[i].sticky_lock);
1502 1503 if (++i == ilbs->ilbs_sticky_hash_size) {
1503 1504 *flags |= ILB_LIST_END;
1504 1505 break;
1505 1506 }
1506 1507 mutex_enter(&hash[i].sticky_lock);
1507 1508 curp = list_head(&hash[i].sticky_head);
1508 1509 continue;
1509 1510 }
1510 1511 (void) strcpy(st[j].rule_name, curp->rule_name);
1511 1512 st[j].req_addr = curp->src;
1512 1513 st[j].srv_addr = curp->server->iser_addr_v6;
1513 1514 st[j].expiry_time = TICK_TO_MSEC(curp->expiry);
1514 1515 j++;
1515 1516 curp = list_next(&hash[i].sticky_head, curp);
1516 1517 }
1517 1518 ilbs->ilbs_sticky_list_curp = curp;
1518 1519 if (j == *num)
1519 1520 mutex_exit(&hash[i].sticky_lock);
1520 1521
1521 1522 ilbs->ilbs_sticky_list_cur = i;
1522 1523
1523 1524 *num = j;
1524 1525 done:
1525 1526 mutex_enter(&ilbs->ilbs_sticky_list_lock);
1526 1527 ilbs->ilbs_sticky_list_busy = B_FALSE;
1527 1528 cv_signal(&ilbs->ilbs_sticky_list_cv);
1528 1529 mutex_exit(&ilbs->ilbs_sticky_list_lock);
1529 1530
1530 1531 return (ret);
1531 1532 }
↓ open down ↓ |
159 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX