Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/ib/clients/rdsv3/rds_recv.c
+++ new/usr/src/uts/common/io/ib/clients/rdsv3/rds_recv.c
1 1 /*
2 2 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
3 3 */
4 4
5 5 /*
6 6 * This file contains code imported from the OFED rds source file recv.c
7 7 * Oracle elects to have and use the contents of rds_recv.c under and governed
8 8 * by the OpenIB.org BSD license (see below for full license text). However,
9 9 * the following notice accompanied the original version of this file:
10 10 */
11 11
12 12 /*
13 13 * Copyright (c) 2006 Oracle. All rights reserved.
14 14 *
15 15 * This software is available to you under a choice of one of two
16 16 * licenses. You may choose to be licensed under the terms of the GNU
17 17 * General Public License (GPL) Version 2, available from the file
18 18 * COPYING in the main directory of this source tree, or the
19 19 * OpenIB.org BSD license below:
20 20 *
21 21 * Redistribution and use in source and binary forms, with or
22 22 * without modification, are permitted provided that the following
23 23 * conditions are met:
24 24 *
25 25 * - Redistributions of source code must retain the above
26 26 * copyright notice, this list of conditions and the following
27 27 * disclaimer.
28 28 *
29 29 * - Redistributions in binary form must reproduce the above
30 30 * copyright notice, this list of conditions and the following
31 31 * disclaimer in the documentation and/or other materials
32 32 * provided with the distribution.
33 33 *
34 34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 35 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 36 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37 37 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
38 38 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
39 39 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
40 40 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
41 41 * SOFTWARE.
42 42 *
43 43 */
44 44 #include <sys/rds.h>
45 45
46 46 #include <sys/ib/clients/rdsv3/rdsv3.h>
47 47 #include <sys/ib/clients/rdsv3/rdma.h>
48 48 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
49 49
50 50 void
51 51 rdsv3_inc_init(struct rdsv3_incoming *inc, struct rdsv3_connection *conn,
52 52 uint32_be_t saddr)
53 53 {
54 54 RDSV3_DPRINTF5("rdsv3_inc_init", "Enter(inc: %p, conn: %p)", inc, conn);
55 55 inc->i_refcount = 1;
56 56 list_link_init(&inc->i_item);
↓ open down ↓ |
56 lines elided |
↑ open up ↑ |
57 57 inc->i_conn = conn;
58 58 inc->i_saddr = saddr;
59 59 inc->i_rdma_cookie = 0;
60 60 }
61 61
62 62 void
63 63 rdsv3_inc_addref(struct rdsv3_incoming *inc)
64 64 {
65 65 RDSV3_DPRINTF4("rdsv3_inc_addref",
66 66 "addref inc %p ref %d", inc, atomic_get(&inc->i_refcount));
67 - atomic_add_32(&inc->i_refcount, 1);
67 + atomic_inc_32(&inc->i_refcount);
68 68 }
69 69
70 70 void
71 71 rdsv3_inc_put(struct rdsv3_incoming *inc)
72 72 {
73 73 RDSV3_DPRINTF4("rdsv3_inc_put", "put inc %p ref %d",
74 74 inc, atomic_get(&inc->i_refcount));
75 75 if (atomic_dec_and_test(&inc->i_refcount)) {
76 76 ASSERT(!list_link_active(&inc->i_item));
77 77
78 78 inc->i_conn->c_trans->inc_free(inc);
79 79 }
80 80 }
81 81
82 82 /*ARGSUSED*/
83 83 static void
84 84 rdsv3_recv_rcvbuf_delta(struct rdsv3_sock *rs, struct rsock *sk,
85 85 struct rdsv3_cong_map *map,
86 86 int delta, uint16_be_t port)
87 87 {
88 88 int now_congested;
89 89
90 90 RDSV3_DPRINTF4("rdsv3_recv_rcvbuf_delta",
91 91 "Enter(rs: %p, map: %p, delta: %d, port: %d)",
92 92 rs, map, delta, port);
93 93
94 94 if (delta == 0)
95 95 return;
96 96
97 97 rs->rs_rcv_bytes += delta;
98 98 now_congested = rs->rs_rcv_bytes > rdsv3_sk_rcvbuf(rs);
99 99
100 100 RDSV3_DPRINTF5("rdsv3_recv_rcvbuf_delta",
101 101 "rs %p (%u.%u.%u.%u:%u) recv bytes %d buf %d "
102 102 "now_cong %d delta %d",
103 103 rs, NIPQUAD(rs->rs_bound_addr),
104 104 (int)ntohs(rs->rs_bound_port), rs->rs_rcv_bytes,
105 105 rdsv3_sk_rcvbuf(rs), now_congested, delta);
106 106
107 107 /* wasn't -> am congested */
108 108 if (!rs->rs_congested && now_congested) {
109 109 rs->rs_congested = 1;
110 110 rdsv3_cong_set_bit(map, port);
111 111 rdsv3_cong_queue_updates(map);
112 112 }
113 113 /* was -> aren't congested */
114 114 /*
115 115 * Require more free space before reporting uncongested to prevent
116 116 * bouncing cong/uncong state too often
117 117 */
118 118 else if (rs->rs_congested &&
119 119 (rs->rs_rcv_bytes < (rdsv3_sk_rcvbuf(rs)/2))) {
120 120 rs->rs_congested = 0;
121 121 rdsv3_cong_clear_bit(map, port);
122 122 rdsv3_cong_queue_updates(map);
123 123 }
124 124
125 125 /* do nothing if no change in cong state */
126 126
127 127 RDSV3_DPRINTF4("rdsv3_recv_rcvbuf_delta", "Return(rs: %p)", rs);
128 128 }
129 129
130 130 /*
131 131 * Process all extension headers that come with this message.
132 132 */
133 133 static void
134 134 rdsv3_recv_incoming_exthdrs(struct rdsv3_incoming *inc, struct rdsv3_sock *rs)
135 135 {
136 136 struct rdsv3_header *hdr = &inc->i_hdr;
137 137 unsigned int pos = 0, type, len;
138 138 union {
139 139 struct rdsv3_ext_header_version version;
140 140 struct rdsv3_ext_header_rdma rdma;
141 141 struct rdsv3_ext_header_rdma_dest rdma_dest;
142 142 } buffer;
143 143
144 144 RDSV3_DPRINTF4("rdsv3_recv_incoming_exthdrs", "Enter");
145 145 while (1) {
146 146 len = sizeof (buffer);
147 147 type = rdsv3_message_next_extension(hdr, &pos, &buffer, &len);
148 148 if (type == RDSV3_EXTHDR_NONE)
149 149 break;
150 150 RDSV3_DPRINTF4("recv_incoming_exthdrs", "type %d", type);
151 151 /* Process extension header here */
152 152 switch (type) {
153 153 case RDSV3_EXTHDR_RDMA:
154 154 rdsv3_rdma_unuse(rs, ntohl(buffer.rdma.h_rdma_rkey),
155 155 0);
156 156 break;
157 157
158 158 case RDSV3_EXTHDR_RDMA_DEST:
159 159 /*
160 160 * We ignore the size for now. We could stash it
161 161 * somewhere and use it for error checking.
162 162 */
163 163 inc->i_rdma_cookie = rdsv3_rdma_make_cookie(
164 164 ntohl(buffer.rdma_dest.h_rdma_rkey),
165 165 ntohl(buffer.rdma_dest.h_rdma_offset));
166 166
167 167 break;
168 168 }
169 169 }
170 170 RDSV3_DPRINTF4("rdsv3_recv_incoming_exthdrs", "Return");
171 171 }
172 172
173 173 /*
174 174 * The transport must make sure that this is serialized against other
175 175 * rx and conn reset on this specific conn.
176 176 *
177 177 * We currently assert that only one fragmented message will be sent
178 178 * down a connection at a time. This lets us reassemble in the conn
179 179 * instead of per-flow which means that we don't have to go digging through
180 180 * flows to tear down partial reassembly progress on conn failure and
181 181 * we save flow lookup and locking for each frag arrival. It does mean
182 182 * that small messages will wait behind large ones. Fragmenting at all
183 183 * is only to reduce the memory consumption of pre-posted buffers.
184 184 *
185 185 * The caller passes in saddr and daddr instead of us getting it from the
186 186 * conn. This lets loopback, who only has one conn for both directions,
187 187 * tell us which roles the addrs in the conn are playing for this message.
188 188 */
189 189 /* ARGSUSED */
190 190 void
191 191 rdsv3_recv_incoming(struct rdsv3_connection *conn, uint32_be_t saddr,
192 192 uint32_be_t daddr, struct rdsv3_incoming *inc, int gfp)
193 193 {
194 194 struct rdsv3_sock *rs = NULL;
195 195 struct rsock *sk;
196 196
197 197 inc->i_conn = conn;
198 198 inc->i_rx_jiffies = jiffies;
199 199
200 200 RDSV3_DPRINTF5("rdsv3_recv_incoming",
201 201 "conn %p next %llu inc %p seq %llu len %u sport %u dport %u "
202 202 "flags 0x%x rx_jiffies %lu", conn,
203 203 (unsigned long long)conn->c_next_rx_seq,
204 204 inc,
205 205 (unsigned long long)ntohll(inc->i_hdr.h_sequence),
206 206 ntohl(inc->i_hdr.h_len),
207 207 ntohs(inc->i_hdr.h_sport),
208 208 ntohs(inc->i_hdr.h_dport),
209 209 inc->i_hdr.h_flags,
210 210 inc->i_rx_jiffies);
211 211
212 212 /*
213 213 * Sequence numbers should only increase. Messages get their
214 214 * sequence number as they're queued in a sending conn. They
215 215 * can be dropped, though, if the sending socket is closed before
216 216 * they hit the wire. So sequence numbers can skip forward
217 217 * under normal operation. They can also drop back in the conn
218 218 * failover case as previously sent messages are resent down the
219 219 * new instance of a conn. We drop those, otherwise we have
220 220 * to assume that the next valid seq does not come after a
221 221 * hole in the fragment stream.
222 222 *
223 223 * The headers don't give us a way to realize if fragments of
224 224 * a message have been dropped. We assume that frags that arrive
225 225 * to a flow are part of the current message on the flow that is
226 226 * being reassembled. This means that senders can't drop messages
227 227 * from the sending conn until all their frags are sent.
228 228 *
229 229 * XXX we could spend more on the wire to get more robust failure
230 230 * detection, arguably worth it to avoid data corruption.
231 231 */
232 232 if (ntohll(inc->i_hdr.h_sequence) < conn->c_next_rx_seq &&
233 233 (inc->i_hdr.h_flags & RDSV3_FLAG_RETRANSMITTED)) {
234 234 rdsv3_stats_inc(s_recv_drop_old_seq);
235 235 goto out;
236 236 }
237 237 conn->c_next_rx_seq = ntohll(inc->i_hdr.h_sequence) + 1;
238 238
239 239 if (rdsv3_sysctl_ping_enable && inc->i_hdr.h_dport == 0) {
240 240 rdsv3_stats_inc(s_recv_ping);
241 241 (void) rdsv3_send_pong(conn, inc->i_hdr.h_sport);
242 242 goto out;
243 243 }
244 244
245 245 rs = rdsv3_find_bound(conn, inc->i_hdr.h_dport);
246 246 if (!rs) {
247 247 rdsv3_stats_inc(s_recv_drop_no_sock);
248 248 goto out;
249 249 }
250 250
251 251 /* Process extension headers */
252 252 rdsv3_recv_incoming_exthdrs(inc, rs);
253 253
254 254 /* We can be racing with rdsv3_release() which marks the socket dead. */
255 255 sk = rdsv3_rs_to_sk(rs);
256 256
257 257 /* serialize with rdsv3_release -> sock_orphan */
258 258 rw_enter(&rs->rs_recv_lock, RW_WRITER);
259 259 if (!rdsv3_sk_sock_flag(sk, SOCK_DEAD)) {
260 260 int error, bytes;
261 261 RDSV3_DPRINTF5("rdsv3_recv_incoming",
262 262 "adding inc %p to rs %p's recv queue", inc, rs);
263 263 rdsv3_stats_inc(s_recv_queued);
264 264 rdsv3_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
265 265 ntohl(inc->i_hdr.h_len),
266 266 inc->i_hdr.h_dport);
267 267 rdsv3_inc_addref(inc);
268 268 list_insert_tail(&rs->rs_recv_queue, inc);
269 269 bytes = rs->rs_rcv_bytes;
270 270 rw_exit(&rs->rs_recv_lock);
271 271
272 272 __rdsv3_wake_sk_sleep(sk);
273 273
274 274 /* wake up anyone waiting in poll */
275 275 sk->sk_upcalls->su_recv(sk->sk_upper_handle, NULL,
276 276 bytes, 0, &error, NULL);
277 277 if (error != 0) {
278 278 RDSV3_DPRINTF2("rdsv3_recv_incoming",
279 279 "su_recv returned: %d", error);
280 280 }
281 281 } else {
282 282 rdsv3_stats_inc(s_recv_drop_dead_sock);
283 283 rw_exit(&rs->rs_recv_lock);
284 284 }
285 285
286 286 out:
287 287 if (rs)
288 288 rdsv3_sock_put(rs);
289 289 }
290 290
291 291 /*
292 292 * be very careful here. This is being called as the condition in
293 293 * wait_event_*() needs to cope with being called many times.
294 294 */
295 295 static int
296 296 rdsv3_next_incoming(struct rdsv3_sock *rs, struct rdsv3_incoming **inc)
297 297 {
298 298 if (!*inc) {
299 299 rw_enter(&rs->rs_recv_lock, RW_READER);
300 300 if (!list_is_empty(&rs->rs_recv_queue)) {
301 301 *inc = list_head(&rs->rs_recv_queue);
302 302 rdsv3_inc_addref(*inc);
303 303 }
304 304 rw_exit(&rs->rs_recv_lock);
305 305 }
306 306
307 307 return (*inc != NULL);
308 308 }
309 309
310 310 static int
311 311 rdsv3_still_queued(struct rdsv3_sock *rs, struct rdsv3_incoming *inc,
312 312 int drop)
313 313 {
314 314 struct rsock *sk = rdsv3_rs_to_sk(rs);
315 315 int ret = 0;
316 316
317 317 RDSV3_DPRINTF4("rdsv3_still_queued", "Enter rs: %p inc: %p drop: %d",
318 318 rs, inc, drop);
319 319
320 320 rw_enter(&rs->rs_recv_lock, RW_WRITER);
321 321 if (list_link_active(&inc->i_item)) {
322 322 ret = 1;
323 323 if (drop) {
324 324 /* XXX make sure this i_conn is reliable */
325 325 rdsv3_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
326 326 -ntohl(inc->i_hdr.h_len),
327 327 inc->i_hdr.h_dport);
328 328 list_remove_node(&inc->i_item);
329 329 rdsv3_inc_put(inc);
330 330 }
331 331 }
332 332 rw_exit(&rs->rs_recv_lock);
333 333
334 334 RDSV3_DPRINTF5("rdsv3_still_queued",
335 335 "inc %p rs %p still %d dropped %d", inc, rs, ret, drop);
336 336 return (ret);
337 337 }
338 338
339 339 /*
340 340 * Pull errors off the error queue.
341 341 * If msghdr is NULL, we will just purge the error queue.
342 342 */
343 343 int
344 344 rdsv3_notify_queue_get(struct rdsv3_sock *rs, struct msghdr *msghdr)
345 345 {
346 346 struct rdsv3_notifier *notifier;
347 347 struct rds_rdma_notify cmsg;
348 348 unsigned int count = 0, max_messages = ~0U;
349 349 list_t copy;
350 350 int err = 0;
351 351
352 352 RDSV3_DPRINTF4("rdsv3_notify_queue_get", "Enter(rs: %p)", rs);
353 353
354 354 list_create(©, sizeof (struct rdsv3_notifier),
355 355 offsetof(struct rdsv3_notifier, n_list));
356 356
357 357
358 358 /*
359 359 * put_cmsg copies to user space and thus may sleep. We can't do this
360 360 * with rs_lock held, so first grab as many notifications as we can
361 361 * stuff
362 362 * in the user provided cmsg buffer. We don't try to copy more, to avoid
363 363 * losing notifications - except when the buffer is so small that
364 364 * it wouldn't
365 365 * even hold a single notification. Then we give him as much of this
366 366 * single
367 367 * msg as we can squeeze in, and set MSG_CTRUNC.
368 368 */
369 369 if (msghdr) {
370 370 max_messages =
371 371 msghdr->msg_controllen / CMSG_SPACE(sizeof (cmsg));
372 372 if (!max_messages)
373 373 max_messages = 1;
374 374 }
375 375
376 376 mutex_enter(&rs->rs_lock);
377 377 while (!list_is_empty(&rs->rs_notify_queue) && count < max_messages) {
378 378 notifier = list_remove_head(&rs->rs_notify_queue);
379 379 list_insert_tail(©, notifier);
380 380 count++;
381 381 }
382 382 mutex_exit(&rs->rs_lock);
383 383
384 384 if (!count)
385 385 return (0);
386 386
387 387 while (!list_is_empty(©)) {
388 388 notifier = list_remove_head(©);
389 389
390 390 if (msghdr) {
391 391 cmsg.user_token = notifier->n_user_token;
392 392 cmsg.status = notifier->n_status;
393 393
394 394 err = rdsv3_put_cmsg(msghdr, SOL_RDS,
395 395 RDS_CMSG_RDMA_STATUS, sizeof (cmsg), &cmsg);
396 396 if (err)
397 397 break;
398 398 }
399 399
400 400 kmem_free(notifier, sizeof (struct rdsv3_notifier));
401 401 }
402 402
403 403 /*
404 404 * If we bailed out because of an error in put_cmsg,
405 405 * we may be left with one or more notifications that we
406 406 * didn't process. Return them to the head of the list.
407 407 */
408 408 if (!list_is_empty(©)) {
409 409 mutex_enter(&rs->rs_lock);
410 410 list_splice(©, &rs->rs_notify_queue);
411 411 mutex_exit(&rs->rs_lock);
412 412 }
413 413
414 414 RDSV3_DPRINTF4("rdsv3_notify_queue_get", "Return(rs: %p)", rs);
415 415
416 416 return (err);
417 417 }
418 418
419 419 /*
420 420 * Queue a congestion notification
421 421 */
422 422 static int
423 423 rdsv3_notify_cong(struct rdsv3_sock *rs, struct msghdr *msghdr)
424 424 {
425 425 uint64_t notify = rs->rs_cong_notify;
426 426 int err;
427 427
428 428 err = rdsv3_put_cmsg(msghdr, SOL_RDS, RDS_CMSG_CONG_UPDATE,
429 429 sizeof (notify), ¬ify);
430 430 if (err)
431 431 return (err);
432 432
433 433 mutex_enter(&rs->rs_lock);
434 434 rs->rs_cong_notify &= ~notify;
435 435 mutex_exit(&rs->rs_lock);
436 436
437 437 return (0);
438 438 }
439 439
440 440 /*
441 441 * Receive any control messages.
442 442 */
443 443 static int
444 444 rdsv3_cmsg_recv(struct rdsv3_incoming *inc, struct msghdr *msg)
445 445 {
446 446 int ret = 0;
447 447 if (inc->i_rdma_cookie) {
448 448 ret = rdsv3_put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST,
449 449 sizeof (inc->i_rdma_cookie), &inc->i_rdma_cookie);
450 450 }
451 451 return (ret);
452 452 }
453 453
454 454 int
455 455 rdsv3_recvmsg(struct rdsv3_sock *rs, uio_t *uio,
456 456 struct nmsghdr *msg, size_t size, int msg_flags)
457 457 {
458 458 struct rsock *sk = rdsv3_rs_to_sk(rs);
459 459 long timeo;
460 460 int ret = 0;
461 461 struct sockaddr_in *sin = NULL;
462 462 struct rdsv3_incoming *inc = NULL;
463 463 boolean_t nonblock = B_FALSE;
464 464
465 465 RDSV3_DPRINTF4("rdsv3_recvmsg",
466 466 "Enter(rs: %p size: %d msg_flags: 0x%x)", rs, size, msg_flags);
467 467
468 468 if ((uio->uio_fmode & (FNDELAY | FNONBLOCK)) ||
469 469 (msg_flags & MSG_DONTWAIT))
470 470 nonblock = B_TRUE;
471 471
472 472 /* udp_recvmsg()->sock_recvtimeo() gets away without locking too.. */
473 473 timeo = rdsv3_rcvtimeo(sk, nonblock);
474 474
475 475 if (msg_flags & MSG_OOB)
476 476 goto out;
477 477
478 478 /* mark the first cmsg position */
479 479 if (msg) {
480 480 msg->msg_control = NULL;
481 481 }
482 482
483 483 while (1) {
484 484 /*
485 485 * If there are pending notifications, do those -
486 486 * and nothing else
487 487 */
488 488 if (!list_is_empty(&rs->rs_notify_queue)) {
489 489 ret = rdsv3_notify_queue_get(rs, msg);
490 490
491 491 if (msg && msg->msg_namelen) {
492 492 sin = kmem_zalloc(sizeof (struct sockaddr_in),
493 493 KM_SLEEP);
494 494 sin->sin_family = AF_INET_OFFLOAD;
495 495 if (inc) {
496 496 sin->sin_port = inc->i_hdr.h_sport;
497 497 sin->sin_addr.s_addr = inc->i_saddr;
498 498 }
499 499 msg->msg_namelen = sizeof (struct sockaddr_in);
500 500 msg->msg_name = sin;
501 501 }
502 502 break;
503 503 }
504 504
505 505 if (rs->rs_cong_notify) {
506 506 ret = rdsv3_notify_cong(rs, msg);
507 507 goto out;
508 508 }
509 509
510 510 if (!rdsv3_next_incoming(rs, &inc)) {
511 511 if (nonblock) {
512 512 ret = -EAGAIN;
513 513 break;
514 514 }
515 515
516 516 RDSV3_DPRINTF3("rdsv3_recvmsg",
517 517 "Before wait (rs: %p)", rs);
518 518
519 519 #if 0
520 520 ret = rdsv3_wait_sig(sk->sk_sleep,
521 521 !(list_is_empty(&rs->rs_notify_queue) &&
522 522 !rs->rs_cong_notify &&
523 523 !rdsv3_next_incoming(rs, &inc)));
524 524 if (ret == 0) {
525 525 /* signal/timeout pending */
526 526 RDSV3_DPRINTF2("rdsv3_recvmsg",
527 527 "woke due to signal");
528 528 ret = -ERESTART;
529 529 }
530 530 #else
531 531 mutex_enter(&sk->sk_sleep->waitq_mutex);
532 532 sk->sk_sleep->waitq_waiters++;
533 533 while ((list_is_empty(&rs->rs_notify_queue) &&
534 534 !rs->rs_cong_notify &&
535 535 !rdsv3_next_incoming(rs, &inc))) {
536 536 ret = cv_wait_sig(&sk->sk_sleep->waitq_cv,
537 537 &sk->sk_sleep->waitq_mutex);
538 538 if (ret == 0) {
539 539 /* signal/timeout pending */
540 540 RDSV3_DPRINTF2("rdsv3_recvmsg",
541 541 "woke due to signal");
542 542 ret = -EINTR;
543 543 break;
544 544 }
545 545 }
546 546 sk->sk_sleep->waitq_waiters--;
547 547 mutex_exit(&sk->sk_sleep->waitq_mutex);
548 548 #endif
549 549
550 550 RDSV3_DPRINTF5("rdsv3_recvmsg",
551 551 "recvmsg woke rs: %p inc %p ret %d",
552 552 rs, inc, -ret);
553 553
554 554 if (ret < 0)
555 555 break;
556 556
557 557 /*
558 558 * if the wakeup was due to rs_notify_queue or
559 559 * rs_cong_notify then we need to handle those first.
560 560 */
561 561 continue;
562 562 }
563 563
564 564 RDSV3_DPRINTF5("rdsv3_recvmsg",
565 565 "copying inc %p from %u.%u.%u.%u:%u to user", inc,
566 566 NIPQUAD(inc->i_conn->c_faddr),
567 567 ntohs(inc->i_hdr.h_sport));
568 568
569 569 ret = inc->i_conn->c_trans->inc_copy_to_user(inc, uio, size);
570 570 if (ret < 0)
571 571 break;
572 572
573 573 /*
574 574 * if the message we just copied isn't at the head of the
575 575 * recv queue then someone else raced us to return it, try
576 576 * to get the next message.
577 577 */
578 578 if (!rdsv3_still_queued(rs, inc, !(msg_flags & MSG_PEEK))) {
579 579 rdsv3_inc_put(inc);
580 580 inc = NULL;
581 581 rdsv3_stats_inc(s_recv_deliver_raced);
582 582 continue;
583 583 }
584 584
585 585 if (ret < ntohl(inc->i_hdr.h_len)) {
586 586 if (msg_flags & MSG_TRUNC)
587 587 ret = ntohl(inc->i_hdr.h_len);
588 588 msg->msg_flags |= MSG_TRUNC;
589 589 }
590 590
591 591 if (rdsv3_cmsg_recv(inc, msg)) {
592 592 ret = -EFAULT;
593 593 goto out;
594 594 }
595 595
596 596 rdsv3_stats_inc(s_recv_delivered);
597 597
598 598 if (msg->msg_namelen) {
599 599 sin = kmem_alloc(sizeof (struct sockaddr_in), KM_SLEEP);
600 600 sin->sin_family = AF_INET_OFFLOAD;
601 601 sin->sin_port = inc->i_hdr.h_sport;
602 602 sin->sin_addr.s_addr = inc->i_saddr;
603 603 (void) memset(sin->sin_zero, 0,
604 604 sizeof (sin->sin_zero));
605 605 msg->msg_namelen = sizeof (struct sockaddr_in);
606 606 msg->msg_name = sin;
607 607 }
608 608 break;
609 609 }
610 610
611 611 if (inc)
612 612 rdsv3_inc_put(inc);
613 613
614 614 out:
615 615 if (msg && msg->msg_control == NULL)
616 616 msg->msg_controllen = 0;
617 617
618 618 RDSV3_DPRINTF4("rdsv3_recvmsg", "Return(rs: %p, ret: %d)", rs, ret);
619 619
620 620 return (ret);
621 621 }
622 622
623 623 /*
624 624 * The socket is being shut down and we're asked to drop messages that were
625 625 * queued for recvmsg. The caller has unbound the socket so the receive path
626 626 * won't queue any more incoming fragments or messages on the socket.
627 627 */
628 628 void
629 629 rdsv3_clear_recv_queue(struct rdsv3_sock *rs)
630 630 {
631 631 struct rsock *sk = rdsv3_rs_to_sk(rs);
632 632 struct rdsv3_incoming *inc, *tmp;
633 633
634 634 RDSV3_DPRINTF4("rdsv3_clear_recv_queue", "Enter(rs: %p)", rs);
635 635
636 636 rw_enter(&rs->rs_recv_lock, RW_WRITER);
637 637 RDSV3_FOR_EACH_LIST_NODE_SAFE(inc, tmp, &rs->rs_recv_queue, i_item) {
638 638 rdsv3_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
639 639 -ntohl(inc->i_hdr.h_len),
640 640 inc->i_hdr.h_dport);
641 641 list_remove_node(&inc->i_item);
642 642 rdsv3_inc_put(inc);
643 643 }
644 644 rw_exit(&rs->rs_recv_lock);
645 645
646 646 RDSV3_DPRINTF4("rdsv3_clear_recv_queue", "Return(rs: %p)", rs);
647 647 }
648 648
649 649 /*
650 650 * inc->i_saddr isn't used here because it is only set in the receive
651 651 * path.
652 652 */
653 653 void
654 654 rdsv3_inc_info_copy(struct rdsv3_incoming *inc,
655 655 struct rdsv3_info_iterator *iter,
656 656 uint32_be_t saddr, uint32_be_t daddr, int flip)
657 657 {
658 658 struct rds_info_message minfo;
659 659
660 660 minfo.seq = ntohll(inc->i_hdr.h_sequence);
661 661 minfo.len = ntohl(inc->i_hdr.h_len);
662 662
663 663 if (flip) {
664 664 minfo.laddr = daddr;
665 665 minfo.faddr = saddr;
666 666 minfo.lport = inc->i_hdr.h_dport;
667 667 minfo.fport = inc->i_hdr.h_sport;
668 668 } else {
669 669 minfo.laddr = saddr;
670 670 minfo.faddr = daddr;
671 671 minfo.lport = inc->i_hdr.h_sport;
672 672 minfo.fport = inc->i_hdr.h_dport;
673 673 }
674 674
675 675 rdsv3_info_copy(iter, &minfo, sizeof (minfo));
676 676 }
↓ open down ↓ |
599 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX