Skip to content

Commit e20cf8d

Browse files
Paolo Abenidavem330
Paolo Abeni
authored andcommitted
udp: implement GRO for plain UDP sockets.
This is the RX counterpart of commit bec1f6f ("udp: generate gso with UDP_SEGMENT"). When UDP_GRO is enabled, such socket is also eligible for GRO in the rx path: UDP segments directed to such socket are assembled into a larger GSO_UDP_L4 packet. The core UDP GRO support is enabled with setsockopt(UDP_GRO). Initial benchmark numbers: Before: udp rx: 1079 MB/s 769065 calls/s After: udp rx: 1466 MB/s 24877 calls/s This change introduces a side effect in respect to UDP tunnels: after a UDP tunnel creation, now the kernel performs a lookup per ingress UDP packet, while before such lookup happened only if the ingress packet carried a valid internal header csum. rfc v2 -> rfc v3: - fixed typos in macro name and comments - really enforce UDP_GRO_CNT_MAX, instead of UDP_GRO_CNT_MAX + 1 - acquire socket lock in UDP_GRO setsockopt rfc v1 -> rfc v2: - use a new option to enable UDP GRO - use static keys to protect the UDP GRO socket lookup Signed-off-by: Paolo Abeni <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 60fb956 commit e20cf8d

File tree

5 files changed

+99
-28
lines changed

5 files changed

+99
-28
lines changed

include/linux/udp.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,12 @@ struct udp_sock {
5050
__u8 encap_type; /* Is this an Encapsulation socket? */
5151
unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
5252
no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
53-
encap_enabled:1; /* This socket enabled encap
53+
encap_enabled:1, /* This socket enabled encap
5454
* processing; UDP tunnels and
5555
* different encapsulation layer set
5656
* this
5757
*/
58+
gro_enabled:1; /* Can accept GRO packets */
5859
/*
5960
* Following member retains the information to create a UDP header
6061
* when the socket is uncorked.

include/uapi/linux/udp.h

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ struct udphdr {
3333
#define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */
3434
#define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */
3535
#define UDP_SEGMENT 103 /* Set GSO segmentation size */
36+
#define UDP_GRO 104 /* This socket can receive UDP GRO packets */
3637

3738
/* UDP encapsulation types */
3839
#define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */

net/ipv4/udp.c

+8
Original file line numberDiff line numberDiff line change
@@ -2473,6 +2473,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
24732473
up->gso_size = val;
24742474
break;
24752475

2476+
case UDP_GRO:
2477+
lock_sock(sk);
2478+
if (valbool)
2479+
udp_tunnel_encap_enable(sk->sk_socket);
2480+
up->gro_enabled = valbool;
2481+
release_sock(sk);
2482+
break;
2483+
24762484
/*
24772485
* UDP-Lite's partial checksum coverage (RFC 3828).
24782486
*/

net/ipv4/udp_offload.c

+87-22
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,54 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
343343
return segs;
344344
}
345345

346+
#define UDP_GRO_CNT_MAX 64
347+
static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
348+
struct sk_buff *skb)
349+
{
350+
struct udphdr *uh = udp_hdr(skb);
351+
struct sk_buff *pp = NULL;
352+
struct udphdr *uh2;
353+
struct sk_buff *p;
354+
355+
/* requires non zero csum, for symmetry with GSO */
356+
if (!uh->check) {
357+
NAPI_GRO_CB(skb)->flush = 1;
358+
return NULL;
359+
}
360+
361+
/* pull encapsulating udp header */
362+
skb_gro_pull(skb, sizeof(struct udphdr));
363+
skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
364+
365+
list_for_each_entry(p, head, list) {
366+
if (!NAPI_GRO_CB(p)->same_flow)
367+
continue;
368+
369+
uh2 = udp_hdr(p);
370+
371+
/* Match ports only, as csum is always non zero */
372+
if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
373+
NAPI_GRO_CB(p)->same_flow = 0;
374+
continue;
375+
}
376+
377+
/* Terminate the flow on len mismatch or if it grow "too much".
378+
* Under small packet flood GRO count could elsewhere grow a lot
379+
* leading to execessive truesize values
380+
*/
381+
if (!skb_gro_receive(p, skb) &&
382+
NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
383+
pp = p;
384+
else if (uh->len != uh2->len)
385+
pp = p;
386+
387+
return pp;
388+
}
389+
390+
/* mismatch, but we never need to flush */
391+
return NULL;
392+
}
393+
346394
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
347395
struct udphdr *uh, udp_lookup_t lookup)
348396
{
@@ -353,23 +401,27 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
353401
int flush = 1;
354402
struct sock *sk;
355403

404+
rcu_read_lock();
405+
sk = (*lookup)(skb, uh->source, uh->dest);
406+
if (!sk)
407+
goto out_unlock;
408+
409+
if (udp_sk(sk)->gro_enabled) {
410+
pp = call_gro_receive(udp_gro_receive_segment, head, skb);
411+
rcu_read_unlock();
412+
return pp;
413+
}
414+
356415
if (NAPI_GRO_CB(skb)->encap_mark ||
357416
(skb->ip_summed != CHECKSUM_PARTIAL &&
358417
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
359-
!NAPI_GRO_CB(skb)->csum_valid))
360-
goto out;
418+
!NAPI_GRO_CB(skb)->csum_valid) ||
419+
!udp_sk(sk)->gro_receive)
420+
goto out_unlock;
361421

362422
/* mark that this skb passed once through the tunnel gro layer */
363423
NAPI_GRO_CB(skb)->encap_mark = 1;
364424

365-
rcu_read_lock();
366-
sk = (*lookup)(skb, uh->source, uh->dest);
367-
368-
if (sk && udp_sk(sk)->gro_receive)
369-
goto unflush;
370-
goto out_unlock;
371-
372-
unflush:
373425
flush = 0;
374426

375427
list_for_each_entry(p, head, list) {
@@ -394,7 +446,6 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
394446

395447
out_unlock:
396448
rcu_read_unlock();
397-
out:
398449
skb_gro_flush_final(skb, pp, flush);
399450
return pp;
400451
}
@@ -427,6 +478,19 @@ static struct sk_buff *udp4_gro_receive(struct list_head *head,
427478
return NULL;
428479
}
429480

481+
static int udp_gro_complete_segment(struct sk_buff *skb)
482+
{
483+
struct udphdr *uh = udp_hdr(skb);
484+
485+
skb->csum_start = (unsigned char *)uh - skb->head;
486+
skb->csum_offset = offsetof(struct udphdr, check);
487+
skb->ip_summed = CHECKSUM_PARTIAL;
488+
489+
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
490+
skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
491+
return 0;
492+
}
493+
430494
int udp_gro_complete(struct sk_buff *skb, int nhoff,
431495
udp_lookup_t lookup)
432496
{
@@ -437,16 +501,21 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
437501

438502
uh->len = newlen;
439503

440-
/* Set encapsulation before calling into inner gro_complete() functions
441-
* to make them set up the inner offsets.
442-
*/
443-
skb->encapsulation = 1;
444-
445504
rcu_read_lock();
446505
sk = (*lookup)(skb, uh->source, uh->dest);
447-
if (sk && udp_sk(sk)->gro_complete)
506+
if (sk && udp_sk(sk)->gro_enabled) {
507+
err = udp_gro_complete_segment(skb);
508+
} else if (sk && udp_sk(sk)->gro_complete) {
509+
skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
510+
: SKB_GSO_UDP_TUNNEL;
511+
512+
/* Set encapsulation before calling into inner gro_complete()
513+
* functions to make them set up the inner offsets.
514+
*/
515+
skb->encapsulation = 1;
448516
err = udp_sk(sk)->gro_complete(sk, skb,
449517
nhoff + sizeof(struct udphdr));
518+
}
450519
rcu_read_unlock();
451520

452521
if (skb->remcsum_offload)
@@ -461,13 +530,9 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
461530
const struct iphdr *iph = ip_hdr(skb);
462531
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
463532

464-
if (uh->check) {
465-
skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
533+
if (uh->check)
466534
uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
467535
iph->daddr, 0);
468-
} else {
469-
skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
470-
}
471536

472537
return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
473538
}

net/ipv6/udp_offload.c

+1-5
Original file line numberDiff line numberDiff line change
@@ -147,13 +147,9 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
147147
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
148148
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
149149

150-
if (uh->check) {
151-
skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
150+
if (uh->check)
152151
uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
153152
&ipv6h->daddr, 0);
154-
} else {
155-
skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
156-
}
157153

158154
return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
159155
}

0 commit comments

Comments
 (0)