Skip to content

Commit 6acc9b4

Browse files
joestringerborkmann
authored andcommitted
bpf: Add helper to retrieve socket in BPF
This patch adds new BPF helper functions, bpf_sk_lookup_tcp() and bpf_sk_lookup_udp() which allows BPF programs to find out if there is a socket listening on this host, and returns a socket pointer which the BPF program can then access to determine, for instance, whether to forward or drop traffic. bpf_sk_lookup_xxx() may take a reference on the socket, so when a BPF program makes use of this function, it must subsequently pass the returned pointer into the newly added sk_release() to return the reference. By way of example, the following pseudocode would filter inbound connections at XDP if there is no corresponding service listening for the traffic: struct bpf_sock_tuple tuple; struct bpf_sock_ops *sk; populate_tuple(ctx, &tuple); // Extract the 5tuple from the packet sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof tuple, netns, 0); if (!sk) { // Couldn't find a socket listening for this traffic. Drop. return TC_ACT_SHOT; } bpf_sk_release(sk, 0); return TC_ACT_OK; Signed-off-by: Joe Stringer <[email protected]> Acked-by: Alexei Starovoitov <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]>
1 parent fd978bf commit 6acc9b4

File tree

5 files changed

+354
-3
lines changed

5 files changed

+354
-3
lines changed

include/uapi/linux/bpf.h

+92-1
Original file line numberDiff line numberDiff line change
@@ -2144,6 +2144,77 @@ union bpf_attr {
21442144
* request in the skb.
21452145
* Return
21462146
* 0 on success, or a negative error in case of failure.
2147+
*
2148+
* struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
2149+
* Description
2150+
* Look for TCP socket matching *tuple*, optionally in a child
2151+
* network namespace *netns*. The return value must be checked,
2152+
* and if non-NULL, released via **bpf_sk_release**\ ().
2153+
*
2154+
* The *ctx* should point to the context of the program, such as
2155+
* the skb or socket (depending on the hook in use). This is used
2156+
* to determine the base network namespace for the lookup.
2157+
*
2158+
* *tuple_size* must be one of:
2159+
*
2160+
* **sizeof**\ (*tuple*\ **->ipv4**)
2161+
* Look for an IPv4 socket.
2162+
* **sizeof**\ (*tuple*\ **->ipv6**)
2163+
* Look for an IPv6 socket.
2164+
*
2165+
* If the *netns* is zero, then the socket lookup table in the
2166+
* netns associated with the *ctx* will be used. For the TC hooks,
2167+
* this in the netns of the device in the skb. For socket hooks,
2168+
* this in the netns of the socket. If *netns* is non-zero, then
2169+
* it specifies the ID of the netns relative to the netns
2170+
* associated with the *ctx*.
2171+
*
2172+
* All values for *flags* are reserved for future usage, and must
2173+
* be left at zero.
2174+
*
2175+
* This helper is available only if the kernel was compiled with
2176+
* **CONFIG_NET** configuration option.
2177+
* Return
2178+
* Pointer to *struct bpf_sock*, or NULL in case of failure.
2179+
*
2180+
* struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
2181+
* Description
2182+
* Look for UDP socket matching *tuple*, optionally in a child
2183+
* network namespace *netns*. The return value must be checked,
2184+
* and if non-NULL, released via **bpf_sk_release**\ ().
2185+
*
2186+
* The *ctx* should point to the context of the program, such as
2187+
* the skb or socket (depending on the hook in use). This is used
2188+
* to determine the base network namespace for the lookup.
2189+
*
2190+
* *tuple_size* must be one of:
2191+
*
2192+
* **sizeof**\ (*tuple*\ **->ipv4**)
2193+
* Look for an IPv4 socket.
2194+
* **sizeof**\ (*tuple*\ **->ipv6**)
2195+
* Look for an IPv6 socket.
2196+
*
2197+
* If the *netns* is zero, then the socket lookup table in the
2198+
* netns associated with the *ctx* will be used. For the TC hooks,
2199+
* this in the netns of the device in the skb. For socket hooks,
2200+
* this in the netns of the socket. If *netns* is non-zero, then
2201+
* it specifies the ID of the netns relative to the netns
2202+
* associated with the *ctx*.
2203+
*
2204+
* All values for *flags* are reserved for future usage, and must
2205+
* be left at zero.
2206+
*
2207+
* This helper is available only if the kernel was compiled with
2208+
* **CONFIG_NET** configuration option.
2209+
* Return
2210+
* Pointer to *struct bpf_sock*, or NULL in case of failure.
2211+
*
2212+
* int bpf_sk_release(struct bpf_sock *sk)
2213+
* Description
2214+
* Release the reference held by *sock*. *sock* must be a non-NULL
2215+
* pointer that was returned from bpf_sk_lookup_xxx\ ().
2216+
* Return
2217+
* 0 on success, or a negative error in case of failure.
21472218
*/
21482219
#define __BPF_FUNC_MAPPER(FN) \
21492220
FN(unspec), \
@@ -2229,7 +2300,10 @@ union bpf_attr {
22292300
FN(get_current_cgroup_id), \
22302301
FN(get_local_storage), \
22312302
FN(sk_select_reuseport), \
2232-
FN(skb_ancestor_cgroup_id),
2303+
FN(skb_ancestor_cgroup_id), \
2304+
FN(sk_lookup_tcp), \
2305+
FN(sk_lookup_udp), \
2306+
FN(sk_release),
22332307

22342308
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
22352309
* function eBPF program intends to call
@@ -2399,6 +2473,23 @@ struct bpf_sock {
23992473
*/
24002474
};
24012475

2476+
struct bpf_sock_tuple {
2477+
union {
2478+
struct {
2479+
__be32 saddr;
2480+
__be32 daddr;
2481+
__be16 sport;
2482+
__be16 dport;
2483+
} ipv4;
2484+
struct {
2485+
__be32 saddr[4];
2486+
__be32 daddr[4];
2487+
__be16 sport;
2488+
__be16 dport;
2489+
} ipv6;
2490+
};
2491+
};
2492+
24022493
#define XDP_PACKET_HEADROOM 256
24032494

24042495
/* User return codes for XDP prog type.

kernel/bpf/verifier.c

+7-1
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,12 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
153153
* PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
154154
* passes through a NULL-check conditional. For the branch wherein the state is
155155
* changed to CONST_IMM, the verifier releases the reference.
156+
*
157+
* For each helper function that allocates a reference, such as
158+
* bpf_sk_lookup_tcp(), there is a corresponding release function, such as
159+
* bpf_sk_release(). When a reference type passes into the release function,
160+
* the verifier also releases the reference. If any unchecked or unreleased
161+
* reference remains at the end of the program, the verifier rejects it.
156162
*/
157163

158164
/* verifier_state + insn_idx are pushed to stack when branch is encountered */
@@ -300,7 +306,7 @@ static bool arg_type_is_refcounted(enum bpf_arg_type type)
300306
*/
301307
static bool is_release_function(enum bpf_func_id func_id)
302308
{
303-
return false;
309+
return func_id == BPF_FUNC_sk_release;
304310
}
305311

306312
/* string representation of 'enum bpf_reg_type' */

net/core/filter.c

+151
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,17 @@
5858
#include <net/busy_poll.h>
5959
#include <net/tcp.h>
6060
#include <net/xfrm.h>
61+
#include <net/udp.h>
6162
#include <linux/bpf_trace.h>
6263
#include <net/xdp_sock.h>
6364
#include <linux/inetdevice.h>
65+
#include <net/inet_hashtables.h>
66+
#include <net/inet6_hashtables.h>
6467
#include <net/ip_fib.h>
6568
#include <net/flow.h>
6669
#include <net/arp.h>
6770
#include <net/ipv6.h>
71+
#include <net/net_namespace.h>
6872
#include <linux/seg6_local.h>
6973
#include <net/seg6.h>
7074
#include <net/seg6_local.h>
@@ -4813,6 +4817,141 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
48134817
};
48144818
#endif /* CONFIG_IPV6_SEG6_BPF */
48154819

4820+
struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
4821+
struct sk_buff *skb, u8 family, u8 proto)
4822+
{
4823+
int dif = skb->dev->ifindex;
4824+
bool refcounted = false;
4825+
struct sock *sk = NULL;
4826+
4827+
if (family == AF_INET) {
4828+
__be32 src4 = tuple->ipv4.saddr;
4829+
__be32 dst4 = tuple->ipv4.daddr;
4830+
int sdif = inet_sdif(skb);
4831+
4832+
if (proto == IPPROTO_TCP)
4833+
sk = __inet_lookup(net, &tcp_hashinfo, skb, 0,
4834+
src4, tuple->ipv4.sport,
4835+
dst4, tuple->ipv4.dport,
4836+
dif, sdif, &refcounted);
4837+
else
4838+
sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
4839+
dst4, tuple->ipv4.dport,
4840+
dif, sdif, &udp_table, skb);
4841+
#if IS_ENABLED(CONFIG_IPV6)
4842+
} else {
4843+
struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
4844+
struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
4845+
int sdif = inet6_sdif(skb);
4846+
4847+
if (proto == IPPROTO_TCP)
4848+
sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0,
4849+
src6, tuple->ipv6.sport,
4850+
dst6, tuple->ipv6.dport,
4851+
dif, sdif, &refcounted);
4852+
else
4853+
sk = __udp6_lib_lookup(net, src6, tuple->ipv6.sport,
4854+
dst6, tuple->ipv6.dport,
4855+
dif, sdif, &udp_table, skb);
4856+
#endif
4857+
}
4858+
4859+
if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) {
4860+
WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
4861+
sk = NULL;
4862+
}
4863+
return sk;
4864+
}
4865+
4866+
/* bpf_sk_lookup performs the core lookup for different types of sockets,
4867+
* taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
4868+
* Returns the socket as an 'unsigned long' to simplify the casting in the
4869+
* callers to satisfy BPF_CALL declarations.
4870+
*/
4871+
static unsigned long
4872+
bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
4873+
u8 proto, u64 netns_id, u64 flags)
4874+
{
4875+
struct net *caller_net;
4876+
struct sock *sk = NULL;
4877+
u8 family = AF_UNSPEC;
4878+
struct net *net;
4879+
4880+
family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6;
4881+
if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags))
4882+
goto out;
4883+
4884+
if (skb->dev)
4885+
caller_net = dev_net(skb->dev);
4886+
else
4887+
caller_net = sock_net(skb->sk);
4888+
if (netns_id) {
4889+
net = get_net_ns_by_id(caller_net, netns_id);
4890+
if (unlikely(!net))
4891+
goto out;
4892+
sk = sk_lookup(net, tuple, skb, family, proto);
4893+
put_net(net);
4894+
} else {
4895+
net = caller_net;
4896+
sk = sk_lookup(net, tuple, skb, family, proto);
4897+
}
4898+
4899+
if (sk)
4900+
sk = sk_to_full_sk(sk);
4901+
out:
4902+
return (unsigned long) sk;
4903+
}
4904+
4905+
BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
4906+
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
4907+
{
4908+
return bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, netns_id, flags);
4909+
}
4910+
4911+
static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
4912+
.func = bpf_sk_lookup_tcp,
4913+
.gpl_only = false,
4914+
.pkt_access = true,
4915+
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
4916+
.arg1_type = ARG_PTR_TO_CTX,
4917+
.arg2_type = ARG_PTR_TO_MEM,
4918+
.arg3_type = ARG_CONST_SIZE,
4919+
.arg4_type = ARG_ANYTHING,
4920+
.arg5_type = ARG_ANYTHING,
4921+
};
4922+
4923+
BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
4924+
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
4925+
{
4926+
return bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, netns_id, flags);
4927+
}
4928+
4929+
static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
4930+
.func = bpf_sk_lookup_udp,
4931+
.gpl_only = false,
4932+
.pkt_access = true,
4933+
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
4934+
.arg1_type = ARG_PTR_TO_CTX,
4935+
.arg2_type = ARG_PTR_TO_MEM,
4936+
.arg3_type = ARG_CONST_SIZE,
4937+
.arg4_type = ARG_ANYTHING,
4938+
.arg5_type = ARG_ANYTHING,
4939+
};
4940+
4941+
BPF_CALL_1(bpf_sk_release, struct sock *, sk)
4942+
{
4943+
if (!sock_flag(sk, SOCK_RCU_FREE))
4944+
sock_gen_put(sk);
4945+
return 0;
4946+
}
4947+
4948+
static const struct bpf_func_proto bpf_sk_release_proto = {
4949+
.func = bpf_sk_release,
4950+
.gpl_only = false,
4951+
.ret_type = RET_INTEGER,
4952+
.arg1_type = ARG_PTR_TO_SOCKET,
4953+
};
4954+
48164955
bool bpf_helper_changes_pkt_data(void *func)
48174956
{
48184957
if (func == bpf_skb_vlan_push ||
@@ -5019,6 +5158,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
50195158
case BPF_FUNC_skb_ancestor_cgroup_id:
50205159
return &bpf_skb_ancestor_cgroup_id_proto;
50215160
#endif
5161+
case BPF_FUNC_sk_lookup_tcp:
5162+
return &bpf_sk_lookup_tcp_proto;
5163+
case BPF_FUNC_sk_lookup_udp:
5164+
return &bpf_sk_lookup_udp_proto;
5165+
case BPF_FUNC_sk_release:
5166+
return &bpf_sk_release_proto;
50225167
default:
50235168
return bpf_base_func_proto(func_id);
50245169
}
@@ -5119,6 +5264,12 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
51195264
return &bpf_sk_redirect_hash_proto;
51205265
case BPF_FUNC_get_local_storage:
51215266
return &bpf_get_local_storage_proto;
5267+
case BPF_FUNC_sk_lookup_tcp:
5268+
return &bpf_sk_lookup_tcp_proto;
5269+
case BPF_FUNC_sk_lookup_udp:
5270+
return &bpf_sk_lookup_udp_proto;
5271+
case BPF_FUNC_sk_release:
5272+
return &bpf_sk_release_proto;
51225273
default:
51235274
return bpf_base_func_proto(func_id);
51245275
}

0 commit comments

Comments
 (0)