Skip to content

Commit c319b4d

Browse files
segoondavem330
authored andcommitted
net: ipv4: add IPPROTO_ICMP socket kind
This patch adds IPPROTO_ICMP socket kind. It makes it possible to send ICMP_ECHO messages and receive the corresponding ICMP_ECHOREPLY messages without any special privileges. In other words, the patch makes it possible to implement setuid-less and CAP_NET_RAW-less /bin/ping. In order not to increase the kernel's attack surface, the new functionality is disabled by default, but is enabled at bootup by supporting Linux distributions, optionally with restriction to a group or a group range (see below). Similar functionality is implemented in Mac OS X: http://www.manpagez.com/man/4/icmp/ A new ping socket is created with socket(PF_INET, SOCK_DGRAM, PROT_ICMP) Message identifiers (octets 4-5 of ICMP header) are interpreted as local ports. Addresses are stored in struct sockaddr_in. No port numbers are reserved for privileged processes, port 0 is reserved for API ("let the kernel pick a free number"). There is no notion of remote ports, remote port numbers provided by the user (e.g. in connect()) are ignored. Data sent and received include ICMP headers. This is deliberate to: 1) Avoid the need to transport headers values like sequence numbers by other means. 2) Make it easier to port existing programs using raw sockets. ICMP headers given to send() are checked and sanitized. The type must be ICMP_ECHO and the code must be zero (future extensions might relax this, see below). The id is set to the number (local port) of the socket, the checksum is always recomputed. ICMP reply packets received from the network are demultiplexed according to their id's, and are returned by recv() without any modifications. IP header information and ICMP errors of those packets may be obtained via ancillary data (IP_RECVTTL, IP_RETOPTS, and IP_RECVERR). ICMP source quenches and redirects are reported as fake errors via the error queue (IP_RECVERR); the next hop address for redirects is saved to ee_info (in network order). socket(2) is restricted to the group range specified in "/proc/sys/net/ipv4/ping_group_range". It is "1 0" by default, meaning that nobody (not even root) may create ping sockets. Setting it to "100 100" would grant permissions to the single group (to either make /sbin/ping g+s and owned by this group or to grant permissions to the "netadmins" group), "0 4294967295" would enable it for the world, "100 4294967295" would enable it for the users, but not daemons. The existing code might be (in the unlikely case anyone needs it) extended rather easily to handle other similar pairs of ICMP messages (Timestamp/Reply, Information Request/Reply, Address Mask Request/Reply etc.). Userspace ping util & patch for it: http://openwall.info/wiki/people/segoon/ping For Openwall GNU/*/Linux it was the last step on the road to the setuid-less distro. A revision of this patch (for RHEL5/OpenVZ kernels) is in use in Owl-current, such as in the 2011/03/12 LiveCD ISOs: http://mirrors.kernel.org/openwall/Owl/current/iso/ Initially this functionality was written by Pavel Kankovsky for Linux 2.4.32, but unfortunately it was never made public. All ping options (-b, -p, -Q, -R, -s, -t, -T, -M, -I), are tested with the patch. PATCH v3: - switched to flowi4. - minor changes to be consistent with raw sockets code. PATCH v2: - changed ping_debug() to pr_debug(). - removed CONFIG_IP_PING. - removed ping_seq_fops.owner field (unused for procfs). - switched to proc_net_fops_create(). - switched to %pK in seq_printf(). PATCH v1: - fixed checksumming bug. - CAP_NET_RAW may not create icmp sockets anymore. RFC v2: - minor cleanups. - introduced sysctl'able group range to restrict socket(2). Signed-off-by: Vasiliy Kulikov <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent f201903 commit c319b4d

File tree

7 files changed

+1110
-2
lines changed

7 files changed

+1110
-2
lines changed

include/net/netns/ipv4.h

+2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ struct netns_ipv4 {
5454
int sysctl_rt_cache_rebuild_count;
5555
int current_rt_cache_rebuild_count;
5656

57+
unsigned int sysctl_ping_group_range[2];
58+
5759
atomic_t rt_genid;
5860
atomic_t dev_addr_genid;
5961

include/net/ping.h

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
* INET An implementation of the TCP/IP protocol suite for the LINUX
3+
* operating system. INET is implemented using the BSD Socket
4+
* interface as the means of communication with the user level.
5+
*
6+
* Definitions for the "ping" module.
7+
*
8+
* This program is free software; you can redistribute it and/or
9+
* modify it under the terms of the GNU General Public License
10+
* as published by the Free Software Foundation; either version
11+
* 2 of the License, or (at your option) any later version.
12+
*/
13+
#ifndef _PING_H
14+
#define _PING_H
15+
16+
#include <net/netns/hash.h>
17+
18+
/* PING_HTABLE_SIZE must be power of 2 */
19+
#define PING_HTABLE_SIZE 64
20+
#define PING_HTABLE_MASK (PING_HTABLE_SIZE-1)
21+
22+
#define ping_portaddr_for_each_entry(__sk, node, list) \
23+
hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
24+
25+
/*
26+
* gid_t is either uint or ushort. We want to pass it to
27+
* proc_dointvec_minmax(), so it must not be larger than MAX_INT
28+
*/
29+
#define GID_T_MAX (((gid_t)~0U) >> 1)
30+
31+
struct ping_table {
32+
struct hlist_nulls_head hash[PING_HTABLE_SIZE];
33+
rwlock_t lock;
34+
};
35+
36+
struct ping_iter_state {
37+
struct seq_net_private p;
38+
int bucket;
39+
};
40+
41+
extern struct proto ping_prot;
42+
43+
44+
extern void ping_rcv(struct sk_buff *);
45+
extern void ping_err(struct sk_buff *, u32 info);
46+
47+
extern void inet_get_ping_group_range_net(struct net *net, unsigned int *low, unsigned int *high);
48+
49+
#ifdef CONFIG_PROC_FS
50+
extern int __init ping_proc_init(void);
51+
extern void ping_proc_exit(void);
52+
#endif
53+
54+
void __init ping_init(void);
55+
56+
57+
#endif /* _PING_H */

net/ipv4/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \
1111
datagram.o raw.o udp.o udplite.o \
1212
arp.o icmp.o devinet.o af_inet.o igmp.o \
1313
fib_frontend.o fib_semantics.o fib_trie.o \
14-
inet_fragment.o
14+
inet_fragment.o ping.o
1515

1616
obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
1717
obj-$(CONFIG_PROC_FS) += proc.o

net/ipv4/af_inet.c

+22
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@
105105
#include <net/tcp.h>
106106
#include <net/udp.h>
107107
#include <net/udplite.h>
108+
#include <net/ping.h>
108109
#include <linux/skbuff.h>
109110
#include <net/sock.h>
110111
#include <net/raw.h>
@@ -1008,6 +1009,14 @@ static struct inet_protosw inetsw_array[] =
10081009
.flags = INET_PROTOSW_PERMANENT,
10091010
},
10101011

1012+
{
1013+
.type = SOCK_DGRAM,
1014+
.protocol = IPPROTO_ICMP,
1015+
.prot = &ping_prot,
1016+
.ops = &inet_dgram_ops,
1017+
.no_check = UDP_CSUM_DEFAULT,
1018+
.flags = INET_PROTOSW_REUSE,
1019+
},
10111020

10121021
{
10131022
.type = SOCK_RAW,
@@ -1527,6 +1536,7 @@ static const struct net_protocol udp_protocol = {
15271536

15281537
static const struct net_protocol icmp_protocol = {
15291538
.handler = icmp_rcv,
1539+
.err_handler = ping_err,
15301540
.no_policy = 1,
15311541
.netns_ok = 1,
15321542
};
@@ -1642,6 +1652,10 @@ static int __init inet_init(void)
16421652
if (rc)
16431653
goto out_unregister_udp_proto;
16441654

1655+
rc = proto_register(&ping_prot, 1);
1656+
if (rc)
1657+
goto out_unregister_raw_proto;
1658+
16451659
/*
16461660
* Tell SOCKET that we are alive...
16471661
*/
@@ -1697,6 +1711,8 @@ static int __init inet_init(void)
16971711
/* Add UDP-Lite (RFC 3828) */
16981712
udplite4_register();
16991713

1714+
ping_init();
1715+
17001716
/*
17011717
* Set the ICMP layer up
17021718
*/
@@ -1727,6 +1743,8 @@ static int __init inet_init(void)
17271743
rc = 0;
17281744
out:
17291745
return rc;
1746+
out_unregister_raw_proto:
1747+
proto_unregister(&raw_prot);
17301748
out_unregister_udp_proto:
17311749
proto_unregister(&udp_prot);
17321750
out_unregister_tcp_proto:
@@ -1751,11 +1769,15 @@ static int __init ipv4_proc_init(void)
17511769
goto out_tcp;
17521770
if (udp4_proc_init())
17531771
goto out_udp;
1772+
if (ping_proc_init())
1773+
goto out_ping;
17541774
if (ip_misc_proc_init())
17551775
goto out_misc;
17561776
out:
17571777
return rc;
17581778
out_misc:
1779+
ping_proc_exit();
1780+
out_ping:
17591781
udp4_proc_exit();
17601782
out_udp:
17611783
tcp4_proc_exit();

net/ipv4/icmp.c

+11-1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
#include <net/tcp.h>
8484
#include <net/udp.h>
8585
#include <net/raw.h>
86+
#include <net/ping.h>
8687
#include <linux/skbuff.h>
8788
#include <net/sock.h>
8889
#include <linux/errno.h>
@@ -781,6 +782,15 @@ static void icmp_redirect(struct sk_buff *skb)
781782
iph->saddr, skb->dev);
782783
break;
783784
}
785+
786+
/* Ping wants to see redirects.
787+
* Let's pretend they are errors of sorts... */
788+
if (iph->protocol == IPPROTO_ICMP &&
789+
iph->ihl >= 5 &&
790+
pskb_may_pull(skb, (iph->ihl<<2)+8)) {
791+
ping_err(skb, icmp_hdr(skb)->un.gateway);
792+
}
793+
784794
out:
785795
return;
786796
out_err:
@@ -1041,7 +1051,7 @@ int icmp_rcv(struct sk_buff *skb)
10411051
*/
10421052
static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
10431053
[ICMP_ECHOREPLY] = {
1044-
.handler = icmp_discard,
1054+
.handler = ping_rcv,
10451055
},
10461056
[1] = {
10471057
.handler = icmp_discard,

0 commit comments

Comments
 (0)