Blame - net/ipv4/icmp.c - hafnium/third_party/linux

blob: f86f948a4b4c1036932616fec32cb932330da0d6 [file] [log] [blame]

David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0-or-later
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2	/*
				3	* NET3: Implementation of the ICMP protocol layer.
				4	*
				5	* Alan Cox, <alan@lxorguk.ukuu.org.uk>
				6	*
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	7	* Some of the function names and the icmp unreach table for this
				8	* module were derived from [icmp.c 1.0.11 06/02/93] by
				9	* Ross Biro, Fred N. van Kempen, Mark Evans, Alan Cox, Gerhard Koerting.
				10	* Other than that this module is a complete rewrite.
				11	*
				12	* Fixes:
				13	* Clemens Fruhwirth : introduce global icmp rate limiting
				14	* with icmp type masking ability instead
				15	* of broken per type icmp timeouts.
				16	* Mike Shaver : RFC1122 checks.
				17	* Alan Cox : Multicast ping reply as self.
				18	* Alan Cox : Fix atomicity lockup in ip_build_xmit
				19	* call.
				20	* Alan Cox : Added 216,128 byte paths to the MTU
				21	* code.
				22	* Martin Mares : RFC1812 checks.
				23	* Martin Mares : Can be configured to follow redirects
				24	* if acting as a router _without_ a
				25	* routing protocol (RFC 1812).
				26	* Martin Mares : Echo requests may be configured to
				27	* be ignored (RFC 1812).
				28	* Martin Mares : Limitation of ICMP error message
				29	* transmit rate (RFC 1812).
				30	* Martin Mares : TOS and Precedence set correctly
				31	* (RFC 1812).
				32	* Martin Mares : Now copying as much data from the
				33	* original packet as we can without
				34	* exceeding 576 bytes (RFC 1812).
				35	* Willy Konynenberg : Transparent proxying support.
				36	* Keith Owens : RFC1191 correction for 4.2BSD based
				37	* path MTU bug.
				38	* Thomas Quinot : ICMP Dest Unreach codes up to 15 are
				39	* valid (RFC 1812).
				40	* Andi Kleen : Check all packet lengths properly
				41	* and moved all kfree_skb() up to
				42	* icmp_rcv.
				43	* Andi Kleen : Move the rate limit bookkeeping
				44	* into the dest entry and use a token
				45	* bucket filter (thanks to ANK). Make
				46	* the rates sysctl configurable.
				47	* Yu Tianli : Fixed two ugly bugs in icmp_send
				48	* - IP option length was accounted wrongly
				49	* - ICMP header length was not accounted
				50	* at all.
				51	* Tristan Greaves : Added sysctl option to ignore bogus
				52	* broadcast responses from broken routers.
				53	*
				54	* To Fix:
				55	*
				56	* - Should use skb_pull() instead of all the manual checking.
				57	* This would also greatly simply some upper layer error handlers. --AK
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	58	*/
				59
				60	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
				61
				62	#include <linux/module.h>
				63	#include <linux/types.h>
				64	#include <linux/jiffies.h>
				65	#include <linux/kernel.h>
				66	#include <linux/fcntl.h>
				67	#include <linux/socket.h>
				68	#include <linux/in.h>
				69	#include <linux/inet.h>
				70	#include <linux/inetdevice.h>
				71	#include <linux/netdevice.h>
				72	#include <linux/string.h>
				73	#include <linux/netfilter_ipv4.h>
				74	#include <linux/slab.h>
				75	#include <net/snmp.h>
				76	#include <net/ip.h>
				77	#include <net/route.h>
				78	#include <net/protocol.h>
				79	#include <net/icmp.h>
				80	#include <net/tcp.h>
				81	#include <net/udp.h>
				82	#include <net/raw.h>
				83	#include <net/ping.h>
				84	#include <linux/skbuff.h>
				85	#include <net/sock.h>
				86	#include <linux/errno.h>
				87	#include <linux/timer.h>
				88	#include <linux/init.h>
				89	#include <linux/uaccess.h>
				90	#include <net/checksum.h>
				91	#include <net/xfrm.h>
				92	#include <net/inet_common.h>
				93	#include <net/ip_fib.h>
				94	#include <net/l3mdev.h>
				95
				96	/*
				97	* Build xmit assembly blocks
				98	*/
				99
				100	struct icmp_bxm {
				101	struct sk_buff *skb;
				102	int offset;
				103	int data_len;
				104
				105	struct {
				106	struct icmphdr icmph;
				107	__be32 times[3];
				108	} data;
				109	int head_len;
				110	struct ip_options_data replyopts;
				111	};
				112
				113	/* An array of errno for error messages from dest unreach. */
				114	/* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
				115
				116	const struct icmp_err icmp_err_convert[] = {
				117	{
				118	.errno = ENETUNREACH, /* ICMP_NET_UNREACH */
				119	.fatal = 0,
				120	},
				121	{
				122	.errno = EHOSTUNREACH, /* ICMP_HOST_UNREACH */
				123	.fatal = 0,
				124	},
				125	{
				126	.errno = ENOPROTOOPT /* ICMP_PROT_UNREACH */,
				127	.fatal = 1,
				128	},
				129	{
				130	.errno = ECONNREFUSED, /* ICMP_PORT_UNREACH */
				131	.fatal = 1,
				132	},
				133	{
				134	.errno = EMSGSIZE, /* ICMP_FRAG_NEEDED */
				135	.fatal = 0,
				136	},
				137	{
				138	.errno = EOPNOTSUPP, /* ICMP_SR_FAILED */
				139	.fatal = 0,
				140	},
				141	{
				142	.errno = ENETUNREACH, /* ICMP_NET_UNKNOWN */
				143	.fatal = 1,
				144	},
				145	{
				146	.errno = EHOSTDOWN, /* ICMP_HOST_UNKNOWN */
				147	.fatal = 1,
				148	},
				149	{
				150	.errno = ENONET, /* ICMP_HOST_ISOLATED */
				151	.fatal = 1,
				152	},
				153	{
				154	.errno = ENETUNREACH, /* ICMP_NET_ANO */
				155	.fatal = 1,
				156	},
				157	{
				158	.errno = EHOSTUNREACH, /* ICMP_HOST_ANO */
				159	.fatal = 1,
				160	},
				161	{
				162	.errno = ENETUNREACH, /* ICMP_NET_UNR_TOS */
				163	.fatal = 0,
				164	},
				165	{
				166	.errno = EHOSTUNREACH, /* ICMP_HOST_UNR_TOS */
				167	.fatal = 0,
				168	},
				169	{
				170	.errno = EHOSTUNREACH, /* ICMP_PKT_FILTERED */
				171	.fatal = 1,
				172	},
				173	{
				174	.errno = EHOSTUNREACH, /* ICMP_PREC_VIOLATION */
				175	.fatal = 1,
				176	},
				177	{
				178	.errno = EHOSTUNREACH, /* ICMP_PREC_CUTOFF */
				179	.fatal = 1,
				180	},
				181	};
				182	EXPORT_SYMBOL(icmp_err_convert);
				183
				184	/*
				185	* ICMP control array. This specifies what to do with each ICMP.
				186	*/
				187
				188	struct icmp_control {
				189	bool (handler)(struct sk_buff skb);
				190	short error; /* This ICMP is classed as an error message */
				191	};
				192
				193	static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
				194
				195	/*
				196	* The ICMP socket(s). This is the most convenient way to flow control
				197	* our ICMP output as well as maintain a clean interface throughout
				198	* all layers. All Socketless IP sends will soon be gone.
				199	*
				200	* On SMP we have one ICMP socket per-cpu.
				201	*/
				202	static struct sock icmp_sk(struct net net)
				203	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	204	return this_cpu_read(*net->ipv4.icmp_sk);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	205	}
				206
				207	/* Called with BH disabled */
				208	static inline struct sock icmp_xmit_lock(struct net net)
				209	{
				210	struct sock *sk;
				211
				212	sk = icmp_sk(net);
				213
				214	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
				215	/* This can happen if the output path signals a
				216	* dst_link_failure() for an outgoing ICMP packet.
				217	*/
				218	return NULL;
				219	}
				220	return sk;
				221	}
				222
				223	static inline void icmp_xmit_unlock(struct sock *sk)
				224	{
				225	spin_unlock(&sk->sk_lock.slock);
				226	}
				227
				228	int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
				229	int sysctl_icmp_msgs_burst __read_mostly = 50;
				230
				231	static struct {
				232	spinlock_t lock;
				233	u32 credit;
				234	u32 stamp;
				235	} icmp_global = {
				236	.lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock),
				237	};
				238
				239	/**
				240	* icmp_global_allow - Are we allowed to send one more ICMP message ?
				241	*
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	242	* Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	243	* Returns false if we reached the limit and can not send another packet.
				244	* Note: called with BH disabled
				245	*/
				246	bool icmp_global_allow(void)
				247	{
				248	u32 credit, delta, incr = 0, now = (u32)jiffies;
				249	bool rc = false;
				250
				251	/* Check if token bucket is empty and cannot be refilled
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	252	* without taking the spinlock. The READ_ONCE() are paired
				253	* with the following WRITE_ONCE() in this same function.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	254	*/
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	255	if (!READ_ONCE(icmp_global.credit)) {
				256	delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	257	if (delta < HZ / 50)
				258	return false;
				259	}
				260
				261	spin_lock(&icmp_global.lock);
				262	delta = min_t(u32, now - icmp_global.stamp, HZ);
				263	if (delta >= HZ / 50) {
				264	incr = sysctl_icmp_msgs_per_sec * delta / HZ ;
				265	if (incr)
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	266	WRITE_ONCE(icmp_global.stamp, now);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	267	}
				268	credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst);
				269	if (credit) {
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	270	/* We want to use a credit of one in average, but need to randomize
				271	* it for security reasons.
				272	*/
				273	credit = max_t(int, credit - prandom_u32_max(3), 0);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	274	rc = true;
				275	}
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	276	WRITE_ONCE(icmp_global.credit, credit);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	277	spin_unlock(&icmp_global.lock);
				278	return rc;
				279	}
				280	EXPORT_SYMBOL(icmp_global_allow);
				281
				282	static bool icmpv4_mask_allow(struct net *net, int type, int code)
				283	{
				284	if (type > NR_ICMP_TYPES)
				285	return true;
				286
				287	/* Don't limit PMTU discovery. */
				288	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
				289	return true;
				290
				291	/* Limit if icmp type is enabled in ratemask. */
				292	if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask))
				293	return true;
				294
				295	return false;
				296	}
				297
				298	static bool icmpv4_global_allow(struct net *net, int type, int code)
				299	{
				300	if (icmpv4_mask_allow(net, type, code))
				301	return true;
				302
				303	if (icmp_global_allow())
				304	return true;
				305
				306	return false;
				307	}
				308
				309	/*
				310	* Send an ICMP frame.
				311	*/
				312
				313	static bool icmpv4_xrlim_allow(struct net net, struct rtable rt,
				314	struct flowi4 *fl4, int type, int code)
				315	{
				316	struct dst_entry *dst = &rt->dst;
				317	struct inet_peer *peer;
				318	bool rc = true;
				319	int vif;
				320
				321	if (icmpv4_mask_allow(net, type, code))
				322	goto out;
				323
				324	/* No rate limit on loopback */
				325	if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
				326	goto out;
				327
				328	vif = l3mdev_master_ifindex(dst->dev);
				329	peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
				330	rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit);
				331	if (peer)
				332	inet_putpeer(peer);
				333	out:
				334	return rc;
				335	}
				336
				337	/*
				338	* Maintain the counters used in the SNMP statistics for outgoing ICMP
				339	*/
				340	void icmp_out_count(struct net *net, unsigned char type)
				341	{
				342	ICMPMSGOUT_INC_STATS(net, type);
				343	ICMP_INC_STATS(net, ICMP_MIB_OUTMSGS);
				344	}
				345
				346	/*
				347	* Checksum each fragment, and on the first include the headers and final
				348	* checksum.
				349	*/
				350	static int icmp_glue_bits(void from, char to, int offset, int len, int odd,
				351	struct sk_buff *skb)
				352	{
				353	struct icmp_bxm icmp_param = (struct icmp_bxm )from;
				354	__wsum csum;
				355
				356	csum = skb_copy_and_csum_bits(icmp_param->skb,
				357	icmp_param->offset + offset,
				358	to, len, 0);
				359
				360	skb->csum = csum_block_add(skb->csum, csum, odd);
				361	if (icmp_pointers[icmp_param->data.icmph.type].error)
				362	nf_ct_attach(skb, icmp_param->skb);
				363	return 0;
				364	}
				365
				366	static void icmp_push_reply(struct icmp_bxm *icmp_param,
				367	struct flowi4 *fl4,
				368	struct ipcm_cookie ipc, struct rtable *rt)
				369	{
				370	struct sock *sk;
				371	struct sk_buff *skb;
				372
				373	sk = icmp_sk(dev_net((*rt)->dst.dev));
				374	if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
				375	icmp_param->data_len+icmp_param->head_len,
				376	icmp_param->head_len,
				377	ipc, rt, MSG_DONTWAIT) < 0) {
				378	__ICMP_INC_STATS(sock_net(sk), ICMP_MIB_OUTERRORS);
				379	ip_flush_pending_frames(sk);
				380	} else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
				381	struct icmphdr *icmph = icmp_hdr(skb);
				382	__wsum csum = 0;
				383	struct sk_buff *skb1;
				384
				385	skb_queue_walk(&sk->sk_write_queue, skb1) {
				386	csum = csum_add(csum, skb1->csum);
				387	}
				388	csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
				389	(char *)icmph,
				390	icmp_param->head_len, csum);
				391	icmph->checksum = csum_fold(csum);
				392	skb->ip_summed = CHECKSUM_NONE;
				393	ip_push_pending_frames(sk, fl4);
				394	}
				395	}
				396
				397	/*
				398	* Driving logic for building and sending ICMP messages.
				399	*/
				400
				401	static void icmp_reply(struct icmp_bxm icmp_param, struct sk_buff skb)
				402	{
				403	struct ipcm_cookie ipc;
				404	struct rtable *rt = skb_rtable(skb);
				405	struct net *net = dev_net(rt->dst.dev);
				406	struct flowi4 fl4;
				407	struct sock *sk;
				408	struct inet_sock *inet;
				409	__be32 daddr, saddr;
				410	u32 mark = IP4_REPLY_MARK(net, skb->mark);
				411	int type = icmp_param->data.icmph.type;
				412	int code = icmp_param->data.icmph.code;
				413
				414	if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb))
				415	return;
				416
				417	/* Needed by both icmp_global_allow and icmp_xmit_lock */
				418	local_bh_disable();
				419
				420	/* global icmp_msgs_per_sec */
				421	if (!icmpv4_global_allow(net, type, code))
				422	goto out_bh_enable;
				423
				424	sk = icmp_xmit_lock(net);
				425	if (!sk)
				426	goto out_bh_enable;
				427	inet = inet_sk(sk);
				428
				429	icmp_param->data.icmph.checksum = 0;
				430
				431	ipcm_init(&ipc);
				432	inet->tos = ip_hdr(skb)->tos;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	433	ipc.sockc.mark = mark;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	434	daddr = ipc.addr = ip_hdr(skb)->saddr;
				435	saddr = fib_compute_spec_dst(skb);
				436
				437	if (icmp_param->replyopts.opt.opt.optlen) {
				438	ipc.opt = &icmp_param->replyopts.opt;
				439	if (ipc.opt->opt.srr)
				440	daddr = icmp_param->replyopts.opt.opt.faddr;
				441	}
				442	memset(&fl4, 0, sizeof(fl4));
				443	fl4.daddr = daddr;
				444	fl4.saddr = saddr;
				445	fl4.flowi4_mark = mark;
				446	fl4.flowi4_uid = sock_net_uid(net, NULL);
				447	fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
				448	fl4.flowi4_proto = IPPROTO_ICMP;
				449	fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
				450	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
				451	rt = ip_route_output_key(net, &fl4);
				452	if (IS_ERR(rt))
				453	goto out_unlock;
				454	if (icmpv4_xrlim_allow(net, rt, &fl4, type, code))
				455	icmp_push_reply(icmp_param, &fl4, &ipc, &rt);
				456	ip_rt_put(rt);
				457	out_unlock:
				458	icmp_xmit_unlock(sk);
				459	out_bh_enable:
				460	local_bh_enable();
				461	}
				462
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	463	/*
				464	* The device used for looking up which routing table to use for sending an ICMP
				465	* error is preferably the source whenever it is set, which should ensure the
				466	* icmp error can be sent to the source host, else lookup using the routing
				467	* table of the destination device, else use the main routing table (index 0).
				468	*/
				469	static struct net_device icmp_get_route_lookup_dev(struct sk_buff skb)
				470	{
				471	struct net_device *route_lookup_dev = NULL;
				472
				473	if (skb->dev)
				474	route_lookup_dev = skb->dev;
				475	else if (skb_dst(skb))
				476	route_lookup_dev = skb_dst(skb)->dev;
				477	return route_lookup_dev;
				478	}
				479
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	480	static struct rtable icmp_route_lookup(struct net net,
				481	struct flowi4 *fl4,
				482	struct sk_buff *skb_in,
				483	const struct iphdr *iph,
				484	__be32 saddr, u8 tos, u32 mark,
				485	int type, int code,
				486	struct icmp_bxm *param)
				487	{
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	488	struct net_device *route_lookup_dev;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	489	struct rtable rt, rt2;
				490	struct flowi4 fl4_dec;
				491	int err;
				492
				493	memset(fl4, 0, sizeof(*fl4));
				494	fl4->daddr = (param->replyopts.opt.opt.srr ?
				495	param->replyopts.opt.opt.faddr : iph->saddr);
				496	fl4->saddr = saddr;
				497	fl4->flowi4_mark = mark;
				498	fl4->flowi4_uid = sock_net_uid(net, NULL);
				499	fl4->flowi4_tos = RT_TOS(tos);
				500	fl4->flowi4_proto = IPPROTO_ICMP;
				501	fl4->fl4_icmp_type = type;
				502	fl4->fl4_icmp_code = code;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	503	route_lookup_dev = icmp_get_route_lookup_dev(skb_in);
				504	fl4->flowi4_oif = l3mdev_master_ifindex(route_lookup_dev);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	505
				506	security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
				507	rt = ip_route_output_key_hash(net, fl4, skb_in);
				508	if (IS_ERR(rt))
				509	return rt;
				510
				511	/* No need to clone since we're just using its address. */
				512	rt2 = rt;
				513
				514	rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
				515	flowi4_to_flowi(fl4), NULL, 0);
				516	if (!IS_ERR(rt)) {
				517	if (rt != rt2)
				518	return rt;
				519	} else if (PTR_ERR(rt) == -EPERM) {
				520	rt = NULL;
				521	} else
				522	return rt;
				523
				524	err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
				525	if (err)
				526	goto relookup_failed;
				527
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	528	if (inet_addr_type_dev_table(net, route_lookup_dev,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	529	fl4_dec.saddr) == RTN_LOCAL) {
				530	rt2 = __ip_route_output_key(net, &fl4_dec);
				531	if (IS_ERR(rt2))
				532	err = PTR_ERR(rt2);
				533	} else {
				534	struct flowi4 fl4_2 = {};
				535	unsigned long orefdst;
				536
				537	fl4_2.daddr = fl4_dec.saddr;
				538	rt2 = ip_route_output_key(net, &fl4_2);
				539	if (IS_ERR(rt2)) {
				540	err = PTR_ERR(rt2);
				541	goto relookup_failed;
				542	}
				543	/* Ugh! */
				544	orefdst = skb_in->_skb_refdst; /* save old refdst */
				545	skb_dst_set(skb_in, NULL);
				546	err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
				547	RT_TOS(tos), rt2->dst.dev);
				548
				549	dst_release(&rt2->dst);
				550	rt2 = skb_rtable(skb_in);
				551	skb_in->_skb_refdst = orefdst; /* restore old refdst */
				552	}
				553
				554	if (err)
				555	goto relookup_failed;
				556
				557	rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst,
				558	flowi4_to_flowi(&fl4_dec), NULL,
				559	XFRM_LOOKUP_ICMP);
				560	if (!IS_ERR(rt2)) {
				561	dst_release(&rt->dst);
				562	memcpy(fl4, &fl4_dec, sizeof(*fl4));
				563	rt = rt2;
				564	} else if (PTR_ERR(rt2) == -EPERM) {
				565	if (rt)
				566	dst_release(&rt->dst);
				567	return rt2;
				568	} else {
				569	err = PTR_ERR(rt2);
				570	goto relookup_failed;
				571	}
				572	return rt;
				573
				574	relookup_failed:
				575	if (rt)
				576	return rt;
				577	return ERR_PTR(err);
				578	}
				579
				580	/*
				581	* Send an ICMP message in response to a situation
				582	*
				583	* RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header.
				584	* MAY send more (we do).
				585	* MUST NOT change this header information.
				586	* MUST NOT reply to a multicast/broadcast IP address.
				587	* MUST NOT reply to a multicast/broadcast MAC address.
				588	* MUST reply to only the first fragment.
				589	*/
				590
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	591	void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
				592	const struct ip_options *opt)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	593	{
				594	struct iphdr *iph;
				595	int room;
				596	struct icmp_bxm icmp_param;
				597	struct rtable *rt = skb_rtable(skb_in);
				598	struct ipcm_cookie ipc;
				599	struct flowi4 fl4;
				600	__be32 saddr;
				601	u8 tos;
				602	u32 mark;
				603	struct net *net;
				604	struct sock *sk;
				605
				606	if (!rt)
				607	goto out;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	608
				609	if (rt->dst.dev)
				610	net = dev_net(rt->dst.dev);
				611	else if (skb_in->dev)
				612	net = dev_net(skb_in->dev);
				613	else
				614	goto out;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	615
				616	/*
				617	* Find the original header. It is expected to be valid, of course.
				618	* Check this, icmp_send is called from the most obscure devices
				619	* sometimes.
				620	*/
				621	iph = ip_hdr(skb_in);
				622
				623	if ((u8 *)iph < skb_in->head \|\|
				624	(skb_network_header(skb_in) + sizeof(*iph)) >
				625	skb_tail_pointer(skb_in))
				626	goto out;
				627
				628	/*
				629	* No replies to physical multicast/broadcast
				630	*/
				631	if (skb_in->pkt_type != PACKET_HOST)
				632	goto out;
				633
				634	/*
				635	* Now check at the protocol level
				636	*/
				637	if (rt->rt_flags & (RTCF_BROADCAST \| RTCF_MULTICAST))
				638	goto out;
				639
				640	/*
				641	* Only reply to fragment 0. We byte re-order the constant
				642	* mask for efficiency.
				643	*/
				644	if (iph->frag_off & htons(IP_OFFSET))
				645	goto out;
				646
				647	/*
				648	* If we send an ICMP error to an ICMP error a mess would result..
				649	*/
				650	if (icmp_pointers[type].error) {
				651	/*
				652	* We are an error, check if we are replying to an
				653	* ICMP error
				654	*/
				655	if (iph->protocol == IPPROTO_ICMP) {
				656	u8 _inner_type, *itp;
				657
				658	itp = skb_header_pointer(skb_in,
				659	skb_network_header(skb_in) +
				660	(iph->ihl << 2) +
				661	offsetof(struct icmphdr,
				662	type) -
				663	skb_in->data,
				664	sizeof(_inner_type),
				665	&_inner_type);
				666	if (!itp)
				667	goto out;
				668
				669	/*
				670	* Assume any unknown ICMP type is an error. This
				671	* isn't specified by the RFC, but think about it..
				672	*/
				673	if (*itp > NR_ICMP_TYPES \|\|
				674	icmp_pointers[*itp].error)
				675	goto out;
				676	}
				677	}
				678
				679	/* Needed by both icmp_global_allow and icmp_xmit_lock */
				680	local_bh_disable();
				681
				682	/* Check global sysctl_icmp_msgs_per_sec ratelimit, unless
				683	* incoming dev is loopback. If outgoing dev change to not be
				684	* loopback, then peer ratelimit still work (in icmpv4_xrlim_allow)
				685	*/
				686	if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
				687	!icmpv4_global_allow(net, type, code))
				688	goto out_bh_enable;
				689
				690	sk = icmp_xmit_lock(net);
				691	if (!sk)
				692	goto out_bh_enable;
				693
				694	/*
				695	* Construct source address and options.
				696	*/
				697
				698	saddr = iph->daddr;
				699	if (!(rt->rt_flags & RTCF_LOCAL)) {
				700	struct net_device *dev = NULL;
				701
				702	rcu_read_lock();
				703	if (rt_is_input_route(rt) &&
				704	net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
				705	dev = dev_get_by_index_rcu(net, inet_iif(skb_in));
				706
				707	if (dev)
				708	saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
				709	else
				710	saddr = 0;
				711	rcu_read_unlock();
				712	}
				713
				714	tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) \|
				715	IPTOS_PREC_INTERNETCONTROL) :
				716	iph->tos;
				717	mark = IP4_REPLY_MARK(net, skb_in->mark);
				718
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	719	if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	720	goto out_unlock;
				721
				722
				723	/*
				724	* Prepare data for ICMP header.
				725	*/
				726
				727	icmp_param.data.icmph.type = type;
				728	icmp_param.data.icmph.code = code;
				729	icmp_param.data.icmph.un.gateway = info;
				730	icmp_param.data.icmph.checksum = 0;
				731	icmp_param.skb = skb_in;
				732	icmp_param.offset = skb_network_offset(skb_in);
				733	inet_sk(sk)->tos = tos;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	734	ipcm_init(&ipc);
				735	ipc.addr = iph->saddr;
				736	ipc.opt = &icmp_param.replyopts.opt;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	737	ipc.sockc.mark = mark;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	738
				739	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
				740	type, code, &icmp_param);
				741	if (IS_ERR(rt))
				742	goto out_unlock;
				743
				744	/* peer icmp_ratelimit */
				745	if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code))
				746	goto ende;
				747
				748	/* RFC says return as much as we can without exceeding 576 bytes. */
				749
				750	room = dst_mtu(&rt->dst);
				751	if (room > 576)
				752	room = 576;
				753	room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
				754	room -= sizeof(struct icmphdr);
				755
				756	icmp_param.data_len = skb_in->len - icmp_param.offset;
				757	if (icmp_param.data_len > room)
				758	icmp_param.data_len = room;
				759	icmp_param.head_len = sizeof(struct icmphdr);
				760
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	761	/* if we don't have a source address at this point, fall back to the
				762	* dummy address instead of sending out a packet with a source address
				763	* of 0.0.0.0
				764	*/
				765	if (!fl4.saddr)
				766	fl4.saddr = htonl(INADDR_DUMMY);
				767
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	768	icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
				769	ende:
				770	ip_rt_put(rt);
				771	out_unlock:
				772	icmp_xmit_unlock(sk);
				773	out_bh_enable:
				774	local_bh_enable();
				775	out:;
				776	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	777	EXPORT_SYMBOL(__icmp_send);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	778
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	779	#if IS_ENABLED(CONFIG_NF_NAT)
				780	#include <net/netfilter/nf_conntrack.h>
				781	void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
				782	{
				783	struct sk_buff *cloned_skb = NULL;
				784	struct ip_options opts = { 0 };
				785	enum ip_conntrack_info ctinfo;
				786	struct nf_conn *ct;
				787	__be32 orig_ip;
				788
				789	ct = nf_ct_get(skb_in, &ctinfo);
				790	if (!ct \|\| !(ct->status & IPS_SRC_NAT)) {
				791	__icmp_send(skb_in, type, code, info, &opts);
				792	return;
				793	}
				794
				795	if (skb_shared(skb_in))
				796	skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
				797
				798	if (unlikely(!skb_in \|\| skb_network_header(skb_in) < skb_in->head \|\|
				799	(skb_network_header(skb_in) + sizeof(struct iphdr)) >
				800	skb_tail_pointer(skb_in) \|\| skb_ensure_writable(skb_in,
				801	skb_network_offset(skb_in) + sizeof(struct iphdr))))
				802	goto out;
				803
				804	orig_ip = ip_hdr(skb_in)->saddr;
				805	ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
				806	__icmp_send(skb_in, type, code, info, &opts);
				807	ip_hdr(skb_in)->saddr = orig_ip;
				808	out:
				809	consume_skb(cloned_skb);
				810	}
				811	EXPORT_SYMBOL(icmp_ndo_send);
				812	#endif
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	813
				814	static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
				815	{
				816	const struct iphdr iph = (const struct iphdr ) skb->data;
				817	const struct net_protocol *ipprot;
				818	int protocol = iph->protocol;
				819
				820	/* Checkin full IP header plus 8 bytes of protocol to
				821	* avoid additional coding at protocol handlers.
				822	*/
				823	if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) {
				824	__ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
				825	return;
				826	}
				827
				828	raw_icmp_error(skb, protocol, info);
				829
				830	ipprot = rcu_dereference(inet_protos[protocol]);
				831	if (ipprot && ipprot->err_handler)
				832	ipprot->err_handler(skb, info);
				833	}
				834
				835	static bool icmp_tag_validation(int proto)
				836	{
				837	bool ok;
				838
				839	rcu_read_lock();
				840	ok = rcu_dereference(inet_protos[proto])->icmp_strict_tag_validation;
				841	rcu_read_unlock();
				842	return ok;
				843	}
				844
				845	/*
				846	* Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEEDED, ICMP_QUENCH, and
				847	* ICMP_PARAMETERPROB.
				848	*/
				849
				850	static bool icmp_unreach(struct sk_buff *skb)
				851	{
				852	const struct iphdr *iph;
				853	struct icmphdr *icmph;
				854	struct net *net;
				855	u32 info = 0;
				856
				857	net = dev_net(skb_dst(skb)->dev);
				858
				859	/*
				860	* Incomplete header ?
				861	* Only checks for the IP header, there should be an
				862	* additional check for longer headers in upper levels.
				863	*/
				864
				865	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
				866	goto out_err;
				867
				868	icmph = icmp_hdr(skb);
				869	iph = (const struct iphdr *)skb->data;
				870
				871	if (iph->ihl < 5) /* Mangled header, drop. */
				872	goto out_err;
				873
				874	switch (icmph->type) {
				875	case ICMP_DEST_UNREACH:
				876	switch (icmph->code & 15) {
				877	case ICMP_NET_UNREACH:
				878	case ICMP_HOST_UNREACH:
				879	case ICMP_PROT_UNREACH:
				880	case ICMP_PORT_UNREACH:
				881	break;
				882	case ICMP_FRAG_NEEDED:
				883	/* for documentation of the ip_no_pmtu_disc
				884	* values please see
				885	* Documentation/networking/ip-sysctl.txt
				886	*/
				887	switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
				888	default:
				889	net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n",
				890	&iph->daddr);
				891	break;
				892	case 2:
				893	goto out;
				894	case 3:
				895	if (!icmp_tag_validation(iph->protocol))
				896	goto out;
				897	/* fall through */
				898	case 0:
				899	info = ntohs(icmph->un.frag.mtu);
				900	}
				901	break;
				902	case ICMP_SR_FAILED:
				903	net_dbg_ratelimited("%pI4: Source Route Failed\n",
				904	&iph->daddr);
				905	break;
				906	default:
				907	break;
				908	}
				909	if (icmph->code > NR_ICMP_UNREACH)
				910	goto out;
				911	break;
				912	case ICMP_PARAMETERPROB:
				913	info = ntohl(icmph->un.gateway) >> 24;
				914	break;
				915	case ICMP_TIME_EXCEEDED:
				916	__ICMP_INC_STATS(net, ICMP_MIB_INTIMEEXCDS);
				917	if (icmph->code == ICMP_EXC_FRAGTIME)
				918	goto out;
				919	break;
				920	}
				921
				922	/*
				923	* Throw it at our lower layers
				924	*
				925	* RFC 1122: 3.2.2 MUST extract the protocol ID from the passed
				926	* header.
				927	* RFC 1122: 3.2.2.1 MUST pass ICMP unreach messages to the
				928	* transport layer.
				929	* RFC 1122: 3.2.2.2 MUST pass ICMP time expired messages to
				930	* transport layer.
				931	*/
				932
				933	/*
				934	* Check the other end isn't violating RFC 1122. Some routers send
				935	* bogus responses to broadcast frames. If you see this message
				936	* first check your netmask matches at both ends, if it does then
				937	* get the other vendor to fix their kit.
				938	*/
				939
				940	if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses &&
				941	inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) {
				942	net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",
				943	&ip_hdr(skb)->saddr,
				944	icmph->type, icmph->code,
				945	&iph->daddr, skb->dev->name);
				946	goto out;
				947	}
				948
				949	icmp_socket_deliver(skb, info);
				950
				951	out:
				952	return true;
				953	out_err:
				954	__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
				955	return false;
				956	}
				957
				958
				959	/*
				960	* Handle ICMP_REDIRECT.
				961	*/
				962
				963	static bool icmp_redirect(struct sk_buff *skb)
				964	{
				965	if (skb->len < sizeof(struct iphdr)) {
				966	__ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
				967	return false;
				968	}
				969
				970	if (!pskb_may_pull(skb, sizeof(struct iphdr))) {
				971	/* there aught to be a stat */
				972	return false;
				973	}
				974
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	975	icmp_socket_deliver(skb, ntohl(icmp_hdr(skb)->un.gateway));
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	976	return true;
				977	}
				978
				979	/*
				980	* Handle ICMP_ECHO ("ping") requests.
				981	*
				982	* RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
				983	* requests.
				984	* RFC 1122: 3.2.2.6 Data received in the ICMP_ECHO request MUST be
				985	* included in the reply.
				986	* RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring
				987	* echo requests, MUST have default=NOT.
				988	* See also WRT handling of options once they are done and working.
				989	*/
				990
				991	static bool icmp_echo(struct sk_buff *skb)
				992	{
				993	struct net *net;
				994
				995	net = dev_net(skb_dst(skb)->dev);
				996	if (!net->ipv4.sysctl_icmp_echo_ignore_all) {
				997	struct icmp_bxm icmp_param;
				998
				999	icmp_param.data.icmph = *icmp_hdr(skb);
				1000	icmp_param.data.icmph.type = ICMP_ECHOREPLY;
				1001	icmp_param.skb = skb;
				1002	icmp_param.offset = 0;
				1003	icmp_param.data_len = skb->len;
				1004	icmp_param.head_len = sizeof(struct icmphdr);
				1005	icmp_reply(&icmp_param, skb);
				1006	}
				1007	/* should there be an ICMP stat for ignored echos? */
				1008	return true;
				1009	}
				1010
				1011	/*
				1012	* Handle ICMP Timestamp requests.
				1013	* RFC 1122: 3.2.2.8 MAY implement ICMP timestamp requests.
				1014	* SHOULD be in the kernel for minimum random latency.
				1015	* MUST be accurate to a few minutes.
				1016	* MUST be updated at least at 15Hz.
				1017	*/
				1018	static bool icmp_timestamp(struct sk_buff *skb)
				1019	{
				1020	struct icmp_bxm icmp_param;
				1021	/*
				1022	* Too short.
				1023	*/
				1024	if (skb->len < 4)
				1025	goto out_err;
				1026
				1027	/*
				1028	* Fill in the current time as ms since midnight UT:
				1029	*/
				1030	icmp_param.data.times[1] = inet_current_timestamp();
				1031	icmp_param.data.times[2] = icmp_param.data.times[1];
				1032
				1033	BUG_ON(skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4));
				1034
				1035	icmp_param.data.icmph = *icmp_hdr(skb);
				1036	icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY;
				1037	icmp_param.data.icmph.code = 0;
				1038	icmp_param.skb = skb;
				1039	icmp_param.offset = 0;
				1040	icmp_param.data_len = 0;
				1041	icmp_param.head_len = sizeof(struct icmphdr) + 12;
				1042	icmp_reply(&icmp_param, skb);
				1043	return true;
				1044
				1045	out_err:
				1046	__ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
				1047	return false;
				1048	}
				1049
				1050	static bool icmp_discard(struct sk_buff *skb)
				1051	{
				1052	/* pretend it was a success */
				1053	return true;
				1054	}
				1055
				1056	/*
				1057	* Deal with incoming ICMP packets.
				1058	*/
				1059	int icmp_rcv(struct sk_buff *skb)
				1060	{
				1061	struct icmphdr *icmph;
				1062	struct rtable *rt = skb_rtable(skb);
				1063	struct net *net = dev_net(rt->dst.dev);
				1064	bool success;
				1065
				1066	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
				1067	struct sec_path *sp = skb_sec_path(skb);
				1068	int nh;
				1069
				1070	if (!(sp && sp->xvec[sp->len - 1]->props.flags &
				1071	XFRM_STATE_ICMP))
				1072	goto drop;
				1073
				1074	if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr)))
				1075	goto drop;
				1076
				1077	nh = skb_network_offset(skb);
				1078	skb_set_network_header(skb, sizeof(*icmph));
				1079
				1080	if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
				1081	goto drop;
				1082
				1083	skb_set_network_header(skb, nh);
				1084	}
				1085
				1086	__ICMP_INC_STATS(net, ICMP_MIB_INMSGS);
				1087
				1088	if (skb_checksum_simple_validate(skb))
				1089	goto csum_error;
				1090
				1091	if (!pskb_pull(skb, sizeof(*icmph)))
				1092	goto error;
				1093
				1094	icmph = icmp_hdr(skb);
				1095
				1096	ICMPMSGIN_INC_STATS(net, icmph->type);
				1097	/*
				1098	* 18 is the highest 'known' ICMP type. Anything else is a mystery
				1099	*
				1100	* RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
				1101	* discarded.
				1102	*/
				1103	if (icmph->type > NR_ICMP_TYPES)
				1104	goto error;
				1105
				1106
				1107	/*
				1108	* Parse the ICMP message
				1109	*/
				1110
				1111	if (rt->rt_flags & (RTCF_BROADCAST \| RTCF_MULTICAST)) {
				1112	/*
				1113	* RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
				1114	* silently ignored (we let user decide with a sysctl).
				1115	* RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently
				1116	* discarded if to broadcast/multicast.
				1117	*/
				1118	if ((icmph->type == ICMP_ECHO \|\|
				1119	icmph->type == ICMP_TIMESTAMP) &&
				1120	net->ipv4.sysctl_icmp_echo_ignore_broadcasts) {
				1121	goto error;
				1122	}
				1123	if (icmph->type != ICMP_ECHO &&
				1124	icmph->type != ICMP_TIMESTAMP &&
				1125	icmph->type != ICMP_ADDRESS &&
				1126	icmph->type != ICMP_ADDRESSREPLY) {
				1127	goto error;
				1128	}
				1129	}
				1130
				1131	success = icmp_pointers[icmph->type].handler(skb);
				1132
				1133	if (success) {
				1134	consume_skb(skb);
				1135	return NET_RX_SUCCESS;
				1136	}
				1137
				1138	drop:
				1139	kfree_skb(skb);
				1140	return NET_RX_DROP;
				1141	csum_error:
				1142	__ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS);
				1143	error:
				1144	__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
				1145	goto drop;
				1146	}
				1147
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1148	int icmp_err(struct sk_buff *skb, u32 info)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1149	{
				1150	struct iphdr iph = (struct iphdr )skb->data;
				1151	int offset = iph->ihl<<2;
				1152	struct icmphdr icmph = (struct icmphdr )(skb->data + offset);
				1153	int type = icmp_hdr(skb)->type;
				1154	int code = icmp_hdr(skb)->code;
				1155	struct net *net = dev_net(skb->dev);
				1156
				1157	/*
				1158	* Use ping_err to handle all icmp errors except those
				1159	* triggered by ICMP_ECHOREPLY which sent from kernel.
				1160	*/
				1161	if (icmph->type != ICMP_ECHOREPLY) {
				1162	ping_err(skb, offset, info);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1163	return 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1164	}
				1165
				1166	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1167	ipv4_update_pmtu(skb, net, info, 0, IPPROTO_ICMP);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1168	else if (type == ICMP_REDIRECT)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1169	ipv4_redirect(skb, net, 0, IPPROTO_ICMP);
				1170
				1171	return 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1172	}
				1173
				1174	/*
				1175	* This table is the definition of how we handle ICMP.
				1176	*/
				1177	static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
				1178	[ICMP_ECHOREPLY] = {
				1179	.handler = ping_rcv,
				1180	},
				1181	[1] = {
				1182	.handler = icmp_discard,
				1183	.error = 1,
				1184	},
				1185	[2] = {
				1186	.handler = icmp_discard,
				1187	.error = 1,
				1188	},
				1189	[ICMP_DEST_UNREACH] = {
				1190	.handler = icmp_unreach,
				1191	.error = 1,
				1192	},
				1193	[ICMP_SOURCE_QUENCH] = {
				1194	.handler = icmp_unreach,
				1195	.error = 1,
				1196	},
				1197	[ICMP_REDIRECT] = {
				1198	.handler = icmp_redirect,
				1199	.error = 1,
				1200	},
				1201	[6] = {
				1202	.handler = icmp_discard,
				1203	.error = 1,
				1204	},
				1205	[7] = {
				1206	.handler = icmp_discard,
				1207	.error = 1,
				1208	},
				1209	[ICMP_ECHO] = {
				1210	.handler = icmp_echo,
				1211	},
				1212	[9] = {
				1213	.handler = icmp_discard,
				1214	.error = 1,
				1215	},
				1216	[10] = {
				1217	.handler = icmp_discard,
				1218	.error = 1,
				1219	},
				1220	[ICMP_TIME_EXCEEDED] = {
				1221	.handler = icmp_unreach,
				1222	.error = 1,
				1223	},
				1224	[ICMP_PARAMETERPROB] = {
				1225	.handler = icmp_unreach,
				1226	.error = 1,
				1227	},
				1228	[ICMP_TIMESTAMP] = {
				1229	.handler = icmp_timestamp,
				1230	},
				1231	[ICMP_TIMESTAMPREPLY] = {
				1232	.handler = icmp_discard,
				1233	},
				1234	[ICMP_INFO_REQUEST] = {
				1235	.handler = icmp_discard,
				1236	},
				1237	[ICMP_INFO_REPLY] = {
				1238	.handler = icmp_discard,
				1239	},
				1240	[ICMP_ADDRESS] = {
				1241	.handler = icmp_discard,
				1242	},
				1243	[ICMP_ADDRESSREPLY] = {
				1244	.handler = icmp_discard,
				1245	},
				1246	};
				1247
				1248	static void __net_exit icmp_sk_exit(struct net *net)
				1249	{
				1250	int i;
				1251
				1252	for_each_possible_cpu(i)
				1253	inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
				1254	free_percpu(net->ipv4.icmp_sk);
				1255	net->ipv4.icmp_sk = NULL;
				1256	}
				1257
				1258	static int __net_init icmp_sk_init(struct net *net)
				1259	{
				1260	int i, err;
				1261
				1262	net->ipv4.icmp_sk = alloc_percpu(struct sock *);
				1263	if (!net->ipv4.icmp_sk)
				1264	return -ENOMEM;
				1265
				1266	for_each_possible_cpu(i) {
				1267	struct sock *sk;
				1268
				1269	err = inet_ctl_sock_create(&sk, PF_INET,
				1270	SOCK_RAW, IPPROTO_ICMP, net);
				1271	if (err < 0)
				1272	goto fail;
				1273
				1274	*per_cpu_ptr(net->ipv4.icmp_sk, i) = sk;
				1275
				1276	/* Enough space for 2 64K ICMP packets, including
				1277	* sk_buff/skb_shared_info struct overhead.
				1278	*/
				1279	sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
				1280
				1281	/*
				1282	* Speedup sock_wfree()
				1283	*/
				1284	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
				1285	inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
				1286	}
				1287
				1288	/* Control parameters for ECHO replies. */
				1289	net->ipv4.sysctl_icmp_echo_ignore_all = 0;
				1290	net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1;
				1291
				1292	/* Control parameter - ignore bogus broadcast responses? */
				1293	net->ipv4.sysctl_icmp_ignore_bogus_error_responses = 1;
				1294
				1295	/*
				1296	* Configurable global rate limit.
				1297	*
				1298	* ratelimit defines tokens/packet consumed for dst->rate_token
				1299	* bucket ratemask defines which icmp types are ratelimited by
				1300	* setting it's bit position.
				1301	*
				1302	* default:
				1303	* dest unreachable (3), source quench (4),
				1304	* time exceeded (11), parameter problem (12)
				1305	*/
				1306
				1307	net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;
				1308	net->ipv4.sysctl_icmp_ratemask = 0x1818;
				1309	net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
				1310
				1311	return 0;
				1312
				1313	fail:
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1314	icmp_sk_exit(net);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1315	return err;
				1316	}
				1317
				1318	static struct pernet_operations __net_initdata icmp_sk_ops = {
				1319	.init = icmp_sk_init,
				1320	.exit = icmp_sk_exit,
				1321	};
				1322
				1323	int __init icmp_init(void)
				1324	{
				1325	return register_pernet_subsys(&icmp_sk_ops);
				1326	}