Blame - net/ipv4/inet_connection_sock.c - hafnium/third_party/linux.git

blob: 85a88425edc488ae3f5f2d346d6a1677c49434fe [file] [log] [blame]

David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0-or-later
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2	/*
				3	* INET An implementation of the TCP/IP protocol suite for the LINUX
				4	* operating system. INET is implemented using the BSD Socket
				5	* interface as the means of communication with the user level.
				6	*
				7	* Support for INET connection oriented protocols.
				8	*
				9	* Authors: See the TCP sources
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	10	*/
				11
				12	#include <linux/module.h>
				13	#include <linux/jhash.h>
				14
				15	#include <net/inet_connection_sock.h>
				16	#include <net/inet_hashtables.h>
				17	#include <net/inet_timewait_sock.h>
				18	#include <net/ip.h>
				19	#include <net/route.h>
				20	#include <net/tcp_states.h>
				21	#include <net/xfrm.h>
				22	#include <net/tcp.h>
				23	#include <net/sock_reuseport.h>
				24	#include <net/addrconf.h>
				25
				26	#if IS_ENABLED(CONFIG_IPV6)
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	27	/* match_sk*_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses
				28	* if IPv6 only, and any IPv4 addresses
				29	* if not IPv6 only
				30	* match_sk*_wildcard == false: addresses must be exactly the same, i.e.
				31	* IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
				32	* and 0.0.0.0 equals to 0.0.0.0 only
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	33	*/
				34	static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
				35	const struct in6_addr *sk2_rcv_saddr6,
				36	__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
				37	bool sk1_ipv6only, bool sk2_ipv6only,
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	38	bool match_sk1_wildcard,
				39	bool match_sk2_wildcard)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	40	{
				41	int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
				42	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
				43
				44	/* if both are mapped, treat as IPv4 */
				45	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
				46	if (!sk2_ipv6only) {
				47	if (sk1_rcv_saddr == sk2_rcv_saddr)
				48	return true;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	49	return (match_sk1_wildcard && !sk1_rcv_saddr) \|\|
				50	(match_sk2_wildcard && !sk2_rcv_saddr);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	51	}
				52	return false;
				53	}
				54
				55	if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
				56	return true;
				57
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	58	if (addr_type2 == IPV6_ADDR_ANY && match_sk2_wildcard &&
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	59	!(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
				60	return true;
				61
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	62	if (addr_type == IPV6_ADDR_ANY && match_sk1_wildcard &&
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	63	!(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
				64	return true;
				65
				66	if (sk2_rcv_saddr6 &&
				67	ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
				68	return true;
				69
				70	return false;
				71	}
				72	#endif
				73
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	74	/* match_sk*_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
				75	* match_sk*_wildcard == false: addresses must be exactly the same, i.e.
				76	* 0.0.0.0 only equals to 0.0.0.0
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	77	*/
				78	static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	79	bool sk2_ipv6only, bool match_sk1_wildcard,
				80	bool match_sk2_wildcard)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	81	{
				82	if (!sk2_ipv6only) {
				83	if (sk1_rcv_saddr == sk2_rcv_saddr)
				84	return true;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	85	return (match_sk1_wildcard && !sk1_rcv_saddr) \|\|
				86	(match_sk2_wildcard && !sk2_rcv_saddr);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	87	}
				88	return false;
				89	}
				90
				91	bool inet_rcv_saddr_equal(const struct sock sk, const struct sock sk2,
				92	bool match_wildcard)
				93	{
				94	#if IS_ENABLED(CONFIG_IPV6)
				95	if (sk->sk_family == AF_INET6)
				96	return ipv6_rcv_saddr_equal(&sk->sk_v6_rcv_saddr,
				97	inet6_rcv_saddr(sk2),
				98	sk->sk_rcv_saddr,
				99	sk2->sk_rcv_saddr,
				100	ipv6_only_sock(sk),
				101	ipv6_only_sock(sk2),
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	102	match_wildcard,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	103	match_wildcard);
				104	#endif
				105	return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr,
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	106	ipv6_only_sock(sk2), match_wildcard,
				107	match_wildcard);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	108	}
				109	EXPORT_SYMBOL(inet_rcv_saddr_equal);
				110
				111	bool inet_rcv_saddr_any(const struct sock *sk)
				112	{
				113	#if IS_ENABLED(CONFIG_IPV6)
				114	if (sk->sk_family == AF_INET6)
				115	return ipv6_addr_any(&sk->sk_v6_rcv_saddr);
				116	#endif
				117	return !sk->sk_rcv_saddr;
				118	}
				119
				120	void inet_get_local_port_range(struct net net, int low, int *high)
				121	{
				122	unsigned int seq;
				123
				124	do {
				125	seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
				126
				127	*low = net->ipv4.ip_local_ports.range[0];
				128	*high = net->ipv4.ip_local_ports.range[1];
				129	} while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
				130	}
				131	EXPORT_SYMBOL(inet_get_local_port_range);
				132
				133	static int inet_csk_bind_conflict(const struct sock *sk,
				134	const struct inet_bind_bucket *tb,
				135	bool relax, bool reuseport_ok)
				136	{
				137	struct sock *sk2;
				138	bool reuse = sk->sk_reuse;
				139	bool reuseport = !!sk->sk_reuseport && reuseport_ok;
				140	kuid_t uid = sock_i_uid((struct sock *)sk);
				141
				142	/*
				143	* Unlike other sk lookup places we do not check
				144	* for sk_net here, since _all_ the socks listed
				145	* in tb->owners list belong to the same net - the
				146	* one this bucket belongs to.
				147	*/
				148
				149	sk_for_each_bound(sk2, &tb->owners) {
				150	if (sk != sk2 &&
				151	(!sk->sk_bound_dev_if \|\|
				152	!sk2->sk_bound_dev_if \|\|
				153	sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
				154	if ((!reuse \|\| !sk2->sk_reuse \|\|
				155	sk2->sk_state == TCP_LISTEN) &&
				156	(!reuseport \|\| !sk2->sk_reuseport \|\|
				157	rcu_access_pointer(sk->sk_reuseport_cb) \|\|
				158	(sk2->sk_state != TCP_TIME_WAIT &&
				159	!uid_eq(uid, sock_i_uid(sk2))))) {
				160	if (inet_rcv_saddr_equal(sk, sk2, true))
				161	break;
				162	}
				163	if (!relax && reuse && sk2->sk_reuse &&
				164	sk2->sk_state != TCP_LISTEN) {
				165	if (inet_rcv_saddr_equal(sk, sk2, true))
				166	break;
				167	}
				168	}
				169	}
				170	return sk2 != NULL;
				171	}
				172
				173	/*
				174	* Find an open port number for the socket. Returns with the
				175	* inet_bind_hashbucket lock held.
				176	*/
				177	static struct inet_bind_hashbucket *
				178	inet_csk_find_open_port(struct sock sk, struct inet_bind_bucket tb_ret, int port_ret)
				179	{
				180	struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
				181	int port = 0;
				182	struct inet_bind_hashbucket *head;
				183	struct net *net = sock_net(sk);
				184	int i, low, high, attempt_half;
				185	struct inet_bind_bucket *tb;
				186	u32 remaining, offset;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	187	int l3mdev;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	188
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	189	l3mdev = inet_sk_bound_l3mdev(sk);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	190	attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
				191	other_half_scan:
				192	inet_get_local_port_range(net, &low, &high);
				193	high++; /* [32768, 60999] -> [32768, 61000[ */
				194	if (high - low < 4)
				195	attempt_half = 0;
				196	if (attempt_half) {
				197	int half = low + (((high - low) >> 2) << 1);
				198
				199	if (attempt_half == 1)
				200	high = half;
				201	else
				202	low = half;
				203	}
				204	remaining = high - low;
				205	if (likely(remaining > 1))
				206	remaining &= ~1U;
				207
				208	offset = prandom_u32() % remaining;
				209	/* __inet_hash_connect() favors ports having @low parity
				210	* We do the opposite to not pollute connect() users.
				211	*/
				212	offset \|= 1U;
				213
				214	other_parity_scan:
				215	port = low + offset;
				216	for (i = 0; i < remaining; i += 2, port += 2) {
				217	if (unlikely(port >= high))
				218	port -= remaining;
				219	if (inet_is_local_reserved_port(net, port))
				220	continue;
				221	head = &hinfo->bhash[inet_bhashfn(net, port,
				222	hinfo->bhash_size)];
				223	spin_lock_bh(&head->lock);
				224	inet_bind_bucket_for_each(tb, &head->chain)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	225	if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
				226	tb->port == port) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	227	if (!inet_csk_bind_conflict(sk, tb, false, false))
				228	goto success;
				229	goto next_port;
				230	}
				231	tb = NULL;
				232	goto success;
				233	next_port:
				234	spin_unlock_bh(&head->lock);
				235	cond_resched();
				236	}
				237
				238	offset--;
				239	if (!(offset & 1))
				240	goto other_parity_scan;
				241
				242	if (attempt_half == 1) {
				243	/* OK we now try the upper half of the range */
				244	attempt_half = 2;
				245	goto other_half_scan;
				246	}
				247	return NULL;
				248	success:
				249	*port_ret = port;
				250	*tb_ret = tb;
				251	return head;
				252	}
				253
				254	static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
				255	struct sock *sk)
				256	{
				257	kuid_t uid = sock_i_uid(sk);
				258
				259	if (tb->fastreuseport <= 0)
				260	return 0;
				261	if (!sk->sk_reuseport)
				262	return 0;
				263	if (rcu_access_pointer(sk->sk_reuseport_cb))
				264	return 0;
				265	if (!uid_eq(tb->fastuid, uid))
				266	return 0;
				267	/* We only need to check the rcv_saddr if this tb was once marked
				268	* without fastreuseport and then was reset, as we can only know that
				269	* the fast_*rcv_saddr doesn't have any conflicts with the socks on the
				270	* owners list.
				271	*/
				272	if (tb->fastreuseport == FASTREUSEPORT_ANY)
				273	return 1;
				274	#if IS_ENABLED(CONFIG_IPV6)
				275	if (tb->fast_sk_family == AF_INET6)
				276	return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr,
				277	inet6_rcv_saddr(sk),
				278	tb->fast_rcv_saddr,
				279	sk->sk_rcv_saddr,
				280	tb->fast_ipv6_only,
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	281	ipv6_only_sock(sk), true, false);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	282	#endif
				283	return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr,
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	284	ipv6_only_sock(sk), true, false);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	285	}
				286
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	287	void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
				288	struct sock *sk)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	289	{
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	290	kuid_t uid = sock_i_uid(sk);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	291	bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	292
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	293	if (hlist_empty(&tb->owners)) {
				294	tb->fastreuse = reuse;
				295	if (sk->sk_reuseport) {
				296	tb->fastreuseport = FASTREUSEPORT_ANY;
				297	tb->fastuid = uid;
				298	tb->fast_rcv_saddr = sk->sk_rcv_saddr;
				299	tb->fast_ipv6_only = ipv6_only_sock(sk);
				300	tb->fast_sk_family = sk->sk_family;
				301	#if IS_ENABLED(CONFIG_IPV6)
				302	tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
				303	#endif
				304	} else {
				305	tb->fastreuseport = 0;
				306	}
				307	} else {
				308	if (!reuse)
				309	tb->fastreuse = 0;
				310	if (sk->sk_reuseport) {
				311	/* We didn't match or we don't have fastreuseport set on
				312	* the tb, but we have sk_reuseport set on this socket
				313	* and we know that there are no bind conflicts with
				314	* this socket in this tb, so reset our tb's reuseport
				315	* settings so that any subsequent sockets that match
				316	* our current socket will be put on the fast path.
				317	*
				318	* If we reset we need to set FASTREUSEPORT_STRICT so we
				319	* do extra checking for all subsequent sk_reuseport
				320	* socks.
				321	*/
				322	if (!sk_reuseport_match(tb, sk)) {
				323	tb->fastreuseport = FASTREUSEPORT_STRICT;
				324	tb->fastuid = uid;
				325	tb->fast_rcv_saddr = sk->sk_rcv_saddr;
				326	tb->fast_ipv6_only = ipv6_only_sock(sk);
				327	tb->fast_sk_family = sk->sk_family;
				328	#if IS_ENABLED(CONFIG_IPV6)
				329	tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
				330	#endif
				331	}
				332	} else {
				333	tb->fastreuseport = 0;
				334	}
				335	}
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	336	}
				337
				338	/* Obtain a reference to a local port for the given sock,
				339	* if snum is zero it means select any available local port.
				340	* We try to allocate an odd port (and leave even ports for connect())
				341	*/
				342	int inet_csk_get_port(struct sock *sk, unsigned short snum)
				343	{
				344	bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
				345	struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
				346	int ret = 1, port = snum;
				347	struct inet_bind_hashbucket *head;
				348	struct net *net = sock_net(sk);
				349	struct inet_bind_bucket *tb = NULL;
				350	int l3mdev;
				351
				352	l3mdev = inet_sk_bound_l3mdev(sk);
				353
				354	if (!port) {
				355	head = inet_csk_find_open_port(sk, &tb, &port);
				356	if (!head)
				357	return ret;
				358	if (!tb)
				359	goto tb_not_found;
				360	goto success;
				361	}
				362	head = &hinfo->bhash[inet_bhashfn(net, port,
				363	hinfo->bhash_size)];
				364	spin_lock_bh(&head->lock);
				365	inet_bind_bucket_for_each(tb, &head->chain)
				366	if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
				367	tb->port == port)
				368	goto tb_found;
				369	tb_not_found:
				370	tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
				371	net, head, port, l3mdev);
				372	if (!tb)
				373	goto fail_unlock;
				374	tb_found:
				375	if (!hlist_empty(&tb->owners)) {
				376	if (sk->sk_reuse == SK_FORCE_REUSE)
				377	goto success;
				378
				379	if ((tb->fastreuse > 0 && reuse) \|\|
				380	sk_reuseport_match(tb, sk))
				381	goto success;
				382	if (inet_csk_bind_conflict(sk, tb, true, true))
				383	goto fail_unlock;
				384	}
				385	success:
				386	inet_csk_update_fastreuse(tb, sk);
				387
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	388	if (!inet_csk(sk)->icsk_bind_hash)
				389	inet_bind_hash(sk, tb, port);
				390	WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
				391	ret = 0;
				392
				393	fail_unlock:
				394	spin_unlock_bh(&head->lock);
				395	return ret;
				396	}
				397	EXPORT_SYMBOL_GPL(inet_csk_get_port);
				398
				399	/*
				400	* Wait for an incoming connection, avoid race conditions. This must be called
				401	* with the socket locked.
				402	*/
				403	static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
				404	{
				405	struct inet_connection_sock *icsk = inet_csk(sk);
				406	DEFINE_WAIT(wait);
				407	int err;
				408
				409	/*
				410	* True wake-one mechanism for incoming connections: only
				411	* one process gets woken up, not the 'whole herd'.
				412	* Since we do not 'race & poll' for established sockets
				413	* anymore, the common case will execute the loop only once.
				414	*
				415	* Subtle issue: "add_wait_queue_exclusive()" will be added
				416	* after any current non-exclusive waiters, and we know that
				417	* it will always _stay_ after any new non-exclusive waiters
				418	* because all non-exclusive waiters are added at the
				419	* beginning of the wait-queue. As such, it's ok to "drop"
				420	* our exclusiveness temporarily when we get woken up without
				421	* having to remove and re-insert us on the wait queue.
				422	*/
				423	for (;;) {
				424	prepare_to_wait_exclusive(sk_sleep(sk), &wait,
				425	TASK_INTERRUPTIBLE);
				426	release_sock(sk);
				427	if (reqsk_queue_empty(&icsk->icsk_accept_queue))
				428	timeo = schedule_timeout(timeo);
				429	sched_annotate_sleep();
				430	lock_sock(sk);
				431	err = 0;
				432	if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
				433	break;
				434	err = -EINVAL;
				435	if (sk->sk_state != TCP_LISTEN)
				436	break;
				437	err = sock_intr_errno(timeo);
				438	if (signal_pending(current))
				439	break;
				440	err = -EAGAIN;
				441	if (!timeo)
				442	break;
				443	}
				444	finish_wait(sk_sleep(sk), &wait);
				445	return err;
				446	}
				447
				448	/*
				449	* This will accept the next outstanding connection.
				450	*/
				451	struct sock inet_csk_accept(struct sock sk, int flags, int *err, bool kern)
				452	{
				453	struct inet_connection_sock *icsk = inet_csk(sk);
				454	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
				455	struct request_sock *req;
				456	struct sock *newsk;
				457	int error;
				458
				459	lock_sock(sk);
				460
				461	/* We need to make sure that this socket is listening,
				462	* and that it has something pending.
				463	*/
				464	error = -EINVAL;
				465	if (sk->sk_state != TCP_LISTEN)
				466	goto out_err;
				467
				468	/* Find already established connection */
				469	if (reqsk_queue_empty(queue)) {
				470	long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
				471
				472	/* If this is a non blocking socket don't sleep */
				473	error = -EAGAIN;
				474	if (!timeo)
				475	goto out_err;
				476
				477	error = inet_csk_wait_for_connect(sk, timeo);
				478	if (error)
				479	goto out_err;
				480	}
				481	req = reqsk_queue_remove(queue, sk);
				482	newsk = req->sk;
				483
				484	if (sk->sk_protocol == IPPROTO_TCP &&
				485	tcp_rsk(req)->tfo_listener) {
				486	spin_lock_bh(&queue->fastopenq.lock);
				487	if (tcp_rsk(req)->tfo_listener) {
				488	/* We are still waiting for the final ACK from 3WHS
				489	* so can't free req now. Instead, we set req->sk to
				490	* NULL to signify that the child socket is taken
				491	* so reqsk_fastopen_remove() will free the req
				492	* when 3WHS finishes (or is aborted).
				493	*/
				494	req->sk = NULL;
				495	req = NULL;
				496	}
				497	spin_unlock_bh(&queue->fastopenq.lock);
				498	}
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	499
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	500	out:
				501	release_sock(sk);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	502	if (newsk && mem_cgroup_sockets_enabled) {
				503	int amt;
				504
				505	/* atomically get the memory usage, set and charge the
				506	* newsk->sk_memcg.
				507	*/
				508	lock_sock(newsk);
				509
				510	/* The socket has not been accepted yet, no need to look at
				511	* newsk->sk_wmem_queued.
				512	*/
				513	amt = sk_mem_pages(newsk->sk_forward_alloc +
				514	atomic_read(&newsk->sk_rmem_alloc));
				515	mem_cgroup_sk_alloc(newsk);
				516	if (newsk->sk_memcg && amt)
				517	mem_cgroup_charge_skmem(newsk->sk_memcg, amt);
				518
				519	release_sock(newsk);
				520	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	521	if (req)
				522	reqsk_put(req);
				523	return newsk;
				524	out_err:
				525	newsk = NULL;
				526	req = NULL;
				527	*err = error;
				528	goto out;
				529	}
				530	EXPORT_SYMBOL(inet_csk_accept);
				531
				532	/*
				533	* Using different timers for retransmit, delayed acks and probes
				534	* We may wish use just one timer maintaining a list of expire jiffies
				535	* to optimize.
				536	*/
				537	void inet_csk_init_xmit_timers(struct sock *sk,
				538	void (retransmit_handler)(struct timer_list t),
				539	void (delack_handler)(struct timer_list t),
				540	void (keepalive_handler)(struct timer_list t))
				541	{
				542	struct inet_connection_sock *icsk = inet_csk(sk);
				543
				544	timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0);
				545	timer_setup(&icsk->icsk_delack_timer, delack_handler, 0);
				546	timer_setup(&sk->sk_timer, keepalive_handler, 0);
				547	icsk->icsk_pending = icsk->icsk_ack.pending = 0;
				548	}
				549	EXPORT_SYMBOL(inet_csk_init_xmit_timers);
				550
				551	void inet_csk_clear_xmit_timers(struct sock *sk)
				552	{
				553	struct inet_connection_sock *icsk = inet_csk(sk);
				554
				555	icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0;
				556
				557	sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
				558	sk_stop_timer(sk, &icsk->icsk_delack_timer);
				559	sk_stop_timer(sk, &sk->sk_timer);
				560	}
				561	EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
				562
				563	void inet_csk_delete_keepalive_timer(struct sock *sk)
				564	{
				565	sk_stop_timer(sk, &sk->sk_timer);
				566	}
				567	EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
				568
				569	void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
				570	{
				571	sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
				572	}
				573	EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
				574
				575	struct dst_entry inet_csk_route_req(const struct sock sk,
				576	struct flowi4 *fl4,
				577	const struct request_sock *req)
				578	{
				579	const struct inet_request_sock *ireq = inet_rsk(req);
				580	struct net *net = read_pnet(&ireq->ireq_net);
				581	struct ip_options_rcu *opt;
				582	struct rtable *rt;
				583
				584	rcu_read_lock();
				585	opt = rcu_dereference(ireq->ireq_opt);
				586
				587	flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
				588	RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
				589	sk->sk_protocol, inet_sk_flowi_flags(sk),
				590	(opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
				591	ireq->ir_loc_addr, ireq->ir_rmt_port,
				592	htons(ireq->ir_num), sk->sk_uid);
				593	security_req_classify_flow(req, flowi4_to_flowi(fl4));
				594	rt = ip_route_output_flow(net, fl4, sk);
				595	if (IS_ERR(rt))
				596	goto no_route;
				597	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
				598	goto route_err;
				599	rcu_read_unlock();
				600	return &rt->dst;
				601
				602	route_err:
				603	ip_rt_put(rt);
				604	no_route:
				605	rcu_read_unlock();
				606	__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
				607	return NULL;
				608	}
				609	EXPORT_SYMBOL_GPL(inet_csk_route_req);
				610
				611	struct dst_entry inet_csk_route_child_sock(const struct sock sk,
				612	struct sock *newsk,
				613	const struct request_sock *req)
				614	{
				615	const struct inet_request_sock *ireq = inet_rsk(req);
				616	struct net *net = read_pnet(&ireq->ireq_net);
				617	struct inet_sock *newinet = inet_sk(newsk);
				618	struct ip_options_rcu *opt;
				619	struct flowi4 *fl4;
				620	struct rtable *rt;
				621
				622	opt = rcu_dereference(ireq->ireq_opt);
				623	fl4 = &newinet->cork.fl.u.ip4;
				624
				625	flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
				626	RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
				627	sk->sk_protocol, inet_sk_flowi_flags(sk),
				628	(opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
				629	ireq->ir_loc_addr, ireq->ir_rmt_port,
				630	htons(ireq->ir_num), sk->sk_uid);
				631	security_req_classify_flow(req, flowi4_to_flowi(fl4));
				632	rt = ip_route_output_flow(net, fl4, sk);
				633	if (IS_ERR(rt))
				634	goto no_route;
				635	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
				636	goto route_err;
				637	return &rt->dst;
				638
				639	route_err:
				640	ip_rt_put(rt);
				641	no_route:
				642	__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
				643	return NULL;
				644	}
				645	EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
				646
				647	#if IS_ENABLED(CONFIG_IPV6)
				648	#define AF_INET_FAMILY(fam) ((fam) == AF_INET)
				649	#else
				650	#define AF_INET_FAMILY(fam) true
				651	#endif
				652
				653	/* Decide when to expire the request and when to resend SYN-ACK */
				654	static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
				655	const int max_retries,
				656	const u8 rskq_defer_accept,
				657	int expire, int resend)
				658	{
				659	if (!rskq_defer_accept) {
				660	*expire = req->num_timeout >= thresh;
				661	*resend = 1;
				662	return;
				663	}
				664	*expire = req->num_timeout >= thresh &&
				665	(!inet_rsk(req)->acked \|\| req->num_timeout >= max_retries);
				666	/*
				667	* Do not resend while waiting for data after ACK,
				668	* start to resend on end of deferring period to give
				669	* last chance for data or ACK to create established socket.
				670	*/
				671	*resend = !inet_rsk(req)->acked \|\|
				672	req->num_timeout >= rskq_defer_accept - 1;
				673	}
				674
				675	int inet_rtx_syn_ack(const struct sock parent, struct request_sock req)
				676	{
				677	int err = req->rsk_ops->rtx_syn_ack(parent, req);
				678
				679	if (!err)
				680	req->num_retrans++;
				681	return err;
				682	}
				683	EXPORT_SYMBOL(inet_rtx_syn_ack);
				684
				685	/* return true if req was found in the ehash table */
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	686	static bool reqsk_queue_unlink(struct request_sock *req)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	687	{
				688	struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
				689	bool found = false;
				690
				691	if (sk_hashed(req_to_sk(req))) {
				692	spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
				693
				694	spin_lock(lock);
				695	found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
				696	spin_unlock(lock);
				697	}
				698	if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
				699	reqsk_put(req);
				700	return found;
				701	}
				702
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	703	bool inet_csk_reqsk_queue_drop(struct sock sk, struct request_sock req)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	704	{
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	705	bool unlinked = reqsk_queue_unlink(req);
				706
				707	if (unlinked) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	708	reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
				709	reqsk_put(req);
				710	}
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	711	return unlinked;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	712	}
				713	EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
				714
				715	void inet_csk_reqsk_queue_drop_and_put(struct sock sk, struct request_sock req)
				716	{
				717	inet_csk_reqsk_queue_drop(sk, req);
				718	reqsk_put(req);
				719	}
				720	EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
				721
				722	static void reqsk_timer_handler(struct timer_list *t)
				723	{
				724	struct request_sock *req = from_timer(req, t, rsk_timer);
				725	struct sock *sk_listener = req->rsk_listener;
				726	struct net *net = sock_net(sk_listener);
				727	struct inet_connection_sock *icsk = inet_csk(sk_listener);
				728	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
				729	int qlen, expire = 0, resend = 0;
				730	int max_retries, thresh;
				731	u8 defer_accept;
				732
				733	if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
				734	goto drop;
				735
				736	max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
				737	thresh = max_retries;
				738	/* Normally all the openreqs are young and become mature
				739	* (i.e. converted to established socket) for first timeout.
				740	* If synack was not acknowledged for 1 second, it means
				741	* one of the following things: synack was lost, ack was lost,
				742	* rtt is high or nobody planned to ack (i.e. synflood).
				743	* When server is a bit loaded, queue is populated with old
				744	* open requests, reducing effective size of queue.
				745	* When server is well loaded, queue size reduces to zero
				746	* after several minutes of work. It is not synflood,
				747	* it is normal operation. The solution is pruning
				748	* too old entries overriding normal timeout, when
				749	* situation becomes dangerous.
				750	*
				751	* Essentially, we reserve half of room for young
				752	* embrions; and abort old ones without pity, if old
				753	* ones are about to clog our table.
				754	*/
				755	qlen = reqsk_queue_len(queue);
				756	if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) {
				757	int young = reqsk_queue_len_young(queue) << 1;
				758
				759	while (thresh > 2) {
				760	if (qlen < young)
				761	break;
				762	thresh--;
				763	young <<= 1;
				764	}
				765	}
				766	defer_accept = READ_ONCE(queue->rskq_defer_accept);
				767	if (defer_accept)
				768	max_retries = defer_accept;
				769	syn_ack_recalc(req, thresh, max_retries, defer_accept,
				770	&expire, &resend);
				771	req->rsk_ops->syn_ack_timeout(req);
				772	if (!expire &&
				773	(!resend \|\|
				774	!inet_rtx_syn_ack(sk_listener, req) \|\|
				775	inet_rsk(req)->acked)) {
				776	unsigned long timeo;
				777
				778	if (req->num_timeout++ == 0)
				779	atomic_dec(&queue->young);
				780	timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
				781	mod_timer(&req->rsk_timer, jiffies + timeo);
				782	return;
				783	}
				784	drop:
				785	inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
				786	}
				787
				788	static void reqsk_queue_hash_req(struct request_sock *req,
				789	unsigned long timeout)
				790	{
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	791	timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
				792	mod_timer(&req->rsk_timer, jiffies + timeout);
				793
				794	inet_ehash_insert(req_to_sk(req), NULL);
				795	/* before letting lookups find us, make sure all req fields
				796	* are committed to memory and refcnt initialized.
				797	*/
				798	smp_wmb();
				799	refcount_set(&req->rsk_refcnt, 2 + 1);
				800	}
				801
				802	void inet_csk_reqsk_queue_hash_add(struct sock sk, struct request_sock req,
				803	unsigned long timeout)
				804	{
				805	reqsk_queue_hash_req(req, timeout);
				806	inet_csk_reqsk_queue_added(sk);
				807	}
				808	EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
				809
				810	/**
				811	* inet_csk_clone_lock - clone an inet socket, and lock its clone
				812	* @sk: the socket to clone
				813	* @req: request_sock
				814	* @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
				815	*
				816	* Caller must unlock socket even in error path (bh_unlock_sock(newsk))
				817	*/
				818	struct sock inet_csk_clone_lock(const struct sock sk,
				819	const struct request_sock *req,
				820	const gfp_t priority)
				821	{
				822	struct sock *newsk = sk_clone_lock(sk, priority);
				823
				824	if (newsk) {
				825	struct inet_connection_sock *newicsk = inet_csk(newsk);
				826
				827	inet_sk_set_state(newsk, TCP_SYN_RECV);
				828	newicsk->icsk_bind_hash = NULL;
				829
				830	inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
				831	inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
				832	inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
				833
				834	/* listeners have SOCK_RCU_FREE, not the children */
				835	sock_reset_flag(newsk, SOCK_RCU_FREE);
				836
				837	inet_sk(newsk)->mc_list = NULL;
				838
				839	newsk->sk_mark = inet_rsk(req)->ir_mark;
				840	atomic64_set(&newsk->sk_cookie,
				841	atomic64_read(&inet_rsk(req)->ir_cookie));
				842
				843	newicsk->icsk_retransmits = 0;
				844	newicsk->icsk_backoff = 0;
				845	newicsk->icsk_probes_out = 0;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	846	newicsk->icsk_probes_tstamp = 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	847
				848	/* Deinitialize accept_queue to trap illegal accesses. */
				849	memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
				850
				851	security_inet_csk_clone(newsk, req);
				852	}
				853	return newsk;
				854	}
				855	EXPORT_SYMBOL_GPL(inet_csk_clone_lock);
				856
				857	/*
				858	* At this point, there should be no process reference to this
				859	* socket, and thus no user references at all. Therefore we
				860	* can assume the socket waitqueue is inactive and nobody will
				861	* try to jump onto it.
				862	*/
				863	void inet_csk_destroy_sock(struct sock *sk)
				864	{
				865	WARN_ON(sk->sk_state != TCP_CLOSE);
				866	WARN_ON(!sock_flag(sk, SOCK_DEAD));
				867
				868	/* It cannot be in hash table! */
				869	WARN_ON(!sk_unhashed(sk));
				870
				871	/* If it has not 0 inet_sk(sk)->inet_num, it must be bound */
				872	WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash);
				873
				874	sk->sk_prot->destroy(sk);
				875
				876	sk_stream_kill_queues(sk);
				877
				878	xfrm_sk_free_policy(sk);
				879
				880	sk_refcnt_debug_release(sk);
				881
				882	percpu_counter_dec(sk->sk_prot->orphan_count);
				883
				884	sock_put(sk);
				885	}
				886	EXPORT_SYMBOL(inet_csk_destroy_sock);
				887
				888	/* This function allows to force a closure of a socket after the call to
				889	* tcp/dccp_create_openreq_child().
				890	*/
				891	void inet_csk_prepare_forced_close(struct sock *sk)
				892	__releases(&sk->sk_lock.slock)
				893	{
				894	/* sk_clone_lock locked the socket and set refcnt to 2 */
				895	bh_unlock_sock(sk);
				896	sock_put(sk);
				897
				898	/* The below has to be done to allow calling inet_csk_destroy_sock */
				899	sock_set_flag(sk, SOCK_DEAD);
				900	percpu_counter_inc(sk->sk_prot->orphan_count);
				901	inet_sk(sk)->inet_num = 0;
				902	}
				903	EXPORT_SYMBOL(inet_csk_prepare_forced_close);
				904
				905	int inet_csk_listen_start(struct sock *sk, int backlog)
				906	{
				907	struct inet_connection_sock *icsk = inet_csk(sk);
				908	struct inet_sock *inet = inet_sk(sk);
				909	int err = -EADDRINUSE;
				910
				911	reqsk_queue_alloc(&icsk->icsk_accept_queue);
				912
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	913	sk->sk_ack_backlog = 0;
				914	inet_csk_delack_init(sk);
				915
				916	/* There is race window here: we announce ourselves listening,
				917	* but this transition is still not validated by get_port().
				918	* It is OK, because this socket enters to hash table only
				919	* after validation is complete.
				920	*/
				921	inet_sk_state_store(sk, TCP_LISTEN);
				922	if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
				923	inet->inet_sport = htons(inet->inet_num);
				924
				925	sk_dst_reset(sk);
				926	err = sk->sk_prot->hash(sk);
				927
				928	if (likely(!err))
				929	return 0;
				930	}
				931
				932	inet_sk_set_state(sk, TCP_CLOSE);
				933	return err;
				934	}
				935	EXPORT_SYMBOL_GPL(inet_csk_listen_start);
				936
				937	static void inet_child_forget(struct sock sk, struct request_sock req,
				938	struct sock *child)
				939	{
				940	sk->sk_prot->disconnect(child, O_NONBLOCK);
				941
				942	sock_orphan(child);
				943
				944	percpu_counter_inc(sk->sk_prot->orphan_count);
				945
				946	if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	947	BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	948	BUG_ON(sk != req->rsk_listener);
				949
				950	/* Paranoid, to prevent race condition if
				951	* an inbound pkt destined for child is
				952	* blocked by sock lock in tcp_v4_rcv().
				953	* Also to satisfy an assertion in
				954	* tcp_v4_destroy_sock().
				955	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	956	RCU_INIT_POINTER(tcp_sk(child)->fastopen_rsk, NULL);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	957	}
				958	inet_csk_destroy_sock(child);
				959	}
				960
				961	struct sock inet_csk_reqsk_queue_add(struct sock sk,
				962	struct request_sock *req,
				963	struct sock *child)
				964	{
				965	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
				966
				967	spin_lock(&queue->rskq_lock);
				968	if (unlikely(sk->sk_state != TCP_LISTEN)) {
				969	inet_child_forget(sk, req, child);
				970	child = NULL;
				971	} else {
				972	req->sk = child;
				973	req->dl_next = NULL;
				974	if (queue->rskq_accept_head == NULL)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	975	WRITE_ONCE(queue->rskq_accept_head, req);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	976	else
				977	queue->rskq_accept_tail->dl_next = req;
				978	queue->rskq_accept_tail = req;
				979	sk_acceptq_added(sk);
				980	}
				981	spin_unlock(&queue->rskq_lock);
				982	return child;
				983	}
				984	EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
				985
				986	struct sock inet_csk_complete_hashdance(struct sock sk, struct sock *child,
				987	struct request_sock *req, bool own_req)
				988	{
				989	if (own_req) {
				990	inet_csk_reqsk_queue_drop(sk, req);
				991	reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
				992	if (inet_csk_reqsk_queue_add(sk, req, child))
				993	return child;
				994	}
				995	/* Too bad, another child took ownership of the request, undo. */
				996	bh_unlock_sock(child);
				997	sock_put(child);
				998	return NULL;
				999	}
				1000	EXPORT_SYMBOL(inet_csk_complete_hashdance);
				1001
				1002	/*
				1003	* This routine closes sockets which have been at least partially
				1004	* opened, but not yet accepted.
				1005	*/
				1006	void inet_csk_listen_stop(struct sock *sk)
				1007	{
				1008	struct inet_connection_sock *icsk = inet_csk(sk);
				1009	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
				1010	struct request_sock next, req;
				1011
				1012	/* Following specs, it would be better either to send FIN
				1013	* (and enter FIN-WAIT-1, it is normal close)
				1014	* or to send active reset (abort).
				1015	* Certainly, it is pretty dangerous while synflood, but it is
				1016	* bad justification for our negligence 8)
				1017	* To be honest, we are not able to make either
				1018	* of the variants now. --ANK
				1019	*/
				1020	while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
				1021	struct sock *child = req->sk;
				1022
				1023	local_bh_disable();
				1024	bh_lock_sock(child);
				1025	WARN_ON(sock_owned_by_user(child));
				1026	sock_hold(child);
				1027
				1028	inet_child_forget(sk, req, child);
				1029	reqsk_put(req);
				1030	bh_unlock_sock(child);
				1031	local_bh_enable();
				1032	sock_put(child);
				1033
				1034	cond_resched();
				1035	}
				1036	if (queue->fastopenq.rskq_rst_head) {
				1037	/* Free all the reqs queued in rskq_rst_head. */
				1038	spin_lock_bh(&queue->fastopenq.lock);
				1039	req = queue->fastopenq.rskq_rst_head;
				1040	queue->fastopenq.rskq_rst_head = NULL;
				1041	spin_unlock_bh(&queue->fastopenq.lock);
				1042	while (req != NULL) {
				1043	next = req->dl_next;
				1044	reqsk_put(req);
				1045	req = next;
				1046	}
				1047	}
				1048	WARN_ON_ONCE(sk->sk_ack_backlog);
				1049	}
				1050	EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
				1051
				1052	void inet_csk_addr2sockaddr(struct sock sk, struct sockaddr uaddr)
				1053	{
				1054	struct sockaddr_in sin = (struct sockaddr_in )uaddr;
				1055	const struct inet_sock *inet = inet_sk(sk);
				1056
				1057	sin->sin_family = AF_INET;
				1058	sin->sin_addr.s_addr = inet->inet_daddr;
				1059	sin->sin_port = inet->inet_dport;
				1060	}
				1061	EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
				1062
				1063	#ifdef CONFIG_COMPAT
				1064	int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
				1065	char __user optval, int __user optlen)
				1066	{
				1067	const struct inet_connection_sock *icsk = inet_csk(sk);
				1068
				1069	if (icsk->icsk_af_ops->compat_getsockopt)
				1070	return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname,
				1071	optval, optlen);
				1072	return icsk->icsk_af_ops->getsockopt(sk, level, optname,
				1073	optval, optlen);
				1074	}
				1075	EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt);
				1076
				1077	int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
				1078	char __user *optval, unsigned int optlen)
				1079	{
				1080	const struct inet_connection_sock *icsk = inet_csk(sk);
				1081
				1082	if (icsk->icsk_af_ops->compat_setsockopt)
				1083	return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname,
				1084	optval, optlen);
				1085	return icsk->icsk_af_ops->setsockopt(sk, level, optname,
				1086	optval, optlen);
				1087	}
				1088	EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt);
				1089	#endif
				1090
				1091	static struct dst_entry inet_csk_rebuild_route(struct sock sk, struct flowi *fl)
				1092	{
				1093	const struct inet_sock *inet = inet_sk(sk);
				1094	const struct ip_options_rcu *inet_opt;
				1095	__be32 daddr = inet->inet_daddr;
				1096	struct flowi4 *fl4;
				1097	struct rtable *rt;
				1098
				1099	rcu_read_lock();
				1100	inet_opt = rcu_dereference(inet->inet_opt);
				1101	if (inet_opt && inet_opt->opt.srr)
				1102	daddr = inet_opt->opt.faddr;
				1103	fl4 = &fl->u.ip4;
				1104	rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr,
				1105	inet->inet_saddr, inet->inet_dport,
				1106	inet->inet_sport, sk->sk_protocol,
				1107	RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
				1108	if (IS_ERR(rt))
				1109	rt = NULL;
				1110	if (rt)
				1111	sk_setup_caps(sk, &rt->dst);
				1112	rcu_read_unlock();
				1113
				1114	return &rt->dst;
				1115	}
				1116
				1117	struct dst_entry inet_csk_update_pmtu(struct sock sk, u32 mtu)
				1118	{
				1119	struct dst_entry *dst = __sk_dst_check(sk, 0);
				1120	struct inet_sock *inet = inet_sk(sk);
				1121
				1122	if (!dst) {
				1123	dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
				1124	if (!dst)
				1125	goto out;
				1126	}
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1127	dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1128
				1129	dst = __sk_dst_check(sk, 0);
				1130	if (!dst)
				1131	dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
				1132	out:
				1133	return dst;
				1134	}
				1135	EXPORT_SYMBOL_GPL(inet_csk_update_pmtu);