blob: 3903cc0ab188328dc7c24830305d6dd929a46c1e [file] [log] [blame]
David Brazdil0f672f62019-12-10 10:32:29 +00001// SPDX-License-Identifier: GPL-2.0-or-later
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002/*
3 * TCP over IPv6
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on:
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
13 *
14 * Fixes:
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000020 */
21
22#include <linux/bottom_half.h>
23#include <linux/module.h>
24#include <linux/errno.h>
25#include <linux/types.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/net.h>
29#include <linux/jiffies.h>
30#include <linux/in.h>
31#include <linux/in6.h>
32#include <linux/netdevice.h>
33#include <linux/init.h>
34#include <linux/jhash.h>
35#include <linux/ipsec.h>
36#include <linux/times.h>
37#include <linux/slab.h>
38#include <linux/uaccess.h>
39#include <linux/ipv6.h>
40#include <linux/icmpv6.h>
41#include <linux/random.h>
David Brazdil0f672f62019-12-10 10:32:29 +000042#include <linux/indirect_call_wrapper.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000043
44#include <net/tcp.h>
45#include <net/ndisc.h>
46#include <net/inet6_hashtables.h>
47#include <net/inet6_connection_sock.h>
48#include <net/ipv6.h>
49#include <net/transp_v6.h>
50#include <net/addrconf.h>
51#include <net/ip6_route.h>
52#include <net/ip6_checksum.h>
53#include <net/inet_ecn.h>
54#include <net/protocol.h>
55#include <net/xfrm.h>
56#include <net/snmp.h>
57#include <net/dsfield.h>
58#include <net/timewait_sock.h>
59#include <net/inet_common.h>
60#include <net/secure_seq.h>
61#include <net/busy_poll.h>
62
63#include <linux/proc_fs.h>
64#include <linux/seq_file.h>
65
66#include <crypto/hash.h>
67#include <linux/scatterlist.h>
68
69#include <trace/events/tcp.h>
70
71static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
74
75static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77static const struct inet_connection_sock_af_ops ipv6_mapped;
78static const struct inet_connection_sock_af_ops ipv6_specific;
79#ifdef CONFIG_TCP_MD5SIG
80static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82#else
83static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr)
85{
86 return NULL;
87}
88#endif
89
David Brazdil0f672f62019-12-10 10:32:29 +000090/* Helper returning the inet6 address from a given tcp socket.
91 * It can be used in TCP stack instead of inet6_sk(sk).
92 * This avoids a dereference and allow compiler optimizations.
93 * It is a specialized version of inet6_sk_generic().
94 */
95static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
96{
97 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
98
99 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
100}
101
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000102static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
103{
104 struct dst_entry *dst = skb_dst(skb);
105
106 if (dst && dst_hold_safe(dst)) {
107 const struct rt6_info *rt = (const struct rt6_info *)dst;
108
109 sk->sk_rx_dst = dst;
110 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
David Brazdil0f672f62019-12-10 10:32:29 +0000111 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000112 }
113}
114
115static u32 tcp_v6_init_seq(const struct sk_buff *skb)
116{
117 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
118 ipv6_hdr(skb)->saddr.s6_addr32,
119 tcp_hdr(skb)->dest,
120 tcp_hdr(skb)->source);
121}
122
123static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
124{
125 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
126 ipv6_hdr(skb)->saddr.s6_addr32);
127}
128
129static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
130 int addr_len)
131{
132 /* This check is replicated from tcp_v6_connect() and intended to
133 * prevent BPF program called below from accessing bytes that are out
134 * of the bound specified by user in addr_len.
135 */
136 if (addr_len < SIN6_LEN_RFC2133)
137 return -EINVAL;
138
139 sock_owned_by_me(sk);
140
141 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
142}
143
144static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
145 int addr_len)
146{
147 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
148 struct inet_sock *inet = inet_sk(sk);
149 struct inet_connection_sock *icsk = inet_csk(sk);
David Brazdil0f672f62019-12-10 10:32:29 +0000150 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000151 struct tcp_sock *tp = tcp_sk(sk);
152 struct in6_addr *saddr = NULL, *final_p, final;
153 struct ipv6_txoptions *opt;
154 struct flowi6 fl6;
155 struct dst_entry *dst;
156 int addr_type;
157 int err;
158 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
159
160 if (addr_len < SIN6_LEN_RFC2133)
161 return -EINVAL;
162
163 if (usin->sin6_family != AF_INET6)
164 return -EAFNOSUPPORT;
165
166 memset(&fl6, 0, sizeof(fl6));
167
168 if (np->sndflow) {
169 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
170 IP6_ECN_flow_init(fl6.flowlabel);
171 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
172 struct ip6_flowlabel *flowlabel;
173 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
David Brazdil0f672f62019-12-10 10:32:29 +0000174 if (IS_ERR(flowlabel))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000175 return -EINVAL;
176 fl6_sock_release(flowlabel);
177 }
178 }
179
180 /*
181 * connect() to INADDR_ANY means loopback (BSD'ism).
182 */
183
184 if (ipv6_addr_any(&usin->sin6_addr)) {
185 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
186 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
187 &usin->sin6_addr);
188 else
189 usin->sin6_addr = in6addr_loopback;
190 }
191
192 addr_type = ipv6_addr_type(&usin->sin6_addr);
193
194 if (addr_type & IPV6_ADDR_MULTICAST)
195 return -ENETUNREACH;
196
197 if (addr_type&IPV6_ADDR_LINKLOCAL) {
198 if (addr_len >= sizeof(struct sockaddr_in6) &&
199 usin->sin6_scope_id) {
200 /* If interface is set while binding, indices
201 * must coincide.
202 */
203 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
204 return -EINVAL;
205
206 sk->sk_bound_dev_if = usin->sin6_scope_id;
207 }
208
209 /* Connect to link-local address requires an interface */
210 if (!sk->sk_bound_dev_if)
211 return -EINVAL;
212 }
213
214 if (tp->rx_opt.ts_recent_stamp &&
215 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
216 tp->rx_opt.ts_recent = 0;
217 tp->rx_opt.ts_recent_stamp = 0;
David Brazdil0f672f62019-12-10 10:32:29 +0000218 WRITE_ONCE(tp->write_seq, 0);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000219 }
220
221 sk->sk_v6_daddr = usin->sin6_addr;
222 np->flow_label = fl6.flowlabel;
223
224 /*
225 * TCP over IPv4
226 */
227
228 if (addr_type & IPV6_ADDR_MAPPED) {
229 u32 exthdrlen = icsk->icsk_ext_hdr_len;
230 struct sockaddr_in sin;
231
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000232 if (__ipv6_only_sock(sk))
233 return -ENETUNREACH;
234
235 sin.sin_family = AF_INET;
236 sin.sin_port = usin->sin6_port;
237 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
238
239 icsk->icsk_af_ops = &ipv6_mapped;
240 sk->sk_backlog_rcv = tcp_v4_do_rcv;
241#ifdef CONFIG_TCP_MD5SIG
242 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
243#endif
244
245 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
246
247 if (err) {
248 icsk->icsk_ext_hdr_len = exthdrlen;
249 icsk->icsk_af_ops = &ipv6_specific;
250 sk->sk_backlog_rcv = tcp_v6_do_rcv;
251#ifdef CONFIG_TCP_MD5SIG
252 tp->af_specific = &tcp_sock_ipv6_specific;
253#endif
254 goto failure;
255 }
256 np->saddr = sk->sk_v6_rcv_saddr;
257
258 return err;
259 }
260
261 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
262 saddr = &sk->sk_v6_rcv_saddr;
263
264 fl6.flowi6_proto = IPPROTO_TCP;
265 fl6.daddr = sk->sk_v6_daddr;
266 fl6.saddr = saddr ? *saddr : np->saddr;
267 fl6.flowi6_oif = sk->sk_bound_dev_if;
268 fl6.flowi6_mark = sk->sk_mark;
269 fl6.fl6_dport = usin->sin6_port;
270 fl6.fl6_sport = inet->inet_sport;
271 fl6.flowi6_uid = sk->sk_uid;
272
273 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
274 final_p = fl6_update_dst(&fl6, opt, &final);
275
276 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
277
Olivier Deprez0e641232021-09-23 10:07:05 +0200278 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000279 if (IS_ERR(dst)) {
280 err = PTR_ERR(dst);
281 goto failure;
282 }
283
284 if (!saddr) {
285 saddr = &fl6.saddr;
286 sk->sk_v6_rcv_saddr = *saddr;
287 }
288
289 /* set the source address */
290 np->saddr = *saddr;
291 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
292
293 sk->sk_gso_type = SKB_GSO_TCPV6;
294 ip6_dst_store(sk, dst, NULL, NULL);
295
296 icsk->icsk_ext_hdr_len = 0;
297 if (opt)
298 icsk->icsk_ext_hdr_len = opt->opt_flen +
299 opt->opt_nflen;
300
301 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
302
303 inet->inet_dport = usin->sin6_port;
304
305 tcp_set_state(sk, TCP_SYN_SENT);
306 err = inet6_hash_connect(tcp_death_row, sk);
307 if (err)
308 goto late_failure;
309
310 sk_set_txhash(sk);
311
312 if (likely(!tp->repair)) {
313 if (!tp->write_seq)
David Brazdil0f672f62019-12-10 10:32:29 +0000314 WRITE_ONCE(tp->write_seq,
315 secure_tcpv6_seq(np->saddr.s6_addr32,
316 sk->sk_v6_daddr.s6_addr32,
317 inet->inet_sport,
318 inet->inet_dport));
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000319 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
320 np->saddr.s6_addr32,
321 sk->sk_v6_daddr.s6_addr32);
322 }
323
324 if (tcp_fastopen_defer_connect(sk, &err))
325 return err;
326 if (err)
327 goto late_failure;
328
329 err = tcp_connect(sk);
330 if (err)
331 goto late_failure;
332
333 return 0;
334
335late_failure:
336 tcp_set_state(sk, TCP_CLOSE);
337failure:
338 inet->inet_dport = 0;
339 sk->sk_route_caps = 0;
340 return err;
341}
342
343static void tcp_v6_mtu_reduced(struct sock *sk)
344{
345 struct dst_entry *dst;
Olivier Deprez0e641232021-09-23 10:07:05 +0200346 u32 mtu;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000347
348 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
349 return;
350
Olivier Deprez0e641232021-09-23 10:07:05 +0200351 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
352
353 /* Drop requests trying to increase our current mss.
354 * Check done in __ip6_rt_update_pmtu() is too late.
355 */
356 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
357 return;
358
359 dst = inet6_csk_update_pmtu(sk, mtu);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000360 if (!dst)
361 return;
362
363 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
364 tcp_sync_mss(sk, dst_mtu(dst));
365 tcp_simple_retransmit(sk);
366 }
367}
368
David Brazdil0f672f62019-12-10 10:32:29 +0000369static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000370 u8 type, u8 code, int offset, __be32 info)
371{
372 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
373 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
374 struct net *net = dev_net(skb->dev);
375 struct request_sock *fastopen;
376 struct ipv6_pinfo *np;
377 struct tcp_sock *tp;
378 __u32 seq, snd_una;
379 struct sock *sk;
380 bool fatal;
381 int err;
382
383 sk = __inet6_lookup_established(net, &tcp_hashinfo,
384 &hdr->daddr, th->dest,
385 &hdr->saddr, ntohs(th->source),
386 skb->dev->ifindex, inet6_sdif(skb));
387
388 if (!sk) {
389 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
390 ICMP6_MIB_INERRORS);
David Brazdil0f672f62019-12-10 10:32:29 +0000391 return -ENOENT;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000392 }
393
394 if (sk->sk_state == TCP_TIME_WAIT) {
395 inet_twsk_put(inet_twsk(sk));
David Brazdil0f672f62019-12-10 10:32:29 +0000396 return 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000397 }
398 seq = ntohl(th->seq);
399 fatal = icmpv6_err_convert(type, code, &err);
David Brazdil0f672f62019-12-10 10:32:29 +0000400 if (sk->sk_state == TCP_NEW_SYN_RECV) {
401 tcp_req_err(sk, seq, fatal);
402 return 0;
403 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000404
405 bh_lock_sock(sk);
406 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
407 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
408
409 if (sk->sk_state == TCP_CLOSE)
410 goto out;
411
David Brazdil0f672f62019-12-10 10:32:29 +0000412 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000413 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
414 goto out;
415 }
416
417 tp = tcp_sk(sk);
418 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
David Brazdil0f672f62019-12-10 10:32:29 +0000419 fastopen = rcu_dereference(tp->fastopen_rsk);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000420 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
421 if (sk->sk_state != TCP_LISTEN &&
422 !between(seq, snd_una, tp->snd_nxt)) {
423 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
424 goto out;
425 }
426
David Brazdil0f672f62019-12-10 10:32:29 +0000427 np = tcp_inet6_sk(sk);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000428
429 if (type == NDISC_REDIRECT) {
430 if (!sock_owned_by_user(sk)) {
431 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
432
433 if (dst)
434 dst->ops->redirect(dst, sk, skb);
435 }
436 goto out;
437 }
438
439 if (type == ICMPV6_PKT_TOOBIG) {
Olivier Deprez0e641232021-09-23 10:07:05 +0200440 u32 mtu = ntohl(info);
441
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000442 /* We are not interested in TCP_LISTEN and open_requests
443 * (SYN-ACKs send out by Linux are always <576bytes so
444 * they should go through unfragmented).
445 */
446 if (sk->sk_state == TCP_LISTEN)
447 goto out;
448
449 if (!ip6_sk_accept_pmtu(sk))
450 goto out;
451
Olivier Deprez0e641232021-09-23 10:07:05 +0200452 if (mtu < IPV6_MIN_MTU)
453 goto out;
454
455 WRITE_ONCE(tp->mtu_info, mtu);
456
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000457 if (!sock_owned_by_user(sk))
458 tcp_v6_mtu_reduced(sk);
459 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
460 &sk->sk_tsq_flags))
461 sock_hold(sk);
462 goto out;
463 }
464
465
466 /* Might be for an request_sock */
467 switch (sk->sk_state) {
468 case TCP_SYN_SENT:
469 case TCP_SYN_RECV:
470 /* Only in fast or simultaneous open. If a fast open socket is
471 * is already accepted it is treated as a connected one below.
472 */
473 if (fastopen && !fastopen->sk)
474 break;
475
476 if (!sock_owned_by_user(sk)) {
477 sk->sk_err = err;
478 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
479
480 tcp_done(sk);
481 } else
482 sk->sk_err_soft = err;
483 goto out;
484 }
485
486 if (!sock_owned_by_user(sk) && np->recverr) {
487 sk->sk_err = err;
488 sk->sk_error_report(sk);
489 } else
490 sk->sk_err_soft = err;
491
492out:
493 bh_unlock_sock(sk);
494 sock_put(sk);
David Brazdil0f672f62019-12-10 10:32:29 +0000495 return 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000496}
497
498
499static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
500 struct flowi *fl,
501 struct request_sock *req,
502 struct tcp_fastopen_cookie *foc,
503 enum tcp_synack_type synack_type)
504{
505 struct inet_request_sock *ireq = inet_rsk(req);
David Brazdil0f672f62019-12-10 10:32:29 +0000506 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000507 struct ipv6_txoptions *opt;
508 struct flowi6 *fl6 = &fl->u.ip6;
509 struct sk_buff *skb;
510 int err = -ENOMEM;
511
512 /* First, grab a route. */
513 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
514 IPPROTO_TCP)) == NULL)
515 goto done;
516
517 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
518
519 if (skb) {
520 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
521 &ireq->ir_v6_rmt_addr);
522
523 fl6->daddr = ireq->ir_v6_rmt_addr;
524 if (np->repflow && ireq->pktopts)
525 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
526
527 rcu_read_lock();
528 opt = ireq->ipv6_opt;
529 if (!opt)
530 opt = rcu_dereference(np->opt);
Olivier Deprez0e641232021-09-23 10:07:05 +0200531 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
532 np->tclass, sk->sk_priority);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000533 rcu_read_unlock();
534 err = net_xmit_eval(err);
535 }
536
537done:
538 return err;
539}
540
541
542static void tcp_v6_reqsk_destructor(struct request_sock *req)
543{
544 kfree(inet_rsk(req)->ipv6_opt);
545 kfree_skb(inet_rsk(req)->pktopts);
546}
547
548#ifdef CONFIG_TCP_MD5SIG
549static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
550 const struct in6_addr *addr)
551{
552 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
553}
554
555static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
556 const struct sock *addr_sk)
557{
558 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
559}
560
561static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
562 char __user *optval, int optlen)
563{
564 struct tcp_md5sig cmd;
565 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
566 u8 prefixlen;
567
568 if (optlen < sizeof(cmd))
569 return -EINVAL;
570
571 if (copy_from_user(&cmd, optval, sizeof(cmd)))
572 return -EFAULT;
573
574 if (sin6->sin6_family != AF_INET6)
575 return -EINVAL;
576
577 if (optname == TCP_MD5SIG_EXT &&
578 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
579 prefixlen = cmd.tcpm_prefixlen;
580 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
581 prefixlen > 32))
582 return -EINVAL;
583 } else {
584 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
585 }
586
587 if (!cmd.tcpm_keylen) {
588 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
589 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
590 AF_INET, prefixlen);
591 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
592 AF_INET6, prefixlen);
593 }
594
595 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
596 return -EINVAL;
597
598 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
599 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
600 AF_INET, prefixlen, cmd.tcpm_key,
601 cmd.tcpm_keylen, GFP_KERNEL);
602
603 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
604 AF_INET6, prefixlen, cmd.tcpm_key,
605 cmd.tcpm_keylen, GFP_KERNEL);
606}
607
608static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
609 const struct in6_addr *daddr,
610 const struct in6_addr *saddr,
611 const struct tcphdr *th, int nbytes)
612{
613 struct tcp6_pseudohdr *bp;
614 struct scatterlist sg;
615 struct tcphdr *_th;
616
617 bp = hp->scratch;
618 /* 1. TCP pseudo-header (RFC2460) */
619 bp->saddr = *saddr;
620 bp->daddr = *daddr;
621 bp->protocol = cpu_to_be32(IPPROTO_TCP);
622 bp->len = cpu_to_be32(nbytes);
623
624 _th = (struct tcphdr *)(bp + 1);
625 memcpy(_th, th, sizeof(*th));
626 _th->check = 0;
627
628 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
629 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
630 sizeof(*bp) + sizeof(*th));
631 return crypto_ahash_update(hp->md5_req);
632}
633
634static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
635 const struct in6_addr *daddr, struct in6_addr *saddr,
636 const struct tcphdr *th)
637{
638 struct tcp_md5sig_pool *hp;
639 struct ahash_request *req;
640
641 hp = tcp_get_md5sig_pool();
642 if (!hp)
643 goto clear_hash_noput;
644 req = hp->md5_req;
645
646 if (crypto_ahash_init(req))
647 goto clear_hash;
648 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
649 goto clear_hash;
650 if (tcp_md5_hash_key(hp, key))
651 goto clear_hash;
652 ahash_request_set_crypt(req, NULL, md5_hash, 0);
653 if (crypto_ahash_final(req))
654 goto clear_hash;
655
656 tcp_put_md5sig_pool();
657 return 0;
658
659clear_hash:
660 tcp_put_md5sig_pool();
661clear_hash_noput:
662 memset(md5_hash, 0, 16);
663 return 1;
664}
665
666static int tcp_v6_md5_hash_skb(char *md5_hash,
667 const struct tcp_md5sig_key *key,
668 const struct sock *sk,
669 const struct sk_buff *skb)
670{
671 const struct in6_addr *saddr, *daddr;
672 struct tcp_md5sig_pool *hp;
673 struct ahash_request *req;
674 const struct tcphdr *th = tcp_hdr(skb);
675
676 if (sk) { /* valid for establish/request sockets */
677 saddr = &sk->sk_v6_rcv_saddr;
678 daddr = &sk->sk_v6_daddr;
679 } else {
680 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
681 saddr = &ip6h->saddr;
682 daddr = &ip6h->daddr;
683 }
684
685 hp = tcp_get_md5sig_pool();
686 if (!hp)
687 goto clear_hash_noput;
688 req = hp->md5_req;
689
690 if (crypto_ahash_init(req))
691 goto clear_hash;
692
693 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
694 goto clear_hash;
695 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
696 goto clear_hash;
697 if (tcp_md5_hash_key(hp, key))
698 goto clear_hash;
699 ahash_request_set_crypt(req, NULL, md5_hash, 0);
700 if (crypto_ahash_final(req))
701 goto clear_hash;
702
703 tcp_put_md5sig_pool();
704 return 0;
705
706clear_hash:
707 tcp_put_md5sig_pool();
708clear_hash_noput:
709 memset(md5_hash, 0, 16);
710 return 1;
711}
712
713#endif
714
715static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
716 const struct sk_buff *skb)
717{
718#ifdef CONFIG_TCP_MD5SIG
719 const __u8 *hash_location = NULL;
720 struct tcp_md5sig_key *hash_expected;
721 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
722 const struct tcphdr *th = tcp_hdr(skb);
723 int genhash;
724 u8 newhash[16];
725
726 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
727 hash_location = tcp_parse_md5sig_option(th);
728
729 /* We've parsed the options - do we have a hash? */
730 if (!hash_expected && !hash_location)
731 return false;
732
733 if (hash_expected && !hash_location) {
734 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
735 return true;
736 }
737
738 if (!hash_expected && hash_location) {
739 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
740 return true;
741 }
742
743 /* check the signature */
744 genhash = tcp_v6_md5_hash_skb(newhash,
745 hash_expected,
746 NULL, skb);
747
748 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
749 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
750 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
751 genhash ? "failed" : "mismatch",
752 &ip6h->saddr, ntohs(th->source),
753 &ip6h->daddr, ntohs(th->dest));
754 return true;
755 }
756#endif
757 return false;
758}
759
760static void tcp_v6_init_req(struct request_sock *req,
761 const struct sock *sk_listener,
762 struct sk_buff *skb)
763{
David Brazdil0f672f62019-12-10 10:32:29 +0000764 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000765 struct inet_request_sock *ireq = inet_rsk(req);
David Brazdil0f672f62019-12-10 10:32:29 +0000766 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000767
768 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
769 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
770
771 /* So that link locals have meaning */
David Brazdil0f672f62019-12-10 10:32:29 +0000772 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000773 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
774 ireq->ir_iif = tcp_v6_iif(skb);
775
776 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
777 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
778 np->rxopt.bits.rxinfo ||
779 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
780 np->rxopt.bits.rxohlim || np->repflow)) {
781 refcount_inc(&skb->users);
782 ireq->pktopts = skb;
783 }
784}
785
786static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
787 struct flowi *fl,
788 const struct request_sock *req)
789{
790 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
791}
792
793struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
794 .family = AF_INET6,
795 .obj_size = sizeof(struct tcp6_request_sock),
796 .rtx_syn_ack = tcp_rtx_synack,
797 .send_ack = tcp_v6_reqsk_send_ack,
798 .destructor = tcp_v6_reqsk_destructor,
799 .send_reset = tcp_v6_send_reset,
800 .syn_ack_timeout = tcp_syn_ack_timeout,
801};
802
803static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
804 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
805 sizeof(struct ipv6hdr),
806#ifdef CONFIG_TCP_MD5SIG
807 .req_md5_lookup = tcp_v6_md5_lookup,
808 .calc_md5_hash = tcp_v6_md5_hash_skb,
809#endif
810 .init_req = tcp_v6_init_req,
811#ifdef CONFIG_SYN_COOKIES
812 .cookie_init_seq = cookie_v6_init_sequence,
813#endif
814 .route_req = tcp_v6_route_req,
815 .init_seq = tcp_v6_init_seq,
816 .init_ts_off = tcp_v6_init_ts_off,
817 .send_synack = tcp_v6_send_synack,
818};
819
820static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
821 u32 ack, u32 win, u32 tsval, u32 tsecr,
822 int oif, struct tcp_md5sig_key *key, int rst,
David Brazdil0f672f62019-12-10 10:32:29 +0000823 u8 tclass, __be32 label, u32 priority)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000824{
825 const struct tcphdr *th = tcp_hdr(skb);
826 struct tcphdr *t1;
827 struct sk_buff *buff;
828 struct flowi6 fl6;
829 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
830 struct sock *ctl_sk = net->ipv6.tcp_sk;
831 unsigned int tot_len = sizeof(struct tcphdr);
832 struct dst_entry *dst;
833 __be32 *topt;
834 __u32 mark = 0;
835
836 if (tsecr)
837 tot_len += TCPOLEN_TSTAMP_ALIGNED;
838#ifdef CONFIG_TCP_MD5SIG
839 if (key)
840 tot_len += TCPOLEN_MD5SIG_ALIGNED;
841#endif
842
843 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
844 GFP_ATOMIC);
845 if (!buff)
846 return;
847
848 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
849
850 t1 = skb_push(buff, tot_len);
851 skb_reset_transport_header(buff);
852
853 /* Swap the send and the receive. */
854 memset(t1, 0, sizeof(*t1));
855 t1->dest = th->source;
856 t1->source = th->dest;
857 t1->doff = tot_len / 4;
858 t1->seq = htonl(seq);
859 t1->ack_seq = htonl(ack);
860 t1->ack = !rst || !th->ack;
861 t1->rst = rst;
862 t1->window = htons(win);
863
864 topt = (__be32 *)(t1 + 1);
865
866 if (tsecr) {
867 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
868 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
869 *topt++ = htonl(tsval);
870 *topt++ = htonl(tsecr);
871 }
872
873#ifdef CONFIG_TCP_MD5SIG
874 if (key) {
875 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
876 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
877 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
878 &ipv6_hdr(skb)->saddr,
879 &ipv6_hdr(skb)->daddr, t1);
880 }
881#endif
882
883 memset(&fl6, 0, sizeof(fl6));
884 fl6.daddr = ipv6_hdr(skb)->saddr;
885 fl6.saddr = ipv6_hdr(skb)->daddr;
886 fl6.flowlabel = label;
887
888 buff->ip_summed = CHECKSUM_PARTIAL;
889 buff->csum = 0;
890
891 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
892
893 fl6.flowi6_proto = IPPROTO_TCP;
894 if (rt6_need_strict(&fl6.daddr) && !oif)
895 fl6.flowi6_oif = tcp_v6_iif(skb);
896 else {
897 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
898 oif = skb->skb_iif;
899
900 fl6.flowi6_oif = oif;
901 }
902
David Brazdil0f672f62019-12-10 10:32:29 +0000903 if (sk) {
904 if (sk->sk_state == TCP_TIME_WAIT) {
905 mark = inet_twsk(sk)->tw_mark;
906 /* autoflowlabel relies on buff->hash */
907 skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
908 PKT_HASH_TYPE_L4);
909 } else {
910 mark = sk->sk_mark;
911 }
912 buff->tstamp = tcp_transmit_time(sk);
913 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000914 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
915 fl6.fl6_dport = t1->dest;
916 fl6.fl6_sport = t1->source;
917 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
918 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
919
920 /* Pass a socket to ip6_dst_lookup either it is for RST
921 * Underlying function will use this to retrieve the network
922 * namespace
923 */
Olivier Deprez0e641232021-09-23 10:07:05 +0200924 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000925 if (!IS_ERR(dst)) {
926 skb_dst_set(buff, dst);
David Brazdil0f672f62019-12-10 10:32:29 +0000927 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
928 priority);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000929 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
930 if (rst)
931 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
932 return;
933 }
934
935 kfree_skb(buff);
936}
937
938static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
939{
940 const struct tcphdr *th = tcp_hdr(skb);
David Brazdil0f672f62019-12-10 10:32:29 +0000941 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000942 u32 seq = 0, ack_seq = 0;
943 struct tcp_md5sig_key *key = NULL;
944#ifdef CONFIG_TCP_MD5SIG
945 const __u8 *hash_location = NULL;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000946 unsigned char newhash[16];
947 int genhash;
948 struct sock *sk1 = NULL;
949#endif
David Brazdil0f672f62019-12-10 10:32:29 +0000950 __be32 label = 0;
951 u32 priority = 0;
952 struct net *net;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000953 int oif = 0;
954
955 if (th->rst)
956 return;
957
958 /* If sk not NULL, it means we did a successful lookup and incoming
959 * route had to be correct. prequeue might have dropped our dst.
960 */
961 if (!sk && !ipv6_unicast_destination(skb))
962 return;
963
David Brazdil0f672f62019-12-10 10:32:29 +0000964 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000965#ifdef CONFIG_TCP_MD5SIG
966 rcu_read_lock();
967 hash_location = tcp_parse_md5sig_option(th);
968 if (sk && sk_fullsock(sk)) {
969 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
970 } else if (hash_location) {
971 /*
972 * active side is lost. Try to find listening socket through
973 * source port, and then find md5 key through listening socket.
974 * we are not loose security here:
975 * Incoming packet is checked with md5 hash with finding key,
976 * no RST generated if md5 hash doesn't match.
977 */
David Brazdil0f672f62019-12-10 10:32:29 +0000978 sk1 = inet6_lookup_listener(net,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000979 &tcp_hashinfo, NULL, 0,
980 &ipv6h->saddr,
981 th->source, &ipv6h->daddr,
982 ntohs(th->source),
983 tcp_v6_iif_l3_slave(skb),
984 tcp_v6_sdif(skb));
985 if (!sk1)
986 goto out;
987
988 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
989 if (!key)
990 goto out;
991
992 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
993 if (genhash || memcmp(hash_location, newhash, 16) != 0)
994 goto out;
995 }
996#endif
997
998 if (th->ack)
999 seq = ntohl(th->ack_seq);
1000 else
1001 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1002 (th->doff << 2);
1003
1004 if (sk) {
1005 oif = sk->sk_bound_dev_if;
David Brazdil0f672f62019-12-10 10:32:29 +00001006 if (sk_fullsock(sk)) {
1007 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1008
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001009 trace_tcp_send_reset(sk, skb);
David Brazdil0f672f62019-12-10 10:32:29 +00001010 if (np->repflow)
1011 label = ip6_flowlabel(ipv6h);
1012 priority = sk->sk_priority;
1013 }
1014 if (sk->sk_state == TCP_TIME_WAIT) {
1015 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1016 priority = inet_twsk(sk)->tw_priority;
1017 }
1018 } else {
1019 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1020 label = ip6_flowlabel(ipv6h);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001021 }
1022
David Brazdil0f672f62019-12-10 10:32:29 +00001023 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1024 label, priority);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001025
1026#ifdef CONFIG_TCP_MD5SIG
1027out:
1028 rcu_read_unlock();
1029#endif
1030}
1031
1032static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1033 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1034 struct tcp_md5sig_key *key, u8 tclass,
David Brazdil0f672f62019-12-10 10:32:29 +00001035 __be32 label, u32 priority)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001036{
1037 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
David Brazdil0f672f62019-12-10 10:32:29 +00001038 tclass, label, priority);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001039}
1040
1041static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1042{
1043 struct inet_timewait_sock *tw = inet_twsk(sk);
1044 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1045
1046 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1047 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1048 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1049 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
David Brazdil0f672f62019-12-10 10:32:29 +00001050 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001051
1052 inet_twsk_put(tw);
1053}
1054
1055static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1056 struct request_sock *req)
1057{
1058 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1059 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1060 */
1061 /* RFC 7323 2.3
1062 * The window field (SEG.WND) of every outgoing segment, with the
1063 * exception of <SYN> segments, MUST be right-shifted by
1064 * Rcv.Wind.Shift bits:
1065 */
1066 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1067 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1068 tcp_rsk(req)->rcv_nxt,
1069 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1070 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1071 req->ts_recent, sk->sk_bound_dev_if,
1072 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
David Brazdil0f672f62019-12-10 10:32:29 +00001073 0, 0, sk->sk_priority);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001074}
1075
1076
1077static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1078{
1079#ifdef CONFIG_SYN_COOKIES
1080 const struct tcphdr *th = tcp_hdr(skb);
1081
1082 if (!th->syn)
1083 sk = cookie_v6_check(sk, skb);
1084#endif
1085 return sk;
1086}
1087
David Brazdil0f672f62019-12-10 10:32:29 +00001088u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1089 struct tcphdr *th, u32 *cookie)
1090{
1091 u16 mss = 0;
1092#ifdef CONFIG_SYN_COOKIES
1093 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1094 &tcp_request_sock_ipv6_ops, sk, th);
1095 if (mss) {
1096 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1097 tcp_synq_overflow(sk);
1098 }
1099#endif
1100 return mss;
1101}
1102
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001103static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1104{
1105 if (skb->protocol == htons(ETH_P_IP))
1106 return tcp_v4_conn_request(sk, skb);
1107
1108 if (!ipv6_unicast_destination(skb))
1109 goto drop;
1110
Olivier Deprez0e641232021-09-23 10:07:05 +02001111 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1112 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1113 return 0;
1114 }
1115
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001116 return tcp_conn_request(&tcp6_request_sock_ops,
1117 &tcp_request_sock_ipv6_ops, sk, skb);
1118
1119drop:
1120 tcp_listendrop(sk);
1121 return 0; /* don't send reset */
1122}
1123
1124static void tcp_v6_restore_cb(struct sk_buff *skb)
1125{
1126 /* We need to move header back to the beginning if xfrm6_policy_check()
1127 * and tcp_v6_fill_cb() are going to be called again.
1128 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1129 */
1130 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1131 sizeof(struct inet6_skb_parm));
1132}
1133
1134static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1135 struct request_sock *req,
1136 struct dst_entry *dst,
1137 struct request_sock *req_unhash,
1138 bool *own_req)
1139{
1140 struct inet_request_sock *ireq;
1141 struct ipv6_pinfo *newnp;
David Brazdil0f672f62019-12-10 10:32:29 +00001142 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001143 struct ipv6_txoptions *opt;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001144 struct inet_sock *newinet;
1145 struct tcp_sock *newtp;
1146 struct sock *newsk;
1147#ifdef CONFIG_TCP_MD5SIG
1148 struct tcp_md5sig_key *key;
1149#endif
1150 struct flowi6 fl6;
1151
1152 if (skb->protocol == htons(ETH_P_IP)) {
1153 /*
1154 * v6 mapped
1155 */
1156
1157 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1158 req_unhash, own_req);
1159
1160 if (!newsk)
1161 return NULL;
1162
David Brazdil0f672f62019-12-10 10:32:29 +00001163 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001164
1165 newinet = inet_sk(newsk);
David Brazdil0f672f62019-12-10 10:32:29 +00001166 newnp = tcp_inet6_sk(newsk);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001167 newtp = tcp_sk(newsk);
1168
1169 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1170
1171 newnp->saddr = newsk->sk_v6_rcv_saddr;
1172
1173 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1174 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1175#ifdef CONFIG_TCP_MD5SIG
1176 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1177#endif
1178
1179 newnp->ipv6_mc_list = NULL;
1180 newnp->ipv6_ac_list = NULL;
1181 newnp->ipv6_fl_list = NULL;
1182 newnp->pktoptions = NULL;
1183 newnp->opt = NULL;
David Brazdil0f672f62019-12-10 10:32:29 +00001184 newnp->mcast_oif = inet_iif(skb);
1185 newnp->mcast_hops = ip_hdr(skb)->ttl;
1186 newnp->rcv_flowinfo = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001187 if (np->repflow)
David Brazdil0f672f62019-12-10 10:32:29 +00001188 newnp->flow_label = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001189
1190 /*
1191 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1192 * here, tcp_create_openreq_child now does this for us, see the comment in
1193 * that function for the gory details. -acme
1194 */
1195
1196 /* It is tricky place. Until this moment IPv4 tcp
1197 worked with IPv6 icsk.icsk_af_ops.
1198 Sync it now.
1199 */
1200 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1201
1202 return newsk;
1203 }
1204
1205 ireq = inet_rsk(req);
1206
1207 if (sk_acceptq_is_full(sk))
1208 goto out_overflow;
1209
1210 if (!dst) {
1211 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1212 if (!dst)
1213 goto out;
1214 }
1215
1216 newsk = tcp_create_openreq_child(sk, req, skb);
1217 if (!newsk)
1218 goto out_nonewsk;
1219
1220 /*
1221 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1222 * count here, tcp_create_openreq_child now does this for us, see the
1223 * comment in that function for the gory details. -acme
1224 */
1225
1226 newsk->sk_gso_type = SKB_GSO_TCPV6;
1227 ip6_dst_store(newsk, dst, NULL, NULL);
1228 inet6_sk_rx_dst_set(newsk, skb);
1229
David Brazdil0f672f62019-12-10 10:32:29 +00001230 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001231
1232 newtp = tcp_sk(newsk);
1233 newinet = inet_sk(newsk);
David Brazdil0f672f62019-12-10 10:32:29 +00001234 newnp = tcp_inet6_sk(newsk);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001235
1236 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1237
1238 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1239 newnp->saddr = ireq->ir_v6_loc_addr;
1240 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1241 newsk->sk_bound_dev_if = ireq->ir_iif;
1242
1243 /* Now IPv6 options...
1244
1245 First: no IPv4 options.
1246 */
1247 newinet->inet_opt = NULL;
1248 newnp->ipv6_mc_list = NULL;
1249 newnp->ipv6_ac_list = NULL;
1250 newnp->ipv6_fl_list = NULL;
1251
1252 /* Clone RX bits */
1253 newnp->rxopt.all = np->rxopt.all;
1254
1255 newnp->pktoptions = NULL;
1256 newnp->opt = NULL;
1257 newnp->mcast_oif = tcp_v6_iif(skb);
1258 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1259 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1260 if (np->repflow)
1261 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1262
1263 /* Clone native IPv6 options from listening socket (if any)
1264
1265 Yes, keeping reference count would be much more clever,
1266 but we make one more one thing there: reattach optmem
1267 to newsk.
1268 */
1269 opt = ireq->ipv6_opt;
1270 if (!opt)
1271 opt = rcu_dereference(np->opt);
1272 if (opt) {
1273 opt = ipv6_dup_options(newsk, opt);
1274 RCU_INIT_POINTER(newnp->opt, opt);
1275 }
1276 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1277 if (opt)
1278 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1279 opt->opt_flen;
1280
1281 tcp_ca_openreq_child(newsk, dst);
1282
1283 tcp_sync_mss(newsk, dst_mtu(dst));
1284 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1285
1286 tcp_initialize_rcv_mss(newsk);
1287
1288 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1289 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1290
1291#ifdef CONFIG_TCP_MD5SIG
1292 /* Copy over the MD5 key from the original socket */
1293 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1294 if (key) {
1295 /* We're using one, so create a matching key
1296 * on the newsk structure. If we fail to get
1297 * memory, then we end up not copying the key
1298 * across. Shucks.
1299 */
1300 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1301 AF_INET6, 128, key->key, key->keylen,
1302 sk_gfp_mask(sk, GFP_ATOMIC));
1303 }
1304#endif
1305
1306 if (__inet_inherit_port(sk, newsk) < 0) {
1307 inet_csk_prepare_forced_close(newsk);
1308 tcp_done(newsk);
1309 goto out;
1310 }
1311 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1312 if (*own_req) {
1313 tcp_move_syn(newtp, req);
1314
1315 /* Clone pktoptions received with SYN, if we own the req */
1316 if (ireq->pktopts) {
1317 newnp->pktoptions = skb_clone(ireq->pktopts,
1318 sk_gfp_mask(sk, GFP_ATOMIC));
1319 consume_skb(ireq->pktopts);
1320 ireq->pktopts = NULL;
1321 if (newnp->pktoptions) {
1322 tcp_v6_restore_cb(newnp->pktoptions);
1323 skb_set_owner_r(newnp->pktoptions, newsk);
1324 }
1325 }
1326 }
1327
1328 return newsk;
1329
1330out_overflow:
1331 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1332out_nonewsk:
1333 dst_release(dst);
1334out:
1335 tcp_listendrop(sk);
1336 return NULL;
1337}
1338
1339/* The socket must have it's spinlock held when we get
1340 * here, unless it is a TCP_LISTEN socket.
1341 *
1342 * We have a potential double-lock case here, so even when
1343 * doing backlog processing we use the BH locking scheme.
1344 * This is because we cannot sleep with the original spinlock
1345 * held.
1346 */
1347static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1348{
David Brazdil0f672f62019-12-10 10:32:29 +00001349 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001350 struct sk_buff *opt_skb = NULL;
David Brazdil0f672f62019-12-10 10:32:29 +00001351 struct tcp_sock *tp;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001352
1353 /* Imagine: socket is IPv6. IPv4 packet arrives,
1354 goes to IPv4 receive handler and backlogged.
1355 From backlog it always goes here. Kerboom...
1356 Fortunately, tcp_rcv_established and rcv_established
1357 handle them correctly, but it is not case with
1358 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1359 */
1360
1361 if (skb->protocol == htons(ETH_P_IP))
1362 return tcp_v4_do_rcv(sk, skb);
1363
1364 /*
1365 * socket locking is here for SMP purposes as backlog rcv
1366 * is currently called with bh processing disabled.
1367 */
1368
1369 /* Do Stevens' IPV6_PKTOPTIONS.
1370
1371 Yes, guys, it is the only place in our code, where we
1372 may make it not affecting IPv4.
1373 The rest of code is protocol independent,
1374 and I do not like idea to uglify IPv4.
1375
1376 Actually, all the idea behind IPV6_PKTOPTIONS
1377 looks not very well thought. For now we latch
1378 options, received in the last packet, enqueued
1379 by tcp. Feel free to propose better solution.
1380 --ANK (980728)
1381 */
1382 if (np->rxopt.all)
1383 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1384
1385 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1386 struct dst_entry *dst = sk->sk_rx_dst;
1387
1388 sock_rps_save_rxhash(sk, skb);
1389 sk_mark_napi_id(sk, skb);
1390 if (dst) {
1391 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1392 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1393 dst_release(dst);
1394 sk->sk_rx_dst = NULL;
1395 }
1396 }
1397
1398 tcp_rcv_established(sk, skb);
1399 if (opt_skb)
1400 goto ipv6_pktoptions;
1401 return 0;
1402 }
1403
1404 if (tcp_checksum_complete(skb))
1405 goto csum_err;
1406
1407 if (sk->sk_state == TCP_LISTEN) {
1408 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1409
1410 if (!nsk)
1411 goto discard;
1412
1413 if (nsk != sk) {
1414 if (tcp_child_process(sk, nsk, skb))
1415 goto reset;
1416 if (opt_skb)
1417 __kfree_skb(opt_skb);
1418 return 0;
1419 }
1420 } else
1421 sock_rps_save_rxhash(sk, skb);
1422
1423 if (tcp_rcv_state_process(sk, skb))
1424 goto reset;
1425 if (opt_skb)
1426 goto ipv6_pktoptions;
1427 return 0;
1428
1429reset:
1430 tcp_v6_send_reset(sk, skb);
1431discard:
1432 if (opt_skb)
1433 __kfree_skb(opt_skb);
1434 kfree_skb(skb);
1435 return 0;
1436csum_err:
1437 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1438 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1439 goto discard;
1440
1441
1442ipv6_pktoptions:
1443 /* Do you ask, what is it?
1444
1445 1. skb was enqueued by tcp.
1446 2. skb is added to tail of read queue, rather than out of order.
1447 3. socket is not in passive state.
1448 4. Finally, it really contains options, which user wants to receive.
1449 */
1450 tp = tcp_sk(sk);
1451 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1452 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1453 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1454 np->mcast_oif = tcp_v6_iif(opt_skb);
1455 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1456 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1457 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1458 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1459 if (np->repflow)
1460 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1461 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1462 skb_set_owner_r(opt_skb, sk);
1463 tcp_v6_restore_cb(opt_skb);
1464 opt_skb = xchg(&np->pktoptions, opt_skb);
1465 } else {
1466 __kfree_skb(opt_skb);
1467 opt_skb = xchg(&np->pktoptions, NULL);
1468 }
1469 }
1470
1471 kfree_skb(opt_skb);
1472 return 0;
1473}
1474
1475static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1476 const struct tcphdr *th)
1477{
1478 /* This is tricky: we move IP6CB at its correct location into
1479 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1480 * _decode_session6() uses IP6CB().
1481 * barrier() makes sure compiler won't play aliasing games.
1482 */
1483 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1484 sizeof(struct inet6_skb_parm));
1485 barrier();
1486
1487 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1488 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1489 skb->len - th->doff*4);
1490 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1491 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1492 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1493 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1494 TCP_SKB_CB(skb)->sacked = 0;
1495 TCP_SKB_CB(skb)->has_rxtstamp =
1496 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1497}
1498
David Brazdil0f672f62019-12-10 10:32:29 +00001499INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001500{
David Brazdil0f672f62019-12-10 10:32:29 +00001501 struct sk_buff *skb_to_free;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001502 int sdif = inet6_sdif(skb);
1503 const struct tcphdr *th;
1504 const struct ipv6hdr *hdr;
1505 bool refcounted;
1506 struct sock *sk;
1507 int ret;
1508 struct net *net = dev_net(skb->dev);
1509
1510 if (skb->pkt_type != PACKET_HOST)
1511 goto discard_it;
1512
1513 /*
1514 * Count it even if it's bad.
1515 */
1516 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1517
1518 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1519 goto discard_it;
1520
1521 th = (const struct tcphdr *)skb->data;
1522
1523 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1524 goto bad_packet;
1525 if (!pskb_may_pull(skb, th->doff*4))
1526 goto discard_it;
1527
1528 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1529 goto csum_error;
1530
1531 th = (const struct tcphdr *)skb->data;
1532 hdr = ipv6_hdr(skb);
1533
1534lookup:
1535 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1536 th->source, th->dest, inet6_iif(skb), sdif,
1537 &refcounted);
1538 if (!sk)
1539 goto no_tcp_socket;
1540
1541process:
1542 if (sk->sk_state == TCP_TIME_WAIT)
1543 goto do_time_wait;
1544
1545 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1546 struct request_sock *req = inet_reqsk(sk);
1547 bool req_stolen = false;
1548 struct sock *nsk;
1549
1550 sk = req->rsk_listener;
1551 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1552 sk_drops_add(sk, skb);
1553 reqsk_put(req);
1554 goto discard_it;
1555 }
1556 if (tcp_checksum_complete(skb)) {
1557 reqsk_put(req);
1558 goto csum_error;
1559 }
1560 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1561 inet_csk_reqsk_queue_drop_and_put(sk, req);
1562 goto lookup;
1563 }
1564 sock_hold(sk);
1565 refcounted = true;
1566 nsk = NULL;
1567 if (!tcp_filter(sk, skb)) {
1568 th = (const struct tcphdr *)skb->data;
1569 hdr = ipv6_hdr(skb);
1570 tcp_v6_fill_cb(skb, hdr, th);
1571 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1572 }
1573 if (!nsk) {
1574 reqsk_put(req);
1575 if (req_stolen) {
1576 /* Another cpu got exclusive access to req
1577 * and created a full blown socket.
1578 * Try to feed this packet to this socket
1579 * instead of discarding it.
1580 */
1581 tcp_v6_restore_cb(skb);
1582 sock_put(sk);
1583 goto lookup;
1584 }
1585 goto discard_and_relse;
1586 }
1587 if (nsk == sk) {
1588 reqsk_put(req);
1589 tcp_v6_restore_cb(skb);
1590 } else if (tcp_child_process(sk, nsk, skb)) {
1591 tcp_v6_send_reset(nsk, skb);
1592 goto discard_and_relse;
1593 } else {
1594 sock_put(sk);
1595 return 0;
1596 }
1597 }
David Brazdil0f672f62019-12-10 10:32:29 +00001598 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001599 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1600 goto discard_and_relse;
1601 }
1602
1603 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1604 goto discard_and_relse;
1605
1606 if (tcp_v6_inbound_md5_hash(sk, skb))
1607 goto discard_and_relse;
1608
1609 if (tcp_filter(sk, skb))
1610 goto discard_and_relse;
1611 th = (const struct tcphdr *)skb->data;
1612 hdr = ipv6_hdr(skb);
1613 tcp_v6_fill_cb(skb, hdr, th);
1614
1615 skb->dev = NULL;
1616
1617 if (sk->sk_state == TCP_LISTEN) {
1618 ret = tcp_v6_do_rcv(sk, skb);
1619 goto put_and_return;
1620 }
1621
1622 sk_incoming_cpu_update(sk);
1623
1624 bh_lock_sock_nested(sk);
1625 tcp_segs_in(tcp_sk(sk), skb);
1626 ret = 0;
1627 if (!sock_owned_by_user(sk)) {
David Brazdil0f672f62019-12-10 10:32:29 +00001628 skb_to_free = sk->sk_rx_skb_cache;
1629 sk->sk_rx_skb_cache = NULL;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001630 ret = tcp_v6_do_rcv(sk, skb);
David Brazdil0f672f62019-12-10 10:32:29 +00001631 } else {
1632 if (tcp_add_backlog(sk, skb))
1633 goto discard_and_relse;
1634 skb_to_free = NULL;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001635 }
1636 bh_unlock_sock(sk);
David Brazdil0f672f62019-12-10 10:32:29 +00001637 if (skb_to_free)
1638 __kfree_skb(skb_to_free);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001639put_and_return:
1640 if (refcounted)
1641 sock_put(sk);
1642 return ret ? -1 : 0;
1643
1644no_tcp_socket:
1645 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1646 goto discard_it;
1647
1648 tcp_v6_fill_cb(skb, hdr, th);
1649
1650 if (tcp_checksum_complete(skb)) {
1651csum_error:
1652 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1653bad_packet:
1654 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1655 } else {
1656 tcp_v6_send_reset(NULL, skb);
1657 }
1658
1659discard_it:
1660 kfree_skb(skb);
1661 return 0;
1662
1663discard_and_relse:
1664 sk_drops_add(sk, skb);
1665 if (refcounted)
1666 sock_put(sk);
1667 goto discard_it;
1668
1669do_time_wait:
1670 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1671 inet_twsk_put(inet_twsk(sk));
1672 goto discard_it;
1673 }
1674
1675 tcp_v6_fill_cb(skb, hdr, th);
1676
1677 if (tcp_checksum_complete(skb)) {
1678 inet_twsk_put(inet_twsk(sk));
1679 goto csum_error;
1680 }
1681
1682 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1683 case TCP_TW_SYN:
1684 {
1685 struct sock *sk2;
1686
1687 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1688 skb, __tcp_hdrlen(th),
1689 &ipv6_hdr(skb)->saddr, th->source,
1690 &ipv6_hdr(skb)->daddr,
1691 ntohs(th->dest),
1692 tcp_v6_iif_l3_slave(skb),
1693 sdif);
1694 if (sk2) {
1695 struct inet_timewait_sock *tw = inet_twsk(sk);
1696 inet_twsk_deschedule_put(tw);
1697 sk = sk2;
1698 tcp_v6_restore_cb(skb);
1699 refcounted = false;
1700 goto process;
1701 }
1702 }
1703 /* to ACK */
1704 /* fall through */
1705 case TCP_TW_ACK:
1706 tcp_v6_timewait_ack(sk, skb);
1707 break;
1708 case TCP_TW_RST:
1709 tcp_v6_send_reset(sk, skb);
1710 inet_twsk_deschedule_put(inet_twsk(sk));
1711 goto discard_it;
1712 case TCP_TW_SUCCESS:
1713 ;
1714 }
1715 goto discard_it;
1716}
1717
David Brazdil0f672f62019-12-10 10:32:29 +00001718INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001719{
1720 const struct ipv6hdr *hdr;
1721 const struct tcphdr *th;
1722 struct sock *sk;
1723
1724 if (skb->pkt_type != PACKET_HOST)
1725 return;
1726
1727 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1728 return;
1729
1730 hdr = ipv6_hdr(skb);
1731 th = tcp_hdr(skb);
1732
1733 if (th->doff < sizeof(struct tcphdr) / 4)
1734 return;
1735
1736 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1737 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1738 &hdr->saddr, th->source,
1739 &hdr->daddr, ntohs(th->dest),
1740 inet6_iif(skb), inet6_sdif(skb));
1741 if (sk) {
1742 skb->sk = sk;
1743 skb->destructor = sock_edemux;
1744 if (sk_fullsock(sk)) {
1745 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1746
1747 if (dst)
David Brazdil0f672f62019-12-10 10:32:29 +00001748 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001749 if (dst &&
1750 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1751 skb_dst_set_noref(skb, dst);
1752 }
1753 }
1754}
1755
1756static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1757 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1758 .twsk_unique = tcp_twsk_unique,
1759 .twsk_destructor = tcp_twsk_destructor,
1760};
1761
1762static const struct inet_connection_sock_af_ops ipv6_specific = {
1763 .queue_xmit = inet6_csk_xmit,
1764 .send_check = tcp_v6_send_check,
1765 .rebuild_header = inet6_sk_rebuild_header,
1766 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1767 .conn_request = tcp_v6_conn_request,
1768 .syn_recv_sock = tcp_v6_syn_recv_sock,
1769 .net_header_len = sizeof(struct ipv6hdr),
1770 .net_frag_header_len = sizeof(struct frag_hdr),
1771 .setsockopt = ipv6_setsockopt,
1772 .getsockopt = ipv6_getsockopt,
1773 .addr2sockaddr = inet6_csk_addr2sockaddr,
1774 .sockaddr_len = sizeof(struct sockaddr_in6),
1775#ifdef CONFIG_COMPAT
1776 .compat_setsockopt = compat_ipv6_setsockopt,
1777 .compat_getsockopt = compat_ipv6_getsockopt,
1778#endif
1779 .mtu_reduced = tcp_v6_mtu_reduced,
1780};
1781
1782#ifdef CONFIG_TCP_MD5SIG
1783static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1784 .md5_lookup = tcp_v6_md5_lookup,
1785 .calc_md5_hash = tcp_v6_md5_hash_skb,
1786 .md5_parse = tcp_v6_parse_md5_keys,
1787};
1788#endif
1789
1790/*
1791 * TCP over IPv4 via INET6 API
1792 */
1793static const struct inet_connection_sock_af_ops ipv6_mapped = {
1794 .queue_xmit = ip_queue_xmit,
1795 .send_check = tcp_v4_send_check,
1796 .rebuild_header = inet_sk_rebuild_header,
1797 .sk_rx_dst_set = inet_sk_rx_dst_set,
1798 .conn_request = tcp_v6_conn_request,
1799 .syn_recv_sock = tcp_v6_syn_recv_sock,
1800 .net_header_len = sizeof(struct iphdr),
1801 .setsockopt = ipv6_setsockopt,
1802 .getsockopt = ipv6_getsockopt,
1803 .addr2sockaddr = inet6_csk_addr2sockaddr,
1804 .sockaddr_len = sizeof(struct sockaddr_in6),
1805#ifdef CONFIG_COMPAT
1806 .compat_setsockopt = compat_ipv6_setsockopt,
1807 .compat_getsockopt = compat_ipv6_getsockopt,
1808#endif
1809 .mtu_reduced = tcp_v4_mtu_reduced,
1810};
1811
1812#ifdef CONFIG_TCP_MD5SIG
1813static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1814 .md5_lookup = tcp_v4_md5_lookup,
1815 .calc_md5_hash = tcp_v4_md5_hash_skb,
1816 .md5_parse = tcp_v6_parse_md5_keys,
1817};
1818#endif
1819
1820/* NOTE: A lot of things set to zero explicitly by call to
1821 * sk_alloc() so need not be done here.
1822 */
1823static int tcp_v6_init_sock(struct sock *sk)
1824{
1825 struct inet_connection_sock *icsk = inet_csk(sk);
1826
1827 tcp_init_sock(sk);
1828
1829 icsk->icsk_af_ops = &ipv6_specific;
1830
1831#ifdef CONFIG_TCP_MD5SIG
1832 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1833#endif
1834
1835 return 0;
1836}
1837
1838static void tcp_v6_destroy_sock(struct sock *sk)
1839{
1840 tcp_v4_destroy_sock(sk);
1841 inet6_destroy_sock(sk);
1842}
1843
1844#ifdef CONFIG_PROC_FS
1845/* Proc filesystem TCPv6 sock list dumping. */
1846static void get_openreq6(struct seq_file *seq,
1847 const struct request_sock *req, int i)
1848{
1849 long ttd = req->rsk_timer.expires - jiffies;
1850 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1851 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1852
1853 if (ttd < 0)
1854 ttd = 0;
1855
1856 seq_printf(seq,
1857 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1858 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1859 i,
1860 src->s6_addr32[0], src->s6_addr32[1],
1861 src->s6_addr32[2], src->s6_addr32[3],
1862 inet_rsk(req)->ir_num,
1863 dest->s6_addr32[0], dest->s6_addr32[1],
1864 dest->s6_addr32[2], dest->s6_addr32[3],
1865 ntohs(inet_rsk(req)->ir_rmt_port),
1866 TCP_SYN_RECV,
1867 0, 0, /* could print option size, but that is af dependent. */
1868 1, /* timers active (only the expire timer) */
1869 jiffies_to_clock_t(ttd),
1870 req->num_timeout,
1871 from_kuid_munged(seq_user_ns(seq),
1872 sock_i_uid(req->rsk_listener)),
1873 0, /* non standard timer */
1874 0, /* open_requests have no inode */
1875 0, req);
1876}
1877
1878static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1879{
1880 const struct in6_addr *dest, *src;
1881 __u16 destp, srcp;
1882 int timer_active;
1883 unsigned long timer_expires;
1884 const struct inet_sock *inet = inet_sk(sp);
1885 const struct tcp_sock *tp = tcp_sk(sp);
1886 const struct inet_connection_sock *icsk = inet_csk(sp);
1887 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1888 int rx_queue;
1889 int state;
1890
1891 dest = &sp->sk_v6_daddr;
1892 src = &sp->sk_v6_rcv_saddr;
1893 destp = ntohs(inet->inet_dport);
1894 srcp = ntohs(inet->inet_sport);
1895
1896 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1897 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1898 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1899 timer_active = 1;
1900 timer_expires = icsk->icsk_timeout;
1901 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1902 timer_active = 4;
1903 timer_expires = icsk->icsk_timeout;
1904 } else if (timer_pending(&sp->sk_timer)) {
1905 timer_active = 2;
1906 timer_expires = sp->sk_timer.expires;
1907 } else {
1908 timer_active = 0;
1909 timer_expires = jiffies;
1910 }
1911
1912 state = inet_sk_state_load(sp);
1913 if (state == TCP_LISTEN)
1914 rx_queue = sp->sk_ack_backlog;
1915 else
1916 /* Because we don't lock the socket,
1917 * we might find a transient negative value.
1918 */
David Brazdil0f672f62019-12-10 10:32:29 +00001919 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1920 READ_ONCE(tp->copied_seq), 0);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001921
1922 seq_printf(seq,
1923 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1924 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1925 i,
1926 src->s6_addr32[0], src->s6_addr32[1],
1927 src->s6_addr32[2], src->s6_addr32[3], srcp,
1928 dest->s6_addr32[0], dest->s6_addr32[1],
1929 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1930 state,
David Brazdil0f672f62019-12-10 10:32:29 +00001931 READ_ONCE(tp->write_seq) - tp->snd_una,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001932 rx_queue,
1933 timer_active,
1934 jiffies_delta_to_clock_t(timer_expires - jiffies),
1935 icsk->icsk_retransmits,
1936 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1937 icsk->icsk_probes_out,
1938 sock_i_ino(sp),
1939 refcount_read(&sp->sk_refcnt), sp,
1940 jiffies_to_clock_t(icsk->icsk_rto),
1941 jiffies_to_clock_t(icsk->icsk_ack.ato),
David Brazdil0f672f62019-12-10 10:32:29 +00001942 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001943 tp->snd_cwnd,
1944 state == TCP_LISTEN ?
1945 fastopenq->max_qlen :
1946 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1947 );
1948}
1949
1950static void get_timewait6_sock(struct seq_file *seq,
1951 struct inet_timewait_sock *tw, int i)
1952{
1953 long delta = tw->tw_timer.expires - jiffies;
1954 const struct in6_addr *dest, *src;
1955 __u16 destp, srcp;
1956
1957 dest = &tw->tw_v6_daddr;
1958 src = &tw->tw_v6_rcv_saddr;
1959 destp = ntohs(tw->tw_dport);
1960 srcp = ntohs(tw->tw_sport);
1961
1962 seq_printf(seq,
1963 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1964 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1965 i,
1966 src->s6_addr32[0], src->s6_addr32[1],
1967 src->s6_addr32[2], src->s6_addr32[3], srcp,
1968 dest->s6_addr32[0], dest->s6_addr32[1],
1969 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1970 tw->tw_substate, 0, 0,
1971 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1972 refcount_read(&tw->tw_refcnt), tw);
1973}
1974
1975static int tcp6_seq_show(struct seq_file *seq, void *v)
1976{
1977 struct tcp_iter_state *st;
1978 struct sock *sk = v;
1979
1980 if (v == SEQ_START_TOKEN) {
1981 seq_puts(seq,
1982 " sl "
1983 "local_address "
1984 "remote_address "
1985 "st tx_queue rx_queue tr tm->when retrnsmt"
1986 " uid timeout inode\n");
1987 goto out;
1988 }
1989 st = seq->private;
1990
1991 if (sk->sk_state == TCP_TIME_WAIT)
1992 get_timewait6_sock(seq, v, st->num);
1993 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1994 get_openreq6(seq, v, st->num);
1995 else
1996 get_tcp6_sock(seq, v, st->num);
1997out:
1998 return 0;
1999}
2000
2001static const struct seq_operations tcp6_seq_ops = {
2002 .show = tcp6_seq_show,
2003 .start = tcp_seq_start,
2004 .next = tcp_seq_next,
2005 .stop = tcp_seq_stop,
2006};
2007
2008static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2009 .family = AF_INET6,
2010};
2011
2012int __net_init tcp6_proc_init(struct net *net)
2013{
2014 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2015 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2016 return -ENOMEM;
2017 return 0;
2018}
2019
2020void tcp6_proc_exit(struct net *net)
2021{
2022 remove_proc_entry("tcp6", net->proc_net);
2023}
2024#endif
2025
2026struct proto tcpv6_prot = {
2027 .name = "TCPv6",
2028 .owner = THIS_MODULE,
2029 .close = tcp_close,
2030 .pre_connect = tcp_v6_pre_connect,
2031 .connect = tcp_v6_connect,
2032 .disconnect = tcp_disconnect,
2033 .accept = inet_csk_accept,
2034 .ioctl = tcp_ioctl,
2035 .init = tcp_v6_init_sock,
2036 .destroy = tcp_v6_destroy_sock,
2037 .shutdown = tcp_shutdown,
2038 .setsockopt = tcp_setsockopt,
2039 .getsockopt = tcp_getsockopt,
2040 .keepalive = tcp_set_keepalive,
2041 .recvmsg = tcp_recvmsg,
2042 .sendmsg = tcp_sendmsg,
2043 .sendpage = tcp_sendpage,
2044 .backlog_rcv = tcp_v6_do_rcv,
2045 .release_cb = tcp_release_cb,
2046 .hash = inet6_hash,
2047 .unhash = inet_unhash,
2048 .get_port = inet_csk_get_port,
2049 .enter_memory_pressure = tcp_enter_memory_pressure,
2050 .leave_memory_pressure = tcp_leave_memory_pressure,
2051 .stream_memory_free = tcp_stream_memory_free,
2052 .sockets_allocated = &tcp_sockets_allocated,
2053 .memory_allocated = &tcp_memory_allocated,
2054 .memory_pressure = &tcp_memory_pressure,
2055 .orphan_count = &tcp_orphan_count,
2056 .sysctl_mem = sysctl_tcp_mem,
2057 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2058 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2059 .max_header = MAX_TCP_HEADER,
2060 .obj_size = sizeof(struct tcp6_sock),
2061 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2062 .twsk_prot = &tcp6_timewait_sock_ops,
2063 .rsk_prot = &tcp6_request_sock_ops,
2064 .h.hashinfo = &tcp_hashinfo,
2065 .no_autobind = true,
2066#ifdef CONFIG_COMPAT
2067 .compat_setsockopt = compat_tcp_setsockopt,
2068 .compat_getsockopt = compat_tcp_getsockopt,
2069#endif
2070 .diag_destroy = tcp_abort,
2071};
2072
2073/* thinking of making this const? Don't.
2074 * early_demux can change based on sysctl.
2075 */
2076static struct inet6_protocol tcpv6_protocol = {
2077 .early_demux = tcp_v6_early_demux,
2078 .early_demux_handler = tcp_v6_early_demux,
2079 .handler = tcp_v6_rcv,
2080 .err_handler = tcp_v6_err,
2081 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2082};
2083
2084static struct inet_protosw tcpv6_protosw = {
2085 .type = SOCK_STREAM,
2086 .protocol = IPPROTO_TCP,
2087 .prot = &tcpv6_prot,
2088 .ops = &inet6_stream_ops,
2089 .flags = INET_PROTOSW_PERMANENT |
2090 INET_PROTOSW_ICSK,
2091};
2092
2093static int __net_init tcpv6_net_init(struct net *net)
2094{
2095 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2096 SOCK_RAW, IPPROTO_TCP, net);
2097}
2098
2099static void __net_exit tcpv6_net_exit(struct net *net)
2100{
2101 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2102}
2103
2104static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2105{
2106 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2107}
2108
2109static struct pernet_operations tcpv6_net_ops = {
2110 .init = tcpv6_net_init,
2111 .exit = tcpv6_net_exit,
2112 .exit_batch = tcpv6_net_exit_batch,
2113};
2114
2115int __init tcpv6_init(void)
2116{
2117 int ret;
2118
2119 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2120 if (ret)
2121 goto out;
2122
2123 /* register inet6 protocol */
2124 ret = inet6_register_protosw(&tcpv6_protosw);
2125 if (ret)
2126 goto out_tcpv6_protocol;
2127
2128 ret = register_pernet_subsys(&tcpv6_net_ops);
2129 if (ret)
2130 goto out_tcpv6_protosw;
2131out:
2132 return ret;
2133
2134out_tcpv6_protosw:
2135 inet6_unregister_protosw(&tcpv6_protosw);
2136out_tcpv6_protocol:
2137 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2138 goto out;
2139}
2140
2141void tcpv6_exit(void)
2142{
2143 unregister_pernet_subsys(&tcpv6_net_ops);
2144 inet6_unregister_protosw(&tcpv6_protosw);
2145 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2146}