blob: fc913f09606db374850d1b7d635fd6ff089dd30b [file] [log] [blame]
David Brazdil0f672f62019-12-10 10:32:29 +00001// SPDX-License-Identifier: GPL-2.0-or-later
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002/*
3 * IPv6 output functions
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on linux/net/ipv4/ip_output.c
10 *
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000011 * Changes:
12 * A.N.Kuznetsov : airthmetics in fragmentation.
13 * extension headers are implemented.
14 * route changes now work.
15 * ip6_forward does not confuse sniffers.
16 * etc.
17 *
18 * H. von Brand : Added missing #include <linux/string.h>
19 * Imran Patel : frag id should be in NBO
20 * Kazunori MIYAZAWA @USAGI
21 * : add ip6_append_data and related functions
22 * for datagram xmit
23 */
24
25#include <linux/errno.h>
26#include <linux/kernel.h>
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/net.h>
30#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/in6.h>
33#include <linux/tcp.h>
34#include <linux/route.h>
35#include <linux/module.h>
36#include <linux/slab.h>
37
38#include <linux/bpf-cgroup.h>
39#include <linux/netfilter.h>
40#include <linux/netfilter_ipv6.h>
41
42#include <net/sock.h>
43#include <net/snmp.h>
44
45#include <net/ipv6.h>
46#include <net/ndisc.h>
47#include <net/protocol.h>
48#include <net/ip6_route.h>
49#include <net/addrconf.h>
50#include <net/rawv6.h>
51#include <net/icmp.h>
52#include <net/xfrm.h>
53#include <net/checksum.h>
54#include <linux/mroute6.h>
55#include <net/l3mdev.h>
56#include <net/lwtunnel.h>
57
58static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
59{
60 struct dst_entry *dst = skb_dst(skb);
61 struct net_device *dev = dst->dev;
Olivier Deprez0e641232021-09-23 10:07:05 +020062 unsigned int hh_len = LL_RESERVED_SPACE(dev);
63 int delta = hh_len - skb_headroom(skb);
David Brazdil0f672f62019-12-10 10:32:29 +000064 const struct in6_addr *nexthop;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000065 struct neighbour *neigh;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000066 int ret;
67
Olivier Deprez0e641232021-09-23 10:07:05 +020068 /* Be paranoid, rather than too clever. */
69 if (unlikely(delta > 0) && dev->header_ops) {
70 /* pskb_expand_head() might crash, if skb is shared */
71 if (skb_shared(skb)) {
72 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
73
74 if (likely(nskb)) {
75 if (skb->sk)
76 skb_set_owner_w(nskb, skb->sk);
77 consume_skb(skb);
78 } else {
79 kfree_skb(skb);
80 }
81 skb = nskb;
82 }
83 if (skb &&
84 pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
85 kfree_skb(skb);
86 skb = NULL;
87 }
88 if (!skb) {
89 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
90 return -ENOMEM;
91 }
92 }
93
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000094 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
95 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
96
97 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
98 ((mroute6_is_socket(net, skb) &&
99 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
100 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
101 &ipv6_hdr(skb)->saddr))) {
102 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
103
104 /* Do not check for IFF_ALLMULTI; multicast routing
105 is not supported in any case.
106 */
107 if (newskb)
108 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
109 net, sk, newskb, NULL, newskb->dev,
110 dev_loopback_xmit);
111
112 if (ipv6_hdr(skb)->hop_limit == 0) {
113 IP6_INC_STATS(net, idev,
114 IPSTATS_MIB_OUTDISCARDS);
115 kfree_skb(skb);
116 return 0;
117 }
118 }
119
120 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
121
122 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
123 IPV6_ADDR_SCOPE_NODELOCAL &&
124 !(dev->flags & IFF_LOOPBACK)) {
125 kfree_skb(skb);
126 return 0;
127 }
128 }
129
130 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
131 int res = lwtunnel_xmit(skb);
132
133 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
134 return res;
135 }
136
137 rcu_read_lock_bh();
138 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
139 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
140 if (unlikely(!neigh))
141 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
142 if (!IS_ERR(neigh)) {
143 sock_confirm_neigh(skb, neigh);
David Brazdil0f672f62019-12-10 10:32:29 +0000144 ret = neigh_output(neigh, skb, false);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000145 rcu_read_unlock_bh();
146 return ret;
147 }
148 rcu_read_unlock_bh();
149
150 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
151 kfree_skb(skb);
152 return -EINVAL;
153}
154
Olivier Deprez0e641232021-09-23 10:07:05 +0200155static int
156ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
157 struct sk_buff *skb, unsigned int mtu)
158{
159 struct sk_buff *segs, *nskb;
160 netdev_features_t features;
161 int ret = 0;
162
163 /* Please see corresponding comment in ip_finish_output_gso
164 * describing the cases where GSO segment length exceeds the
165 * egress MTU.
166 */
167 features = netif_skb_features(skb);
168 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
169 if (IS_ERR_OR_NULL(segs)) {
170 kfree_skb(skb);
171 return -ENOMEM;
172 }
173
174 consume_skb(skb);
175
176 skb_list_walk_safe(segs, segs, nskb) {
177 int err;
178
179 skb_mark_not_on_list(segs);
180 err = ip6_fragment(net, sk, segs, ip6_finish_output2);
181 if (err && ret == 0)
182 ret = err;
183 }
184
185 return ret;
186}
187
David Brazdil0f672f62019-12-10 10:32:29 +0000188static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000189{
Olivier Deprez0e641232021-09-23 10:07:05 +0200190 unsigned int mtu;
191
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000192#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
193 /* Policy lookup after SNAT yielded a new policy */
194 if (skb_dst(skb)->xfrm) {
195 IPCB(skb)->flags |= IPSKB_REROUTED;
196 return dst_output(net, sk, skb);
197 }
198#endif
199
Olivier Deprez0e641232021-09-23 10:07:05 +0200200 mtu = ip6_skb_dst_mtu(skb);
201 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
202 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
203
204 if ((skb->len > mtu && !skb_is_gso(skb)) ||
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000205 dst_allfrag(skb_dst(skb)) ||
206 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
207 return ip6_fragment(net, sk, skb, ip6_finish_output2);
208 else
209 return ip6_finish_output2(net, sk, skb);
210}
211
David Brazdil0f672f62019-12-10 10:32:29 +0000212static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
213{
214 int ret;
215
216 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
217 switch (ret) {
218 case NET_XMIT_SUCCESS:
219 return __ip6_finish_output(net, sk, skb);
220 case NET_XMIT_CN:
221 return __ip6_finish_output(net, sk, skb) ? : ret;
222 default:
223 kfree_skb(skb);
224 return ret;
225 }
226}
227
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000228int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
229{
230 struct net_device *dev = skb_dst(skb)->dev;
231 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
232
233 skb->protocol = htons(ETH_P_IPV6);
234 skb->dev = dev;
235
236 if (unlikely(idev->cnf.disable_ipv6)) {
237 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
238 kfree_skb(skb);
239 return 0;
240 }
241
242 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
243 net, sk, skb, NULL, dev,
244 ip6_finish_output,
245 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
246}
247
248bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
249{
250 if (!np->autoflowlabel_set)
251 return ip6_default_np_autolabel(net);
252 else
253 return np->autoflowlabel;
254}
255
256/*
257 * xmit an sk_buff (used by TCP, SCTP and DCCP)
258 * Note : socket lock is not held for SYNACK packets, but might be modified
259 * by calls to skb_set_owner_w() and ipv6_local_error(),
260 * which are using proper atomic operations or spinlocks.
261 */
262int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
David Brazdil0f672f62019-12-10 10:32:29 +0000263 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000264{
265 struct net *net = sock_net(sk);
266 const struct ipv6_pinfo *np = inet6_sk(sk);
267 struct in6_addr *first_hop = &fl6->daddr;
268 struct dst_entry *dst = skb_dst(skb);
269 unsigned int head_room;
270 struct ipv6hdr *hdr;
271 u8 proto = fl6->flowi6_proto;
272 int seg_len = skb->len;
273 int hlimit = -1;
274 u32 mtu;
275
276 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
277 if (opt)
278 head_room += opt->opt_nflen + opt->opt_flen;
279
280 if (unlikely(skb_headroom(skb) < head_room)) {
281 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
282 if (!skb2) {
283 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
284 IPSTATS_MIB_OUTDISCARDS);
285 kfree_skb(skb);
286 return -ENOBUFS;
287 }
288 if (skb->sk)
289 skb_set_owner_w(skb2, skb->sk);
290 consume_skb(skb);
291 skb = skb2;
292 }
293
294 if (opt) {
295 seg_len += opt->opt_nflen + opt->opt_flen;
296
297 if (opt->opt_flen)
298 ipv6_push_frag_opts(skb, opt, &proto);
299
300 if (opt->opt_nflen)
301 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
302 &fl6->saddr);
303 }
304
305 skb_push(skb, sizeof(struct ipv6hdr));
306 skb_reset_network_header(skb);
307 hdr = ipv6_hdr(skb);
308
309 /*
310 * Fill in the IPv6 header
311 */
312 if (np)
313 hlimit = np->hop_limit;
314 if (hlimit < 0)
315 hlimit = ip6_dst_hoplimit(dst);
316
317 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
318 ip6_autoflowlabel(net, np), fl6));
319
320 hdr->payload_len = htons(seg_len);
321 hdr->nexthdr = proto;
322 hdr->hop_limit = hlimit;
323
324 hdr->saddr = fl6->saddr;
325 hdr->daddr = *first_hop;
326
327 skb->protocol = htons(ETH_P_IPV6);
David Brazdil0f672f62019-12-10 10:32:29 +0000328 skb->priority = priority;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000329 skb->mark = mark;
330
331 mtu = dst_mtu(dst);
332 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
333 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
334 IPSTATS_MIB_OUT, skb->len);
335
336 /* if egress device is enslaved to an L3 master device pass the
337 * skb to its handler for processing
338 */
339 skb = l3mdev_ip6_out((struct sock *)sk, skb);
340 if (unlikely(!skb))
341 return 0;
342
343 /* hooks should never assume socket lock is held.
344 * we promote our socket to non const
345 */
346 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
347 net, (struct sock *)sk, skb, NULL, dst->dev,
348 dst_output);
349 }
350
351 skb->dev = dst->dev;
352 /* ipv6_local_error() does not require socket lock,
353 * we promote our socket to non const
354 */
355 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
356
357 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
358 kfree_skb(skb);
359 return -EMSGSIZE;
360}
361EXPORT_SYMBOL(ip6_xmit);
362
363static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
364{
365 struct ip6_ra_chain *ra;
366 struct sock *last = NULL;
367
368 read_lock(&ip6_ra_lock);
369 for (ra = ip6_ra_chain; ra; ra = ra->next) {
370 struct sock *sk = ra->sk;
371 if (sk && ra->sel == sel &&
372 (!sk->sk_bound_dev_if ||
373 sk->sk_bound_dev_if == skb->dev->ifindex)) {
David Brazdil0f672f62019-12-10 10:32:29 +0000374 struct ipv6_pinfo *np = inet6_sk(sk);
375
376 if (np && np->rtalert_isolate &&
377 !net_eq(sock_net(sk), dev_net(skb->dev))) {
378 continue;
379 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000380 if (last) {
381 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
382 if (skb2)
383 rawv6_rcv(last, skb2);
384 }
385 last = sk;
386 }
387 }
388
389 if (last) {
390 rawv6_rcv(last, skb);
391 read_unlock(&ip6_ra_lock);
392 return 1;
393 }
394 read_unlock(&ip6_ra_lock);
395 return 0;
396}
397
398static int ip6_forward_proxy_check(struct sk_buff *skb)
399{
400 struct ipv6hdr *hdr = ipv6_hdr(skb);
401 u8 nexthdr = hdr->nexthdr;
402 __be16 frag_off;
403 int offset;
404
405 if (ipv6_ext_hdr(nexthdr)) {
406 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
407 if (offset < 0)
408 return 0;
409 } else
410 offset = sizeof(struct ipv6hdr);
411
412 if (nexthdr == IPPROTO_ICMPV6) {
413 struct icmp6hdr *icmp6;
414
415 if (!pskb_may_pull(skb, (skb_network_header(skb) +
416 offset + 1 - skb->data)))
417 return 0;
418
419 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
420
421 switch (icmp6->icmp6_type) {
422 case NDISC_ROUTER_SOLICITATION:
423 case NDISC_ROUTER_ADVERTISEMENT:
424 case NDISC_NEIGHBOUR_SOLICITATION:
425 case NDISC_NEIGHBOUR_ADVERTISEMENT:
426 case NDISC_REDIRECT:
427 /* For reaction involving unicast neighbor discovery
428 * message destined to the proxied address, pass it to
429 * input function.
430 */
431 return 1;
432 default:
433 break;
434 }
435 }
436
437 /*
438 * The proxying router can't forward traffic sent to a link-local
439 * address, so signal the sender and discard the packet. This
440 * behavior is clarified by the MIPv6 specification.
441 */
442 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
443 dst_link_failure(skb);
444 return -1;
445 }
446
447 return 0;
448}
449
450static inline int ip6_forward_finish(struct net *net, struct sock *sk,
451 struct sk_buff *skb)
452{
453 struct dst_entry *dst = skb_dst(skb);
454
455 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
456 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
457
David Brazdil0f672f62019-12-10 10:32:29 +0000458#ifdef CONFIG_NET_SWITCHDEV
459 if (skb->offload_l3_fwd_mark) {
460 consume_skb(skb);
461 return 0;
462 }
463#endif
464
465 skb->tstamp = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000466 return dst_output(net, sk, skb);
467}
468
469static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
470{
471 if (skb->len <= mtu)
472 return false;
473
474 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
475 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
476 return true;
477
478 if (skb->ignore_df)
479 return false;
480
481 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
482 return false;
483
484 return true;
485}
486
487int ip6_forward(struct sk_buff *skb)
488{
489 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
490 struct dst_entry *dst = skb_dst(skb);
491 struct ipv6hdr *hdr = ipv6_hdr(skb);
492 struct inet6_skb_parm *opt = IP6CB(skb);
493 struct net *net = dev_net(dst->dev);
494 u32 mtu;
495
496 if (net->ipv6.devconf_all->forwarding == 0)
497 goto error;
498
499 if (skb->pkt_type != PACKET_HOST)
500 goto drop;
501
502 if (unlikely(skb->sk))
503 goto drop;
504
505 if (skb_warn_if_lro(skb))
506 goto drop;
507
Olivier Deprez0e641232021-09-23 10:07:05 +0200508 if (!net->ipv6.devconf_all->disable_policy &&
509 !idev->cnf.disable_policy &&
510 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000511 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
512 goto drop;
513 }
514
515 skb_forward_csum(skb);
516
517 /*
518 * We DO NOT make any processing on
519 * RA packets, pushing them to user level AS IS
520 * without ane WARRANTY that application will be able
521 * to interpret them. The reason is that we
522 * cannot make anything clever here.
523 *
524 * We are not end-node, so that if packet contains
525 * AH/ESP, we cannot make anything.
526 * Defragmentation also would be mistake, RA packets
527 * cannot be fragmented, because there is no warranty
528 * that different fragments will go along one path. --ANK
529 */
530 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
531 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
532 return 0;
533 }
534
535 /*
536 * check and decrement ttl
537 */
538 if (hdr->hop_limit <= 1) {
539 /* Force OUTPUT device used as source address */
540 skb->dev = dst->dev;
541 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
542 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
543
544 kfree_skb(skb);
545 return -ETIMEDOUT;
546 }
547
548 /* XXX: idev->cnf.proxy_ndp? */
549 if (net->ipv6.devconf_all->proxy_ndp &&
550 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
551 int proxied = ip6_forward_proxy_check(skb);
552 if (proxied > 0)
553 return ip6_input(skb);
554 else if (proxied < 0) {
555 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
556 goto drop;
557 }
558 }
559
560 if (!xfrm6_route_forward(skb)) {
561 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
562 goto drop;
563 }
564 dst = skb_dst(skb);
565
566 /* IPv6 specs say nothing about it, but it is clear that we cannot
567 send redirects to source routed frames.
568 We don't send redirects to frames decapsulated from IPsec.
569 */
570 if (IP6CB(skb)->iif == dst->dev->ifindex &&
571 opt->srcrt == 0 && !skb_sec_path(skb)) {
572 struct in6_addr *target = NULL;
573 struct inet_peer *peer;
574 struct rt6_info *rt;
575
576 /*
577 * incoming and outgoing devices are the same
578 * send a redirect.
579 */
580
581 rt = (struct rt6_info *) dst;
582 if (rt->rt6i_flags & RTF_GATEWAY)
583 target = &rt->rt6i_gateway;
584 else
585 target = &hdr->daddr;
586
587 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
588
589 /* Limit redirects both by destination (here)
590 and by source (inside ndisc_send_redirect)
591 */
592 if (inet_peer_xrlim_allow(peer, 1*HZ))
593 ndisc_send_redirect(skb, target);
594 if (peer)
595 inet_putpeer(peer);
596 } else {
597 int addrtype = ipv6_addr_type(&hdr->saddr);
598
599 /* This check is security critical. */
600 if (addrtype == IPV6_ADDR_ANY ||
601 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
602 goto error;
603 if (addrtype & IPV6_ADDR_LINKLOCAL) {
604 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
605 ICMPV6_NOT_NEIGHBOUR, 0);
606 goto error;
607 }
608 }
609
610 mtu = ip6_dst_mtu_forward(dst);
611 if (mtu < IPV6_MIN_MTU)
612 mtu = IPV6_MIN_MTU;
613
614 if (ip6_pkt_too_big(skb, mtu)) {
615 /* Again, force OUTPUT device used as source address */
616 skb->dev = dst->dev;
617 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
618 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
619 __IP6_INC_STATS(net, ip6_dst_idev(dst),
620 IPSTATS_MIB_FRAGFAILS);
621 kfree_skb(skb);
622 return -EMSGSIZE;
623 }
624
625 if (skb_cow(skb, dst->dev->hard_header_len)) {
626 __IP6_INC_STATS(net, ip6_dst_idev(dst),
627 IPSTATS_MIB_OUTDISCARDS);
628 goto drop;
629 }
630
631 hdr = ipv6_hdr(skb);
632
633 /* Mangling hops number delayed to point after skb COW */
634
635 hdr->hop_limit--;
636
637 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
638 net, NULL, skb, skb->dev, dst->dev,
639 ip6_forward_finish);
640
641error:
642 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
643drop:
644 kfree_skb(skb);
645 return -EINVAL;
646}
647
648static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
649{
650 to->pkt_type = from->pkt_type;
651 to->priority = from->priority;
652 to->protocol = from->protocol;
653 skb_dst_drop(to);
654 skb_dst_set(to, dst_clone(skb_dst(from)));
655 to->dev = from->dev;
656 to->mark = from->mark;
657
658 skb_copy_hash(to, from);
659
660#ifdef CONFIG_NET_SCHED
661 to->tc_index = from->tc_index;
662#endif
663 nf_copy(to, from);
David Brazdil0f672f62019-12-10 10:32:29 +0000664 skb_ext_copy(to, from);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000665 skb_copy_secmark(to, from);
666}
667
David Brazdil0f672f62019-12-10 10:32:29 +0000668int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
669 u8 nexthdr, __be32 frag_id,
670 struct ip6_fraglist_iter *iter)
671{
672 unsigned int first_len;
673 struct frag_hdr *fh;
674
675 /* BUILD HEADER */
676 *prevhdr = NEXTHDR_FRAGMENT;
677 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
678 if (!iter->tmp_hdr)
679 return -ENOMEM;
680
681 iter->frag = skb_shinfo(skb)->frag_list;
682 skb_frag_list_init(skb);
683
684 iter->offset = 0;
685 iter->hlen = hlen;
686 iter->frag_id = frag_id;
687 iter->nexthdr = nexthdr;
688
689 __skb_pull(skb, hlen);
690 fh = __skb_push(skb, sizeof(struct frag_hdr));
691 __skb_push(skb, hlen);
692 skb_reset_network_header(skb);
693 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
694
695 fh->nexthdr = nexthdr;
696 fh->reserved = 0;
697 fh->frag_off = htons(IP6_MF);
698 fh->identification = frag_id;
699
700 first_len = skb_pagelen(skb);
701 skb->data_len = first_len - skb_headlen(skb);
702 skb->len = first_len;
703 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
704
705 return 0;
706}
707EXPORT_SYMBOL(ip6_fraglist_init);
708
709void ip6_fraglist_prepare(struct sk_buff *skb,
710 struct ip6_fraglist_iter *iter)
711{
712 struct sk_buff *frag = iter->frag;
713 unsigned int hlen = iter->hlen;
714 struct frag_hdr *fh;
715
716 frag->ip_summed = CHECKSUM_NONE;
717 skb_reset_transport_header(frag);
718 fh = __skb_push(frag, sizeof(struct frag_hdr));
719 __skb_push(frag, hlen);
720 skb_reset_network_header(frag);
721 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
722 iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
723 fh->nexthdr = iter->nexthdr;
724 fh->reserved = 0;
725 fh->frag_off = htons(iter->offset);
726 if (frag->next)
727 fh->frag_off |= htons(IP6_MF);
728 fh->identification = iter->frag_id;
729 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
730 ip6_copy_metadata(frag, skb);
731}
732EXPORT_SYMBOL(ip6_fraglist_prepare);
733
734void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
735 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
736 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
737{
738 state->prevhdr = prevhdr;
739 state->nexthdr = nexthdr;
740 state->frag_id = frag_id;
741
742 state->hlen = hlen;
743 state->mtu = mtu;
744
745 state->left = skb->len - hlen; /* Space per frame */
746 state->ptr = hlen; /* Where to start from */
747
748 state->hroom = hdr_room;
749 state->troom = needed_tailroom;
750
751 state->offset = 0;
752}
753EXPORT_SYMBOL(ip6_frag_init);
754
755struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
756{
757 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
758 struct sk_buff *frag;
759 struct frag_hdr *fh;
760 unsigned int len;
761
762 len = state->left;
763 /* IF: it doesn't fit, use 'mtu' - the data space left */
764 if (len > state->mtu)
765 len = state->mtu;
766 /* IF: we are not sending up to and including the packet end
767 then align the next start on an eight byte boundary */
768 if (len < state->left)
769 len &= ~7;
770
771 /* Allocate buffer */
772 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
773 state->hroom + state->troom, GFP_ATOMIC);
774 if (!frag)
775 return ERR_PTR(-ENOMEM);
776
777 /*
778 * Set up data on packet
779 */
780
781 ip6_copy_metadata(frag, skb);
782 skb_reserve(frag, state->hroom);
783 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
784 skb_reset_network_header(frag);
785 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
786 frag->transport_header = (frag->network_header + state->hlen +
787 sizeof(struct frag_hdr));
788
789 /*
790 * Charge the memory for the fragment to any owner
791 * it might possess
792 */
793 if (skb->sk)
794 skb_set_owner_w(frag, skb->sk);
795
796 /*
797 * Copy the packet header into the new buffer.
798 */
799 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
800
801 fragnexthdr_offset = skb_network_header(frag);
802 fragnexthdr_offset += prevhdr - skb_network_header(skb);
803 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
804
805 /*
806 * Build fragment header.
807 */
808 fh->nexthdr = state->nexthdr;
809 fh->reserved = 0;
810 fh->identification = state->frag_id;
811
812 /*
813 * Copy a block of the IP datagram.
814 */
815 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
816 len));
817 state->left -= len;
818
819 fh->frag_off = htons(state->offset);
820 if (state->left > 0)
821 fh->frag_off |= htons(IP6_MF);
822 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
823
824 state->ptr += len;
825 state->offset += len;
826
827 return frag;
828}
829EXPORT_SYMBOL(ip6_frag_next);
830
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000831int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
832 int (*output)(struct net *, struct sock *, struct sk_buff *))
833{
834 struct sk_buff *frag;
835 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
836 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
837 inet6_sk(skb->sk) : NULL;
David Brazdil0f672f62019-12-10 10:32:29 +0000838 struct ip6_frag_state state;
839 unsigned int mtu, hlen, nexthdr_offset;
840 ktime_t tstamp = skb->tstamp;
841 int hroom, err = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000842 __be32 frag_id;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000843 u8 *prevhdr, nexthdr = 0;
844
845 err = ip6_find_1stfragopt(skb, &prevhdr);
846 if (err < 0)
847 goto fail;
848 hlen = err;
849 nexthdr = *prevhdr;
David Brazdil0f672f62019-12-10 10:32:29 +0000850 nexthdr_offset = prevhdr - skb_network_header(skb);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000851
852 mtu = ip6_skb_dst_mtu(skb);
853
854 /* We must not fragment if the socket is set to force MTU discovery
855 * or if the skb it not generated by a local socket.
856 */
857 if (unlikely(!skb->ignore_df && skb->len > mtu))
858 goto fail_toobig;
859
860 if (IP6CB(skb)->frag_max_size) {
861 if (IP6CB(skb)->frag_max_size > mtu)
862 goto fail_toobig;
863
864 /* don't send fragments larger than what we received */
865 mtu = IP6CB(skb)->frag_max_size;
866 if (mtu < IPV6_MIN_MTU)
867 mtu = IPV6_MIN_MTU;
868 }
869
870 if (np && np->frag_size < mtu) {
871 if (np->frag_size)
872 mtu = np->frag_size;
873 }
874 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
875 goto fail_toobig;
876 mtu -= hlen + sizeof(struct frag_hdr);
877
878 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
879 &ipv6_hdr(skb)->saddr);
880
881 if (skb->ip_summed == CHECKSUM_PARTIAL &&
882 (err = skb_checksum_help(skb)))
883 goto fail;
884
David Brazdil0f672f62019-12-10 10:32:29 +0000885 prevhdr = skb_network_header(skb) + nexthdr_offset;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000886 hroom = LL_RESERVED_SPACE(rt->dst.dev);
887 if (skb_has_frag_list(skb)) {
888 unsigned int first_len = skb_pagelen(skb);
David Brazdil0f672f62019-12-10 10:32:29 +0000889 struct ip6_fraglist_iter iter;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000890 struct sk_buff *frag2;
891
892 if (first_len - hlen > mtu ||
893 ((first_len - hlen) & 7) ||
894 skb_cloned(skb) ||
895 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
896 goto slow_path;
897
898 skb_walk_frags(skb, frag) {
899 /* Correct geometry. */
900 if (frag->len > mtu ||
901 ((frag->len & 7) && frag->next) ||
902 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
903 goto slow_path_clean;
904
905 /* Partially cloned skb? */
906 if (skb_shared(frag))
907 goto slow_path_clean;
908
909 BUG_ON(frag->sk);
910 if (skb->sk) {
911 frag->sk = skb->sk;
912 frag->destructor = sock_wfree;
913 }
914 skb->truesize -= frag->truesize;
915 }
916
David Brazdil0f672f62019-12-10 10:32:29 +0000917 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
918 &iter);
919 if (err < 0)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000920 goto fail;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000921
922 for (;;) {
923 /* Prepare header of the next frame,
924 * before previous one went down. */
David Brazdil0f672f62019-12-10 10:32:29 +0000925 if (iter.frag)
926 ip6_fraglist_prepare(skb, &iter);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000927
David Brazdil0f672f62019-12-10 10:32:29 +0000928 skb->tstamp = tstamp;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000929 err = output(net, sk, skb);
930 if (!err)
931 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
932 IPSTATS_MIB_FRAGCREATES);
933
David Brazdil0f672f62019-12-10 10:32:29 +0000934 if (err || !iter.frag)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000935 break;
936
David Brazdil0f672f62019-12-10 10:32:29 +0000937 skb = ip6_fraglist_next(&iter);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000938 }
939
David Brazdil0f672f62019-12-10 10:32:29 +0000940 kfree(iter.tmp_hdr);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000941
942 if (err == 0) {
943 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
944 IPSTATS_MIB_FRAGOKS);
945 return 0;
946 }
947
David Brazdil0f672f62019-12-10 10:32:29 +0000948 kfree_skb_list(iter.frag);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000949
950 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
951 IPSTATS_MIB_FRAGFAILS);
952 return err;
953
954slow_path_clean:
955 skb_walk_frags(skb, frag2) {
956 if (frag2 == frag)
957 break;
958 frag2->sk = NULL;
959 frag2->destructor = NULL;
960 skb->truesize += frag2->truesize;
961 }
962 }
963
964slow_path:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000965 /*
966 * Fragment the datagram.
967 */
968
David Brazdil0f672f62019-12-10 10:32:29 +0000969 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
970 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
971 &state);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000972
973 /*
974 * Keep copying data until we run out.
975 */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000976
David Brazdil0f672f62019-12-10 10:32:29 +0000977 while (state.left > 0) {
978 frag = ip6_frag_next(skb, &state);
979 if (IS_ERR(frag)) {
980 err = PTR_ERR(frag);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000981 goto fail;
982 }
983
984 /*
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000985 * Put this fragment into the sending queue.
986 */
David Brazdil0f672f62019-12-10 10:32:29 +0000987 frag->tstamp = tstamp;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000988 err = output(net, sk, frag);
989 if (err)
990 goto fail;
991
992 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
993 IPSTATS_MIB_FRAGCREATES);
994 }
995 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
996 IPSTATS_MIB_FRAGOKS);
997 consume_skb(skb);
998 return err;
999
1000fail_toobig:
1001 if (skb->sk && dst_allfrag(skb_dst(skb)))
1002 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
1003
1004 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1005 err = -EMSGSIZE;
1006
1007fail:
1008 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1009 IPSTATS_MIB_FRAGFAILS);
1010 kfree_skb(skb);
1011 return err;
1012}
1013
1014static inline int ip6_rt_check(const struct rt6key *rt_key,
1015 const struct in6_addr *fl_addr,
1016 const struct in6_addr *addr_cache)
1017{
1018 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
1019 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
1020}
1021
1022static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1023 struct dst_entry *dst,
1024 const struct flowi6 *fl6)
1025{
1026 struct ipv6_pinfo *np = inet6_sk(sk);
1027 struct rt6_info *rt;
1028
1029 if (!dst)
1030 goto out;
1031
1032 if (dst->ops->family != AF_INET6) {
1033 dst_release(dst);
1034 return NULL;
1035 }
1036
1037 rt = (struct rt6_info *)dst;
1038 /* Yes, checking route validity in not connected
1039 * case is not very simple. Take into account,
1040 * that we do not support routing by source, TOS,
1041 * and MSG_DONTROUTE --ANK (980726)
1042 *
1043 * 1. ip6_rt_check(): If route was host route,
1044 * check that cached destination is current.
1045 * If it is network route, we still may
1046 * check its validity using saved pointer
1047 * to the last used address: daddr_cache.
1048 * We do not want to save whole address now,
1049 * (because main consumer of this service
1050 * is tcp, which has not this problem),
1051 * so that the last trick works only on connected
1052 * sockets.
1053 * 2. oif also should be the same.
1054 */
1055 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1056#ifdef CONFIG_IPV6_SUBTREES
1057 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1058#endif
1059 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1060 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1061 dst_release(dst);
1062 dst = NULL;
1063 }
1064
1065out:
1066 return dst;
1067}
1068
1069static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1070 struct dst_entry **dst, struct flowi6 *fl6)
1071{
1072#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1073 struct neighbour *n;
1074 struct rt6_info *rt;
1075#endif
1076 int err;
1077 int flags = 0;
1078
1079 /* The correct way to handle this would be to do
1080 * ip6_route_get_saddr, and then ip6_route_output; however,
1081 * the route-specific preferred source forces the
1082 * ip6_route_output call _before_ ip6_route_get_saddr.
1083 *
1084 * In source specific routing (no src=any default route),
1085 * ip6_route_output will fail given src=any saddr, though, so
1086 * that's why we try it again later.
1087 */
1088 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1089 struct fib6_info *from;
1090 struct rt6_info *rt;
1091 bool had_dst = *dst != NULL;
1092
1093 if (!had_dst)
1094 *dst = ip6_route_output(net, sk, fl6);
1095 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1096
1097 rcu_read_lock();
1098 from = rt ? rcu_dereference(rt->from) : NULL;
1099 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1100 sk ? inet6_sk(sk)->srcprefs : 0,
1101 &fl6->saddr);
1102 rcu_read_unlock();
1103
1104 if (err)
1105 goto out_err_release;
1106
1107 /* If we had an erroneous initial result, pretend it
1108 * never existed and let the SA-enabled version take
1109 * over.
1110 */
1111 if (!had_dst && (*dst)->error) {
1112 dst_release(*dst);
1113 *dst = NULL;
1114 }
1115
1116 if (fl6->flowi6_oif)
1117 flags |= RT6_LOOKUP_F_IFACE;
1118 }
1119
1120 if (!*dst)
1121 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1122
1123 err = (*dst)->error;
1124 if (err)
1125 goto out_err_release;
1126
1127#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1128 /*
1129 * Here if the dst entry we've looked up
1130 * has a neighbour entry that is in the INCOMPLETE
1131 * state and the src address from the flow is
1132 * marked as OPTIMISTIC, we release the found
1133 * dst entry and replace it instead with the
1134 * dst entry of the nexthop router
1135 */
1136 rt = (struct rt6_info *) *dst;
1137 rcu_read_lock_bh();
1138 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1139 rt6_nexthop(rt, &fl6->daddr));
1140 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1141 rcu_read_unlock_bh();
1142
1143 if (err) {
1144 struct inet6_ifaddr *ifp;
1145 struct flowi6 fl_gw6;
1146 int redirect;
1147
1148 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1149 (*dst)->dev, 1);
1150
1151 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1152 if (ifp)
1153 in6_ifa_put(ifp);
1154
1155 if (redirect) {
1156 /*
1157 * We need to get the dst entry for the
1158 * default router instead
1159 */
1160 dst_release(*dst);
1161 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1162 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1163 *dst = ip6_route_output(net, sk, &fl_gw6);
1164 err = (*dst)->error;
1165 if (err)
1166 goto out_err_release;
1167 }
1168 }
1169#endif
1170 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1171 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1172 err = -EAFNOSUPPORT;
1173 goto out_err_release;
1174 }
1175
1176 return 0;
1177
1178out_err_release:
1179 dst_release(*dst);
1180 *dst = NULL;
1181
1182 if (err == -ENETUNREACH)
1183 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1184 return err;
1185}
1186
1187/**
1188 * ip6_dst_lookup - perform route lookup on flow
1189 * @sk: socket which provides route info
1190 * @dst: pointer to dst_entry * for result
1191 * @fl6: flow to lookup
1192 *
1193 * This function performs a route lookup on the given flow.
1194 *
1195 * It returns zero on success, or a standard errno code on error.
1196 */
1197int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1198 struct flowi6 *fl6)
1199{
1200 *dst = NULL;
1201 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1202}
1203EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1204
1205/**
1206 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1207 * @sk: socket which provides route info
1208 * @fl6: flow to lookup
1209 * @final_dst: final destination address for ipsec lookup
1210 *
1211 * This function performs a route lookup on the given flow.
1212 *
1213 * It returns a valid dst pointer on success, or a pointer encoded
1214 * error code.
1215 */
Olivier Deprez0e641232021-09-23 10:07:05 +02001216struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001217 const struct in6_addr *final_dst)
1218{
1219 struct dst_entry *dst = NULL;
1220 int err;
1221
Olivier Deprez0e641232021-09-23 10:07:05 +02001222 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001223 if (err)
1224 return ERR_PTR(err);
1225 if (final_dst)
1226 fl6->daddr = *final_dst;
1227
Olivier Deprez0e641232021-09-23 10:07:05 +02001228 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001229}
1230EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1231
1232/**
1233 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1234 * @sk: socket which provides the dst cache and route info
1235 * @fl6: flow to lookup
1236 * @final_dst: final destination address for ipsec lookup
1237 * @connected: whether @sk is connected or not
1238 *
1239 * This function performs a route lookup on the given flow with the
1240 * possibility of using the cached route in the socket if it is valid.
1241 * It will take the socket dst lock when operating on the dst cache.
1242 * As a result, this function can only be used in process context.
1243 *
1244 * In addition, for a connected socket, cache the dst in the socket
1245 * if the current cache is not valid.
1246 *
1247 * It returns a valid dst pointer on success, or a pointer encoded
1248 * error code.
1249 */
1250struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1251 const struct in6_addr *final_dst,
1252 bool connected)
1253{
1254 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1255
1256 dst = ip6_sk_dst_check(sk, dst, fl6);
1257 if (dst)
1258 return dst;
1259
Olivier Deprez0e641232021-09-23 10:07:05 +02001260 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001261 if (connected && !IS_ERR(dst))
1262 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1263
1264 return dst;
1265}
1266EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1267
1268static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1269 gfp_t gfp)
1270{
1271 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1272}
1273
1274static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1275 gfp_t gfp)
1276{
1277 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1278}
1279
1280static void ip6_append_data_mtu(unsigned int *mtu,
1281 int *maxfraglen,
1282 unsigned int fragheaderlen,
1283 struct sk_buff *skb,
1284 struct rt6_info *rt,
1285 unsigned int orig_mtu)
1286{
1287 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1288 if (!skb) {
1289 /* first fragment, reserve header_len */
1290 *mtu = orig_mtu - rt->dst.header_len;
1291
1292 } else {
1293 /*
1294 * this fragment is not first, the headers
1295 * space is regarded as data space.
1296 */
1297 *mtu = orig_mtu;
1298 }
1299 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1300 + fragheaderlen - sizeof(struct frag_hdr);
1301 }
1302}
1303
1304static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1305 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1306 struct rt6_info *rt, struct flowi6 *fl6)
1307{
1308 struct ipv6_pinfo *np = inet6_sk(sk);
1309 unsigned int mtu;
1310 struct ipv6_txoptions *opt = ipc6->opt;
1311
1312 /*
1313 * setup for corking
1314 */
1315 if (opt) {
1316 if (WARN_ON(v6_cork->opt))
1317 return -EINVAL;
1318
1319 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1320 if (unlikely(!v6_cork->opt))
1321 return -ENOBUFS;
1322
1323 v6_cork->opt->tot_len = sizeof(*opt);
1324 v6_cork->opt->opt_flen = opt->opt_flen;
1325 v6_cork->opt->opt_nflen = opt->opt_nflen;
1326
1327 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1328 sk->sk_allocation);
1329 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1330 return -ENOBUFS;
1331
1332 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1333 sk->sk_allocation);
1334 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1335 return -ENOBUFS;
1336
1337 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1338 sk->sk_allocation);
1339 if (opt->hopopt && !v6_cork->opt->hopopt)
1340 return -ENOBUFS;
1341
1342 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1343 sk->sk_allocation);
1344 if (opt->srcrt && !v6_cork->opt->srcrt)
1345 return -ENOBUFS;
1346
1347 /* need source address above miyazawa*/
1348 }
1349 dst_hold(&rt->dst);
1350 cork->base.dst = &rt->dst;
1351 cork->fl.u.ip6 = *fl6;
1352 v6_cork->hop_limit = ipc6->hlimit;
1353 v6_cork->tclass = ipc6->tclass;
1354 if (rt->dst.flags & DST_XFRM_TUNNEL)
1355 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1356 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1357 else
1358 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1359 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1360 if (np->frag_size < mtu) {
1361 if (np->frag_size)
1362 mtu = np->frag_size;
1363 }
1364 if (mtu < IPV6_MIN_MTU)
1365 return -EINVAL;
1366 cork->base.fragsize = mtu;
1367 cork->base.gso_size = ipc6->gso_size;
1368 cork->base.tx_flags = 0;
David Brazdil0f672f62019-12-10 10:32:29 +00001369 cork->base.mark = ipc6->sockc.mark;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001370 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1371
1372 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1373 cork->base.flags |= IPCORK_ALLFRAG;
1374 cork->base.length = 0;
1375
1376 cork->base.transmit_time = ipc6->sockc.transmit_time;
1377
1378 return 0;
1379}
1380
1381static int __ip6_append_data(struct sock *sk,
1382 struct flowi6 *fl6,
1383 struct sk_buff_head *queue,
1384 struct inet_cork *cork,
1385 struct inet6_cork *v6_cork,
1386 struct page_frag *pfrag,
1387 int getfrag(void *from, char *to, int offset,
1388 int len, int odd, struct sk_buff *skb),
1389 void *from, int length, int transhdrlen,
1390 unsigned int flags, struct ipcm6_cookie *ipc6)
1391{
1392 struct sk_buff *skb, *skb_prev = NULL;
1393 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
David Brazdil0f672f62019-12-10 10:32:29 +00001394 struct ubuf_info *uarg = NULL;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001395 int exthdrlen = 0;
1396 int dst_exthdrlen = 0;
1397 int hh_len;
1398 int copy;
1399 int err;
1400 int offset = 0;
1401 u32 tskey = 0;
1402 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1403 struct ipv6_txoptions *opt = v6_cork->opt;
1404 int csummode = CHECKSUM_NONE;
1405 unsigned int maxnonfragsize, headersize;
1406 unsigned int wmem_alloc_delta = 0;
David Brazdil0f672f62019-12-10 10:32:29 +00001407 bool paged, extra_uref = false;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001408
1409 skb = skb_peek_tail(queue);
1410 if (!skb) {
1411 exthdrlen = opt ? opt->opt_flen : 0;
1412 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1413 }
1414
1415 paged = !!cork->gso_size;
1416 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1417 orig_mtu = mtu;
1418
1419 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1420 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1421 tskey = sk->sk_tskey++;
1422
1423 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1424
1425 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1426 (opt ? opt->opt_nflen : 0);
1427 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1428 sizeof(struct frag_hdr);
1429
1430 headersize = sizeof(struct ipv6hdr) +
1431 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1432 (dst_allfrag(&rt->dst) ?
1433 sizeof(struct frag_hdr) : 0) +
1434 rt->rt6i_nfheader_len;
1435
1436 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1437 * the first fragment
1438 */
1439 if (headersize + transhdrlen > mtu)
1440 goto emsgsize;
1441
1442 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1443 (sk->sk_protocol == IPPROTO_UDP ||
1444 sk->sk_protocol == IPPROTO_RAW)) {
1445 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1446 sizeof(struct ipv6hdr));
1447 goto emsgsize;
1448 }
1449
1450 if (ip6_sk_ignore_df(sk))
1451 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1452 else
1453 maxnonfragsize = mtu;
1454
1455 if (cork->length + length > maxnonfragsize - headersize) {
1456emsgsize:
1457 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1458 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1459 return -EMSGSIZE;
1460 }
1461
1462 /* CHECKSUM_PARTIAL only with no extension headers and when
1463 * we are not going to fragment
1464 */
1465 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1466 headersize == sizeof(struct ipv6hdr) &&
1467 length <= mtu - headersize &&
1468 (!(flags & MSG_MORE) || cork->gso_size) &&
1469 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1470 csummode = CHECKSUM_PARTIAL;
1471
David Brazdil0f672f62019-12-10 10:32:29 +00001472 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1473 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1474 if (!uarg)
1475 return -ENOBUFS;
1476 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
1477 if (rt->dst.dev->features & NETIF_F_SG &&
1478 csummode == CHECKSUM_PARTIAL) {
1479 paged = true;
1480 } else {
1481 uarg->zerocopy = 0;
1482 skb_zcopy_set(skb, uarg, &extra_uref);
1483 }
1484 }
1485
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001486 /*
1487 * Let's try using as much space as possible.
1488 * Use MTU if total length of the message fits into the MTU.
1489 * Otherwise, we need to reserve fragment header and
1490 * fragment alignment (= 8-15 octects, in total).
1491 *
1492 * Note that we may need to "move" the data from the tail of
1493 * of the buffer to the new fragment when we split
1494 * the message.
1495 *
1496 * FIXME: It may be fragmented into multiple chunks
1497 * at once if non-fragmentable extension headers
1498 * are too large.
1499 * --yoshfuji
1500 */
1501
1502 cork->length += length;
1503 if (!skb)
1504 goto alloc_new_skb;
1505
1506 while (length > 0) {
1507 /* Check if the remaining data fits into current packet. */
1508 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1509 if (copy < length)
1510 copy = maxfraglen - skb->len;
1511
1512 if (copy <= 0) {
1513 char *data;
1514 unsigned int datalen;
1515 unsigned int fraglen;
1516 unsigned int fraggap;
Olivier Deprez0e641232021-09-23 10:07:05 +02001517 unsigned int alloclen, alloc_extra;
David Brazdil0f672f62019-12-10 10:32:29 +00001518 unsigned int pagedlen;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001519alloc_new_skb:
1520 /* There's no room in the current skb */
1521 if (skb)
1522 fraggap = skb->len - maxfraglen;
1523 else
1524 fraggap = 0;
1525 /* update mtu and maxfraglen if necessary */
1526 if (!skb || !skb_prev)
1527 ip6_append_data_mtu(&mtu, &maxfraglen,
1528 fragheaderlen, skb, rt,
1529 orig_mtu);
1530
1531 skb_prev = skb;
1532
1533 /*
1534 * If remaining data exceeds the mtu,
1535 * we know we need more fragment(s).
1536 */
1537 datalen = length + fraggap;
1538
1539 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1540 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1541 fraglen = datalen + fragheaderlen;
David Brazdil0f672f62019-12-10 10:32:29 +00001542 pagedlen = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001543
Olivier Deprez0e641232021-09-23 10:07:05 +02001544 alloc_extra = hh_len;
1545 alloc_extra += dst_exthdrlen;
1546 alloc_extra += rt->dst.trailer_len;
1547
1548 /* We just reserve space for fragment header.
1549 * Note: this may be overallocation if the message
1550 * (without MSG_MORE) fits into the MTU.
1551 */
1552 alloc_extra += sizeof(struct frag_hdr);
1553
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001554 if ((flags & MSG_MORE) &&
1555 !(rt->dst.dev->features&NETIF_F_SG))
1556 alloclen = mtu;
Olivier Deprez0e641232021-09-23 10:07:05 +02001557 else if (!paged &&
1558 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1559 !(rt->dst.dev->features & NETIF_F_SG)))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001560 alloclen = fraglen;
1561 else {
1562 alloclen = min_t(int, fraglen, MAX_HEADER);
1563 pagedlen = fraglen - alloclen;
1564 }
Olivier Deprez0e641232021-09-23 10:07:05 +02001565 alloclen += alloc_extra;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001566
1567 if (datalen != length + fraggap) {
1568 /*
1569 * this is not the last fragment, the trailer
1570 * space is regarded as data space.
1571 */
1572 datalen += rt->dst.trailer_len;
1573 }
1574
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001575 fraglen = datalen + fragheaderlen;
1576
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001577 copy = datalen - transhdrlen - fraggap - pagedlen;
1578 if (copy < 0) {
1579 err = -EINVAL;
1580 goto error;
1581 }
1582 if (transhdrlen) {
Olivier Deprez0e641232021-09-23 10:07:05 +02001583 skb = sock_alloc_send_skb(sk, alloclen,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001584 (flags & MSG_DONTWAIT), &err);
1585 } else {
1586 skb = NULL;
1587 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1588 2 * sk->sk_sndbuf)
Olivier Deprez0e641232021-09-23 10:07:05 +02001589 skb = alloc_skb(alloclen,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001590 sk->sk_allocation);
1591 if (unlikely(!skb))
1592 err = -ENOBUFS;
1593 }
1594 if (!skb)
1595 goto error;
1596 /*
1597 * Fill in the control structures
1598 */
1599 skb->protocol = htons(ETH_P_IPV6);
1600 skb->ip_summed = csummode;
1601 skb->csum = 0;
1602 /* reserve for fragmentation and ipsec header */
1603 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1604 dst_exthdrlen);
1605
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001606 /*
1607 * Find where to start putting bytes
1608 */
1609 data = skb_put(skb, fraglen - pagedlen);
1610 skb_set_network_header(skb, exthdrlen);
1611 data += fragheaderlen;
1612 skb->transport_header = (skb->network_header +
1613 fragheaderlen);
1614 if (fraggap) {
1615 skb->csum = skb_copy_and_csum_bits(
1616 skb_prev, maxfraglen,
1617 data + transhdrlen, fraggap, 0);
1618 skb_prev->csum = csum_sub(skb_prev->csum,
1619 skb->csum);
1620 data += fraggap;
1621 pskb_trim_unique(skb_prev, maxfraglen);
1622 }
1623 if (copy > 0 &&
1624 getfrag(from, data + transhdrlen, offset,
1625 copy, fraggap, skb) < 0) {
1626 err = -EFAULT;
1627 kfree_skb(skb);
1628 goto error;
1629 }
1630
1631 offset += copy;
1632 length -= copy + transhdrlen;
1633 transhdrlen = 0;
1634 exthdrlen = 0;
1635 dst_exthdrlen = 0;
1636
David Brazdil0f672f62019-12-10 10:32:29 +00001637 /* Only the initial fragment is time stamped */
1638 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1639 cork->tx_flags = 0;
1640 skb_shinfo(skb)->tskey = tskey;
1641 tskey = 0;
1642 skb_zcopy_set(skb, uarg, &extra_uref);
1643
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001644 if ((flags & MSG_CONFIRM) && !skb_prev)
1645 skb_set_dst_pending_confirm(skb, 1);
1646
1647 /*
1648 * Put the packet on the pending queue
1649 */
1650 if (!skb->destructor) {
1651 skb->destructor = sock_wfree;
1652 skb->sk = sk;
1653 wmem_alloc_delta += skb->truesize;
1654 }
1655 __skb_queue_tail(queue, skb);
1656 continue;
1657 }
1658
1659 if (copy > length)
1660 copy = length;
1661
1662 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1663 skb_tailroom(skb) >= copy) {
1664 unsigned int off;
1665
1666 off = skb->len;
1667 if (getfrag(from, skb_put(skb, copy),
1668 offset, copy, off, skb) < 0) {
1669 __skb_trim(skb, off);
1670 err = -EFAULT;
1671 goto error;
1672 }
David Brazdil0f672f62019-12-10 10:32:29 +00001673 } else if (!uarg || !uarg->zerocopy) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001674 int i = skb_shinfo(skb)->nr_frags;
1675
1676 err = -ENOMEM;
1677 if (!sk_page_frag_refill(sk, pfrag))
1678 goto error;
1679
1680 if (!skb_can_coalesce(skb, i, pfrag->page,
1681 pfrag->offset)) {
1682 err = -EMSGSIZE;
1683 if (i == MAX_SKB_FRAGS)
1684 goto error;
1685
1686 __skb_fill_page_desc(skb, i, pfrag->page,
1687 pfrag->offset, 0);
1688 skb_shinfo(skb)->nr_frags = ++i;
1689 get_page(pfrag->page);
1690 }
1691 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1692 if (getfrag(from,
1693 page_address(pfrag->page) + pfrag->offset,
1694 offset, copy, skb->len, skb) < 0)
1695 goto error_efault;
1696
1697 pfrag->offset += copy;
1698 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1699 skb->len += copy;
1700 skb->data_len += copy;
1701 skb->truesize += copy;
1702 wmem_alloc_delta += copy;
David Brazdil0f672f62019-12-10 10:32:29 +00001703 } else {
1704 err = skb_zerocopy_iter_dgram(skb, from, copy);
1705 if (err < 0)
1706 goto error;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001707 }
1708 offset += copy;
1709 length -= copy;
1710 }
1711
1712 if (wmem_alloc_delta)
1713 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1714 return 0;
1715
1716error_efault:
1717 err = -EFAULT;
1718error:
David Brazdil0f672f62019-12-10 10:32:29 +00001719 if (uarg)
1720 sock_zerocopy_put_abort(uarg, extra_uref);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001721 cork->length -= length;
1722 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1723 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1724 return err;
1725}
1726
1727int ip6_append_data(struct sock *sk,
1728 int getfrag(void *from, char *to, int offset, int len,
1729 int odd, struct sk_buff *skb),
1730 void *from, int length, int transhdrlen,
1731 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1732 struct rt6_info *rt, unsigned int flags)
1733{
1734 struct inet_sock *inet = inet_sk(sk);
1735 struct ipv6_pinfo *np = inet6_sk(sk);
1736 int exthdrlen;
1737 int err;
1738
1739 if (flags&MSG_PROBE)
1740 return 0;
1741 if (skb_queue_empty(&sk->sk_write_queue)) {
1742 /*
1743 * setup for corking
1744 */
1745 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1746 ipc6, rt, fl6);
1747 if (err)
1748 return err;
1749
1750 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1751 length += exthdrlen;
1752 transhdrlen += exthdrlen;
1753 } else {
1754 fl6 = &inet->cork.fl.u.ip6;
1755 transhdrlen = 0;
1756 }
1757
1758 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1759 &np->cork, sk_page_frag(sk), getfrag,
1760 from, length, transhdrlen, flags, ipc6);
1761}
1762EXPORT_SYMBOL_GPL(ip6_append_data);
1763
1764static void ip6_cork_release(struct inet_cork_full *cork,
1765 struct inet6_cork *v6_cork)
1766{
1767 if (v6_cork->opt) {
1768 kfree(v6_cork->opt->dst0opt);
1769 kfree(v6_cork->opt->dst1opt);
1770 kfree(v6_cork->opt->hopopt);
1771 kfree(v6_cork->opt->srcrt);
1772 kfree(v6_cork->opt);
1773 v6_cork->opt = NULL;
1774 }
1775
1776 if (cork->base.dst) {
1777 dst_release(cork->base.dst);
1778 cork->base.dst = NULL;
1779 cork->base.flags &= ~IPCORK_ALLFRAG;
1780 }
1781 memset(&cork->fl, 0, sizeof(cork->fl));
1782}
1783
1784struct sk_buff *__ip6_make_skb(struct sock *sk,
1785 struct sk_buff_head *queue,
1786 struct inet_cork_full *cork,
1787 struct inet6_cork *v6_cork)
1788{
1789 struct sk_buff *skb, *tmp_skb;
1790 struct sk_buff **tail_skb;
1791 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1792 struct ipv6_pinfo *np = inet6_sk(sk);
1793 struct net *net = sock_net(sk);
1794 struct ipv6hdr *hdr;
1795 struct ipv6_txoptions *opt = v6_cork->opt;
1796 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1797 struct flowi6 *fl6 = &cork->fl.u.ip6;
1798 unsigned char proto = fl6->flowi6_proto;
1799
1800 skb = __skb_dequeue(queue);
1801 if (!skb)
1802 goto out;
1803 tail_skb = &(skb_shinfo(skb)->frag_list);
1804
1805 /* move skb->data to ip header from ext header */
1806 if (skb->data < skb_network_header(skb))
1807 __skb_pull(skb, skb_network_offset(skb));
1808 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1809 __skb_pull(tmp_skb, skb_network_header_len(skb));
1810 *tail_skb = tmp_skb;
1811 tail_skb = &(tmp_skb->next);
1812 skb->len += tmp_skb->len;
1813 skb->data_len += tmp_skb->len;
1814 skb->truesize += tmp_skb->truesize;
1815 tmp_skb->destructor = NULL;
1816 tmp_skb->sk = NULL;
1817 }
1818
1819 /* Allow local fragmentation. */
1820 skb->ignore_df = ip6_sk_ignore_df(sk);
1821
1822 *final_dst = fl6->daddr;
1823 __skb_pull(skb, skb_network_header_len(skb));
1824 if (opt && opt->opt_flen)
1825 ipv6_push_frag_opts(skb, opt, &proto);
1826 if (opt && opt->opt_nflen)
1827 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1828
1829 skb_push(skb, sizeof(struct ipv6hdr));
1830 skb_reset_network_header(skb);
1831 hdr = ipv6_hdr(skb);
1832
1833 ip6_flow_hdr(hdr, v6_cork->tclass,
1834 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1835 ip6_autoflowlabel(net, np), fl6));
1836 hdr->hop_limit = v6_cork->hop_limit;
1837 hdr->nexthdr = proto;
1838 hdr->saddr = fl6->saddr;
1839 hdr->daddr = *final_dst;
1840
1841 skb->priority = sk->sk_priority;
David Brazdil0f672f62019-12-10 10:32:29 +00001842 skb->mark = cork->base.mark;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001843
1844 skb->tstamp = cork->base.transmit_time;
1845
1846 skb_dst_set(skb, dst_clone(&rt->dst));
1847 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1848 if (proto == IPPROTO_ICMPV6) {
1849 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1850
1851 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1852 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1853 }
1854
1855 ip6_cork_release(cork, v6_cork);
1856out:
1857 return skb;
1858}
1859
1860int ip6_send_skb(struct sk_buff *skb)
1861{
1862 struct net *net = sock_net(skb->sk);
1863 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1864 int err;
1865
1866 err = ip6_local_out(net, skb->sk, skb);
1867 if (err) {
1868 if (err > 0)
1869 err = net_xmit_errno(err);
1870 if (err)
1871 IP6_INC_STATS(net, rt->rt6i_idev,
1872 IPSTATS_MIB_OUTDISCARDS);
1873 }
1874
1875 return err;
1876}
1877
1878int ip6_push_pending_frames(struct sock *sk)
1879{
1880 struct sk_buff *skb;
1881
1882 skb = ip6_finish_skb(sk);
1883 if (!skb)
1884 return 0;
1885
1886 return ip6_send_skb(skb);
1887}
1888EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1889
1890static void __ip6_flush_pending_frames(struct sock *sk,
1891 struct sk_buff_head *queue,
1892 struct inet_cork_full *cork,
1893 struct inet6_cork *v6_cork)
1894{
1895 struct sk_buff *skb;
1896
1897 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1898 if (skb_dst(skb))
1899 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1900 IPSTATS_MIB_OUTDISCARDS);
1901 kfree_skb(skb);
1902 }
1903
1904 ip6_cork_release(cork, v6_cork);
1905}
1906
1907void ip6_flush_pending_frames(struct sock *sk)
1908{
1909 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1910 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1911}
1912EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1913
1914struct sk_buff *ip6_make_skb(struct sock *sk,
1915 int getfrag(void *from, char *to, int offset,
1916 int len, int odd, struct sk_buff *skb),
1917 void *from, int length, int transhdrlen,
1918 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1919 struct rt6_info *rt, unsigned int flags,
1920 struct inet_cork_full *cork)
1921{
1922 struct inet6_cork v6_cork;
1923 struct sk_buff_head queue;
1924 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1925 int err;
1926
1927 if (flags & MSG_PROBE)
1928 return NULL;
1929
1930 __skb_queue_head_init(&queue);
1931
1932 cork->base.flags = 0;
1933 cork->base.addr = 0;
1934 cork->base.opt = NULL;
1935 cork->base.dst = NULL;
1936 v6_cork.opt = NULL;
1937 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1938 if (err) {
1939 ip6_cork_release(cork, &v6_cork);
1940 return ERR_PTR(err);
1941 }
1942 if (ipc6->dontfrag < 0)
1943 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1944
1945 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1946 &current->task_frag, getfrag, from,
1947 length + exthdrlen, transhdrlen + exthdrlen,
1948 flags, ipc6);
1949 if (err) {
1950 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1951 return ERR_PTR(err);
1952 }
1953
1954 return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1955}