Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2694def..71827b5 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* IPv6 output functions
* Linux INET6 implementation
@@ -7,11 +8,6 @@
*
* Based on linux/net/ipv4/ip_output.c
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Changes:
* A.N.Kuznetsov : airthmetics in fragmentation.
* extension headers are implemented.
@@ -63,8 +59,8 @@
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
+ const struct in6_addr *nexthop;
struct neighbour *neigh;
- struct in6_addr *nexthop;
int ret;
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
@@ -117,7 +113,7 @@
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
- ret = neigh_output(neigh, skb);
+ ret = neigh_output(neigh, skb, false);
rcu_read_unlock_bh();
return ret;
}
@@ -128,16 +124,8 @@
return -EINVAL;
}
-static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- int ret;
-
- ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
- if (ret) {
- kfree_skb(skb);
- return ret;
- }
-
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
if (skb_dst(skb)->xfrm) {
@@ -154,6 +142,22 @@
return ip6_finish_output2(net, sk, skb);
}
+static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ int ret;
+
+ ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+ switch (ret) {
+ case NET_XMIT_SUCCESS:
+ return __ip6_finish_output(net, sk, skb);
+ case NET_XMIT_CN:
+ return __ip6_finish_output(net, sk, skb) ? : ret;
+ default:
+ kfree_skb(skb);
+ return ret;
+ }
+}
+
int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
@@ -189,7 +193,7 @@
* which are using proper atomic operations or spinlocks.
*/
int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
- __u32 mark, struct ipv6_txoptions *opt, int tclass)
+ __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
{
struct net *net = sock_net(sk);
const struct ipv6_pinfo *np = inet6_sk(sk);
@@ -254,7 +258,7 @@
hdr->daddr = *first_hop;
skb->protocol = htons(ETH_P_IPV6);
- skb->priority = sk->sk_priority;
+ skb->priority = priority;
skb->mark = mark;
mtu = dst_mtu(dst);
@@ -300,6 +304,12 @@
if (sk && ra->sel == sel &&
(!sk->sk_bound_dev_if ||
sk->sk_bound_dev_if == skb->dev->ifindex)) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (np && np->rtalert_isolate &&
+ !net_eq(sock_net(sk), dev_net(skb->dev))) {
+ continue;
+ }
if (last) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
@@ -378,6 +388,14 @@
__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
+#ifdef CONFIG_NET_SWITCHDEV
+ if (skb->offload_l3_fwd_mark) {
+ consume_skb(skb);
+ return 0;
+ }
+#endif
+
+ skb->tstamp = 0;
return dst_output(net, sk, skb);
}
@@ -574,9 +592,173 @@
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
+ skb_ext_copy(to, from);
skb_copy_secmark(to, from);
}
+int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
+ u8 nexthdr, __be32 frag_id,
+ struct ip6_fraglist_iter *iter)
+{
+ unsigned int first_len;
+ struct frag_hdr *fh;
+
+ /* BUILD HEADER */
+ *prevhdr = NEXTHDR_FRAGMENT;
+ iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
+ if (!iter->tmp_hdr)
+ return -ENOMEM;
+
+ iter->frag = skb_shinfo(skb)->frag_list;
+ skb_frag_list_init(skb);
+
+ iter->offset = 0;
+ iter->hlen = hlen;
+ iter->frag_id = frag_id;
+ iter->nexthdr = nexthdr;
+
+ __skb_pull(skb, hlen);
+ fh = __skb_push(skb, sizeof(struct frag_hdr));
+ __skb_push(skb, hlen);
+ skb_reset_network_header(skb);
+ memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
+
+ fh->nexthdr = nexthdr;
+ fh->reserved = 0;
+ fh->frag_off = htons(IP6_MF);
+ fh->identification = frag_id;
+
+ first_len = skb_pagelen(skb);
+ skb->data_len = first_len - skb_headlen(skb);
+ skb->len = first_len;
+ ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
+
+ return 0;
+}
+EXPORT_SYMBOL(ip6_fraglist_init);
+
+void ip6_fraglist_prepare(struct sk_buff *skb,
+ struct ip6_fraglist_iter *iter)
+{
+ struct sk_buff *frag = iter->frag;
+ unsigned int hlen = iter->hlen;
+ struct frag_hdr *fh;
+
+ frag->ip_summed = CHECKSUM_NONE;
+ skb_reset_transport_header(frag);
+ fh = __skb_push(frag, sizeof(struct frag_hdr));
+ __skb_push(frag, hlen);
+ skb_reset_network_header(frag);
+ memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
+ iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
+ fh->nexthdr = iter->nexthdr;
+ fh->reserved = 0;
+ fh->frag_off = htons(iter->offset);
+ if (frag->next)
+ fh->frag_off |= htons(IP6_MF);
+ fh->identification = iter->frag_id;
+ ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+ ip6_copy_metadata(frag, skb);
+}
+EXPORT_SYMBOL(ip6_fraglist_prepare);
+
+void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
+ unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
+ u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
+{
+ state->prevhdr = prevhdr;
+ state->nexthdr = nexthdr;
+ state->frag_id = frag_id;
+
+ state->hlen = hlen;
+ state->mtu = mtu;
+
+ state->left = skb->len - hlen; /* Space per frame */
+ state->ptr = hlen; /* Where to start from */
+
+ state->hroom = hdr_room;
+ state->troom = needed_tailroom;
+
+ state->offset = 0;
+}
+EXPORT_SYMBOL(ip6_frag_init);
+
+struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
+{
+ u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
+ struct sk_buff *frag;
+ struct frag_hdr *fh;
+ unsigned int len;
+
+ len = state->left;
+ /* IF: it doesn't fit, use 'mtu' - the data space left */
+ if (len > state->mtu)
+ len = state->mtu;
+ /* IF: we are not sending up to and including the packet end
+ then align the next start on an eight byte boundary */
+ if (len < state->left)
+ len &= ~7;
+
+ /* Allocate buffer */
+ frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
+ state->hroom + state->troom, GFP_ATOMIC);
+ if (!frag)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * Set up data on packet
+ */
+
+ ip6_copy_metadata(frag, skb);
+ skb_reserve(frag, state->hroom);
+ skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
+ skb_reset_network_header(frag);
+ fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
+ frag->transport_header = (frag->network_header + state->hlen +
+ sizeof(struct frag_hdr));
+
+ /*
+ * Charge the memory for the fragment to any owner
+ * it might possess
+ */
+ if (skb->sk)
+ skb_set_owner_w(frag, skb->sk);
+
+ /*
+ * Copy the packet header into the new buffer.
+ */
+ skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
+
+ fragnexthdr_offset = skb_network_header(frag);
+ fragnexthdr_offset += prevhdr - skb_network_header(skb);
+ *fragnexthdr_offset = NEXTHDR_FRAGMENT;
+
+ /*
+ * Build fragment header.
+ */
+ fh->nexthdr = state->nexthdr;
+ fh->reserved = 0;
+ fh->identification = state->frag_id;
+
+ /*
+ * Copy a block of the IP datagram.
+ */
+ BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
+ len));
+ state->left -= len;
+
+ fh->frag_off = htons(state->offset);
+ if (state->left > 0)
+ fh->frag_off |= htons(IP6_MF);
+ ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+
+ state->ptr += len;
+ state->offset += len;
+
+ return frag;
+}
+EXPORT_SYMBOL(ip6_frag_next);
+
int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *))
{
@@ -584,12 +766,11 @@
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL;
- struct ipv6hdr *tmp_hdr;
- struct frag_hdr *fh;
- unsigned int mtu, hlen, left, len;
- int hroom, troom;
+ struct ip6_frag_state state;
+ unsigned int mtu, hlen, nexthdr_offset;
+ ktime_t tstamp = skb->tstamp;
+ int hroom, err = 0;
__be32 frag_id;
- int ptr, offset = 0, err = 0;
u8 *prevhdr, nexthdr = 0;
err = ip6_find_1stfragopt(skb, &prevhdr);
@@ -597,6 +778,7 @@
goto fail;
hlen = err;
nexthdr = *prevhdr;
+ nexthdr_offset = prevhdr - skb_network_header(skb);
mtu = ip6_skb_dst_mtu(skb);
@@ -631,9 +813,11 @@
(err = skb_checksum_help(skb)))
goto fail;
+ prevhdr = skb_network_header(skb) + nexthdr_offset;
hroom = LL_RESERVED_SPACE(rt->dst.dev);
if (skb_has_frag_list(skb)) {
unsigned int first_len = skb_pagelen(skb);
+ struct ip6_fraglist_iter iter;
struct sk_buff *frag2;
if (first_len - hlen > mtu ||
@@ -661,74 +845,30 @@
skb->truesize -= frag->truesize;
}
- err = 0;
- offset = 0;
- /* BUILD HEADER */
-
- *prevhdr = NEXTHDR_FRAGMENT;
- tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
- if (!tmp_hdr) {
- err = -ENOMEM;
+ err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
+ &iter);
+ if (err < 0)
goto fail;
- }
- frag = skb_shinfo(skb)->frag_list;
- skb_frag_list_init(skb);
-
- __skb_pull(skb, hlen);
- fh = __skb_push(skb, sizeof(struct frag_hdr));
- __skb_push(skb, hlen);
- skb_reset_network_header(skb);
- memcpy(skb_network_header(skb), tmp_hdr, hlen);
-
- fh->nexthdr = nexthdr;
- fh->reserved = 0;
- fh->frag_off = htons(IP6_MF);
- fh->identification = frag_id;
-
- first_len = skb_pagelen(skb);
- skb->data_len = first_len - skb_headlen(skb);
- skb->len = first_len;
- ipv6_hdr(skb)->payload_len = htons(first_len -
- sizeof(struct ipv6hdr));
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
- if (frag) {
- frag->ip_summed = CHECKSUM_NONE;
- skb_reset_transport_header(frag);
- fh = __skb_push(frag, sizeof(struct frag_hdr));
- __skb_push(frag, hlen);
- skb_reset_network_header(frag);
- memcpy(skb_network_header(frag), tmp_hdr,
- hlen);
- offset += skb->len - hlen - sizeof(struct frag_hdr);
- fh->nexthdr = nexthdr;
- fh->reserved = 0;
- fh->frag_off = htons(offset);
- if (frag->next)
- fh->frag_off |= htons(IP6_MF);
- fh->identification = frag_id;
- ipv6_hdr(frag)->payload_len =
- htons(frag->len -
- sizeof(struct ipv6hdr));
- ip6_copy_metadata(frag, skb);
- }
+ if (iter.frag)
+ ip6_fraglist_prepare(skb, &iter);
+ skb->tstamp = tstamp;
err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGCREATES);
- if (err || !frag)
+ if (err || !iter.frag)
break;
- skb = frag;
- frag = skb->next;
- skb->next = NULL;
+ skb = ip6_fraglist_next(&iter);
}
- kfree(tmp_hdr);
+ kfree(iter.tmp_hdr);
if (err == 0) {
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
@@ -736,7 +876,7 @@
return 0;
}
- kfree_skb_list(frag);
+ kfree_skb_list(iter.frag);
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGFAILS);
@@ -753,93 +893,29 @@
}
slow_path:
- left = skb->len - hlen; /* Space per frame */
- ptr = hlen; /* Where to start from */
-
/*
* Fragment the datagram.
*/
- troom = rt->dst.dev->needed_tailroom;
+ ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
+ LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
+ &state);
/*
* Keep copying data until we run out.
*/
- while (left > 0) {
- u8 *fragnexthdr_offset;
- len = left;
- /* IF: it doesn't fit, use 'mtu' - the data space left */
- if (len > mtu)
- len = mtu;
- /* IF: we are not sending up to and including the packet end
- then align the next start on an eight byte boundary */
- if (len < left) {
- len &= ~7;
- }
-
- /* Allocate buffer */
- frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
- hroom + troom, GFP_ATOMIC);
- if (!frag) {
- err = -ENOMEM;
+ while (state.left > 0) {
+ frag = ip6_frag_next(skb, &state);
+ if (IS_ERR(frag)) {
+ err = PTR_ERR(frag);
goto fail;
}
/*
- * Set up data on packet
- */
-
- ip6_copy_metadata(frag, skb);
- skb_reserve(frag, hroom);
- skb_put(frag, len + hlen + sizeof(struct frag_hdr));
- skb_reset_network_header(frag);
- fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
- frag->transport_header = (frag->network_header + hlen +
- sizeof(struct frag_hdr));
-
- /*
- * Charge the memory for the fragment to any owner
- * it might possess
- */
- if (skb->sk)
- skb_set_owner_w(frag, skb->sk);
-
- /*
- * Copy the packet header into the new buffer.
- */
- skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
-
- fragnexthdr_offset = skb_network_header(frag);
- fragnexthdr_offset += prevhdr - skb_network_header(skb);
- *fragnexthdr_offset = NEXTHDR_FRAGMENT;
-
- /*
- * Build fragment header.
- */
- fh->nexthdr = nexthdr;
- fh->reserved = 0;
- fh->identification = frag_id;
-
- /*
- * Copy a block of the IP datagram.
- */
- BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
- len));
- left -= len;
-
- fh->frag_off = htons(offset);
- if (left > 0)
- fh->frag_off |= htons(IP6_MF);
- ipv6_hdr(frag)->payload_len = htons(frag->len -
- sizeof(struct ipv6hdr));
-
- ptr += len;
- offset += len;
-
- /*
* Put this fragment into the sending queue.
*/
+ frag->tstamp = tstamp;
err = output(net, sk, frag);
if (err)
goto fail;
@@ -1221,6 +1297,7 @@
cork->base.fragsize = mtu;
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
+ cork->base.mark = ipc6->sockc.mark;
sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
if (dst_allfrag(xfrm_dst_path(&rt->dst)))
@@ -1245,6 +1322,7 @@
{
struct sk_buff *skb, *skb_prev = NULL;
unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
+ struct ubuf_info *uarg = NULL;
int exthdrlen = 0;
int dst_exthdrlen = 0;
int hh_len;
@@ -1257,7 +1335,7 @@
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
unsigned int wmem_alloc_delta = 0;
- bool paged;
+ bool paged, extra_uref = false;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1322,6 +1400,20 @@
rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
csummode = CHECKSUM_PARTIAL;
+ if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
+ uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
+ if (!uarg)
+ return -ENOBUFS;
+ extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
+ if (rt->dst.dev->features & NETIF_F_SG &&
+ csummode == CHECKSUM_PARTIAL) {
+ paged = true;
+ } else {
+ uarg->zerocopy = 0;
+ skb_zcopy_set(skb, uarg, &extra_uref);
+ }
+ }
+
/*
* Let's try using as much space as possible.
* Use MTU if total length of the message fits into the MTU.
@@ -1354,7 +1446,7 @@
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
- unsigned int pagedlen = 0;
+ unsigned int pagedlen;
alloc_new_skb:
/* There's no room in the current skb */
if (skb)
@@ -1378,6 +1470,7 @@
if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
fraglen = datalen + fragheaderlen;
+ pagedlen = 0;
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
@@ -1439,12 +1532,6 @@
skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
dst_exthdrlen);
- /* Only the initial fragment is time stamped */
- skb_shinfo(skb)->tx_flags = cork->tx_flags;
- cork->tx_flags = 0;
- skb_shinfo(skb)->tskey = tskey;
- tskey = 0;
-
/*
* Find where to start putting bytes
*/
@@ -1476,6 +1563,13 @@
exthdrlen = 0;
dst_exthdrlen = 0;
+ /* Only the initial fragment is time stamped */
+ skb_shinfo(skb)->tx_flags = cork->tx_flags;
+ cork->tx_flags = 0;
+ skb_shinfo(skb)->tskey = tskey;
+ tskey = 0;
+ skb_zcopy_set(skb, uarg, &extra_uref);
+
if ((flags & MSG_CONFIRM) && !skb_prev)
skb_set_dst_pending_confirm(skb, 1);
@@ -1505,7 +1599,7 @@
err = -EFAULT;
goto error;
}
- } else {
+ } else if (!uarg || !uarg->zerocopy) {
int i = skb_shinfo(skb)->nr_frags;
err = -ENOMEM;
@@ -1535,6 +1629,10 @@
skb->data_len += copy;
skb->truesize += copy;
wmem_alloc_delta += copy;
+ } else {
+ err = skb_zerocopy_iter_dgram(skb, from, copy);
+ if (err < 0)
+ goto error;
}
offset += copy;
length -= copy;
@@ -1547,6 +1645,8 @@
error_efault:
err = -EFAULT;
error:
+ if (uarg)
+ sock_zerocopy_put_abort(uarg, extra_uref);
cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
@@ -1668,7 +1768,7 @@
hdr->daddr = *final_dst;
skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->mark = cork->base.mark;
skb->tstamp = cork->base.transmit_time;