Update Linux to v5.10.157
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.157.tar.xz
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
Change-Id: I7b30d9e98d8c465d6b44de8e7433b4a40b3289ba
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index e7ce719..edba74a 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -405,6 +405,9 @@
{
const struct inet6_dev *idev = __in6_dev_get(dev);
+ if (unlikely(!idev))
+ return true;
+
return !!idev->cnf.ignore_routes_with_linkdown;
}
diff --git a/include/net/arp.h b/include/net/arp.h
index 4950191..4a23a97 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -71,6 +71,7 @@
const unsigned char *src_hw, const unsigned char *th);
int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir);
void arp_ifdown(struct net_device *dev);
+int arp_invalidate(struct net_device *dev, __be32 ip, bool force);
struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
struct net_device *dev, __be32 src_ip,
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 8b7eb46..aadff55 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -236,6 +236,7 @@
#if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER)
ax25_dama_info dama;
#endif
+ refcount_t refcount;
} ax25_dev;
typedef struct ax25_cb {
@@ -290,6 +291,17 @@
}
}
+static inline void ax25_dev_hold(ax25_dev *ax25_dev)
+{
+ refcount_inc(&ax25_dev->refcount);
+}
+
+static inline void ax25_dev_put(ax25_dev *ax25_dev)
+{
+ if (refcount_dec_and_test(&ax25_dev->refcount)) {
+ kfree(ax25_dev);
+ }
+}
static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev)
{
skb->dev = dev;
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 9125eff..3558356 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -180,19 +180,21 @@
#define BT_DBG(fmt, ...) pr_debug(fmt "\n", ##__VA_ARGS__)
#endif
+#define bt_dev_name(hdev) ((hdev) ? (hdev)->name : "null")
+
#define bt_dev_info(hdev, fmt, ...) \
- BT_INFO("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+ BT_INFO("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
#define bt_dev_warn(hdev, fmt, ...) \
- BT_WARN("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+ BT_WARN("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
#define bt_dev_err(hdev, fmt, ...) \
- BT_ERR("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+ BT_ERR("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
#define bt_dev_dbg(hdev, fmt, ...) \
- BT_DBG("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+ BT_DBG("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
#define bt_dev_warn_ratelimited(hdev, fmt, ...) \
- bt_warn_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+ bt_warn_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
#define bt_dev_err_ratelimited(hdev, fmt, ...) \
- bt_err_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
+ bt_err_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
/* Connection and socket states */
enum {
@@ -420,6 +422,71 @@
return NULL;
}
+/* Shall not be called with lock_sock held */
+static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk,
+ struct msghdr *msg,
+ size_t len, size_t mtu,
+ size_t headroom, size_t tailroom)
+{
+ struct sk_buff *skb;
+ size_t size = min_t(size_t, len, mtu);
+ int err;
+
+ skb = bt_skb_send_alloc(sk, size + headroom + tailroom,
+ msg->msg_flags & MSG_DONTWAIT, &err);
+ if (!skb)
+ return ERR_PTR(err);
+
+ skb_reserve(skb, headroom);
+ skb_tailroom_reserve(skb, mtu, tailroom);
+
+ if (!copy_from_iter_full(skb_put(skb, size), size, &msg->msg_iter)) {
+ kfree_skb(skb);
+ return ERR_PTR(-EFAULT);
+ }
+
+ skb->priority = sk->sk_priority;
+
+ return skb;
+}
+
+/* Similar to bt_skb_sendmsg but can split the msg into multiple fragments
+ * accourding to the MTU.
+ */
+static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk,
+ struct msghdr *msg,
+ size_t len, size_t mtu,
+ size_t headroom, size_t tailroom)
+{
+ struct sk_buff *skb, **frag;
+
+ skb = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom);
+ if (IS_ERR_OR_NULL(skb))
+ return skb;
+
+ len -= skb->len;
+ if (!len)
+ return skb;
+
+ /* Add remaining data over MTU as continuation fragments */
+ frag = &skb_shinfo(skb)->frag_list;
+ while (len) {
+ struct sk_buff *tmp;
+
+ tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom);
+ if (IS_ERR(tmp)) {
+ return skb;
+ }
+
+ len -= tmp->len;
+
+ *frag = tmp;
+ frag = &(*frag)->next;
+ }
+
+ return skb;
+}
+
int bt_to_errno(u16 code);
void hci_sock_set_flag(struct sock *sk, int nr);
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 243de74..ede7a15 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1503,7 +1503,7 @@
} __packed;
#define HCI_LE_USE_PEER_ADDR 0x00
-#define HCI_LE_USE_WHITELIST 0x01
+#define HCI_LE_USE_ACCEPT_LIST 0x01
#define HCI_OP_LE_CREATE_CONN 0x200d
struct hci_cp_le_create_conn {
@@ -1523,22 +1523,22 @@
#define HCI_OP_LE_CREATE_CONN_CANCEL 0x200e
-#define HCI_OP_LE_READ_WHITE_LIST_SIZE 0x200f
-struct hci_rp_le_read_white_list_size {
+#define HCI_OP_LE_READ_ACCEPT_LIST_SIZE 0x200f
+struct hci_rp_le_read_accept_list_size {
__u8 status;
__u8 size;
} __packed;
-#define HCI_OP_LE_CLEAR_WHITE_LIST 0x2010
+#define HCI_OP_LE_CLEAR_ACCEPT_LIST 0x2010
-#define HCI_OP_LE_ADD_TO_WHITE_LIST 0x2011
-struct hci_cp_le_add_to_white_list {
+#define HCI_OP_LE_ADD_TO_ACCEPT_LIST 0x2011
+struct hci_cp_le_add_to_accept_list {
__u8 bdaddr_type;
bdaddr_t bdaddr;
} __packed;
-#define HCI_OP_LE_DEL_FROM_WHITE_LIST 0x2012
-struct hci_cp_le_del_from_white_list {
+#define HCI_OP_LE_DEL_FROM_ACCEPT_LIST 0x2012
+struct hci_cp_le_del_from_accept_list {
__u8 bdaddr_type;
bdaddr_t bdaddr;
} __packed;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a592a82..11a92bb 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -35,6 +35,9 @@
/* HCI priority */
#define HCI_PRIO_MAX 7
+/* HCI maximum id value */
+#define HCI_MAX_ID 10000
+
/* HCI Core structures */
struct inquiry_data {
bdaddr_t bdaddr;
@@ -305,7 +308,7 @@
__u8 max_page;
__u8 features[HCI_MAX_PAGES][8];
__u8 le_features[8];
- __u8 le_white_list_size;
+ __u8 le_accept_list_size;
__u8 le_resolv_list_size;
__u8 le_num_of_adv_sets;
__u8 le_states[8];
@@ -361,6 +364,8 @@
__u8 ssp_debug_mode;
__u8 hw_error_code;
__u32 clock;
+ __u16 advmon_allowlist_duration;
+ __u16 advmon_no_filter_duration;
__u16 devid_source;
__u16 devid_vendor;
@@ -494,14 +499,14 @@
struct hci_conn_hash conn_hash;
struct list_head mgmt_pending;
- struct list_head blacklist;
- struct list_head whitelist;
+ struct list_head reject_list;
+ struct list_head accept_list;
struct list_head uuids;
struct list_head link_keys;
struct list_head long_term_keys;
struct list_head identity_resolving_keys;
struct list_head remote_oob_data;
- struct list_head le_white_list;
+ struct list_head le_accept_list;
struct list_head le_resolv_list;
struct list_head le_conn_params;
struct list_head pend_le_conns;
@@ -542,6 +547,14 @@
struct delayed_work rpa_expired;
bdaddr_t rpa;
+ enum {
+ INTERLEAVE_SCAN_NONE,
+ INTERLEAVE_SCAN_NO_FILTER,
+ INTERLEAVE_SCAN_ALLOWLIST
+ } interleave_scan_state;
+
+ struct delayed_work interleave_scan;
+
#if IS_ENABLED(CONFIG_BT_LEDS)
struct led_trigger *power_led;
#endif
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 1d12329..9b80008 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -845,6 +845,7 @@
};
void l2cap_chan_hold(struct l2cap_chan *c);
+struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c);
void l2cap_chan_put(struct l2cap_chan *c);
static inline void l2cap_chan_lock(struct l2cap_chan *chan)
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index 1a28f29..895eae1 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -15,8 +15,6 @@
#define PKT_TYPE_LACPDU cpu_to_be16(ETH_P_SLOW)
#define AD_TIMER_INTERVAL 100 /*msec*/
-#define MULTICAST_LACPDU_ADDR {0x01, 0x80, 0xC2, 0x00, 0x00, 0x02}
-
#define AD_LACP_SLOW 0
#define AD_LACP_FAST 1
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 67d6760..d9cc3f5 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -763,6 +763,9 @@
/* exported from bond_sysfs_slave.c */
extern const struct sysfs_ops slave_sysfs_ops;
+/* exported from bond_3ad.c */
+extern const u8 lacpdu_mcast_addr[];
+
static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb)
{
atomic_long_inc(&dev->tx_dropped);
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 716b7c5..36e5e75 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -31,7 +31,7 @@
static inline bool net_busy_loop_on(void)
{
- return sysctl_net_busy_poll;
+ return READ_ONCE(sysctl_net_busy_poll);
}
static inline bool sk_can_busy_loop(const struct sock *sk)
diff --git a/include/net/esp.h b/include/net/esp.h
index 90cd02f..9c5637d 100644
--- a/include/net/esp.h
+++ b/include/net/esp.h
@@ -4,8 +4,6 @@
#include <linux/skbuff.h>
-#define ESP_SKB_FRAG_MAXSIZE (PAGE_SIZE << SKB_FRAG_PAGE_ORDER)
-
struct ip_esp_hdr;
static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb)
diff --git a/include/net/flow.h b/include/net/flow.h
index b2531df..39d0ced 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -195,11 +195,21 @@
return container_of(fl4, struct flowi, u.ip4);
}
+static inline struct flowi_common *flowi4_to_flowi_common(struct flowi4 *fl4)
+{
+ return &(flowi4_to_flowi(fl4)->u.__fl_common);
+}
+
static inline struct flowi *flowi6_to_flowi(struct flowi6 *fl6)
{
return container_of(fl6, struct flowi, u.ip6);
}
+static inline struct flowi_common *flowi6_to_flowi_common(struct flowi6 *fl6)
+{
+ return &(flowi6_to_flowi(fl6)->u.__fl_common);
+}
+
static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn)
{
return container_of(fldn, struct flowi, u.dn);
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index cc10b10..5eecf44 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -59,6 +59,8 @@
__be16 vlan_tci;
};
__be16 vlan_tpid;
+ __be16 vlan_eth_type;
+ u16 padding;
};
struct flow_dissector_mpls_lse {
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 010d581..9a58274 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -568,5 +568,6 @@
enum tc_setup_type type, void *data,
struct flow_block_offload *bo,
void (*cleanup)(struct flow_block_cb *block_cb));
+bool flow_indr_dev_exists(void);
#endif /* _NET_FLOW_OFFLOAD_H */
diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index d0d188c..03b64bf 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -15,6 +15,22 @@
#ifndef IEEE802154_NETDEVICE_H
#define IEEE802154_NETDEVICE_H
+#define IEEE802154_REQUIRED_SIZE(struct_type, member) \
+ (offsetof(typeof(struct_type), member) + \
+ sizeof(((typeof(struct_type) *)(NULL))->member))
+
+#define IEEE802154_ADDR_OFFSET \
+ offsetof(typeof(struct sockaddr_ieee802154), addr)
+
+#define IEEE802154_MIN_NAMELEN (IEEE802154_ADDR_OFFSET + \
+ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, addr_type))
+
+#define IEEE802154_NAMELEN_SHORT (IEEE802154_ADDR_OFFSET + \
+ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, short_addr))
+
+#define IEEE802154_NAMELEN_LONG (IEEE802154_ADDR_OFFSET + \
+ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, hwaddr))
+
#include <net/af_ieee802154.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
@@ -165,6 +181,33 @@
memcpy(raw, &temp, IEEE802154_ADDR_LEN);
}
+static inline int
+ieee802154_sockaddr_check_size(struct sockaddr_ieee802154 *daddr, int len)
+{
+ struct ieee802154_addr_sa *sa;
+ int ret = 0;
+
+ sa = &daddr->addr;
+ if (len < IEEE802154_MIN_NAMELEN)
+ return -EINVAL;
+ switch (sa->addr_type) {
+ case IEEE802154_ADDR_NONE:
+ break;
+ case IEEE802154_ADDR_SHORT:
+ if (len < IEEE802154_NAMELEN_SHORT)
+ ret = -EINVAL;
+ break;
+ case IEEE802154_ADDR_LONG:
+ if (len < IEEE802154_NAMELEN_LONG)
+ ret = -EINVAL;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
static inline void ieee802154_addr_from_sa(struct ieee802154_addr *a,
const struct ieee802154_addr_sa *sa)
{
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 8bf5906..e03ba8e 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -64,6 +64,14 @@
struct hlist_node addr_lst;
struct list_head if_list;
+ /*
+ * Used to safely traverse idev->addr_list in process context
+ * if the idev->lock needed to protect idev->addr_list cannot be held.
+ * In that case, add the items to this list temporarily and iterate
+ * without holding idev->lock.
+ * See addrconf_ifdown and dev_forward_change.
+ */
+ struct list_head if_list_aux;
struct list_head tmp_list;
struct inet6_ifaddr *ifpub;
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 81b9659..56f1286 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -103,15 +103,24 @@
const int dif);
int inet6_hash(struct sock *sk);
-#endif /* IS_ENABLED(CONFIG_IPV6) */
-#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif, __sdif) \
- (((__sk)->sk_portpair == (__ports)) && \
- ((__sk)->sk_family == AF_INET6) && \
- ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \
- ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \
- (((__sk)->sk_bound_dev_if == (__dif)) || \
- ((__sk)->sk_bound_dev_if == (__sdif))) && \
- net_eq(sock_net(__sk), (__net)))
+static inline bool inet6_match(struct net *net, const struct sock *sk,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ const __portpair ports,
+ const int dif, const int sdif)
+{
+ if (!net_eq(sock_net(sk), net) ||
+ sk->sk_family != AF_INET6 ||
+ sk->sk_portpair != ports ||
+ !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) ||
+ !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+ return false;
+
+ /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */
+ return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif,
+ sdif);
+}
+#endif /* IS_ENABLED(CONFIG_IPV6) */
#endif /* _INET6_HASHTABLES_H */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 0b1864a..ff901aa 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -317,7 +317,7 @@
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
-#define TCP_PINGPONG_THRESH 3
+#define TCP_PINGPONG_THRESH 1
static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
{
@@ -334,14 +334,6 @@
return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
}
-static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- if (icsk->icsk_ack.pingpong < U8_MAX)
- icsk->icsk_ack.pingpong++;
-}
-
static inline bool inet_csk_has_ulp(struct sock *sk)
{
return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index ca6a3ea..c9e387d 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -197,17 +197,6 @@
hashinfo->ehash_locks = NULL;
}
-static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
- int dif, int sdif)
-{
-#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept,
- bound_dev_if, dif, sdif);
-#else
- return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
-#endif
-}
-
struct inet_bind_bucket *
inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
struct inet_bind_hashbucket *head,
@@ -289,7 +278,6 @@
((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport)))
#endif
-#if (BITS_PER_LONG == 64)
#ifdef __BIG_ENDIAN
#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
const __addrpair __name = (__force __addrpair) ( \
@@ -301,24 +289,20 @@
(((__force __u64)(__be32)(__daddr)) << 32) | \
((__force __u64)(__be32)(__saddr)))
#endif /* __BIG_ENDIAN */
-#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
- (((__sk)->sk_portpair == (__ports)) && \
- ((__sk)->sk_addrpair == (__cookie)) && \
- (((__sk)->sk_bound_dev_if == (__dif)) || \
- ((__sk)->sk_bound_dev_if == (__sdif))) && \
- net_eq(sock_net(__sk), (__net)))
-#else /* 32-bit arch */
-#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
- const int __name __deprecated __attribute__((unused))
-#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
- (((__sk)->sk_portpair == (__ports)) && \
- ((__sk)->sk_daddr == (__saddr)) && \
- ((__sk)->sk_rcv_saddr == (__daddr)) && \
- (((__sk)->sk_bound_dev_if == (__dif)) || \
- ((__sk)->sk_bound_dev_if == (__sdif))) && \
- net_eq(sock_net(__sk), (__net)))
-#endif /* 64-bit arch */
+static inline bool INET_MATCH(struct net *net, const struct sock *sk,
+ const __addrpair cookie, const __portpair ports,
+ int dif, int sdif)
+{
+ if (!net_eq(sock_net(sk), net) ||
+ sk->sk_portpair != ports ||
+ sk->sk_addrpair != cookie)
+ return false;
+
+ /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */
+ return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif,
+ sdif);
+}
/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
* not check it for lookups anymore, thanks Alexey. -DaveM
@@ -419,7 +403,7 @@
}
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
- struct sock *sk, u32 port_offset,
+ struct sock *sk, u64 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16,
struct inet_timewait_sock **));
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 89163ef..f0faf9d 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -107,7 +107,8 @@
static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
{
- if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
+ if (!sk->sk_mark &&
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
return skb->mark;
return sk->sk_mark;
@@ -116,14 +117,15 @@
static inline int inet_request_bound_dev_if(const struct sock *sk,
struct sk_buff *skb)
{
+ int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
#ifdef CONFIG_NET_L3_MASTER_DEV
struct net *net = sock_net(sk);
- if (!sk->sk_bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept)
+ if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept))
return l3mdev_master_ifindex_by_index(net, skb->skb_iif);
#endif
- return sk->sk_bound_dev_if;
+ return bound_dev_if;
}
static inline int inet_sk_bound_l3mdev(const struct sock *sk)
@@ -131,7 +133,7 @@
#ifdef CONFIG_NET_L3_MASTER_DEV
struct net *net = sock_net(sk);
- if (!net->ipv4.sysctl_tcp_l3mdev_accept)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept))
return l3mdev_master_ifindex_by_index(net,
sk->sk_bound_dev_if);
#endif
@@ -147,6 +149,17 @@
return bound_dev_if == dif || bound_dev_if == sdif;
}
+static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
+ int dif, int sdif)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept),
+ bound_dev_if, dif, sdif);
+#else
+ return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
+#endif
+}
+
struct inet_cork {
unsigned int flags;
__be32 addr;
@@ -369,7 +382,7 @@
static inline bool inet_can_nonlocal_bind(struct net *net,
struct inet_sock *inet)
{
- return net->ipv4.sysctl_ip_nonlocal_bind ||
+ return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) ||
inet->freebind || inet->transparent;
}
diff --git a/include/net/ip.h b/include/net/ip.h
index de2dc22..4b775af 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -55,6 +55,7 @@
#define IPSKB_DOREDIRECT BIT(5)
#define IPSKB_FRAG_PMTU BIT(6)
#define IPSKB_L3SLAVE BIT(7)
+#define IPSKB_NOPOLICY BIT(8)
u16 frag_max_size;
};
@@ -351,7 +352,7 @@
static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port)
{
- return port < net->ipv4.sysctl_ip_prot_sock;
+ return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
}
#else
@@ -378,7 +379,7 @@
void ip_static_sysctl_init(void);
#define IP4_REPLY_MARK(net, mark) \
- ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
+ (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0)
static inline bool ip_is_fragment(const struct iphdr *iph)
{
@@ -439,7 +440,7 @@
struct net *net = dev_net(dst->dev);
unsigned int mtu;
- if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
+ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
ip_mtu_locked(dst) ||
!forwarding)
return dst_mtu(dst);
@@ -544,7 +545,7 @@
BUILD_BUG_ON(offsetof(typeof(flow->addrs), v4addrs.dst) !=
offsetof(typeof(flow->addrs), v4addrs.src) +
sizeof(flow->addrs.v4addrs.src));
- memcpy(&flow->addrs.v4addrs, &iph->saddr, sizeof(flow->addrs.v4addrs));
+ memcpy(&flow->addrs.v4addrs, &iph->addrs, sizeof(flow->addrs.v4addrs));
flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
}
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 028eaea..42d5085 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -57,7 +57,7 @@
/* These fields used only by GRE */
__u32 i_seqno; /* The last seen seqno */
- __u32 o_seqno; /* The last output seqno */
+ atomic_t o_seqno; /* The last output seqno */
int hlen; /* tun_hlen + encap_hlen */
int tun_hlen; /* Precalculated header length */
int encap_hlen; /* Encap header length (FOU,GUE) */
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 6162067..c3e55a9 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -113,7 +113,7 @@
/* These four fields used only by GRE */
u32 i_seqno; /* The last seen seqno */
- u32 o_seqno; /* The last output seqno */
+ atomic_t o_seqno; /* The last output seqno */
int tun_hlen; /* Precalculated header length */
/* These four fields used only by ERSPAN */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 6060189..89ce8a5 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -842,7 +842,7 @@
BUILD_BUG_ON(offsetof(typeof(flow->addrs), v6addrs.dst) !=
offsetof(typeof(flow->addrs), v6addrs.src) +
sizeof(flow->addrs.v6addrs.src));
- memcpy(&flow->addrs.v6addrs, &iph->saddr, sizeof(flow->addrs.v6addrs));
+ memcpy(&flow->addrs.v6addrs, &iph->addrs, sizeof(flow->addrs.v6addrs));
flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 09f2efe..5805fe4 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -59,8 +59,13 @@
int ret = NF_ACCEPT;
if (ct) {
- if (!nf_ct_is_confirmed(ct))
+ if (!nf_ct_is_confirmed(ct)) {
ret = __nf_conntrack_confirm(skb);
+
+ if (ret == NF_ACCEPT)
+ ct = (struct nf_conn *)skb_nfct(skb);
+ }
+
if (likely(ret == NF_ACCEPT))
nf_ct_deliver_cached_events(ct);
}
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 76bfb6c..e66fee9 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -176,13 +176,18 @@
bool report;
};
-struct nft_data_desc {
- enum nft_data_types type;
- unsigned int len;
+enum nft_data_desc_flags {
+ NFT_DATA_DESC_SETELEM = (1 << 0),
};
-int nft_data_init(const struct nft_ctx *ctx,
- struct nft_data *data, unsigned int size,
+struct nft_data_desc {
+ enum nft_data_types type;
+ unsigned int size;
+ unsigned int len;
+ unsigned int flags;
+};
+
+int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
struct nft_data_desc *desc, const struct nlattr *nla);
void nft_data_hold(const struct nft_data *data, enum nft_data_types type);
void nft_data_release(const struct nft_data *data, enum nft_data_types type);
@@ -203,11 +208,11 @@
unsigned int nft_parse_register(const struct nlattr *attr);
int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg);
-int nft_validate_register_load(enum nft_registers reg, unsigned int len);
-int nft_validate_register_store(const struct nft_ctx *ctx,
- enum nft_registers reg,
- const struct nft_data *data,
- enum nft_data_types type, unsigned int len);
+int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len);
+int nft_parse_register_store(const struct nft_ctx *ctx,
+ const struct nlattr *attr, u8 *dreg,
+ const struct nft_data *data,
+ enum nft_data_types type, unsigned int len);
/**
* struct nft_userdata - user defined data associated with an object
@@ -1013,7 +1018,6 @@
struct nft_hook {
struct list_head list;
- bool inactive;
struct nf_hook_ops ops;
struct rcu_head rcu;
};
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 8657e68..ce75121 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -26,21 +26,29 @@
struct nft_bitwise_fast_expr {
u32 mask;
u32 xor;
- enum nft_registers sreg:8;
- enum nft_registers dreg:8;
+ u8 sreg;
+ u8 dreg;
};
struct nft_cmp_fast_expr {
u32 data;
u32 mask;
- enum nft_registers sreg:8;
+ u8 sreg;
+ u8 len;
+ bool inv;
+};
+
+struct nft_cmp16_fast_expr {
+ struct nft_data data;
+ struct nft_data mask;
+ u8 sreg;
u8 len;
bool inv;
};
struct nft_immediate_expr {
struct nft_data data;
- enum nft_registers dreg:8;
+ u8 dreg;
u8 dlen;
};
@@ -55,19 +63,20 @@
}
extern const struct nft_expr_ops nft_cmp_fast_ops;
+extern const struct nft_expr_ops nft_cmp16_fast_ops;
struct nft_payload {
enum nft_payload_bases base:8;
u8 offset;
u8 len;
- enum nft_registers dreg:8;
+ u8 dreg;
};
struct nft_payload_set {
enum nft_payload_bases base:8;
u8 offset;
u8 len;
- enum nft_registers sreg:8;
+ u8 sreg;
u8 csum_type;
u8 csum_offset;
u8 csum_flags;
diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index 7a453a3..1058f38 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -91,7 +91,7 @@
NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \
memset(&(__reg)->mask, 0xff, (__reg)->len);
-int nft_chain_offload_priority(struct nft_base_chain *basechain);
+bool nft_chain_offload_support(const struct nft_base_chain *basechain);
int nft_offload_init(void);
void nft_offload_exit(void);
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
index 628b6fa..237f375 100644
--- a/include/net/netfilter/nft_fib.h
+++ b/include/net/netfilter/nft_fib.h
@@ -5,7 +5,7 @@
#include <net/netfilter/nf_tables.h>
struct nft_fib {
- enum nft_registers dreg:8;
+ u8 dreg;
u8 result;
u32 flags;
};
diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
index 07e2fd5..2dce55c 100644
--- a/include/net/netfilter/nft_meta.h
+++ b/include/net/netfilter/nft_meta.h
@@ -7,8 +7,8 @@
struct nft_meta {
enum nft_meta_keys key:8;
union {
- enum nft_registers dreg:8;
- enum nft_registers sreg:8;
+ u8 dreg;
+ u8 sreg;
};
};
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 1c0fbe3..f179996 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -78,8 +78,8 @@
struct dst_ops ip6_dst_ops;
rwlock_t fib6_walker_lock;
spinlock_t fib6_gc_lock;
- unsigned int ip6_rt_gc_expire;
- unsigned long ip6_rt_last_gc;
+ atomic_t ip6_rt_gc_expire;
+ unsigned long ip6_rt_last_gc;
unsigned char flowlabel_has_excl;
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
bool fib6_has_custom_rules;
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 22e1bc7..69e4161 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -64,6 +64,9 @@
u32 sysctl_aevent_rseqth;
int sysctl_larval_drop;
u32 sysctl_acq_expires;
+
+ u8 policy_default[XFRM_POLICY_MAX];
+
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_hdr;
#endif
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 2b778e1..0fd2df8 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -35,8 +35,6 @@
/* This is used to register protocols. */
struct net_protocol {
- int (*early_demux)(struct sk_buff *skb);
- int (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb);
/* This returns an error if we weren't able to handle the error. */
@@ -53,8 +51,6 @@
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_protocol {
- void (*early_demux)(struct sk_buff *skb);
- void (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb);
/* This returns an error if we weren't able to handle the error. */
diff --git a/include/net/raw.h b/include/net/raw.h
index 8ad8df5..c51a635 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -75,7 +75,7 @@
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept,
+ return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept),
bound_dev_if, dif, sdif);
#else
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
diff --git a/include/net/route.h b/include/net/route.h
index a07c277..2551f3f 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -165,7 +165,7 @@
sk ? inet_sk_flowi_flags(sk) : 0,
daddr, saddr, dport, sport, sock_net_uid(net, sk));
if (sk)
- security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
+ security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
return ip_route_output_flow(net, fl4, sk);
}
@@ -322,7 +322,7 @@
ip_rt_put(rt);
flowi4_update_output(fl4, oif, tos, fl4->daddr, fl4->saddr);
}
- security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
+ security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
return ip_route_output_flow(net, fl4, sk);
}
@@ -338,7 +338,7 @@
flowi4_update_output(fl4, sk->sk_bound_dev_if,
RT_CONN_FLAGS(sk), fl4->daddr,
fl4->saddr);
- security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
+ security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
return ip_route_output_flow(sock_net(sk), fl4, sk);
}
return rt;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 1042c44..e7e8c31 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -163,37 +163,17 @@
if (spin_trylock(&qdisc->seqlock))
goto nolock_empty;
- /* Paired with smp_mb__after_atomic() to make sure
- * STATE_MISSED checking is synchronized with clearing
- * in pfifo_fast_dequeue().
+ /* No need to insist if the MISSED flag was already set.
+ * Note that test_and_set_bit() also gives us memory ordering
+ * guarantees wrt potential earlier enqueue() and below
+ * spin_trylock(), both of which are necessary to prevent races
*/
- smp_mb__before_atomic();
-
- /* If the MISSED flag is set, it means other thread has
- * set the MISSED flag before second spin_trylock(), so
- * we can return false here to avoid multi cpus doing
- * the set_bit() and second spin_trylock() concurrently.
- */
- if (test_bit(__QDISC_STATE_MISSED, &qdisc->state))
+ if (test_and_set_bit(__QDISC_STATE_MISSED, &qdisc->state))
return false;
- /* Set the MISSED flag before the second spin_trylock(),
- * if the second spin_trylock() return false, it means
- * other cpu holding the lock will do dequeuing for us
- * or it will see the MISSED flag set after releasing
- * lock and reschedule the net_tx_action() to do the
- * dequeuing.
- */
- set_bit(__QDISC_STATE_MISSED, &qdisc->state);
-
- /* spin_trylock() only has load-acquire semantic, so use
- * smp_mb__after_atomic() to ensure STATE_MISSED is set
- * before doing the second spin_trylock().
- */
- smp_mb__after_atomic();
-
- /* Retry again in case other CPU may not see the new flag
- * after it releases the lock at the end of qdisc_run_end().
+ /* Try to take the lock again to make sure that we will either
+ * grab it or the CPU that still has it will see MISSED set
+ * when testing it in qdisc_run_end()
*/
if (!spin_trylock(&qdisc->seqlock))
return false;
@@ -217,6 +197,12 @@
if (qdisc->flags & TCQ_F_NOLOCK) {
spin_unlock(&qdisc->seqlock);
+ /* spin_unlock() only has store-release semantic. The unlock
+ * and test_bit() ordering is a store-load ordering, so a full
+ * memory barrier is needed here.
+ */
+ smp_mb();
+
if (unlikely(test_bit(__QDISC_STATE_MISSED,
&qdisc->state))) {
clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
@@ -1192,7 +1178,6 @@
static inline void qdisc_reset_queue(struct Qdisc *sch)
{
__qdisc_reset_queue(&sch->q);
- sch->qstats.backlog = 0;
}
static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index d7d2495..dac91aa 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -4,8 +4,8 @@
#include <linux/types.h>
-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
+u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport);
u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport);
diff --git a/include/net/sock.h b/include/net/sock.h
index 2c11eb4..90a8b8b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -160,9 +160,6 @@
* for struct sock and struct inet_timewait_sock.
*/
struct sock_common {
- /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned
- * address on 64bit arches : cf INET_MATCH()
- */
union {
__addrpair skc_addrpair;
struct {
@@ -424,7 +421,7 @@
#ifdef CONFIG_XFRM
struct xfrm_policy __rcu *sk_policy[2];
#endif
- struct dst_entry *sk_rx_dst;
+ struct dst_entry __rcu *sk_rx_dst;
struct dst_entry __rcu *sk_dst_cache;
atomic_t sk_omem_alloc;
int sk_sndbuf;
@@ -530,14 +527,26 @@
SK_PACING_FQ = 2,
};
-/* Pointer stored in sk_user_data might not be suitable for copying
- * when cloning the socket. For instance, it can point to a reference
- * counted object. sk_user_data bottom bit is set if pointer must not
- * be copied.
+/* flag bits in sk_user_data
+ *
+ * - SK_USER_DATA_NOCOPY: Pointer stored in sk_user_data might
+ * not be suitable for copying when cloning the socket. For instance,
+ * it can point to a reference counted object. sk_user_data bottom
+ * bit is set if pointer must not be copied.
+ *
+ * - SK_USER_DATA_BPF: Mark whether sk_user_data field is
+ * managed/owned by a BPF reuseport array. This bit should be set
+ * when sk_user_data's sk is added to the bpf's reuseport_array.
+ *
+ * - SK_USER_DATA_PSOCK: Mark whether pointer stored in
+ * sk_user_data points to psock type. This bit should be set
+ * when sk_user_data is assigned to a psock object.
*/
#define SK_USER_DATA_NOCOPY 1UL
-#define SK_USER_DATA_BPF 2UL /* Managed by BPF */
-#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF)
+#define SK_USER_DATA_BPF 2UL
+#define SK_USER_DATA_PSOCK 4UL
+#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF |\
+ SK_USER_DATA_PSOCK)
/**
* sk_user_data_is_nocopy - Test if sk_user_data pointer must not be copied
@@ -550,24 +559,40 @@
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
+/**
+ * __rcu_dereference_sk_user_data_with_flags - return the pointer
+ * only if argument flags all has been set in sk_user_data. Otherwise
+ * return NULL
+ *
+ * @sk: socket
+ * @flags: flag bits
+ */
+static inline void *
+__rcu_dereference_sk_user_data_with_flags(const struct sock *sk,
+ uintptr_t flags)
+{
+ uintptr_t sk_user_data = (uintptr_t)rcu_dereference(__sk_user_data(sk));
+
+ WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK);
+
+ if ((sk_user_data & flags) == flags)
+ return (void *)(sk_user_data & SK_USER_DATA_PTRMASK);
+ return NULL;
+}
+
#define rcu_dereference_sk_user_data(sk) \
+ __rcu_dereference_sk_user_data_with_flags(sk, 0)
+#define __rcu_assign_sk_user_data_with_flags(sk, ptr, flags) \
({ \
- void *__tmp = rcu_dereference(__sk_user_data((sk))); \
- (void *)((uintptr_t)__tmp & SK_USER_DATA_PTRMASK); \
+ uintptr_t __tmp1 = (uintptr_t)(ptr), \
+ __tmp2 = (uintptr_t)(flags); \
+ WARN_ON_ONCE(__tmp1 & ~SK_USER_DATA_PTRMASK); \
+ WARN_ON_ONCE(__tmp2 & SK_USER_DATA_PTRMASK); \
+ rcu_assign_pointer(__sk_user_data((sk)), \
+ __tmp1 | __tmp2); \
})
#define rcu_assign_sk_user_data(sk, ptr) \
-({ \
- uintptr_t __tmp = (uintptr_t)(ptr); \
- WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \
- rcu_assign_pointer(__sk_user_data((sk)), __tmp); \
-})
-#define rcu_assign_sk_user_data_nocopy(sk, ptr) \
-({ \
- uintptr_t __tmp = (uintptr_t)(ptr); \
- WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \
- rcu_assign_pointer(__sk_user_data((sk)), \
- __tmp | SK_USER_DATA_NOCOPY); \
-})
+ __rcu_assign_sk_user_data_with_flags(sk, ptr, 0)
/*
* SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK
@@ -1445,7 +1470,7 @@
/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */
static inline long sk_prot_mem_limits(const struct sock *sk, int index)
{
- long val = sk->sk_prot->sysctl_mem[index];
+ long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]);
#if PAGE_SIZE > SK_MEM_QUANTUM
val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT;
@@ -1468,19 +1493,23 @@
static inline bool sk_wmem_schedule(struct sock *sk, int size)
{
+ int delta;
+
if (!sk_has_account(sk))
return true;
- return size <= sk->sk_forward_alloc ||
- __sk_mem_schedule(sk, size, SK_MEM_SEND);
+ delta = size - sk->sk_forward_alloc;
+ return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_SEND);
}
static inline bool
sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
{
+ int delta;
+
if (!sk_has_account(sk))
return true;
- return size <= sk->sk_forward_alloc ||
- __sk_mem_schedule(sk, size, SK_MEM_RECV) ||
+ delta = size - sk->sk_forward_alloc;
+ return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) ||
skb_pfmemalloc(skb);
}
@@ -2677,18 +2706,18 @@
{
/* Does this proto have per netns sysctl_wmem ? */
if (proto->sysctl_wmem_offset)
- return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset);
+ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset));
- return *proto->sysctl_wmem;
+ return READ_ONCE(*proto->sysctl_wmem);
}
static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
{
/* Does this proto have per netns sysctl_rmem ? */
if (proto->sysctl_rmem_offset)
- return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset);
+ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset));
- return *proto->sysctl_rmem;
+ return READ_ONCE(*proto->sysctl_rmem);
}
/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 505f1e1..3eac185 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -38,21 +38,20 @@
extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
extern int reuseport_detach_prog(struct sock *sk);
-static inline bool reuseport_has_conns(struct sock *sk, bool set)
+static inline bool reuseport_has_conns(struct sock *sk)
{
struct sock_reuseport *reuse;
bool ret = false;
rcu_read_lock();
reuse = rcu_dereference(sk->sk_reuseport_cb);
- if (reuse) {
- if (set)
- reuse->has_conns = 1;
- ret = reuse->has_conns;
- }
+ if (reuse && reuse->has_conns)
+ ret = true;
rcu_read_unlock();
return ret;
}
+void reuseport_has_conns_set(struct sock *sk);
+
#endif /* _SOCK_REUSEPORT_H */
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 8528015..afdf8bd 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -38,6 +38,7 @@
SWITCHDEV_ATTR_ID_PORT_MROUTER,
SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING,
+ SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL,
SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED,
SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
#if IS_ENABLED(CONFIG_BRIDGE_MRP)
@@ -57,6 +58,7 @@
bool mrouter; /* PORT_MROUTER */
clock_t ageing_time; /* BRIDGE_AGEING_TIME */
bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */
+ u16 vlan_protocol; /* BRIDGE_VLAN_PROTOCOL */
bool mc_disabled; /* MC_DISABLED */
#if IS_ENABLED(CONFIG_BRIDGE_MRP)
u8 mrp_port_role; /* MRP_PORT_ROLE */
diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h
index 748cf87..3e02709 100644
--- a/include/net/tc_act/tc_pedit.h
+++ b/include/net/tc_act/tc_pedit.h
@@ -14,6 +14,7 @@
struct tc_action common;
unsigned char tcfp_nkeys;
unsigned char tcfp_flags;
+ u32 tcfp_off_max_hint;
struct tc_pedit_key *tcfp_keys;
struct tcf_pedit_key_ex *tcfp_keys_ex;
};
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 334b8d1..9a8d986 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -460,6 +460,7 @@
u32 cookie);
struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
+ const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct sk_buff *skb);
#ifdef CONFIG_SYN_COOKIES
@@ -598,6 +599,7 @@
void tcp_reset(struct sock *sk);
void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
void tcp_fin(struct sock *sk);
+void tcp_check_space(struct sock *sk);
/* tcp_timer.c */
void tcp_init_xmit_timers(struct sock *);
@@ -932,7 +934,7 @@
INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb));
INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb));
-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb));
+void tcp_v6_early_demux(struct sk_buff *skb);
#endif
@@ -1041,6 +1043,7 @@
int losses; /* number of packets marked lost upon ACK */
u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */
u32 prior_in_flight; /* in flight before this ACK */
+ u32 last_end_seq; /* end_seq of most recently ACKed packet */
bool is_app_limited; /* is sample from packet with bubble in pipe? */
bool is_retrans; /* is sample from retransmission? */
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
@@ -1151,6 +1154,11 @@
bool is_sack_reneg, struct rate_sample *rs);
void tcp_rate_check_app_limited(struct sock *sk);
+static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
+{
+ return t1 > t2 || (t1 == t2 && after(seq1, seq2));
+}
+
/* These functions determine how the current flow behaves in respect of SACK
* handling. SACK is negotiated with the peer, and therefore it can vary
* between different flows.
@@ -1263,11 +1271,14 @@
{
const struct tcp_sock *tp = tcp_sk(sk);
+ if (tp->is_cwnd_limited)
+ return true;
+
/* If in slow start, ensure cwnd grows to twice what was ACKed. */
if (tcp_in_slow_start(tp))
return tp->snd_cwnd < 2 * tp->max_packets_out;
- return tp->is_cwnd_limited;
+ return false;
}
/* BBR congestion control needs pacing.
@@ -1372,8 +1383,8 @@
struct tcp_sock *tp = tcp_sk(sk);
s32 delta;
- if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out ||
- ca_ops->cong_control)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) ||
+ tp->packets_out || ca_ops->cong_control)
return;
delta = tcp_jiffies32 - tp->lsndtime;
if (delta > inet_csk(sk)->icsk_rto)
@@ -1388,7 +1399,7 @@
static inline int tcp_win_from_space(const struct sock *sk, int space)
{
- int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
+ int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
return tcp_adv_win_scale <= 0 ?
(space>>(-tcp_adv_win_scale)) :
@@ -1439,21 +1450,24 @@
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl;
+ return tp->keepalive_intvl ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
}
static inline int keepalive_time_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time;
+ return tp->keepalive_time ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
}
static inline int keepalive_probes(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes;
+ return tp->keepalive_probes ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
}
static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
@@ -1466,7 +1480,8 @@
static inline int tcp_fin_time(const struct sock *sk)
{
- int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
+ int fin_timeout = tcp_sk(sk)->linger2 ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout);
const int rto = inet_csk(sk)->icsk_rto;
if (fin_timeout < (rto << 2) - (rto >> 1))
@@ -1961,7 +1976,7 @@
static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat;
+ return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
}
/* @wake is one when sk_stream_write_space() calls us.
diff --git a/include/net/tls.h b/include/net/tls.h
index 745b3bc..d9cb597 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -707,7 +707,7 @@
struct tls_crypto_info *crypto_info);
#ifdef CONFIG_TLS_DEVICE
-void tls_device_init(void);
+int tls_device_init(void);
void tls_device_cleanup(void);
void tls_device_sk_destruct(struct sock *sk);
int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
@@ -727,7 +727,7 @@
return tls_get_ctx(sk)->rx_conf == TLS_HW;
}
#else
-static inline void tls_device_init(void) {}
+static inline int tls_device_init(void) { return 0; }
static inline void tls_device_cleanup(void) {}
static inline int
diff --git a/include/net/udp.h b/include/net/udp.h
index 435cc00..388e68c 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -176,6 +176,7 @@
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
struct udphdr *uh, struct sock *sk);
int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
+void udp_v6_early_demux(struct sk_buff *skb);
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
netdev_features_t features, bool is_ipv6);
@@ -259,7 +260,7 @@
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept,
+ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept),
bound_dev_if, dif, sdif);
#else
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
@@ -467,6 +468,7 @@
DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
void udp_encap_enable(void);
+void udp_encap_disable(void);
#if IS_ENABLED(CONFIG_IPV6)
DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
void udpv6_encap_enable(void);
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 2ea453d..24ece06 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -177,9 +177,8 @@
#if IS_ENABLED(CONFIG_IPV6)
if (sock->sk->sk_family == PF_INET6)
ipv6_stub->udpv6_encap_enable();
- else
#endif
- udp_encap_enable();
+ udp_encap_enable();
}
#define UDP_TUNNEL_NIC_MAX_TABLES 4
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0049a74..8a9943d 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1091,6 +1091,27 @@
int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb,
unsigned short family);
+static inline bool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb,
+ int dir)
+{
+ if (!net->xfrm.policy_count[dir] && !secpath_exists(skb))
+ return net->xfrm.policy_default[dir] == XFRM_USERPOLICY_ACCEPT;
+
+ return false;
+}
+
+static inline bool __xfrm_check_dev_nopolicy(struct sk_buff *skb,
+ int dir, unsigned short family)
+{
+ if (dir != XFRM_POLICY_OUT && family == AF_INET) {
+ /* same dst may be used for traffic originating from
+ * devices with different policy settings.
+ */
+ return IPCB(skb)->flags & IPSKB_NOPOLICY;
+ }
+ return skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY);
+}
+
static inline int __xfrm_policy_check2(struct sock *sk, int dir,
struct sk_buff *skb,
unsigned int family, int reverse)
@@ -1101,9 +1122,9 @@
if (sk && sk->sk_policy[XFRM_POLICY_IN])
return __xfrm_policy_check(sk, ndir, skb, family);
- return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) ||
- (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) ||
- __xfrm_policy_check(sk, ndir, skb, family);
+ return __xfrm_check_nopolicy(net, skb, dir) ||
+ __xfrm_check_dev_nopolicy(skb, dir, family) ||
+ __xfrm_policy_check(sk, ndir, skb, family);
}
static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family)
@@ -1155,9 +1176,12 @@
{
struct net *net = dev_net(skb->dev);
- return !net->xfrm.policy_count[XFRM_POLICY_OUT] ||
- (skb_dst(skb)->flags & DST_NOXFRM) ||
- __xfrm_route_forward(skb, family);
+ if (!net->xfrm.policy_count[XFRM_POLICY_OUT] &&
+ net->xfrm.policy_default[XFRM_POLICY_OUT] == XFRM_USERPOLICY_ACCEPT)
+ return true;
+
+ return (skb_dst(skb)->flags & DST_NOXFRM) ||
+ __xfrm_route_forward(skb, family);
}
static inline int xfrm4_route_forward(struct sk_buff *skb)
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 7a9a23e..c9a47d3 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -86,7 +86,7 @@
struct xdp_umem *umem);
int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev,
u16 queue_id, u16 flags);
-int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
+int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs,
struct net_device *dev, u16 queue_id);
void xp_destroy(struct xsk_buff_pool *pool);
void xp_release(struct xdp_buff_xsk *xskb);