Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/include/net/act_api.h b/include/net/act_api.h
index b18c699..4dabe47 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -69,7 +69,8 @@
{
dtm->install = jiffies_to_clock_t(jiffies - stm->install);
dtm->lastuse = jiffies_to_clock_t(jiffies - stm->lastuse);
- dtm->firstuse = jiffies_to_clock_t(jiffies - stm->firstuse);
+ dtm->firstuse = stm->firstuse ?
+ jiffies_to_clock_t(jiffies - stm->firstuse) : 0;
dtm->expires = jiffies_to_clock_t(stm->expires);
}
@@ -155,8 +156,7 @@
int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
struct tc_action **a, const struct tc_action_ops *ops,
int bind, bool cpustats);
-void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
-
+void tcf_idr_insert_many(struct tc_action *actions[]);
void tcf_idr_cleanup(struct tc_action_net *tn, u32 index);
int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
struct tc_action **a, int bind);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 3f62b34..8d90fb9 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -229,7 +229,6 @@
void ipv6_mc_remap(struct inet6_dev *idev);
void ipv6_mc_init_dev(struct inet6_dev *idev);
void ipv6_mc_destroy_dev(struct inet6_dev *idev);
-int ipv6_mc_check_icmpv6(struct sk_buff *skb);
int ipv6_mc_check_mld(struct sk_buff *skb);
void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp);
@@ -273,6 +272,7 @@
const struct in6_addr *addr);
int ipv6_sock_ac_drop(struct sock *sk, int ifindex,
const struct in6_addr *addr);
+void __ipv6_sock_ac_close(struct sock *sk);
void ipv6_sock_ac_close(struct sock *sk);
int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr);
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 1abae3c..55b980b 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -16,6 +16,12 @@
struct socket;
struct rxrpc_call;
+enum rxrpc_interruptibility {
+ RXRPC_INTERRUPTIBLE, /* Call is interruptible */
+ RXRPC_PREINTERRUPTIBLE, /* Call can be cancelled whilst waiting for a slot */
+ RXRPC_UNINTERRUPTIBLE, /* Call should not be interruptible at all */
+};
+
/*
* Debug ID counter for tracing.
*/
@@ -41,7 +47,7 @@
gfp_t,
rxrpc_notify_rx_t,
bool,
- bool,
+ enum rxrpc_interruptibility,
unsigned int);
int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
struct msghdr *, size_t,
@@ -53,14 +59,12 @@
void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
struct sockaddr_rxrpc *);
-u64 rxrpc_kernel_get_rtt(struct socket *, struct rxrpc_call *);
+bool rxrpc_kernel_get_srtt(struct socket *, struct rxrpc_call *, u32 *);
int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
rxrpc_user_attach_call_t, unsigned long, gfp_t,
unsigned int);
void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
-bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *,
- u32 *);
-void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *);
+bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
ktime_t *);
diff --git a/include/net/arp.h b/include/net/arp.h
index c8f580a..4950191 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -57,8 +57,8 @@
unsigned long now = jiffies;
/* avoid dirtying neighbour */
- if (n->confirmed != now)
- n->confirmed = now;
+ if (READ_ONCE(n->confirmed) != now)
+ WRITE_ONCE(n->confirmed, now);
}
rcu_read_unlock_bh();
}
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index b689ace..14d00cc 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -540,6 +540,7 @@
struct sk_buff_head data_q;
unsigned int sent;
__u8 state;
+ bool amp;
};
struct hci_conn_params {
@@ -1055,6 +1056,7 @@
void hci_free_dev(struct hci_dev *hdev);
int hci_register_dev(struct hci_dev *hdev);
void hci_unregister_dev(struct hci_dev *hdev);
+void hci_cleanup_dev(struct hci_dev *hdev);
int hci_suspend_dev(struct hci_dev *hdev);
int hci_resume_dev(struct hci_dev *hdev);
int hci_reset_dev(struct hci_dev *hdev);
@@ -1308,16 +1310,34 @@
conn->security_cfm_cb(conn, status);
}
-static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status,
- __u8 encrypt)
+static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status)
{
struct hci_cb *cb;
+ __u8 encrypt;
- if (conn->sec_level == BT_SECURITY_SDP)
- conn->sec_level = BT_SECURITY_LOW;
+ if (conn->state == BT_CONFIG) {
+ if (!status)
+ conn->state = BT_CONNECTED;
- if (conn->pending_sec_level > conn->sec_level)
- conn->sec_level = conn->pending_sec_level;
+ hci_connect_cfm(conn, status);
+ hci_conn_drop(conn);
+ return;
+ }
+
+ if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
+ encrypt = 0x00;
+ else if (test_bit(HCI_CONN_AES_CCM, &conn->flags))
+ encrypt = 0x02;
+ else
+ encrypt = 0x01;
+
+ if (!status) {
+ if (conn->sec_level == BT_SECURITY_SDP)
+ conn->sec_level = BT_SECURITY_LOW;
+
+ if (conn->pending_sec_level > conn->sec_level)
+ conn->sec_level = conn->pending_sec_level;
+ }
mutex_lock(&hci_cb_list_lock);
list_for_each_entry(cb, &hci_cb_list, list) {
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 093aede..8efc241 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -623,6 +623,8 @@
struct sk_buff *(*alloc_skb) (struct l2cap_chan *chan,
unsigned long hdr_len,
unsigned long len, int nb);
+ int (*filter) (struct l2cap_chan * chan,
+ struct sk_buff *skb);
};
struct l2cap_conn {
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 3d56b02..1bee8fd 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -180,6 +180,11 @@
struct rtnl_link_stats64 slave_stats;
};
+static inline struct slave *to_slave(struct kobject *kobj)
+{
+ return container_of(kobj, struct slave, kobj);
+}
+
struct bond_up_slave {
unsigned int count;
struct rcu_head rcu;
@@ -743,6 +748,9 @@
/* exported from bond_netlink.c */
extern struct rtnl_link_ops bond_link_ops;
+/* exported from bond_sysfs_slave.c */
+extern const struct sysfs_ops slave_sysfs_ops;
+
static inline void bond_tx_drop(struct net_device *dev, struct sk_buff *skb)
{
atomic_long_inc(&dev->tx_dropped);
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 86e0283..9899b9a 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -36,7 +36,7 @@
static inline bool sk_can_busy_loop(const struct sock *sk)
{
- return sk->sk_ll_usec && !signal_pending(current);
+ return READ_ONCE(sk->sk_ll_usec) && !signal_pending(current);
}
bool sk_busy_loop_end(void *p, unsigned long start_time);
diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h
index 48ecca8..b655d86 100644
--- a/include/net/caif/caif_dev.h
+++ b/include/net/caif/caif_dev.h
@@ -119,7 +119,7 @@
* The link_support layer is used to add any Link Layer specific
* framing.
*/
-void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
+int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
struct cflayer *link_support, int head_room,
struct cflayer **layer, int (**rcv_func)(
struct sk_buff *, struct net_device *,
diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h
index 2aa5e91..8819ff4 100644
--- a/include/net/caif/cfcnfg.h
+++ b/include/net/caif/cfcnfg.h
@@ -62,7 +62,7 @@
* @fcs: Specify if checksum is used in CAIF Framing Layer.
* @head_room: Head space needed by link specific protocol.
*/
-void
+int
cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
struct net_device *dev, struct cflayer *phy_layer,
enum cfcnfg_phy_preference pref,
diff --git a/include/net/caif/cfserl.h b/include/net/caif/cfserl.h
index 14a55e0..67cce87 100644
--- a/include/net/caif/cfserl.h
+++ b/include/net/caif/cfserl.h
@@ -9,4 +9,5 @@
#include <net/caif/caif_layer.h>
struct cflayer *cfserl_create(int instance, bool use_stx);
+void cfserl_release(struct cflayer *layer);
#endif
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4ab2c49..69b9ccb 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3537,6 +3537,9 @@
*
* @start_radar_detection: Start radar detection in the driver.
*
+ * @end_cac: End running CAC, probably because a related CAC
+ * was finished on another phy.
+ *
* @update_ft_ies: Provide updated Fast BSS Transition information to the
* driver. If the SME is in the driver/firmware, this information can be
* used in building Authentication and Reassociation Request frames.
@@ -3863,6 +3866,8 @@
struct net_device *dev,
struct cfg80211_chan_def *chandef,
u32 cac_time_ms);
+ void (*end_cac)(struct wiphy *wiphy,
+ struct net_device *dev);
int (*update_ft_ies)(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_update_ft_ies_params *ftie);
int (*crit_proto_start)(struct wiphy *wiphy,
@@ -5189,7 +5194,7 @@
*/
int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
const u8 *addr, enum nl80211_iftype iftype,
- u8 data_offset);
+ u8 data_offset, bool is_amsdu);
/**
* ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3
@@ -5201,7 +5206,7 @@
static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
enum nl80211_iftype iftype)
{
- return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0);
+ return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0, false);
}
/**
diff --git a/include/net/drop_monitor.h b/include/net/drop_monitor.h
index 2ab6684..f68bc37 100644
--- a/include/net/drop_monitor.h
+++ b/include/net/drop_monitor.h
@@ -19,7 +19,7 @@
struct net_device *input_dev;
};
-#if IS_ENABLED(CONFIG_NET_DROP_MONITOR)
+#if IS_REACHABLE(CONFIG_NET_DROP_MONITOR)
void net_dm_hw_report(struct sk_buff *skb,
const struct net_dm_hw_metadata *hw_metadata);
#else
diff --git a/include/net/dst.h b/include/net/dst.h
index fe62fe2..433f7c1 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -82,7 +82,7 @@
struct dst_metrics {
u32 metrics[RTAX_MAX];
refcount_t refcnt;
-};
+} __aligned(4); /* Low pointer bits contain DST_METRICS_FLAGS */
extern const struct dst_metrics dst_default_metrics;
u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
@@ -401,7 +401,15 @@
static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst,
struct sk_buff *skb)
{
- struct neighbour *n = dst->ops->neigh_lookup(dst, skb, NULL);
+ struct neighbour *n = NULL;
+
+ /* The packets from tunnel devices (eg bareudp) may have only
+ * metadata in the dst pointer of skb. Hence a pointer check of
+ * neigh_lookup is needed.
+ */
+ if (dst->ops->neigh_lookup)
+ n = dst->ops->neigh_lookup(dst, skb, NULL);
+
return IS_ERR(n) ? NULL : n;
}
@@ -516,7 +524,16 @@
struct dst_entry *dst = skb_dst(skb);
if (dst && dst->ops->update_pmtu)
- dst->ops->update_pmtu(dst, NULL, skb, mtu);
+ dst->ops->update_pmtu(dst, NULL, skb, mtu, true);
+}
+
+/* update dst pmtu but not do neighbor confirm */
+static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu)
+{
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (dst && dst->ops->update_pmtu)
+ dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
}
static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
@@ -526,7 +543,7 @@
u32 encap_mtu = dst_mtu(encap_dst);
if (skb->len > encap_mtu - headroom)
- skb_dst_update_pmtu(skb, encap_mtu - headroom);
+ skb_dst_update_pmtu_no_confirm(skb, encap_mtu - headroom);
}
#endif /* _NET_DST_H */
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 56cb3c3..14efa0d 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -45,7 +45,9 @@
return &md_dst->u.tun_info;
dst = skb_dst(skb);
- if (dst && dst->lwtstate)
+ if (dst && dst->lwtstate &&
+ (dst->lwtstate->type == LWTUNNEL_ENCAP_IP ||
+ dst->lwtstate->type == LWTUNNEL_ENCAP_IP6))
return lwt_tun_info(dst->lwtstate);
return NULL;
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 5ec645f..443863c 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -27,7 +27,8 @@
struct dst_entry * (*negative_advice)(struct dst_entry *);
void (*link_failure)(struct sk_buff *);
void (*update_pmtu)(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu);
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh);
void (*redirect)(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
int (*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb);
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 20dcadd..7fed319 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -108,6 +108,7 @@
[FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
[FRA_PRIORITY] = { .type = NLA_U32 }, \
[FRA_FWMARK] = { .type = NLA_U32 }, \
+ [FRA_TUN_ID] = { .type = NLA_U64 }, \
[FRA_FWMASK] = { .type = NLA_U32 }, \
[FRA_TABLE] = { .type = NLA_U32 }, \
[FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \
diff --git a/include/net/flow.h b/include/net/flow.h
index a50fb77..d058e63 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -116,6 +116,7 @@
fl4->saddr = saddr;
fl4->fl4_dport = dport;
fl4->fl4_sport = sport;
+ fl4->flowi4_multipath_hash = 0;
}
/* Reset some input parameters after previous lookup */
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 5cd1227..78f6437 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/in6.h>
#include <linux/siphash.h>
+#include <linux/string.h>
#include <uapi/linux/if_ether.h>
/**
@@ -229,6 +230,7 @@
FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */
FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */
FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */
+ FLOW_DISSECTOR_KEY_PORTS_RANGE, /* struct flow_dissector_key_ports */
FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */
FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */
FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */
@@ -337,4 +339,12 @@
void *data_end;
};
+static inline void
+flow_dissector_init_keys(struct flow_dissector_key_control *key_control,
+ struct flow_dissector_key_basic *key_basic)
+{
+ memset(key_control, 0, sizeof(*key_control));
+ memset(key_basic, 0, sizeof(*key_basic));
+}
+
#endif
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 86c567f..c6f7bd2 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -380,19 +380,18 @@
typedef int flow_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
enum tc_setup_type type, void *type_data);
-typedef void flow_indr_block_ing_cmd_t(struct net_device *dev,
- flow_indr_block_bind_cb_t *cb,
- void *cb_priv,
- enum flow_block_command command);
+typedef void flow_indr_block_cmd_t(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb, void *cb_priv,
+ enum flow_block_command command);
-struct flow_indr_block_ing_entry {
- flow_indr_block_ing_cmd_t *cb;
+struct flow_indr_block_entry {
+ flow_indr_block_cmd_t *cb;
struct list_head list;
};
-void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry);
+void flow_indr_add_block_cb(struct flow_indr_block_entry *entry);
-void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry);
+void flow_indr_del_block_cb(struct flow_indr_block_entry *entry);
int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
flow_indr_block_bind_cb_t *cb,
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 9292f1c..2d9e67a 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -35,12 +35,6 @@
* do additional, common, filtering and return an error
* @post_doit: called after an operation's doit callback, it may
* undo operations done by pre_doit, for example release locks
- * @mcast_bind: a socket bound to the given multicast group (which
- * is given as the offset into the groups array)
- * @mcast_unbind: a socket was unbound from the given multicast group.
- * Note that unbind() will not be called symmetrically if the
- * generic netlink family is removed while there are still open
- * sockets.
* @attrbuf: buffer to store parsed attributes (private)
* @mcgrps: multicast groups used by this family
* @n_mcgrps: number of multicast groups
@@ -64,8 +58,6 @@
void (*post_doit)(const struct genl_ops *ops,
struct sk_buff *skb,
struct genl_info *info);
- int (*mcast_bind)(struct net *net, int group);
- void (*mcast_unbind)(struct net *net, int group);
struct nlattr ** attrbuf; /* private */
const struct genl_ops * ops;
const struct genl_multicast_group *mcgrps;
diff --git a/include/net/icmp.h b/include/net/icmp.h
index 5d4bfdb..fd84adc 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -43,6 +43,16 @@
__icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
}
+#if IS_ENABLED(CONFIG_NF_NAT)
+void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info);
+#else
+static inline void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+{
+ struct ip_options opts = { 0 };
+ __icmp_send(skb_in, type, code, info, &opts);
+}
+#endif
+
int icmp_rcv(struct sk_buff *skb);
int icmp_err(struct sk_buff *skb, u32 info);
int icmp_init(void);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 8955460..13792c0 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -83,6 +83,8 @@
* @icsk_ext_hdr_len: Network protocol overhead (IP/IPv6 options)
* @icsk_ack: Delayed ACK control data
* @icsk_mtup; MTU probing control data
+ * @icsk_probes_tstamp: Probe timestamp (cleared by non-zero window ack)
+ * @icsk_user_timeout: TCP_USER_TIMEOUT value
*/
struct inet_connection_sock {
/* inet_sock has to be the first member! */
@@ -133,6 +135,7 @@
u32 probe_timestamp;
} icsk_mtup;
+ u32 icsk_probes_tstamp;
u32 icsk_user_timeout;
u64 icsk_ca_priv[104 / sizeof(u64)];
@@ -284,7 +287,7 @@
return inet_csk_reqsk_queue_len(sk) >= sk->sk_max_ack_backlog;
}
-void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);
+bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);
void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req);
void inet_csk_destroy_sock(struct sock *sk);
@@ -309,6 +312,10 @@
int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen);
+/* update the fast reuse flag when adding a socket */
+void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
+ struct sock *sk);
+
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
#define TCP_PINGPONG_THRESH 3
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index c8e2beb..563457f 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -4,6 +4,7 @@
#include <linux/ip.h>
#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
#include <net/inet_sock.h>
#include <net/dsfield.h>
@@ -99,6 +100,20 @@
return 1;
}
+static inline int IP_ECN_set_ect1(struct iphdr *iph)
+{
+ u32 check = (__force u32)iph->check;
+
+ if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0)
+ return 0;
+
+ check += (__force u16)htons(0x1);
+
+ iph->check = (__force __sum16)(check + (check>=0xFFFF));
+ iph->tos ^= INET_ECN_MASK;
+ return 1;
+}
+
static inline void IP_ECN_clear(struct iphdr *iph)
{
iph->tos &= ~INET_ECN_MASK;
@@ -134,6 +149,22 @@
return 1;
}
+static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph)
+{
+ __be32 from, to;
+
+ if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0)
+ return 0;
+
+ from = *(__be32 *)iph;
+ to = from ^ htonl(INET_ECN_MASK << 20);
+ *(__be32 *)iph = to;
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
+ (__force __wsum)to);
+ return 1;
+}
+
static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
{
dscp &= ~INET_ECN_MASK;
@@ -142,7 +173,7 @@
static inline int INET_ECN_set_ce(struct sk_buff *skb)
{
- switch (skb->protocol) {
+ switch (skb_protocol(skb, true)) {
case cpu_to_be16(ETH_P_IP):
if (skb_network_header(skb) + sizeof(struct iphdr) <=
skb_tail_pointer(skb))
@@ -159,6 +190,25 @@
return 0;
}
+static inline int INET_ECN_set_ect1(struct sk_buff *skb)
+{
+ switch (skb_protocol(skb, true)) {
+ case cpu_to_be16(ETH_P_IP):
+ if (skb_network_header(skb) + sizeof(struct iphdr) <=
+ skb_tail_pointer(skb))
+ return IP_ECN_set_ect1(ip_hdr(skb));
+ break;
+
+ case cpu_to_be16(ETH_P_IPV6):
+ if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
+ skb_tail_pointer(skb))
+ return IP6_ECN_set_ect1(skb, ipv6_hdr(skb));
+ break;
+ }
+
+ return 0;
+}
+
/*
* RFC 6040 4.2
* To decapsulate the inner header at the tunnel egress, a compliant
@@ -208,8 +258,12 @@
int rc;
rc = __INET_ECN_decapsulate(outer, inner, &set_ce);
- if (!rc && set_ce)
- INET_ECN_set_ce(skb);
+ if (!rc) {
+ if (set_ce)
+ INET_ECN_set_ce(skb);
+ else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1)
+ INET_ECN_set_ect1(skb);
+ }
return rc;
}
@@ -219,12 +273,16 @@
{
__u8 inner;
- if (skb->protocol == htons(ETH_P_IP))
+ switch (skb_protocol(skb, true)) {
+ case htons(ETH_P_IP):
inner = ip_hdr(skb)->tos;
- else if (skb->protocol == htons(ETH_P_IPV6))
+ break;
+ case htons(ETH_P_IPV6):
inner = ipv6_get_dsfield(ipv6_hdr(skb));
- else
+ break;
+ default:
return 0;
+ }
return INET_ECN_decapsulate(skb, oiph->tos, inner);
}
@@ -234,12 +292,16 @@
{
__u8 inner;
- if (skb->protocol == htons(ETH_P_IP))
+ switch (skb_protocol(skb, true)) {
+ case htons(ETH_P_IP):
inner = ip_hdr(skb)->tos;
- else if (skb->protocol == htons(ETH_P_IPV6))
+ break;
+ case htons(ETH_P_IPV6):
inner = ipv6_get_dsfield(ipv6_hdr(skb));
- else
+ break;
+ default:
return 0;
+ }
return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner);
}
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index af2b4c0..59802eb 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -103,13 +103,19 @@
struct hlist_head chain;
};
-/*
- * Sockets can be hashed in established or listening table
+/* Sockets can be hashed in established or listening table.
+ * We must use different 'nulls' end-of-chain value for all hash buckets :
+ * A socket might transition from ESTABLISH to LISTEN state without
+ * RCU grace period. A lookup in ehash table needs to handle this case.
*/
+#define LISTENING_NULLS_BASE (1U << 29)
struct inet_listen_hashbucket {
spinlock_t lock;
unsigned int count;
- struct hlist_head head;
+ union {
+ struct hlist_head head;
+ struct hlist_nulls_head nulls_head;
+ };
};
/* This is for listening sockets, thus all sockets which possess wildcards. */
@@ -179,6 +185,12 @@
int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo);
+static inline void inet_hashinfo2_free_mod(struct inet_hashinfo *h)
+{
+ kfree(h->lhash2);
+ h->lhash2 = NULL;
+}
+
static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
{
kvfree(hashinfo->ehash_locks);
diff --git a/include/net/ip.h b/include/net/ip.h
index a2c61c3..52abfc0 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -30,6 +30,7 @@
#include <net/flow.h>
#include <net/flow_dissector.h>
#include <net/netns/hash.h>
+#include <net/lwtunnel.h>
#define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */
#define IPV4_MIN_MTU 68 /* RFC 791 */
@@ -439,25 +440,34 @@
bool forwarding)
{
struct net *net = dev_net(dst->dev);
+ unsigned int mtu;
if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
ip_mtu_locked(dst) ||
!forwarding)
return dst_mtu(dst);
- return min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU);
+ /* 'forwarding = true' case should always honour route mtu */
+ mtu = dst_metric_raw(dst, RTAX_MTU);
+ if (!mtu)
+ mtu = min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU);
+
+ return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
const struct sk_buff *skb)
{
+ unsigned int mtu;
+
if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) {
bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding);
}
- return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
+ mtu = min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
+ return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu);
}
struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx,
@@ -760,4 +770,9 @@
int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family,
struct netlink_ext_ack *extack);
+static inline bool inetdev_valid_mtu(unsigned int mtu)
+{
+ return likely(mtu >= IPV4_MIN_MTU);
+}
+
#endif /* _IP_H */
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 4b5656c..bd0f159 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -177,6 +177,7 @@
struct rt6_info {
struct dst_entry dst;
struct fib6_info __rcu *from;
+ int sernum;
struct rt6key rt6i_dst;
struct rt6key rt6i_src;
@@ -260,6 +261,9 @@
struct fib6_info *from;
u32 cookie = 0;
+ if (rt->sernum)
+ return rt->sernum;
+
rcu_read_lock();
from = rcu_dereference(rt->from);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index b69c16c..2c739fc 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -254,19 +254,27 @@
return rt->rt6i_flags & RTF_ANYCAST ||
(rt->rt6i_dst.plen < 127 &&
+ !(rt->rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) &&
ipv6_addr_equal(&rt->rt6i_dst.addr, daddr));
}
int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *));
-static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
+static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb)
{
+ unsigned int mtu;
+
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL;
- return (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) ?
- skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
+ if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) {
+ mtu = READ_ONCE(skb_dst(skb)->dev->mtu);
+ mtu -= lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu);
+ } else
+ mtu = dst_mtu(skb_dst(skb));
+
+ return mtu;
}
static inline bool ip6_sk_accept_pmtu(const struct sock *sk)
@@ -314,7 +322,7 @@
if (dst_metric_locked(dst, RTAX_MTU)) {
mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
- return mtu;
+ goto out;
}
mtu = IPV6_MIN_MTU;
@@ -324,7 +332,8 @@
mtu = idev->cnf.mtu6;
rcu_read_unlock();
- return mtu;
+out:
+ return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
u32 ip6_mtu_from_fib6(const struct fib6_result *res,
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index ab1ca9e..ffbae76 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -244,7 +244,6 @@
u32 table_id;
/* filter_set is an optimization that an entry is set */
bool filter_set;
- bool dump_all_families;
bool dump_routes;
bool dump_exceptions;
unsigned char protocol;
@@ -423,6 +422,16 @@
#endif
int fib_unmerge(struct net *net);
+static inline bool nhc_l3mdev_matches_dev(const struct fib_nh_common *nhc,
+const struct net_device *dev)
+{
+ if (nhc->nhc_dev == dev ||
+ l3mdev_master_ifindex_rcu(nhc->nhc_dev) == dev->ifindex)
+ return true;
+
+ return false;
+}
+
/* Exported by fib_semantics.c */
int ip_fib_check_default(__be32 gw, struct net_device *dev);
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index af64560..56deb25 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -472,9 +472,11 @@
const void *from, int len,
__be16 flags)
{
- memcpy(ip_tunnel_info_opts(info), from, len);
info->options_len = len;
- info->key.tun_flags |= flags;
+ if (len > 0) {
+ memcpy(ip_tunnel_info_opts(info), from, len);
+ info->key.tun_flags |= flags;
+ }
}
static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
@@ -520,7 +522,6 @@
__be16 flags)
{
info->options_len = 0;
- info->key.tun_flags |= flags;
}
#endif /* CONFIG_INET */
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 078887c..7c37e3c 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1624,18 +1624,16 @@
}
#endif /* CONFIG_IP_VS_NFCT */
-/* Really using conntrack? */
-static inline bool ip_vs_conn_uses_conntrack(struct ip_vs_conn *cp,
- struct sk_buff *skb)
+/* Using old conntrack that can not be redirected to another real server? */
+static inline bool ip_vs_conn_uses_old_conntrack(struct ip_vs_conn *cp,
+ struct sk_buff *skb)
{
#ifdef CONFIG_IP_VS_NFCT
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
- if (!(cp->flags & IP_VS_CONN_F_NFCT))
- return false;
ct = nf_ct_get(skb, &ctinfo);
- if (ct)
+ if (ct && nf_ct_is_confirmed(ct))
return true;
#endif
return false;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 009605c..b59b3da 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1017,7 +1017,7 @@
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
struct flowi6 *fl6);
-struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
+struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst);
struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst,
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 5c93e94..3e7d2c0 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -24,8 +24,10 @@
const struct in6_addr *addr);
int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex,
const struct in6_addr *addr);
- int (*ipv6_dst_lookup)(struct net *net, struct sock *sk,
- struct dst_entry **dst, struct flowi6 *fl6);
+ struct dst_entry *(*ipv6_dst_lookup_flow)(struct net *net,
+ const struct sock *sk,
+ struct flowi6 *fl6,
+ const struct in6_addr *final_dst);
int (*ipv6_route_input)(struct sk_buff *skb);
struct fib6_table *(*fib6_get_table)(struct net *net, u32 id);
diff --git a/include/net/ipx.h b/include/net/ipx.h
index baf0903..9d13428 100644
--- a/include/net/ipx.h
+++ b/include/net/ipx.h
@@ -47,11 +47,6 @@
/* From af_ipx.c */
extern int sysctl_ipx_pprop_broadcasting;
-static __inline__ struct ipxhdr *ipx_hdr(struct sk_buff *skb)
-{
- return (struct ipxhdr *)skb_transport_header(skb);
-}
-
struct ipx_interface {
/* IPX address */
__be32 if_netnum;
diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index c0f0a13..49aa79c 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -15,9 +15,11 @@
#include <linux/if_ether.h>
/* Lengths of frame formats */
-#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */
-#define LLC_PDU_LEN_S 4
-#define LLC_PDU_LEN_U 3 /* header and 1 control byte */
+#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */
+#define LLC_PDU_LEN_S 4
+#define LLC_PDU_LEN_U 3 /* header and 1 control byte */
+/* header and 1 control byte and XID info */
+#define LLC_PDU_LEN_U_XID (LLC_PDU_LEN_U + sizeof(struct llc_xid_info))
/* Known SAP addresses */
#define LLC_GLOBAL_SAP 0xFF
#define LLC_NULL_SAP 0x00 /* not network-layer visible */
@@ -50,9 +52,10 @@
#define LLC_PDU_TYPE_U_MASK 0x03 /* 8-bit control field */
#define LLC_PDU_TYPE_MASK 0x03
-#define LLC_PDU_TYPE_I 0 /* first bit */
-#define LLC_PDU_TYPE_S 1 /* first two bits */
-#define LLC_PDU_TYPE_U 3 /* first two bits */
+#define LLC_PDU_TYPE_I 0 /* first bit */
+#define LLC_PDU_TYPE_S 1 /* first two bits */
+#define LLC_PDU_TYPE_U 3 /* first two bits */
+#define LLC_PDU_TYPE_U_XID 4 /* private type for detecting XID commands */
#define LLC_PDU_TYPE_IS_I(pdu) \
((!(pdu->ctrl_1 & LLC_PDU_TYPE_I_MASK)) ? 1 : 0)
@@ -230,9 +233,18 @@
static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type,
u8 ssap, u8 dsap, u8 cr)
{
- const int hlen = type == LLC_PDU_TYPE_U ? 3 : 4;
+ int hlen = 4; /* default value for I and S types */
struct llc_pdu_un *pdu;
+ switch (type) {
+ case LLC_PDU_TYPE_U:
+ hlen = 3;
+ break;
+ case LLC_PDU_TYPE_U_XID:
+ hlen = 6;
+ break;
+ }
+
skb_push(skb, hlen);
skb_reset_network_header(skb);
pdu = llc_pdu_un_hdr(skb);
@@ -374,7 +386,10 @@
xid_info->fmt_id = LLC_XID_FMT_ID; /* 0x81 */
xid_info->type = svcs_supported;
xid_info->rw = rx_window << 1; /* size of receive window */
- skb_put(skb, sizeof(struct llc_xid_info));
+
+ /* no need to push/put since llc_pdu_header_init() has already
+ * pushed 3 + 3 bytes
+ */
}
/**
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 523c6a0..d9ba9a7 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -5933,7 +5933,9 @@
struct rate_control_ops {
unsigned long capa;
const char *name;
- void *(*alloc)(struct ieee80211_hw *hw, struct dentry *debugfsdir);
+ void *(*alloc)(struct ieee80211_hw *hw);
+ void (*add_debugfs)(struct ieee80211_hw *hw, void *priv,
+ struct dentry *debugfsdir);
void (*free)(void *priv);
void *(*alloc_sta)(void *priv, struct ieee80211_sta *sta, gfp_t gfp);
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index b2f715c..b5ebeb3 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -414,8 +414,8 @@
unsigned long now = jiffies;
/* avoid dirtying neighbour */
- if (n->confirmed != now)
- n->confirmed = now;
+ if (READ_ONCE(n->confirmed) != now)
+ WRITE_ONCE(n->confirmed, now);
}
rcu_read_unlock_bh();
}
@@ -431,8 +431,8 @@
unsigned long now = jiffies;
/* avoid dirtying neighbour */
- if (n->confirmed != now)
- n->confirmed = now;
+ if (READ_ONCE(n->confirmed) != now)
+ WRITE_ONCE(n->confirmed, now);
}
rcu_read_unlock_bh();
}
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index b8452cc..2be8d6b 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -72,7 +72,6 @@
struct net_device *dev;
struct list_head list;
int (*neigh_setup)(struct neighbour *);
- void (*neigh_cleanup)(struct neighbour *);
struct neigh_table *tbl;
void *sysctl_table;
@@ -205,6 +204,7 @@
int (*pconstructor)(struct pneigh_entry *);
void (*pdestructor)(struct pneigh_entry *);
void (*proxy_redo)(struct sk_buff *skb);
+ int (*is_multicast)(const void *pkey);
bool (*allow_add)(const struct net_device *dev,
struct netlink_ext_ack *extack);
char *id;
@@ -468,7 +468,7 @@
do {
seq = read_seqbegin(&hh->hh_lock);
- hh_len = hh->hh_len;
+ hh_len = READ_ONCE(hh->hh_len);
if (likely(hh_len <= HH_DATA_MOD)) {
hh_alen = HH_DATA_MOD;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index c7e15a2..167e390 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -195,6 +195,8 @@
void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
void net_ns_barrier(void);
+
+struct ns_common *get_net_ns(struct ns_common *ns);
#else /* CONFIG_NET_NS */
#include <linux/sched.h>
#include <linux/nsproxy.h>
@@ -214,6 +216,11 @@
}
static inline void net_ns_barrier(void) {}
+
+static inline struct ns_common *get_net_ns(struct ns_common *ns)
+{
+ return ERR_PTR(-EINVAL);
+}
#endif /* CONFIG_NET_NS */
@@ -428,6 +435,13 @@
return atomic_read(&net->ipv4.rt_genid);
}
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int rt_genid_ipv6(const struct net *net)
+{
+ return atomic_read(&net->ipv6.fib6_sernum);
+}
+#endif
+
static inline void rt_genid_bump_ipv4(struct net *net)
{
atomic_inc(&net->ipv4.rt_genid);
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 9f551f3..90690e3 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -87,7 +87,7 @@
struct hlist_node nat_bysource;
#endif
/* all members below initialized via memset */
- u8 __nfct_init_offset[0];
+ struct { } __nfct_init_offset;
/* If we were expected by an expectation, this will be it */
struct nf_conn *master;
diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 0d39208..716db4a 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -108,6 +108,7 @@
unsigned int logflags);
void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
struct sock *sk);
+void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb);
void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
unsigned int hooknum, const struct sk_buff *skb,
const struct net_device *in,
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2d0275f..f694f08 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -143,6 +143,8 @@
static inline void nft_data_copy(u32 *dst, const struct nft_data *src,
unsigned int len)
{
+ if (len % NFT_REG32_SIZE)
+ dst[len / NFT_REG32_SIZE] = 0;
memcpy(dst, src, len);
}
@@ -870,6 +872,12 @@
return (struct nft_expr *)&rule->data[rule->dlen];
}
+static inline bool nft_expr_more(const struct nft_rule *rule,
+ const struct nft_expr *expr)
+{
+ return expr != nft_expr_last(rule) && expr->ops;
+}
+
static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
{
return (void *)&rule->data[rule->dlen];
@@ -1454,4 +1462,10 @@
void __init nft_chain_route_init(void);
void nft_chain_route_fini(void);
+
+void nf_tables_trans_destroy_flush_work(void);
+
+int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result);
+__be64 nf_jiffies64_to_msecs(u64 input);
+
#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index 03cf585..d0bb9e3 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -37,6 +37,7 @@
struct nft_flow_key {
struct flow_dissector_key_basic basic;
+ struct flow_dissector_key_control control;
union {
struct flow_dissector_key_ipv4_addrs ipv4;
struct flow_dissector_key_ipv6_addrs ipv6;
@@ -61,6 +62,9 @@
#define NFT_OFFLOAD_F_ACTION (1 << 0)
+void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
+ enum flow_dissector_key_id addr_type);
+
struct nft_rule;
struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule);
void nft_flow_rule_destroy(struct nft_flow_rule *flow);
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 286fd96..a1a8d45 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -7,6 +7,7 @@
struct netns_nftables {
struct list_head tables;
struct list_head commit_list;
+ struct list_head module_list;
struct mutex commit_mutex;
unsigned int base_seq;
u8 gencursor;
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 59f45b1..b59d73d 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -72,7 +72,9 @@
#if IS_ENABLED(CONFIG_IPV6)
struct dst_ops xfrm6_dst_ops;
#endif
- spinlock_t xfrm_state_lock;
+ spinlock_t xfrm_state_lock;
+ seqcount_t xfrm_state_hash_generation;
+
spinlock_t xfrm_policy_lock;
struct mutex xfrm_cfg_mutex;
};
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 331ebbc..18a5aca 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -70,6 +70,7 @@
};
struct nh_group {
+ struct nh_group *spare; /* spare group for removals */
u16 num_nh;
bool mpath;
bool has_v4;
@@ -136,21 +137,20 @@
{
unsigned int rc = 1;
- if (nexthop_is_multipath(nh)) {
+ if (nh->is_group) {
struct nh_group *nh_grp;
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
- rc = nh_grp->num_nh;
+ if (nh_grp->mpath)
+ rc = nh_grp->num_nh;
}
return rc;
}
static inline
-struct nexthop *nexthop_mpath_select(const struct nexthop *nh, int nhsel)
+struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel)
{
- const struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
-
/* for_nexthops macros in fib_semantics.c grabs a pointer to
* the nexthop before checking nhsel
*/
@@ -185,12 +185,14 @@
{
const struct nh_info *nhi;
- if (nexthop_is_multipath(nh)) {
- if (nexthop_num_path(nh) > 1)
+ if (nh->is_group) {
+ struct nh_group *nh_grp;
+
+ nh_grp = rcu_dereference_rtnl(nh->nh_grp);
+ if (nh_grp->num_nh > 1)
return false;
- nh = nexthop_mpath_select(nh, 0);
- if (!nh)
- return false;
+
+ nh = nh_grp->nh_entries[0].nh;
}
nhi = rcu_dereference_rtnl(nh->nh_info);
@@ -216,16 +218,46 @@
BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0);
BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0);
- if (nexthop_is_multipath(nh)) {
- nh = nexthop_mpath_select(nh, nhsel);
- if (!nh)
- return NULL;
+ if (nh->is_group) {
+ struct nh_group *nh_grp;
+
+ nh_grp = rcu_dereference_rtnl(nh->nh_grp);
+ if (nh_grp->mpath) {
+ nh = nexthop_mpath_select(nh_grp, nhsel);
+ if (!nh)
+ return NULL;
+ }
}
nhi = rcu_dereference_rtnl(nh->nh_info);
return &nhi->fib_nhc;
}
+static inline bool nexthop_uses_dev(const struct nexthop *nh,
+ const struct net_device *dev)
+{
+ struct nh_info *nhi;
+
+ if (nh->is_group) {
+ struct nh_group *nhg = rcu_dereference(nh->nh_grp);
+ int i;
+
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nexthop *nhe = nhg->nh_entries[i].nh;
+
+ nhi = rcu_dereference(nhe->nh_info);
+ if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
+ return true;
+ }
+ } else {
+ nhi = rcu_dereference(nh->nh_info);
+ if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
+ return true;
+ }
+
+ return false;
+}
+
static inline unsigned int fib_info_num_path(const struct fib_info *fi)
{
if (unlikely(fi->nh))
@@ -259,12 +291,16 @@
int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
struct netlink_ext_ack *extack);
+/* Caller should either hold rcu_read_lock(), or RTNL. */
static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
{
struct nh_info *nhi;
- if (nexthop_is_multipath(nh)) {
- nh = nexthop_mpath_select(nh, 0);
+ if (nh->is_group) {
+ struct nh_group *nh_grp;
+
+ nh_grp = rcu_dereference_rtnl(nh->nh_grp);
+ nh = nexthop_mpath_select(nh_grp, 0);
if (!nh)
return NULL;
}
@@ -276,6 +312,29 @@
return NULL;
}
+/* Variant of nexthop_fib6_nh().
+ * Caller should either hold rcu_read_lock_bh(), or RTNL.
+ */
+static inline struct fib6_nh *nexthop_fib6_nh_bh(struct nexthop *nh)
+{
+ struct nh_info *nhi;
+
+ if (nh->is_group) {
+ struct nh_group *nh_grp;
+
+ nh_grp = rcu_dereference_bh_rtnl(nh->nh_grp);
+ nh = nexthop_mpath_select(nh_grp, 0);
+ if (!nh)
+ return NULL;
+ }
+
+ nhi = rcu_dereference_bh_rtnl(nh->nh_info);
+ if (nhi->family == AF_INET6)
+ return &nhi->fib6_nh;
+
+ return NULL;
+}
+
static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
{
struct fib6_nh *fib6_nh;
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 43c9c5d..3397901 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -298,6 +298,7 @@
struct sk_buff **resp);
struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev);
+void nci_hci_deallocate(struct nci_dev *ndev);
int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event,
const u8 *param, size_t param_len);
int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate,
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 2cbcdbd..cf086e1 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -70,7 +70,12 @@
struct page_pool {
struct page_pool_params p;
- u32 pages_state_hold_cnt;
+ struct delayed_work release_dw;
+ void (*disconnect)(void *);
+ unsigned long defer_start;
+ unsigned long defer_warn;
+
+ u32 pages_state_hold_cnt;
/*
* Data structure for allocation side
@@ -129,26 +134,20 @@
struct page_pool *page_pool_create(const struct page_pool_params *params);
-void __page_pool_free(struct page_pool *pool);
-static inline void page_pool_free(struct page_pool *pool)
-{
- /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
- * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
- */
#ifdef CONFIG_PAGE_POOL
- __page_pool_free(pool);
-#endif
-}
-
-/* Drivers use this instead of page_pool_free */
+void page_pool_destroy(struct page_pool *pool);
+void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *));
+#else
static inline void page_pool_destroy(struct page_pool *pool)
{
- if (!pool)
- return;
-
- page_pool_free(pool);
}
+static inline void page_pool_use_xdp_mem(struct page_pool *pool,
+ void (*disconnect)(void *))
+{
+}
+#endif
+
/* Never call this directly, use helpers below */
void __page_pool_put_page(struct page_pool *pool,
struct page *page, bool allow_direct);
@@ -170,24 +169,6 @@
__page_pool_put_page(pool, page, true);
}
-/* API user MUST have disconnected alloc-side (not allowed to call
- * page_pool_alloc_pages()) before calling this. The free-side can
- * still run concurrently, to handle in-flight packet-pages.
- *
- * A request to shutdown can fail (with false) if there are still
- * in-flight packet-pages.
- */
-bool __page_pool_request_shutdown(struct page_pool *pool);
-static inline bool page_pool_request_shutdown(struct page_pool *pool)
-{
- bool safe_to_remove = false;
-
-#ifdef CONFIG_PAGE_POOL
- safe_to_remove = __page_pool_request_shutdown(pool);
-#endif
- return safe_to_remove;
-}
-
/* Disconnects a page (from a page_pool). API users can have a need
* to disconnect a page (from a page_pool), to allow it to be used as
* a regular page (that will eventually be returned to the normal
@@ -204,7 +185,17 @@
static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
{
- return page->dma_addr;
+ dma_addr_t ret = page->dma_addr[0];
+ if (sizeof(dma_addr_t) > sizeof(unsigned long))
+ ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
+ return ret;
+}
+
+static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+{
+ page->dma_addr[0] = addr;
+ if (sizeof(dma_addr_t) > sizeof(unsigned long))
+ page->dma_addr[1] = upper_32_bits(addr);
}
static inline bool is_page_pool_compiled_in(void)
@@ -216,11 +207,6 @@
#endif
}
-static inline void page_pool_get(struct page_pool *pool)
-{
- refcount_inc(&pool->user_cnt);
-}
-
static inline bool page_pool_put(struct page_pool *pool)
{
return refcount_dec_and_test(&pool->user_cnt);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e553fc8..9976ad2 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -141,31 +141,38 @@
return xchg(clp, cl);
}
-static inline unsigned long
-cls_set_class(struct Qdisc *q, unsigned long *clp, unsigned long cl)
+static inline void
+__tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base)
{
- unsigned long old_cl;
+ unsigned long cl;
- sch_tree_lock(q);
- old_cl = __cls_set_class(clp, cl);
- sch_tree_unlock(q);
- return old_cl;
+ cl = q->ops->cl_ops->bind_tcf(q, base, r->classid);
+ cl = __cls_set_class(&r->class, cl);
+ if (cl)
+ q->ops->cl_ops->unbind_tcf(q, cl);
}
static inline void
tcf_bind_filter(struct tcf_proto *tp, struct tcf_result *r, unsigned long base)
{
struct Qdisc *q = tp->chain->block->q;
- unsigned long cl;
/* Check q as it is not set for shared blocks. In that case,
* setting class is not supported.
*/
if (!q)
return;
- cl = q->ops->cl_ops->bind_tcf(q, base, r->classid);
- cl = cls_set_class(q, &r->class, cl);
- if (cl)
+ sch_tree_lock(q);
+ __tcf_bind_filter(q, r, base);
+ sch_tree_unlock(q);
+}
+
+static inline void
+__tcf_unbind_filter(struct Qdisc *q, struct tcf_result *r)
+{
+ unsigned long cl;
+
+ if ((cl = __cls_set_class(&r->class, 0)) != 0)
q->ops->cl_ops->unbind_tcf(q, cl);
}
@@ -173,12 +180,10 @@
tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r)
{
struct Qdisc *q = tp->chain->block->q;
- unsigned long cl;
if (!q)
return;
- if ((cl = __cls_set_class(&r->class, 0)) != 0)
- q->ops->cl_ops->unbind_tcf(q, cl);
+ __tcf_unbind_filter(q, r);
}
struct tcf_exts {
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 6a70845..b16f923 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -118,27 +118,11 @@
static inline void qdisc_run(struct Qdisc *q)
{
if (qdisc_run_begin(q)) {
- /* NOLOCK qdisc must check 'state' under the qdisc seqlock
- * to avoid racing with dev_qdisc_reset()
- */
- if (!(q->flags & TCQ_F_NOLOCK) ||
- likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
- __qdisc_run(q);
+ __qdisc_run(q);
qdisc_run_end(q);
}
}
-static inline __be16 tc_skb_protocol(const struct sk_buff *skb)
-{
- /* We need to take extra care in case the skb came via
- * vlan accelerated path. In that case, use skb->vlan_proto
- * as the original vlan header was already stripped.
- */
- if (skb_vlan_tag_present(skb))
- return skb->vlan_proto;
- return skb->protocol;
-}
-
/* Calculate maximal size of packet seen by hard_start_xmit
routine of this device.
*/
diff --git a/include/net/psample.h b/include/net/psample.h
index 68ae16b..20a1755 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -18,6 +18,8 @@
void psample_group_take(struct psample_group *group);
void psample_group_put(struct psample_group *group);
+struct sk_buff;
+
#if IS_ENABLED(CONFIG_PSAMPLE)
void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
diff --git a/include/net/red.h b/include/net/red.h
index 9665582..ff07a7c 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -168,14 +168,24 @@
v->qcount = -1;
}
-static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog)
+static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog,
+ u8 Scell_log, u8 *stab)
{
- if (fls(qth_min) + Wlog > 32)
+ if (fls(qth_min) + Wlog >= 32)
return false;
- if (fls(qth_max) + Wlog > 32)
+ if (fls(qth_max) + Wlog >= 32)
+ return false;
+ if (Scell_log >= 32)
return false;
if (qth_max < qth_min)
return false;
+ if (stab) {
+ int i;
+
+ for (i = 0; i < RED_STAB_SIZE; i++)
+ if (stab[i] >= 32)
+ return false;
+ }
return true;
}
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index e2091bb..4da61c9 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -33,6 +33,7 @@
*
* @list: Used internally
* @kind: Identifier
+ * @netns_refund: Physical device, move to init_net on netns exit
* @maxtype: Highest device specific netlink attribute number
* @policy: Netlink policy for device specific attribute validation
* @validate: Optional validation function for netlink/changelink parameters
@@ -64,6 +65,7 @@
size_t priv_size;
void (*setup)(struct net_device *dev);
+ bool netns_refund;
unsigned int maxtype;
const struct nla_policy *policy;
int (*validate)(struct nlattr *tb[],
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index d80acda..0cb0a4b 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -36,6 +36,7 @@
enum qdisc_state_t {
__QDISC_STATE_SCHED,
__QDISC_STATE_DEACTIVATED,
+ __QDISC_STATE_MISSED,
};
struct qdisc_size_table {
@@ -149,16 +150,53 @@
static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
{
if (qdisc_is_percpu_stats(qdisc))
- return qdisc->empty;
- return !qdisc->q.qlen;
+ return READ_ONCE(qdisc->empty);
+ return !READ_ONCE(qdisc->q.qlen);
}
static inline bool qdisc_run_begin(struct Qdisc *qdisc)
{
if (qdisc->flags & TCQ_F_NOLOCK) {
+ if (spin_trylock(&qdisc->seqlock))
+ goto nolock_empty;
+
+ /* Paired with smp_mb__after_atomic() to make sure
+ * STATE_MISSED checking is synchronized with clearing
+ * in pfifo_fast_dequeue().
+ */
+ smp_mb__before_atomic();
+
+ /* If the MISSED flag is set, it means other thread has
+ * set the MISSED flag before second spin_trylock(), so
+ * we can return false here to avoid multi cpus doing
+ * the set_bit() and second spin_trylock() concurrently.
+ */
+ if (test_bit(__QDISC_STATE_MISSED, &qdisc->state))
+ return false;
+
+ /* Set the MISSED flag before the second spin_trylock(),
+ * if the second spin_trylock() return false, it means
+ * other cpu holding the lock will do dequeuing for us
+ * or it will see the MISSED flag set after releasing
+ * lock and reschedule the net_tx_action() to do the
+ * dequeuing.
+ */
+ set_bit(__QDISC_STATE_MISSED, &qdisc->state);
+
+ /* spin_trylock() only has load-acquire semantic, so use
+ * smp_mb__after_atomic() to ensure STATE_MISSED is set
+ * before doing the second spin_trylock().
+ */
+ smp_mb__after_atomic();
+
+ /* Retry again in case other CPU may not see the new flag
+ * after it releases the lock at the end of qdisc_run_end().
+ */
if (!spin_trylock(&qdisc->seqlock))
return false;
- qdisc->empty = false;
+
+nolock_empty:
+ WRITE_ONCE(qdisc->empty, false);
} else if (qdisc_is_running(qdisc)) {
return false;
}
@@ -173,8 +211,15 @@
static inline void qdisc_run_end(struct Qdisc *qdisc)
{
write_seqcount_end(&qdisc->running);
- if (qdisc->flags & TCQ_F_NOLOCK)
+ if (qdisc->flags & TCQ_F_NOLOCK) {
spin_unlock(&qdisc->seqlock);
+
+ if (unlikely(test_bit(__QDISC_STATE_MISSED,
+ &qdisc->state))) {
+ clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+ __netif_schedule(qdisc);
+ }
+ }
}
static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
@@ -308,6 +353,7 @@
int (*delete)(struct tcf_proto *tp, void *arg,
bool *last, bool rtnl_held,
struct netlink_ext_ack *);
+ bool (*delete_empty)(struct tcf_proto *tp);
void (*walk)(struct tcf_proto *tp,
struct tcf_walker *arg, bool rtnl_held);
int (*reoffload)(struct tcf_proto *tp, bool add,
@@ -317,7 +363,8 @@
void *type_data);
void (*hw_del)(struct tcf_proto *tp,
void *type_data);
- void (*bind_class)(void *, u32, unsigned long);
+ void (*bind_class)(void *, u32, unsigned long,
+ void *, unsigned long);
void * (*tmplt_create)(struct net *net,
struct tcf_chain *chain,
struct nlattr **tca,
@@ -336,6 +383,10 @@
int flags;
};
+/* Classifiers setting TCF_PROTO_OPS_DOIT_UNLOCKED in tcf_proto_ops->flags
+ * are expected to implement tcf_proto_ops->delete_empty(), otherwise race
+ * conditions can occur when filters are inserted/deleted simultaneously.
+ */
enum tcf_proto_ops_flags {
TCF_PROTO_OPS_DOIT_UNLOCKED = 1,
};
@@ -401,6 +452,7 @@
struct mutex lock;
struct list_head chain_list;
u32 index; /* block index for shared blocks */
+ u32 classid; /* which class this block belongs to */
refcount_t refcnt;
struct net *net;
struct Qdisc *q;
@@ -669,22 +721,6 @@
const struct qdisc_size_table *stab);
int skb_do_redirect(struct sk_buff *);
-static inline void skb_reset_tc(struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_redirected = 0;
-#endif
-}
-
-static inline bool skb_is_tc_redirected(const struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_CLS_ACT
- return skb->tc_redirected;
-#else
- return false;
-#endif
-}
-
static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_CLS_ACT
@@ -1167,7 +1203,7 @@
old = *pold;
*pold = new;
if (old != NULL)
- qdisc_tree_flush_backlog(old);
+ qdisc_purge_queue(old);
sch_tree_unlock(sch);
return old;
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 823afc4..8c6b04f 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -328,8 +328,7 @@
#define SCTP_SCOPE_POLICY_MAX SCTP_SCOPE_POLICY_LINK
/* Based on IPv4 scoping <draft-stewart-tsvwg-sctp-ipv4-00.txt>,
- * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 198.18.0.0/24,
- * 192.88.99.0/24.
+ * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 192.88.99.0/24.
* Also, RFC 8.4, non-unicast addresses are not considered valid SCTP
* addresses.
*/
@@ -337,15 +336,16 @@
((htonl(INADDR_BROADCAST) == a) || \
ipv4_is_multicast(a) || \
ipv4_is_zeronet(a) || \
- ipv4_is_test_198(a) || \
ipv4_is_anycast_6to4(a))
/* Flags used for the bind address copy functions. */
-#define SCTP_ADDR6_ALLOWED 0x00000001 /* IPv6 address is allowed by
+#define SCTP_ADDR4_ALLOWED 0x00000001 /* IPv4 address is allowed by
local sock family */
-#define SCTP_ADDR4_PEERSUPP 0x00000002 /* IPv4 address is supported by
+#define SCTP_ADDR6_ALLOWED 0x00000002 /* IPv6 address is allowed by
+ local sock family */
+#define SCTP_ADDR4_PEERSUPP 0x00000004 /* IPv4 address is supported by
peer */
-#define SCTP_ADDR6_PEERSUPP 0x00000004 /* IPv6 address is supported by
+#define SCTP_ADDR6_PEERSUPP 0x00000008 /* IPv6 address is supported by
peer */
/* Reasons to retransmit. */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 2b6f3f1..fd7c3f7 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -224,12 +224,14 @@
data_ready_signalled:1;
atomic_t pd_mode;
+
+ /* Fields after this point will be skipped on copies, like on accept
+ * and peeloff operations
+ */
+
/* Receive to here while partial delivery is in effect. */
struct sk_buff_head pd_lobby;
- /* These must be the last fields, as they will skipped on copies,
- * like on accept and peeloff operations
- */
struct list_head auto_asconf_list;
int do_auto_asconf;
};
@@ -464,7 +466,7 @@
int saddr);
void (*from_sk) (union sctp_addr *,
struct sock *sk);
- void (*from_addr_param) (union sctp_addr *,
+ bool (*from_addr_param) (union sctp_addr *,
union sctp_addr_param *,
__be16 port, int iif);
int (*to_addr_param) (const union sctp_addr *,
diff --git a/include/net/sock.h b/include/net/sock.h
index 718e62f..d3dd89b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -723,6 +723,11 @@
hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
}
+static inline void __sk_nulls_add_node_tail_rcu(struct sock *sk, struct hlist_nulls_head *list)
+{
+ hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
+}
+
static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
{
sock_hold(sk);
@@ -844,6 +849,8 @@
{
return static_branch_unlikely(&memalloc_socks_key);
}
+
+void __receive_sock(struct file *file);
#else
static inline int sk_memalloc_socks(void)
@@ -851,6 +858,8 @@
return 0;
}
+static inline void __receive_sock(struct file *file)
+{ }
#endif
static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask)
@@ -900,11 +909,11 @@
skb_dst_force(skb);
if (!sk->sk_backlog.tail)
- sk->sk_backlog.head = skb;
+ WRITE_ONCE(sk->sk_backlog.head, skb);
else
sk->sk_backlog.tail->next = skb;
- sk->sk_backlog.tail = skb;
+ WRITE_ONCE(sk->sk_backlog.tail, skb);
skb->next = NULL;
}
@@ -1798,7 +1807,6 @@
static inline void sk_set_socket(struct sock *sk, struct socket *sock)
{
- sk_tx_queue_clear(sk);
sk->sk_socket = sock;
}
@@ -1852,7 +1860,8 @@
static inline void sk_set_txhash(struct sock *sk)
{
- sk->sk_txhash = net_tx_rndhash();
+ /* This pairs with READ_ONCE() in skb_set_hash_from_sk() */
+ WRITE_ONCE(sk->sk_txhash, net_tx_rndhash());
}
static inline void sk_rethink_txhash(struct sock *sk)
@@ -1940,8 +1949,8 @@
static inline void sk_dst_confirm(struct sock *sk)
{
- if (!sk->sk_dst_pending_confirm)
- sk->sk_dst_pending_confirm = 1;
+ if (!READ_ONCE(sk->sk_dst_pending_confirm))
+ WRITE_ONCE(sk->sk_dst_pending_confirm, 1);
}
static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
@@ -1951,10 +1960,10 @@
unsigned long now = jiffies;
/* avoid dirtying neighbour */
- if (n->confirmed != now)
- n->confirmed = now;
- if (sk && sk->sk_dst_pending_confirm)
- sk->sk_dst_pending_confirm = 0;
+ if (READ_ONCE(n->confirmed) != now)
+ WRITE_ONCE(n->confirmed, now);
+ if (sk && READ_ONCE(sk->sk_dst_pending_confirm))
+ WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
}
}
@@ -2117,9 +2126,12 @@
static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
{
- if (sk->sk_txhash) {
+ /* This pairs with WRITE_ONCE() in sk_set_txhash() */
+ u32 txhash = READ_ONCE(sk->sk_txhash);
+
+ if (txhash) {
skb->l4_hash = 1;
- skb->hash = sk->sk_txhash;
+ skb->hash = txhash;
}
}
@@ -2142,6 +2154,17 @@
sk_mem_charge(sk, skb->truesize);
}
+static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struct sock *sk)
+{
+ if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) {
+ skb_orphan(skb);
+ skb->destructor = sock_efree;
+ skb->sk = sk;
+ return true;
+ }
+ return false;
+}
+
void sk_reset_timer(struct sock *sk, struct timer_list *timer,
unsigned long expires);
@@ -2584,9 +2607,9 @@
*/
static inline void sk_pacing_shift_update(struct sock *sk, int val)
{
- if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val)
+ if (!sk || !sk_fullsock(sk) || READ_ONCE(sk->sk_pacing_shift) == val)
return;
- sk->sk_pacing_shift = val;
+ WRITE_ONCE(sk->sk_pacing_shift, val);
}
/* if a socket is bound to a device, check that the given device
diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h
index 4e25024..add6fb5 100644
--- a/include/net/tc_act/tc_vlan.h
+++ b/include/net/tc_act/tc_vlan.h
@@ -14,6 +14,7 @@
u16 tcfv_push_vid;
__be16 tcfv_push_proto;
u8 tcfv_push_prio;
+ bool tcfv_push_prio_exists;
struct rcu_head rcu;
};
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ab4eb5e..b914959 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -50,7 +50,7 @@
extern struct percpu_counter tcp_orphan_count;
void tcp_time_wait(struct sock *sk, int state, int timeo);
-#define MAX_TCP_HEADER (128 + MAX_HEADER)
+#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER)
#define MAX_TCP_OPTION_SPACE 40
#define TCP_MIN_SND_MSS 48
#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE)
@@ -494,15 +494,16 @@
reuse = rcu_dereference(sk->sk_reuseport_cb);
if (likely(reuse)) {
last_overflow = READ_ONCE(reuse->synq_overflow_ts);
- if (time_after32(now, last_overflow + HZ))
+ if (!time_between32(now, last_overflow,
+ last_overflow + HZ))
WRITE_ONCE(reuse->synq_overflow_ts, now);
return;
}
}
- last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
- if (time_after32(now, last_overflow + HZ))
- tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
+ last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
+ if (!time_between32(now, last_overflow, last_overflow + HZ))
+ WRITE_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp, now);
}
/* syncookies: no recent synqueue overflow on this listening socket? */
@@ -517,13 +518,23 @@
reuse = rcu_dereference(sk->sk_reuseport_cb);
if (likely(reuse)) {
last_overflow = READ_ONCE(reuse->synq_overflow_ts);
- return time_after32(now, last_overflow +
- TCP_SYNCOOKIE_VALID);
+ return !time_between32(now, last_overflow - HZ,
+ last_overflow +
+ TCP_SYNCOOKIE_VALID);
}
}
- last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
- return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID);
+ last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
+
+ /* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID,
+ * then we're under synflood. However, we have to use
+ * 'last_overflow - HZ' as lower bound. That's because a concurrent
+ * tcp_synq_overflow() could update .ts_recent_stamp after we read
+ * jiffies but before we store .ts_recent_stamp into last_overflow,
+ * which could lead to rejecting a valid syncookie.
+ */
+ return !time_between32(now, last_overflow - HZ,
+ last_overflow + TCP_SYNCOOKIE_VALID);
}
static inline u32 tcp_cookie_time(void)
@@ -608,6 +619,7 @@
unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
unsigned int tcp_current_mss(struct sock *sk);
+u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when);
/* Bound MSS / TSO packet size with the half of the window */
static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
@@ -1390,6 +1402,24 @@
return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
+/* We provision sk_rcvbuf around 200% of sk_rcvlowat.
+ * If 87.5 % (7/8) of the space has been consumed, we want to override
+ * SO_RCVLOWAT constraint, since we are receiving skbs with too small
+ * len/truesize ratio.
+ */
+static inline bool tcp_rmem_pressure(const struct sock *sk)
+{
+ int rcvbuf, threshold;
+
+ if (tcp_under_memory_pressure(sk))
+ return true;
+
+ rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+ threshold = rcvbuf - (rcvbuf >> 3);
+
+ return atomic_read(&sk->sk_rmem_alloc) > threshold;
+}
+
extern void tcp_openreq_init_rwin(struct request_sock *req,
const struct sock *sk_listener,
const struct dst_entry *dst);
@@ -1626,6 +1656,8 @@
void tcp_fastopen_ctx_destroy(struct net *net);
int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
void *primary_key, void *backup_key);
+int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk,
+ u64 *key);
void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
@@ -2004,7 +2036,7 @@
void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced);
extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb,
u32 reo_wnd);
-extern void tcp_rack_mark_lost(struct sock *sk);
+extern bool tcp_rack_mark_lost(struct sock *sk);
extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
u64 xmit_time);
extern void tcp_rack_reo_timeout(struct sock *sk);
@@ -2121,7 +2153,8 @@
/* initialize ulp */
int (*init)(struct sock *sk);
/* update ulp */
- void (*update)(struct sock *sk, struct proto *p);
+ void (*update)(struct sock *sk, struct proto *p,
+ void (*write_space)(struct sock *sk));
/* cleanup ulp */
void (*release)(struct sock *sk);
/* diagnostic */
@@ -2136,7 +2169,8 @@
int tcp_set_ulp(struct sock *sk, const char *name);
void tcp_get_available_ulp(char *buf, size_t len);
void tcp_cleanup_ulp(struct sock *sk);
-void tcp_update_ulp(struct sock *sk, struct proto *p);
+void tcp_update_ulp(struct sock *sk, struct proto *p,
+ void (*write_space)(struct sock *sk));
#define MODULE_ALIAS_TCP_ULP(name) \
__MODULE_INFO(alias, alias_userspace, name); \
diff --git a/include/net/tls.h b/include/net/tls.h
index 093abb5..697df45 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -157,6 +157,8 @@
struct tls_rec *open_rec;
struct list_head tx_list;
atomic_t encrypt_pending;
+ /* protect crypto_wait with encrypt_pending */
+ spinlock_t encrypt_compl_lock;
int async_notify;
int async_capable;
@@ -177,6 +179,8 @@
int async_capable;
bool decrypted;
atomic_t decrypt_pending;
+ /* protect crypto_wait with decrypt_pending*/
+ spinlock_t decrypt_compl_lock;
bool async_notify;
};
@@ -217,6 +221,12 @@
* to be atomic.
*/
TLS_TX_SYNC_SCHED = 1,
+ /* tls_dev_del was called for the RX side, device state was released,
+ * but tls_ctx->netdev might still be kept, because TX-side driver
+ * resources might not be released yet. Used to prevent the second
+ * tls_dev_del call in tls_device_down if it happens simultaneously.
+ */
+ TLS_RX_DEV_CLOSED = 2,
};
struct cipher_context {
@@ -586,6 +596,15 @@
return !!tls_sw_ctx_tx(ctx);
}
+static inline bool tls_sw_has_ctx_rx(const struct sock *sk)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+
+ if (!ctx)
+ return false;
+ return !!tls_sw_ctx_rx(ctx);
+}
+
void tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
void tls_device_write_space(struct sock *sk, struct tls_context *ctx);
diff --git a/include/net/udp.h b/include/net/udp.h
index bad74f7..fabf507 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -476,6 +476,16 @@
if (!inet_get_convert_csum(sk))
features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+ /* UDP segmentation expects packets of type CHECKSUM_PARTIAL or
+ * CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial
+ * packets in udp_gro_complete_segment. As does UDP GSO, verified by
+ * udp_send_skb. But when those packets are looped in dev_loopback_xmit
+ * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this
+ * specific case, where PARTIAL is both correct and required.
+ */
+ if (skb->pkt_type == PACKET_LOOPBACK)
+ skb->ip_summed = CHECKSUM_PARTIAL;
+
/* the GSO CB lays after the UDP one, no need to save and restore any
* CB fragment
*/
diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h
index 6a8cba6..a9d5b76 100644
--- a/include/net/xdp_priv.h
+++ b/include/net/xdp_priv.h
@@ -12,12 +12,8 @@
struct page_pool *page_pool;
struct zero_copy_allocator *zc_alloc;
};
- int disconnect_cnt;
- unsigned long defer_start;
struct rhash_head node;
struct rcu_head rcu;
- struct delayed_work defer_wq;
- unsigned long defer_warn;
};
#endif /* __LINUX_NET_XDP_PRIV_H__ */
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index aa08a7a..8ce6385 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -945,7 +945,7 @@
static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst)
{
#ifdef CONFIG_XFRM
- if (dst->xfrm) {
+ if (dst->xfrm || (dst->flags & DST_XFRM_QUEUE)) {
const struct xfrm_dst *xdst = (const struct xfrm_dst *) dst;
return xdst->path;
@@ -957,7 +957,7 @@
static inline struct dst_entry *xfrm_dst_child(const struct dst_entry *dst)
{
#ifdef CONFIG_XFRM
- if (dst->xfrm) {
+ if (dst->xfrm || (dst->flags & DST_XFRM_QUEUE)) {
struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
return xdst->child;
}
@@ -1012,6 +1012,7 @@
#define XFRM_GRO 32
#define XFRM_ESP_NO_TRAILER 64
#define XFRM_DEV_RESUME 128
+#define XFRM_XMIT 256
__u32 status;
#define CRYPTO_SUCCESS 1
@@ -1097,7 +1098,7 @@
return __xfrm_policy_check(sk, ndir, skb, family);
return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) ||
- (skb_dst(skb)->flags & DST_NOPOLICY) ||
+ (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) ||
__xfrm_policy_check(sk, ndir, skb, family);
}
@@ -1542,6 +1543,7 @@
void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
int xfrm_init_replay(struct xfrm_state *x);
+u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu);
u32 xfrm_state_mtu(struct xfrm_state *x, int mtu);
int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload);
int xfrm_init_state(struct xfrm_state *x);
@@ -1635,13 +1637,16 @@
void *);
void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net);
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
- u8 type, int dir,
+struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net,
+ const struct xfrm_mark *mark,
+ u32 if_id, u8 type, int dir,
struct xfrm_selector *sel,
struct xfrm_sec_ctx *ctx, int delete,
int *err);
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, u8,
- int dir, u32 id, int delete, int *err);
+struct xfrm_policy *xfrm_policy_byid(struct net *net,
+ const struct xfrm_mark *mark, u32 if_id,
+ u8 type, int dir, u32 id, int delete,
+ int *err);
int xfrm_policy_flush(struct net *net, u8 type, bool task_valid);
void xfrm_policy_hash_rebuild(struct net *net);
u32 xfrm_get_acqseq(void);
@@ -1769,21 +1774,17 @@
static inline int xfrm_replay_clone(struct xfrm_state *x,
struct xfrm_state *orig)
{
- x->replay_esn = kzalloc(xfrm_replay_state_esn_len(orig->replay_esn),
+
+ x->replay_esn = kmemdup(orig->replay_esn,
+ xfrm_replay_state_esn_len(orig->replay_esn),
GFP_KERNEL);
if (!x->replay_esn)
return -ENOMEM;
-
- x->replay_esn->bmp_len = orig->replay_esn->bmp_len;
- x->replay_esn->replay_window = orig->replay_esn->replay_window;
-
- x->preplay_esn = kmemdup(x->replay_esn,
- xfrm_replay_state_esn_len(x->replay_esn),
+ x->preplay_esn = kmemdup(orig->preplay_esn,
+ xfrm_replay_state_esn_len(orig->preplay_esn),
GFP_KERNEL);
- if (!x->preplay_esn) {
- kfree(x->replay_esn);
+ if (!x->preplay_esn)
return -ENOMEM;
- }
return 0;
}